GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/network/download.h
Date: 2025-02-09 02:34:19
Exec Total Coverage
Lines: 27 31 87.1%
Branches: 25 50 50.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_NETWORK_DOWNLOAD_H_
6 #define CVMFS_NETWORK_DOWNLOAD_H_
7
8 #include <poll.h>
9 #include <pthread.h>
10 #include <stdint.h>
11 #include <unistd.h>
12
13 #include <cstdio>
14 #include <map>
15 #include <set>
16 #include <string>
17 #include <vector>
18
19 #include "gtest/gtest_prod.h"
20
21 #include "compression/compression.h"
22 #include "crypto/hash.h"
23 #include "duplex_curl.h"
24 #include "network/dns.h"
25 #include "network/health_check.h"
26 #include "network/jobinfo.h"
27 #include "network/network_errors.h"
28 #include "network/sharding_policy.h"
29 #include "network/sink.h"
30 #include "ssl.h"
31 #include "statistics.h"
32 #include "util/atomic.h"
33 #include "util/pipe.h"
34 #include "util/pointer.h"
35 #include "util/prng.h"
36 #include "util/shared_ptr.h"
37
38 class InterruptCue;
39
40 namespace download {
41
42 struct Counters {
43 perf::Counter *sz_transferred_bytes;
44 perf::Counter *sz_transfer_time; // measured in milliseconds
45 perf::Counter *n_requests;
46 perf::Counter *n_retries;
47 perf::Counter *n_metalink_failover;
48 perf::Counter *n_host_failover;
49 perf::Counter *n_proxy_failover;
50
51 158 explicit Counters(perf::StatisticsTemplate statistics) {
52
3/6
✓ Branch 2 taken 158 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 158 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 158 times.
✗ Branch 10 not taken.
158 sz_transferred_bytes = statistics.RegisterTemplated("sz_transferred_bytes",
53 "Number of transferred bytes");
54
3/6
✓ Branch 2 taken 158 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 158 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 158 times.
✗ Branch 10 not taken.
158 sz_transfer_time = statistics.RegisterTemplated("sz_transfer_time",
55 "Transfer time (milliseconds)");
56
3/6
✓ Branch 2 taken 158 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 158 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 158 times.
✗ Branch 10 not taken.
158 n_requests = statistics.RegisterTemplated("n_requests",
57 "Number of requests");
58
3/6
✓ Branch 2 taken 158 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 158 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 158 times.
✗ Branch 10 not taken.
158 n_retries = statistics.RegisterTemplated("n_retries", "Number of retries");
59
3/6
✓ Branch 2 taken 158 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 158 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 158 times.
✗ Branch 10 not taken.
158 n_metalink_failover = statistics.RegisterTemplated("n_metalink_failover",
60 "Number of metalink failovers");
61
3/6
✓ Branch 2 taken 158 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 158 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 158 times.
✗ Branch 10 not taken.
158 n_host_failover = statistics.RegisterTemplated("n_host_failover",
62 "Number of host failovers");
63
3/6
✓ Branch 2 taken 158 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 158 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 158 times.
✗ Branch 10 not taken.
158 n_proxy_failover = statistics.RegisterTemplated("n_proxy_failover",
64 "Number of proxy failovers");
65 158 }
66 }; // Counters
67
68 /**
69 * Manages blocks of arrays of curl_slist storing header strings. In contrast
70 * to curl's slists, these ones don't take ownership of the header strings.
71 * Overall number of elements is limited as number of concurrent connections
72 * is limited.
73 *
74 * Only use curl_slist objects created in the same HeaderLists instance in this
75 * class
76 */
77 class HeaderLists {
78 FRIEND_TEST(T_HeaderLists, Intrinsics);
79 public:
80 ~HeaderLists();
81 curl_slist *GetList(const char *header);
82 curl_slist *DuplicateList(curl_slist *slist);
83 void AppendHeader(curl_slist *slist, const char *header);
84 void CutHeader(const char *header, curl_slist **slist);
85 void PutList(curl_slist *slist);
86 std::string Print(curl_slist *slist);
87
88 private:
89 static const unsigned kBlockSize = 4096/sizeof(curl_slist);
90
91 70249 bool IsUsed(curl_slist *slist) { return slist->data != NULL; }
92 curl_slist *Get(const char *header);
93 void Put(curl_slist *slist);
94 void AddBlock();
95
96 std::vector<curl_slist *> blocks_; // List of curl_slist blocks
97 };
98
99
100 /**
101 * Provides hooks to attach per-transfer credentials to curl handles.
102 * Overwritten by the AuthzX509Attachment in authz_curl.cc. Needs to be
103 * thread-safe because it can be potentially used by multiple DownloadManagers.
104 */
105 class CredentialsAttachment {
106 public:
107 80 virtual ~CredentialsAttachment() { }
108 virtual bool ConfigureCurlHandle(CURL *curl_handle,
109 pid_t pid,
110 void **info_data) = 0;
111 virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data) = 0;
112 };
113
114
115 /**
116 * Note when adding new fields: Clone() probably needs to be adjusted, too.
117 * TODO(jblomer): improve ordering of members
118 */
119 class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding)
120 FRIEND_TEST(T_Download, ValidateGeoReply);
121 FRIEND_TEST(T_Download, StripDirect);
122 FRIEND_TEST(T_Download, EscapeUrl);
123
124 public:
125 // HostInfo is used for both metalink and host
126 struct HostInfo {
127 HostInfo() { }
128 316 HostInfo(
129 std::vector<std::string> *chain,
130 const int current,
131 const time_t timestamp_backup,
132 const unsigned reset_after)
133 316 : chain(chain)
134 316 , current(current)
135 316 , timestamp_backup(timestamp_backup)
136 316 , reset_after(reset_after)
137 316 { }
138 std::vector<std::string> *chain;
139 int current;
140 time_t timestamp_backup;
141 unsigned reset_after;
142 };
143
144 struct ProxyInfo {
145 ProxyInfo() { }
146
1/2
✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.
64 explicit ProxyInfo(const std::string &url) : url(url) { }
147 4 ProxyInfo(const dns::Host &host, const std::string &url)
148 4 : host(host)
149
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 , url(url)
150 4 { }
151 std::string Print();
152 dns::Host host;
153 std::string url;
154 };
155
156 enum ProxySetModes {
157 kSetProxyRegular = 0,
158 kSetProxyFallback,
159 kSetProxyBoth,
160 };
161
162 /**
163 * No attempt was made to order stratum 1 servers
164 */
165 static const int kProbeUnprobed;
166 /**
167 * The rtt to a stratum 1 could not be determined because the stratum 1
168 * was unreachable.
169 */
170 static const int kProbeDown;
171 /**
172 * The stratum 1 server was put in order according to a Geo-API result
173 */
174 static const int kProbeGeo;
175
176 static const unsigned kDnsDefaultRetries = 1;
177 static const unsigned kDnsDefaultTimeoutMs = 3000;
178 static const unsigned kProxyMapScale = 16;
179
180 DownloadManager(const unsigned max_pool_handles,
181 const perf::StatisticsTemplate &statistics,
182 const std::string &name = "standard");
183 ~DownloadManager();
184
185 static int ParseHttpCode(const char digits[3]);
186
187 void Spawn();
188 DownloadManager *Clone(const perf::StatisticsTemplate &statistics,
189 const std::string &cloned_name);
190 Failures Fetch(JobInfo *info);
191
192 void SetCredentialsAttachment(CredentialsAttachment *ca);
193 std::string GetDnsServer() const;
194 void SetDnsServer(const std::string &address);
195 void SetDnsParameters(const unsigned retries, const unsigned timeout_ms);
196 void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds);
197 void SetIpPreference(const dns::IpPreference preference);
198 void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct);
199 void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct);
200 void SetLowSpeedLimit(const unsigned low_speed_limit);
201 void SetMetalinkChain(const std::string &metalink_list);
202 void SetMetalinkChain(const std::vector<std::string> &metalink_list);
203 void GetMetalinkInfo(std::vector<std::string> *metalink_chain,
204 unsigned *current_metalink);
205 void SwitchMetalink();
206 bool CheckMetalinkChain(const time_t now);
207 void SetHostChain(const std::string &host_list);
208 void SetHostChain(const std::vector<std::string> &host_list);
209 void GetHostInfo(std::vector<std::string> *host_chain,
210 std::vector<int> *rtt, unsigned *current_host);
211 void ProbeHosts();
212 bool ProbeGeo();
213 // Sort list of servers using the Geo API. If the output_order
214 // vector is NULL, then the servers vector input is itself sorted.
215 // If it is non-NULL, then servers is left unchanged and the zero-based
216 // ordering is stored into output_order.
217 bool GeoSortServers(std::vector<std::string> *servers,
218 std::vector<uint64_t> *output_order = NULL);
219 void SwitchHost();
220 void SetProxyChain(const std::string &proxy_list,
221 const std::string &fallback_proxy_list,
222 const ProxySetModes set_mode);
223 void GetProxyInfo(std::vector< std::vector<ProxyInfo> > *proxy_chain,
224 unsigned *current_group,
225 unsigned *fallback_group);
226 std::string GetProxyList();
227 std::string GetFallbackProxyList();
228 void ShardProxies();
229 void RebalanceProxies();
230 void SwitchProxyGroup();
231 void SetProxyGroupResetDelay(const unsigned seconds);
232 void SetMetalinkResetDelay(const unsigned seconds);
233 void SetHostResetDelay(const unsigned seconds);
234 void SetRetryParameters(const unsigned max_retries,
235 const unsigned backoff_init_ms,
236 const unsigned backoff_max_ms);
237 void SetMaxIpaddrPerProxy(unsigned limit);
238 void SetProxyTemplates(const std::string &direct, const std::string &forced);
239 void EnableInfoHeader();
240 void EnableRedirects();
241 void EnableIgnoreSignatureFailures();
242 void EnableHTTPTracing();
243 void AddHTTPTracingHeader(const std::string &header);
244 void UseSystemCertificatePath();
245
246 bool SetShardingPolicy(const ShardingPolicySelector type);
247 void SetFailoverIndefinitely();
248 void SetFqrn(const std::string &fqrn) { fqrn_ = fqrn; }
249
250 1 unsigned num_hosts() {
251
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (opt_host_.chain) return opt_host_.chain->size();
252 return 0;
253 }
254
255 unsigned num_metalinks() {
256 if (opt_metalink_.chain) return opt_metalink_.chain->size();
257 return 0;
258 }
259
260 dns::IpPreference opt_ip_preference() const {
261 return opt_ip_preference_;
262 }
263
264 private:
265 static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action,
266 void *userp, void *socketp);
267 static void *MainDownload(void *data);
268
269 bool StripDirect(const std::string &proxy_list, std::string *cleaned_list);
270 bool ValidateGeoReply(const std::string &reply_order,
271 const unsigned expected_size,
272 std::vector<uint64_t> *reply_vals);
273 void SwitchHostInfo(const std::string &typ, HostInfo &info, JobInfo *jobinfo);
274 void SwitchMetalink(JobInfo *info);
275 void SwitchHost(JobInfo *info);
276 void SwitchProxy(JobInfo *info);
277 ProxyInfo *ChooseProxyUnlocked(const shash::Any *hash);
278 void UpdateProxiesUnlocked(const std::string &reason);
279 void RebalanceProxiesUnlocked(const std::string &reason);
280 CURL *AcquireCurlHandle();
281 void ReleaseCurlHandle(CURL *handle);
282 void ReleaseCredential(JobInfo *info);
283 void InitializeRequest(JobInfo *info, CURL *handle);
284 void SetUrlOptions(JobInfo *info);
285 bool ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host);
286 void UpdateStatistics(CURL *handle);
287 bool CanRetry(const JobInfo *info);
288 void Backoff(JobInfo *info);
289 void SetNocache(JobInfo *info);
290 void SetRegularCache(JobInfo *info);
291 void ProcessLink(JobInfo *info);
292 bool VerifyAndFinalize(const int curl_error, JobInfo *info);
293 void InitHeaders();
294 void CloneProxyConfig(DownloadManager *clone);
295 void CheckHostInfoReset(const std::string &typ, HostInfo &info,
296 JobInfo *jobinfo, time_t &now);
297
298 bool EscapeUrlChar(unsigned char input, char output[3]);
299 std::string EscapeUrl(const int64_t jobinfo_id, const std::string &url);
300 unsigned EscapeHeader(const std::string &header, char *escaped_buf,
301 size_t buf_size);
302
303 102 inline std::vector<ProxyInfo> *current_proxy_group() const {
304
1/2
✓ Branch 0 taken 102 times.
✗ Branch 1 not taken.
102 return (opt_proxy_groups_ ?
305 102 &((*opt_proxy_groups_)[opt_proxy_groups_current_]) : NULL);
306 }
307
308 Prng prng_;
309 std::set<CURL *> *pool_handles_idle_;
310 std::set<CURL *> *pool_handles_inuse_;
311 uint32_t pool_max_handles_;
312 CURLM *curl_multi_;
313 HeaderLists *header_lists_;
314 curl_slist *default_headers_;
315 char *user_agent_;
316
317 pthread_t thread_download_;
318 atomic_int32 multi_threaded_;
319 UniquePtr<Pipe<kPipeThreadTerminator> > pipe_terminate_;
320
321 UniquePtr<Pipe<kPipeDownloadJobs> > pipe_jobs_;
322 struct pollfd *watch_fds_;
323 uint32_t watch_fds_size_;
324 uint32_t watch_fds_inuse_;
325 uint32_t watch_fds_max_;
326
327 pthread_mutex_t *lock_options_;
328 pthread_mutex_t *lock_synchronous_mode_;
329 std::string opt_dns_server_;
330 unsigned opt_timeout_proxy_;
331 unsigned opt_timeout_direct_;
332 unsigned opt_low_speed_limit_;
333 unsigned opt_max_retries_;
334 unsigned opt_backoff_init_ms_;
335 unsigned opt_backoff_max_ms_;
336 bool enable_info_header_;
337 bool opt_ipv4_only_;
338 bool follow_redirects_;
339
340 /**
341 * Ignore signature failures during download.
342 * In general it is a bad idea to do this!
343 */
344 bool ignore_signature_failures_;
345
346 bool enable_http_tracing_;
347 std::vector<std::string> http_tracing_headers_;
348
349 // Metalink list
350 HostInfo opt_metalink_;
351 time_t opt_metalink_timestamp_link_;
352
353 // Host list
354 HostInfo opt_host_;
355 /**
356 * Created by SetHostChain(), filled by probe_hosts. Contains time to get
357 * .cvmfschecksum in ms. -1 is unprobed, -2 is error.
358 */
359 std::vector<int> *opt_host_chain_rtt_;
360
361 // Proxy list
362 std::vector< std::vector<ProxyInfo> > *opt_proxy_groups_;
363 /**
364 * The current load-balancing group (first dimension in opt_proxy_groups_).
365 */
366 unsigned opt_proxy_groups_current_;
367 /**
368 * Number of proxy servers that failed within current load-balance group.
369 * Between 0 and (*opt_proxy_groups_)[opt_proxy_groups_current_].size().
370 */
371 unsigned opt_proxy_groups_current_burned_;
372 /**
373 * The index of the first fallback proxy group. If there are none,
374 * it is set to the number of regular proxy groups.
375 */
376 unsigned opt_proxy_groups_fallback_;
377 /**
378 * Overall number of proxies summed over all the groups.
379 */
380 unsigned opt_num_proxies_;
381 /**
382 * The original proxy list provided to SetProxyChain.
383 */
384 std::string opt_proxy_list_;
385 /**
386 * The original proxy fallback list provided to SetProxyChain.
387 */
388 std::string opt_proxy_fallback_list_;
389 /**
390 * Load-balancing map of currently active proxies
391 */
392 std::map<uint32_t, ProxyInfo *> opt_proxy_map_;
393 /**
394 * Sorted list of currently active proxy URLs (for log messages)
395 */
396 std::vector<std::string> opt_proxies_;
397 /**
398 * Shard requests across multiple proxies via consistent hashing
399 */
400 bool opt_proxy_shard_;
401
402 /**
403 * Sharding policy deciding which proxy should be chosen for each download
404 * request
405 *
406 * Sharding policy is shared between all download managers. As such shared
407 * pointers are used to allow for proper clean-up afterwards in the destructor
408 * (We cannot assume the order in which the download managers are stopped)
409 */
410 SharedPtr<ShardingPolicy> sharding_policy_;
411 /**
412 * Health check for the proxies
413 *
414 * Health check is shared between all download managers. As such shared
415 * pointers are used to allow for proper clean-up afterwards in the destructor
416 * (We cannot assume the order in which the download managers are stopped)
417 */
418 SharedPtr<HealthCheck> health_check_;
419 /**
420 * Endless retries for a failed download (hard failures will result in abort)
421 */
422 bool failover_indefinitely_;
423 /**
424 * Repo name. Needed for the re-try logic if a download was unsuccessful
425 * Used in sharding policy && Interrupted()
426 */
427 std::string fqrn_;
428
429 /**
430 * Name of the download manager (default is "standard")
431 */
432 std::string name_;
433
434 /**
435 * Used to resolve proxy addresses (host addresses are resolved by the proxy).
436 */
437 dns::NormalResolver *resolver_;
438
439 /**
440 * If a proxy has IPv4 and IPv6 addresses, which one to prefer
441 */
442 dns::IpPreference opt_ip_preference_;
443
444 /**
445 * Used to replace @proxy@ in the Geo-API calls to order Stratum 1 servers,
446 * in case the active proxy is DIRECT (no proxy). Should be a UUID
447 * identifying the host.
448 */
449 std::string proxy_template_direct_;
450 /**
451 * Used to force a value for @proxy@ in the Geo-API calls to order Stratum 1
452 * servers. If empty, the fully qualified domain name of the active proxy
453 * server is used.
454 */
455 std::string proxy_template_forced_;
456
457 /**
458 * More than one proxy group can be considered as group of primary proxies
459 * followed by backup proxy groups, e.g. at another site.
460 * If opt_proxy_groups_reset_after_ is > 0, cvmfs will reset its proxy group
461 * to the first one after opt_proxy_groups_reset_after_ seconds are elapsed.
462 */
463 time_t opt_timestamp_backup_proxies_;
464 time_t opt_timestamp_failover_proxies_; // failover within the same group
465 unsigned opt_proxy_groups_reset_after_;
466
467 CredentialsAttachment *credentials_attachment_;
468
469 /**
470 * Writes and reads should be atomic because reading happens in a different
471 * thread than writing.
472 */
473 Counters *counters_;
474
475 /**
476 * Carries the path settings for SSL certificates
477 */
478 SslCertificateStore ssl_certificate_store_;
479 }; // DownloadManager
480
481 } // namespace download
482
483 #endif // CVMFS_NETWORK_DOWNLOAD_H_
484