GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/download.h
Date: 2023-02-05 02:36:10
Exec Total Coverage
Lines: 123 141 87.2%
Branches: 27 50 54.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_DOWNLOAD_H_
6 #define CVMFS_DOWNLOAD_H_
7
8 #include <poll.h>
9 #include <pthread.h>
10 #include <stdint.h>
11 #include <unistd.h>
12
13 #include <cstdio>
14 #include <map>
15 #include <set>
16 #include <string>
17 #include <vector>
18
19 #include "gtest/gtest_prod.h"
20
21 #include "compression.h"
22 #include "crypto/hash.h"
23 #include "dns.h"
24 #include "duplex_curl.h"
25 #include "sink.h"
26 #include "ssl.h"
27 #include "statistics.h"
28 #include "util/atomic.h"
29 #include "util/prng.h"
30
31 class InterruptCue;
32
33 namespace download {
34
35 /**
36 * Possible return values. Adjust ObjectFetcher error handling if new network
37 * error conditions are added.
38 */
39 enum Failures {
40 kFailOk = 0,
41 kFailLocalIO,
42 kFailBadUrl,
43 kFailProxyResolve,
44 kFailHostResolve,
45 // artificial failure code. Try other host even though
46 // failure seems to be at the proxy
47 kFailHostAfterProxy,
48 kFailProxyConnection,
49 kFailHostConnection,
50 kFailProxyHttp,
51 kFailHostHttp,
52 kFailBadData,
53 kFailTooBig,
54 kFailOther,
55 kFailUnsupportedProtocol,
56 kFailProxyTooSlow,
57 kFailHostTooSlow,
58 kFailProxyShortTransfer,
59 kFailHostShortTransfer,
60 kFailCanceled,
61
62 kFailNumEntries
63 }; // Failures
64
65
66 108 inline bool IsHostTransferError(const Failures error) {
67
2/2
✓ Branch 0 taken 37 times.
✓ Branch 1 taken 71 times.
108 switch (error) {
68 37 case kFailHostConnection:
69 case kFailHostTooSlow:
70 case kFailHostShortTransfer:
71 37 return true;
72 71 default:
73 71 break;
74 }
75 71 return false;
76 }
77
78 109 inline bool IsProxyTransferError(const Failures error) {
79
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 107 times.
109 switch (error) {
80 2 case kFailProxyConnection:
81 case kFailProxyTooSlow:
82 case kFailProxyShortTransfer:
83 2 return true;
84 107 default:
85 107 break;
86 }
87 107 return false;
88 }
89
90 37 inline const char *Code2Ascii(const Failures error) {
91 const char *texts[kFailNumEntries + 1];
92 37 texts[0] = "OK";
93 37 texts[1] = "local I/O failure";
94 37 texts[2] = "malformed URL";
95 37 texts[3] = "failed to resolve proxy address";
96 37 texts[4] = "failed to resolve host address";
97 37 texts[5] = "all proxies failed, trying host fail-over";
98 37 texts[6] = "proxy connection problem";
99 37 texts[7] = "host connection problem";
100 37 texts[8] = "proxy returned HTTP error";
101 37 texts[9] = "host returned HTTP error";
102 37 texts[10] = "corrupted data received";
103 37 texts[11] = "resource too big to download";
104 37 texts[12] = "unknown network error";
105 37 texts[13] = "Unsupported URL in protocol";
106 37 texts[14] = "proxy serving data too slowly";
107 37 texts[15] = "host serving data too slowly";
108 37 texts[16] = "proxy data transfer cut short";
109 37 texts[17] = "host data transfer cut short";
110 37 texts[18] = "request canceled";
111 37 texts[19] = "no text";
112 37 return texts[error];
113 }
114
115 /**
116 * Where to store downloaded data.
117 */
118 enum Destination {
119 kDestinationMem = 1,
120 kDestinationFile,
121 kDestinationPath,
122 kDestinationSink,
123 kDestinationNone
124 }; // Destination
125
126
127 struct Counters {
128 perf::Counter *sz_transferred_bytes;
129 perf::Counter *sz_transfer_time; // measured in miliseconds
130 perf::Counter *n_requests;
131 perf::Counter *n_retries;
132 perf::Counter *n_proxy_failover;
133 perf::Counter *n_host_failover;
134
135 132 explicit Counters(perf::StatisticsTemplate statistics) {
136
3/6
✓ Branch 2 taken 132 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 132 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 132 times.
✗ Branch 10 not taken.
132 sz_transferred_bytes = statistics.RegisterTemplated("sz_transferred_bytes",
137 "Number of transferred bytes");
138
3/6
✓ Branch 2 taken 132 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 132 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 132 times.
✗ Branch 10 not taken.
132 sz_transfer_time = statistics.RegisterTemplated("sz_transfer_time",
139 "Transfer time (miliseconds)");
140
3/6
✓ Branch 2 taken 132 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 132 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 132 times.
✗ Branch 10 not taken.
132 n_requests = statistics.RegisterTemplated("n_requests",
141 "Number of requests");
142
3/6
✓ Branch 2 taken 132 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 132 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 132 times.
✗ Branch 10 not taken.
132 n_retries = statistics.RegisterTemplated("n_retries", "Number of retries");
143
3/6
✓ Branch 2 taken 132 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 132 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 132 times.
✗ Branch 10 not taken.
132 n_proxy_failover = statistics.RegisterTemplated("n_proxy_failover",
144 "Number of proxy failovers");
145
3/6
✓ Branch 2 taken 132 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 132 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 132 times.
✗ Branch 10 not taken.
132 n_host_failover = statistics.RegisterTemplated("n_host_failover",
146 "Number of host failovers");
147 132 }
148 }; // Counters
149
150
151 /**
152 * Contains all the information to specify a download job.
153 */
154 struct JobInfo {
155 const std::string *url;
156 bool compressed;
157 bool probe_hosts;
158 bool head_request;
159 bool follow_redirects;
160 bool force_nocache;
161 pid_t pid;
162 uid_t uid;
163 gid_t gid;
164 void *cred_data; // Per-transfer credential data
165 InterruptCue *interrupt_cue;
166 Destination destination;
167 struct {
168 size_t size;
169 size_t pos;
170 char *data;
171 } destination_mem;
172 FILE *destination_file;
173 const std::string *destination_path;
174 cvmfs::Sink *destination_sink;
175 const shash::Any *expected_hash;
176 const std::string *extra_info;
177
178 // Allow byte ranges to be specified.
179 off_t range_offset;
180 off_t range_size;
181
182 // Default initialization of fields
183 181 void Init() {
184 181 url = NULL;
185 181 compressed = false;
186 181 probe_hosts = false;
187 181 head_request = false;
188 181 follow_redirects = false;
189 181 force_nocache = false;
190 181 pid = -1;
191 181 uid = -1;
192 181 gid = -1;
193 181 cred_data = NULL;
194 181 interrupt_cue = NULL;
195 181 destination = kDestinationNone;
196 181 destination_mem.size = destination_mem.pos = 0;
197 181 destination_mem.data = NULL;
198 181 destination_file = NULL;
199 181 destination_path = NULL;
200 181 destination_sink = NULL;
201 181 expected_hash = NULL;
202 181 extra_info = NULL;
203
204 181 curl_handle = NULL;
205 181 headers = NULL;
206 181 memset(&zstream, 0, sizeof(zstream));
207 181 info_header = NULL;
208 181 wait_at[0] = wait_at[1] = -1;
209 181 nocache = false;
210 181 error_code = kFailOther;
211 181 num_used_proxies = num_used_hosts = num_retries = 0;
212 181 backoff_ms = 0;
213 181 current_host_chain_index = 0;
214
215 181 range_offset = -1;
216 181 range_size = -1;
217 181 http_code = -1;
218 181 }
219
220 // One constructor per destination + head request
221 33 JobInfo() { Init(); }
222 JobInfo(const std::string *u, const bool c, const bool ph,
223 const std::string *p, const shash::Any *h)
224 {
225 Init();
226 url = u;
227 compressed = c;
228 probe_hosts = ph;
229 destination = kDestinationPath;
230 destination_path = p;
231 expected_hash = h;
232 }
233 67 JobInfo(const std::string *u, const bool c, const bool ph, FILE *f,
234 const shash::Any *h)
235 67 {
236 67 Init();
237 67 url = u;
238 67 compressed = c;
239 67 probe_hosts = ph;
240 67 destination = kDestinationFile;
241 67 destination_file = f;
242 67 expected_hash = h;
243 67 }
244 79 JobInfo(const std::string *u, const bool c, const bool ph,
245 const shash::Any *h)
246 79 {
247 79 Init();
248 79 url = u;
249 79 compressed = c;
250 79 probe_hosts = ph;
251 79 destination = kDestinationMem;
252 79 expected_hash = h;
253 79 }
254 2 JobInfo(const std::string *u, const bool c, const bool ph,
255 cvmfs::Sink *s, const shash::Any *h)
256 2 {
257 2 Init();
258 2 url = u;
259 2 compressed = c;
260 2 probe_hosts = ph;
261 2 destination = kDestinationSink;
262 2 destination_sink = s;
263 2 expected_hash = h;
264 2 }
265 JobInfo(const std::string *u, const bool ph) {
266 Init();
267 url = u;
268 probe_hosts = ph;
269 head_request = true;
270 }
271
272 181 ~JobInfo() {
273
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 181 times.
181 if (wait_at[0] >= 0) {
274 close(wait_at[0]);
275 close(wait_at[1]);
276 }
277 181 }
278
279 /**
280 * Tells whether the error is because of a non-existing file. Should only
281 * be called if error_code is not kFailOk
282 */
283 bool IsFileNotFound();
284
285 // Internal state, don't touch
286 CURL *curl_handle;
287 curl_slist *headers;
288 char *info_header;
289 z_stream zstream;
290 shash::ContextPtr hash_context;
291 int wait_at[2]; /**< Pipe used for the return value */
292 std::string proxy;
293 bool nocache;
294 Failures error_code;
295 int http_code;
296 unsigned char num_used_proxies;
297 unsigned char num_used_hosts;
298 unsigned char num_retries;
299 unsigned backoff_ms;
300 unsigned int current_host_chain_index;
301 }; // JobInfo
302
303
304 /**
305 * Manages blocks of arrays of curl_slist storing header strings. In contrast
306 * to curl's slists, these ones don't take ownership of the header strings.
307 * Overall number of elements is limited as number of concurrent connections
308 * is limited.
309 *
310 * Only use curl_slist objects created in the same HeaderLists instance in this
311 * class
312 */
313 class HeaderLists {
314 FRIEND_TEST(T_HeaderLists, Intrinsics);
315 public:
316 ~HeaderLists();
317 curl_slist *GetList(const char *header);
318 curl_slist *DuplicateList(curl_slist *slist);
319 void AppendHeader(curl_slist *slist, const char *header);
320 void CutHeader(const char *header, curl_slist **slist);
321 void PutList(curl_slist *slist);
322 std::string Print(curl_slist *slist);
323
324 private:
325 static const unsigned kBlockSize = 4096/sizeof(curl_slist);
326
327 69804 bool IsUsed(curl_slist *slist) { return slist->data != NULL; }
328 curl_slist *Get(const char *header);
329 void Put(curl_slist *slist);
330 void AddBlock();
331
332 std::vector<curl_slist *> blocks_; // List of curl_slist blocks
333 };
334
335
336 /**
337 * Provides hooks to attach per-transfer credentials to curl handles.
338 * Overwritten by the AuthzX509Attachment in authz_curl.cc. Needs to be
339 * thread-safe because it can be potentially used by multiple DownloadManagers.
340 */
341 class CredentialsAttachment {
342 public:
343 68 virtual ~CredentialsAttachment() { }
344 virtual bool ConfigureCurlHandle(CURL *curl_handle,
345 pid_t pid,
346 void **info_data) = 0;
347 virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data) = 0;
348 };
349
350
351 /**
352 * Note when adding new fields: Clone() probably needs to be adjusted, too.
353 * TODO(jblomer): improve ordering of members
354 */
355 class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding)
356 FRIEND_TEST(T_Download, ValidateGeoReply);
357 FRIEND_TEST(T_Download, StripDirect);
358
359 public:
360 struct ProxyInfo {
361 ProxyInfo() { }
362
1/2
✓ Branch 2 taken 58 times.
✗ Branch 3 not taken.
58 explicit ProxyInfo(const std::string &url) : url(url) { }
363 4 ProxyInfo(const dns::Host &host, const std::string &url)
364 4 : host(host)
365
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 , url(url)
366 4 { }
367 std::string Print();
368 dns::Host host;
369 std::string url;
370 };
371
372 enum ProxySetModes {
373 kSetProxyRegular = 0,
374 kSetProxyFallback,
375 kSetProxyBoth,
376 };
377
378 /**
379 * No attempt was made to order stratum 1 servers
380 */
381 static const int kProbeUnprobed;
382 /**
383 * The rtt to a stratum 1 could not be determined because the stratum 1
384 * was unreachable.
385 */
386 static const int kProbeDown;
387 /**
388 * The stratum 1 server was put in order according to a Geo-API result
389 */
390 static const int kProbeGeo;
391
392 /**
393 * Do not download files larger than 1M into memory.
394 */
395 static const unsigned kMaxMemSize;
396
397 static const unsigned kDnsDefaultRetries = 1;
398 static const unsigned kDnsDefaultTimeoutMs = 3000;
399 static const unsigned kProxyMapScale = 16;
400
401 DownloadManager();
402 ~DownloadManager();
403
404 static int ParseHttpCode(const char digits[3]);
405
406 void Init(const unsigned max_pool_handles,
407 const perf::StatisticsTemplate &statistics);
408 void Fini();
409 void Spawn();
410 DownloadManager *Clone(const perf::StatisticsTemplate &statistics);
411 Failures Fetch(JobInfo *info);
412
413 void SetCredentialsAttachment(CredentialsAttachment *ca);
414 std::string GetDnsServer() const;
415 void SetDnsServer(const std::string &address);
416 void SetDnsParameters(const unsigned retries, const unsigned timeout_ms);
417 void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds);
418 void SetIpPreference(const dns::IpPreference preference);
419 void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct);
420 void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct);
421 void SetLowSpeedLimit(const unsigned low_speed_limit);
422 void SetHostChain(const std::string &host_list);
423 void SetHostChain(const std::vector<std::string> &host_list);
424 void GetHostInfo(std::vector<std::string> *host_chain,
425 std::vector<int> *rtt, unsigned *current_host);
426 void ProbeHosts();
427 bool ProbeGeo();
428 // Sort list of servers using the Geo API. If the output_order
429 // vector is NULL, then the servers vector input is itself sorted.
430 // If it is non-NULL, then servers is left unchanged and the zero-based
431 // ordering is stored into output_order.
432 bool GeoSortServers(std::vector<std::string> *servers,
433 std::vector<uint64_t> *output_order = NULL);
434 void SwitchHost();
435 void SetProxyChain(const std::string &proxy_list,
436 const std::string &fallback_proxy_list,
437 const ProxySetModes set_mode);
438 void GetProxyInfo(std::vector< std::vector<ProxyInfo> > *proxy_chain,
439 unsigned *current_group,
440 unsigned *fallback_group);
441 std::string GetProxyList();
442 std::string GetFallbackProxyList();
443 void ShardProxies();
444 void RebalanceProxies();
445 void SwitchProxyGroup();
446 void SetProxyGroupResetDelay(const unsigned seconds);
447 void SetHostResetDelay(const unsigned seconds);
448 void SetRetryParameters(const unsigned max_retries,
449 const unsigned backoff_init_ms,
450 const unsigned backoff_max_ms);
451 void SetMaxIpaddrPerProxy(unsigned limit);
452 void SetProxyTemplates(const std::string &direct, const std::string &forced);
453 void EnableInfoHeader();
454 void EnableRedirects();
455 void UseSystemCertificatePath();
456
457 3 unsigned num_hosts() {
458
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (opt_host_chain_) return opt_host_chain_->size();
459 return 0;
460 }
461
462 dns::IpPreference opt_ip_preference() const {
463 return opt_ip_preference_;
464 }
465
466 private:
467 static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action,
468 void *userp, void *socketp);
469 static void *MainDownload(void *data);
470
471 bool StripDirect(const std::string &proxy_list, std::string *cleaned_list);
472 bool ValidateGeoReply(const std::string &reply_order,
473 const unsigned expected_size,
474 std::vector<uint64_t> *reply_vals);
475 void SwitchHost(JobInfo *info);
476 void SwitchProxy(JobInfo *info);
477 ProxyInfo *ChooseProxyUnlocked(const shash::Any *hash);
478 void UpdateProxiesUnlocked(const std::string &reason);
479 void RebalanceProxiesUnlocked(const std::string &reason);
480 CURL *AcquireCurlHandle();
481 void ReleaseCurlHandle(CURL *handle);
482 void ReleaseCredential(JobInfo *info);
483 void InitializeRequest(JobInfo *info, CURL *handle);
484 void SetUrlOptions(JobInfo *info);
485 bool ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host);
486 void UpdateStatistics(CURL *handle);
487 bool CanRetry(const JobInfo *info);
488 void Backoff(JobInfo *info);
489 void SetNocache(JobInfo *info);
490 void SetRegularCache(JobInfo *info);
491 bool VerifyAndFinalize(const int curl_error, JobInfo *info);
492 void InitHeaders();
493 void FiniHeaders();
494 void CloneProxyConfig(DownloadManager *clone);
495
496 93 inline std::vector<ProxyInfo> *current_proxy_group() const {
497
1/2
✓ Branch 0 taken 93 times.
✗ Branch 1 not taken.
93 return (opt_proxy_groups_ ?
498 93 &((*opt_proxy_groups_)[opt_proxy_groups_current_]) : NULL);
499 }
500
501 Prng prng_;
502 std::set<CURL *> *pool_handles_idle_;
503 std::set<CURL *> *pool_handles_inuse_;
504 uint32_t pool_max_handles_;
505 CURLM *curl_multi_;
506 HeaderLists *header_lists_;
507 curl_slist *default_headers_;
508 char *user_agent_;
509
510 pthread_t thread_download_;
511 atomic_int32 multi_threaded_;
512 int pipe_terminate_[2];
513
514 int pipe_jobs_[2];
515 struct pollfd *watch_fds_;
516 uint32_t watch_fds_size_;
517 uint32_t watch_fds_inuse_;
518 uint32_t watch_fds_max_;
519
520 pthread_mutex_t *lock_options_;
521 pthread_mutex_t *lock_synchronous_mode_;
522 std::string opt_dns_server_;
523 unsigned opt_timeout_proxy_;
524 unsigned opt_timeout_direct_;
525 unsigned opt_low_speed_limit_;
526 unsigned opt_max_retries_;
527 unsigned opt_backoff_init_ms_;
528 unsigned opt_backoff_max_ms_;
529 bool enable_info_header_;
530 bool opt_ipv4_only_;
531 bool follow_redirects_;
532
533 // Host list
534 std::vector<std::string> *opt_host_chain_;
535 /**
536 * Created by SetHostChain(), filled by probe_hosts. Contains time to get
537 * .cvmfschecksum in ms. -1 is unprobed, -2 is error.
538 */
539 std::vector<int> *opt_host_chain_rtt_;
540 unsigned opt_host_chain_current_;
541
542 // Proxy list
543 std::vector< std::vector<ProxyInfo> > *opt_proxy_groups_;
544 /**
545 * The current load-balancing group (first dimension in opt_proxy_groups_).
546 */
547 unsigned opt_proxy_groups_current_;
548 /**
549 * Number of proxy servers that failed within current load-balance group.
550 * Between 0 and (*opt_proxy_groups_)[opt_proxy_groups_current_].size().
551 */
552 unsigned opt_proxy_groups_current_burned_;
553 /**
554 * The index of the first fallback proxy group. If there are none,
555 * it is set to the number of regular proxy groups.
556 */
557 unsigned opt_proxy_groups_fallback_;
558 /**
559 * Overall number of proxies summed over all the groups.
560 */
561 unsigned opt_num_proxies_;
562 /**
563 * The original proxy list provided to SetProxyChain.
564 */
565 std::string opt_proxy_list_;
566 /**
567 * The original proxy fallback list provided to SetProxyChain.
568 */
569 std::string opt_proxy_fallback_list_;
570 /**
571 * Load-balancing map of currently active proxies
572 */
573 std::map<uint32_t, ProxyInfo *> opt_proxy_map_;
574 /**
575 * Sorted list of currently active proxy URLs (for log messages)
576 */
577 std::vector<std::string> opt_proxy_urls_;
578 /**
579 * Shard requests across multiple proxies via consistent hashing
580 */
581 bool opt_proxy_shard_;
582
583 /**
584 * Used to resolve proxy addresses (host addresses are resolved by the proxy).
585 */
586 dns::NormalResolver *resolver_;
587
588 /**
589 * If a proxy has IPv4 and IPv6 addresses, which one to prefer
590 */
591 dns::IpPreference opt_ip_preference_;
592
593 /**
594 * Used to replace @proxy@ in the Geo-API calls to order Stratum 1 servers,
595 * in case the active proxy is DIRECT (no proxy). Should be a UUID
596 * identifying the host.
597 */
598 std::string proxy_template_direct_;
599 /**
600 * Used to force a value for @proxy@ in the Geo-API calls to order Stratum 1
601 * servers. If empty, the fully qualified domain name of the active proxy
602 * server is used.
603 */
604 std::string proxy_template_forced_;
605
606 /**
607 * More than one proxy group can be considered as group of primary proxies
608 * followed by backup proxy groups, e.g. at another site.
609 * If opt_proxy_groups_reset_after_ is > 0, cvmfs will reset its proxy group
610 * to the first one after opt_proxy_groups_reset_after_ seconds are elapsed.
611 */
612 time_t opt_timestamp_backup_proxies_;
613 time_t opt_timestamp_failover_proxies_; // failover within the same group
614 unsigned opt_proxy_groups_reset_after_;
615
616 /**
617 * Similarly to proxy group reset, we'd also like to reset the host after a
618 * failover. Host outages can last longer and might come with a separate
619 * reset delay.
620 */
621 time_t opt_timestamp_backup_host_;
622 unsigned opt_host_reset_after_;
623
624 CredentialsAttachment *credentials_attachment_;
625
626 /**
627 * Writes and reads should be atomic because reading happens in a different
628 * thread than writing.
629 */
630 Counters *counters_;
631
632 /**
633 * Carries the path settings for SSL certificates
634 */
635 SslCertificateStore ssl_certificate_store_;
636 }; // DownloadManager
637
638 } // namespace download
639
640 #endif // CVMFS_DOWNLOAD_H_
641