GCC Code Coverage Report
Directory: cvmfs/ Exec Total Coverage
File: cvmfs/download.h Lines: 108 130 83.1 %
Date: 2019-02-03 02:48:13 Branches: 6 10 60.0 %

Line Branch Exec Source
1
/**
2
 * This file is part of the CernVM File System.
3
 */
4
5
#ifndef CVMFS_DOWNLOAD_H_
6
#define CVMFS_DOWNLOAD_H_
7
8
#include <poll.h>
9
#include <pthread.h>
10
#include <stdint.h>
11
#include <unistd.h>
12
13
#include <cstdio>
14
#include <set>
15
#include <string>
16
#include <vector>
17
18
#include "gtest/gtest_prod.h"
19
20
#include "atomic.h"
21
#include "compression.h"
22
#include "dns.h"
23
#include "duplex_curl.h"
24
#include "hash.h"
25
#include "prng.h"
26
#include "sink.h"
27
#include "statistics.h"
28
29
30
namespace download {
31
32
/**
33
 * Possible return values.  Adjust ObjectFetcher error handling if new network
34
 * error conditions are added.
35
 */
36
enum Failures {
37
  kFailOk = 0,
38
  kFailLocalIO,
39
  kFailBadUrl,
40
  kFailProxyResolve,
41
  kFailHostResolve,
42
  // artificial failure code.  Try other host even though
43
  // failure seems to be at the proxy
44
  kFailHostAfterProxy,
45
  kFailProxyConnection,
46
  kFailHostConnection,
47
  kFailProxyHttp,
48
  kFailHostHttp,
49
  kFailBadData,
50
  kFailTooBig,
51
  kFailOther,
52
  kFailUnsupportedProtocol,
53
  kFailProxyTooSlow,
54
  kFailHostTooSlow,
55
  kFailProxyShortTransfer,
56
  kFailHostShortTransfer,
57
58
  kFailNumEntries
59
};  // Failures
60
61
62
75
inline bool IsHostTransferError(const Failures error) {
63
75
  switch (error) {
64
    case kFailHostConnection:
65
    case kFailHostTooSlow:
66
    case kFailHostShortTransfer:
67
44
      return true;
68
    default:
69
      break;
70
  }
71
31
  return false;
72
}
73
74
75
inline bool IsProxyTransferError(const Failures error) {
75
75
  switch (error) {
76
    case kFailProxyConnection:
77
    case kFailProxyTooSlow:
78
    case kFailProxyShortTransfer:
79
      return true;
80
    default:
81
      break;
82
  }
83
75
  return false;
84
}
85
86
70
inline const char *Code2Ascii(const Failures error) {
87
  const char *texts[kFailNumEntries + 1];
88
70
  texts[0] = "OK";
89
70
  texts[1] = "local I/O failure";
90
70
  texts[2] = "malformed URL";
91
70
  texts[3] = "failed to resolve proxy address";
92
70
  texts[4] = "failed to resolve host address";
93
70
  texts[5] = "all proxies failed, trying host fail-over";
94
70
  texts[6] = "proxy connection problem";
95
70
  texts[7] = "host connection problem";
96
70
  texts[8] = "proxy returned HTTP error";
97
70
  texts[9] = "host returned HTTP error";
98
70
  texts[10] = "corrupted data received";
99
70
  texts[11] = "resource too big to download";
100
70
  texts[12] = "unknown network error";
101
70
  texts[13] = "Unsupported URL in protocol";
102
70
  texts[14] = "proxy serving data too slowly";
103
70
  texts[15] = "host serving data too slowly";
104
70
  texts[16] = "proxy data transfer cut short";
105
70
  texts[17] = "host data transfer cut short";
106
70
  texts[18] = "no text";
107
70
  return texts[error];
108
}
109
110
/**
111
 * Where to store downloaded data.
112
 */
113
enum Destination {
114
  kDestinationMem = 1,
115
  kDestinationFile,
116
  kDestinationPath,
117
  kDestinationSink,
118
  kDestinationNone
119
};  // Destination
120
121
122
struct Counters {
123
  perf::Counter *sz_transferred_bytes;
124
  perf::Counter *sz_transfer_time;  // measured in miliseconds
125
  perf::Counter *n_requests;
126
  perf::Counter *n_retries;
127
  perf::Counter *n_proxy_failover;
128
  perf::Counter *n_host_failover;
129
130
200
  explicit Counters(perf::StatisticsTemplate statistics) {
131
    sz_transferred_bytes = statistics.RegisterTemplated("sz_transferred_bytes",
132
200
        "Number of transferred bytes");
133
    sz_transfer_time = statistics.RegisterTemplated("sz_transfer_time",
134
200
        "Transfer time (miliseconds)");
135
    n_requests = statistics.RegisterTemplated("n_requests",
136
200
        "Number of requests");
137
200
    n_retries = statistics.RegisterTemplated("n_retries", "Number of retries");
138
    n_proxy_failover = statistics.RegisterTemplated("n_proxy_failover",
139
200
        "Number of proxy failovers");
140
    n_host_failover = statistics.RegisterTemplated("n_host_failover",
141
200
        "Number of host failovers");
142
200
  }
143
};  // Counters
144
145
146
/**
147
 * Contains all the information to specify a download job.
148
 */
149
struct JobInfo {
150
  const std::string *url;
151
  bool compressed;
152
  bool probe_hosts;
153
  bool head_request;
154
  bool follow_redirects;
155
  bool force_nocache;
156
  pid_t pid;
157
  uid_t uid;
158
  gid_t gid;
159
  void *cred_data;  // Per-transfer credential data
160
  Destination destination;
161
  struct {
162
    size_t size;
163
    size_t pos;
164
    char *data;
165
  } destination_mem;
166
  FILE *destination_file;
167
  const std::string *destination_path;
168
  cvmfs::Sink *destination_sink;
169
  const shash::Any *expected_hash;
170
  const std::string *extra_info;
171
172
  // Allow byte ranges to be specified.
173
  off_t range_offset;
174
  off_t range_size;
175
176
  // Default initialization of fields
177
237
  void Init() {
178
237
    url = NULL;
179
237
    compressed = false;
180
237
    probe_hosts = false;
181
237
    head_request = false;
182
237
    follow_redirects = false;
183
237
    force_nocache = false;
184
237
    pid = -1;
185
237
    uid = -1;
186
237
    gid = -1;
187
237
    cred_data = NULL;
188
237
    destination = kDestinationNone;
189
237
    destination_mem.size = destination_mem.pos = 0;
190
237
    destination_mem.data = NULL;
191
237
    destination_file = NULL;
192
237
    destination_path = NULL;
193
237
    destination_sink = NULL;
194
237
    expected_hash = NULL;
195
237
    extra_info = NULL;
196
197
237
    curl_handle = NULL;
198
237
    headers = NULL;
199
237
    memset(&zstream, 0, sizeof(zstream));
200
237
    info_header = NULL;
201
237
    wait_at[0] = wait_at[1] = -1;
202
237
    nocache = false;
203
237
    error_code = kFailOther;
204
237
    num_used_proxies = num_used_hosts = num_retries = 0;
205
237
    backoff_ms = 0;
206
207
237
    range_offset = -1;
208
237
    range_size = -1;
209
237
    http_code = -1;
210
237
  }
211
212
  // One constructor per destination + head request
213
86
  JobInfo() { Init(); }
214
  JobInfo(const std::string *u, const bool c, const bool ph,
215
          const std::string *p, const shash::Any *h)
216
  {
217
    Init();
218
    url = u;
219
    compressed = c;
220
    probe_hosts = ph;
221
    destination = kDestinationPath;
222
    destination_path = p;
223
    expected_hash = h;
224
  }
225
37
  JobInfo(const std::string *u, const bool c, const bool ph, FILE *f,
226
          const shash::Any *h)
227
37
  {
228
37
    Init();
229
37
    url = u;
230
37
    compressed = c;
231
37
    probe_hosts = ph;
232
37
    destination = kDestinationFile;
233
37
    destination_file = f;
234
37
    expected_hash = h;
235
37
  }
236
112
  JobInfo(const std::string *u, const bool c, const bool ph,
237
          const shash::Any *h)
238
112
  {
239
112
    Init();
240
112
    url = u;
241
112
    compressed = c;
242
112
    probe_hosts = ph;
243
112
    destination = kDestinationMem;
244
112
    expected_hash = h;
245
112
  }
246
2
  JobInfo(const std::string *u, const bool c, const bool ph,
247
          cvmfs::Sink *s, const shash::Any *h)
248
2
  {
249
2
    Init();
250
2
    url = u;
251
2
    compressed = c;
252
2
    probe_hosts = ph;
253
2
    destination = kDestinationSink;
254
2
    destination_sink = s;
255
2
    expected_hash = h;
256
2
  }
257
  JobInfo(const std::string *u, const bool ph) {
258
    Init();
259
    url = u;
260
    probe_hosts = ph;
261
    head_request = true;
262
  }
263
264
237
  ~JobInfo() {
265
237
    if (wait_at[0] >= 0) {
266
      close(wait_at[0]);
267
      close(wait_at[1]);
268
    }
269
  }
270
271
  // Internal state, don't touch
272
  CURL *curl_handle;
273
  curl_slist *headers;
274
  char *info_header;
275
  z_stream zstream;
276
  shash::ContextPtr hash_context;
277
  int wait_at[2];  /**< Pipe used for the return value */
278
  std::string proxy;
279
  bool nocache;
280
  Failures error_code;
281
  int http_code;
282
  unsigned char num_used_proxies;
283
  unsigned char num_used_hosts;
284
  unsigned char num_retries;
285
  unsigned backoff_ms;
286
};  // JobInfo
287
288
289
/**
290
 * Manages blocks of arrays of curl_slist storing header strings.  In contrast
291
 * to curl's slists, these ones don't take ownership of the header strings.
292
 * Overall number of elements is limited as number of concurrent connections
293
 * is limited.
294
 *
295
 * Only use curl_slist objects created in the same HeaderLists instance in this
296
 * class
297
 */
298
212
class HeaderLists {
299
  FRIEND_TEST(T_HeaderLists, Intrinsics);
300
 public:
301
  ~HeaderLists();
302
  curl_slist *GetList(const char *header);
303
  curl_slist *DuplicateList(curl_slist *slist);
304
  void AppendHeader(curl_slist *slist, const char *header);
305
  void CutHeader(const char *header, curl_slist **slist);
306
  void PutList(curl_slist *slist);
307
  std::string Print(curl_slist *slist);
308
309
 private:
310
  static const unsigned kBlockSize = 4096/sizeof(curl_slist);
311
312
270001
  bool IsUsed(curl_slist *slist) { return slist->data != NULL; }
313
  curl_slist *Get(const char *header);
314
  void Put(curl_slist *slist);
315
  void AddBlock();
316
317
  std::vector<curl_slist *> blocks_;  // List of curl_slist blocks
318
};
319
320
321
/**
322
 * Provides hooks to attach per-transfer credentials to curl handles.
323
 * Overwritten by the AuthzX509Attachment in authz_curl.cc.  Needs to be
324
 * thread-safe because it can be potentially used by multiple DownloadManagers.
325
 */
326
61
class CredentialsAttachment {
327
 public:
328
61
  virtual ~CredentialsAttachment() { }
329
  virtual bool ConfigureCurlHandle(CURL *curl_handle,
330
                                   pid_t pid,
331
                                   void **info_data) = 0;
332
  virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data) = 0;
333
};
334
335
336
/**
337
 * Note when adding new fields: Clone() probably needs to be adjusted, too.
338
 */
339
class DownloadManager {
340
  FRIEND_TEST(T_Download, ValidateGeoReply);
341
  FRIEND_TEST(T_Download, StripDirect);
342
343
 public:
344
684
  struct ProxyInfo {
345
    ProxyInfo() { }
346
114
    explicit ProxyInfo(const std::string &url) : url(url) { }
347
    ProxyInfo(const dns::Host &host, const std::string &url)
348
      : host(host)
349
      , url(url)
350
    { }
351
    std::string Print();
352
    dns::Host host;
353
    std::string url;
354
  };
355
356
  enum ProxySetModes {
357
    kSetProxyRegular = 0,
358
    kSetProxyFallback,
359
    kSetProxyBoth,
360
  };
361
362
  /**
363
   * No attempt was made to order stratum 1 servers
364
   */
365
  static const int kProbeUnprobed;
366
  /**
367
   * The rtt to a stratum 1 could not be determined because the stratum 1
368
   * was unreachable.
369
   */
370
  static const int kProbeDown;
371
  /**
372
   * The stratum 1 server was put in order according to a Geo-API result
373
   */
374
  static const int kProbeGeo;
375
376
  /**
377
   * Do not download files larger than 1M into memory.
378
   */
379
  static const unsigned kMaxMemSize;
380
381
  static const unsigned kDnsDefaultRetries = 1;
382
  static const unsigned kDnsDefaultTimeoutMs = 3000;
383
384
  DownloadManager();
385
  ~DownloadManager();
386
387
  static int ParseHttpCode(const char digits[3]);
388
389
  void Init(const unsigned max_pool_handles,
390
            const bool use_system_proxy,
391
            perf::StatisticsTemplate statistics);
392
  void Fini();
393
  void Spawn();
394
  DownloadManager *Clone(perf::StatisticsTemplate statistics);
395
  Failures Fetch(JobInfo *info);
396
397
  void SetCredentialsAttachment(CredentialsAttachment *ca);
398
  std::string GetDnsServer() const;
399
  void SetDnsServer(const std::string &address);
400
  void SetDnsParameters(const unsigned retries, const unsigned timeout_ms);
401
  void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds);
402
  void SetIpPreference(const dns::IpPreference preference);
403
  void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct);
404
  void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct);
405
  void SetLowSpeedLimit(const unsigned low_speed_limit);
406
  void SetHostChain(const std::string &host_list);
407
  void SetHostChain(const std::vector<std::string> &host_list);
408
  void GetHostInfo(std::vector<std::string> *host_chain,
409
                   std::vector<int> *rtt, unsigned *current_host);
410
  void ProbeHosts();
411
  bool ProbeGeo();
412
    // Sort list of servers using the Geo API.  If the output_order
413
    // vector is NULL, then the servers vector input is itself sorted.
414
    // If it is non-NULL, then servers is left unchanged and the zero-based
415
    // ordering is stored into output_order.
416
  bool GeoSortServers(std::vector<std::string> *servers,
417
                      std::vector<uint64_t>    *output_order = NULL);
418
  void SwitchHost();
419
  void SetProxyChain(const std::string &proxy_list,
420
                     const std::string &fallback_proxy_list,
421
                     const ProxySetModes set_mode);
422
  void GetProxyInfo(std::vector< std::vector<ProxyInfo> > *proxy_chain,
423
                    unsigned *current_group,
424
                    unsigned *fallback_group);
425
  std::string GetProxyList();
426
  std::string GetFallbackProxyList();
427
  void RebalanceProxies();
428
  void SwitchProxyGroup();
429
  void SetProxyGroupResetDelay(const unsigned seconds);
430
  void SetHostResetDelay(const unsigned seconds);
431
  void SetRetryParameters(const unsigned max_retries,
432
                          const unsigned backoff_init_ms,
433
                          const unsigned backoff_max_ms);
434
  void SetMaxIpaddrPerProxy(unsigned limit);
435
  void SetProxyTemplates(const std::string &direct, const std::string &forced);
436
  void EnableInfoHeader();
437
  void EnableRedirects();
438
439
8
  unsigned num_hosts() {
440
8
    if (opt_host_chain_) return opt_host_chain_->size();
441
    return 0;
442
  }
443
444
  dns::IpPreference opt_ip_preference() const {
445
    return opt_ip_preference_;
446
  }
447
448
 private:
449
  static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action,
450
                                void *userp, void *socketp);
451
  static void *MainDownload(void *data);
452
453
  bool StripDirect(const std::string &proxy_list, std::string *cleaned_list);
454
  bool ValidateGeoReply(const std::string &reply_order,
455
                        const unsigned expected_size,
456
                        std::vector<uint64_t> *reply_vals);
457
  void SwitchHost(JobInfo *info);
458
  void SwitchProxy(JobInfo *info);
459
  void RebalanceProxiesUnlocked();
460
  CURL *AcquireCurlHandle();
461
  void ReleaseCurlHandle(CURL *handle);
462
  void ReleaseCredential(JobInfo *info);
463
  void InitializeRequest(JobInfo *info, CURL *handle);
464
  void SetUrlOptions(JobInfo *info);
465
  void ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host);
466
  void UpdateStatistics(CURL *handle);
467
  bool CanRetry(const JobInfo *info);
468
  void Backoff(JobInfo *info);
469
  void SetNocache(JobInfo *info);
470
  void SetRegularCache(JobInfo *info);
471
  bool VerifyAndFinalize(const int curl_error, JobInfo *info);
472
  void InitHeaders();
473
  void FiniHeaders();
474
  void CloneProxyConfig(DownloadManager *clone);
475
476
  Prng prng_;
477
  std::set<CURL *> *pool_handles_idle_;
478
  std::set<CURL *> *pool_handles_inuse_;
479
  uint32_t pool_max_handles_;
480
  CURLM *curl_multi_;
481
  HeaderLists *header_lists_;
482
  curl_slist *default_headers_;
483
  char *user_agent_;
484
485
  pthread_t thread_download_;
486
  atomic_int32 multi_threaded_;
487
  int pipe_terminate_[2];
488
489
  int pipe_jobs_[2];
490
  struct pollfd *watch_fds_;
491
  uint32_t watch_fds_size_;
492
  uint32_t watch_fds_inuse_;
493
  uint32_t watch_fds_max_;
494
495
  pthread_mutex_t *lock_options_;
496
  pthread_mutex_t *lock_synchronous_mode_;
497
  std::string opt_dns_server_;
498
  unsigned opt_timeout_proxy_;
499
  unsigned opt_timeout_direct_;
500
  unsigned opt_low_speed_limit_;
501
  unsigned opt_max_retries_;
502
  unsigned opt_backoff_init_ms_;
503
  unsigned opt_backoff_max_ms_;
504
  bool enable_info_header_;
505
  bool opt_ipv4_only_;
506
  bool follow_redirects_;
507
  bool use_system_proxy_;
508
509
  // Host list
510
  std::vector<std::string> *opt_host_chain_;
511
  /**
512
   * Created by SetHostChain(), filled by probe_hosts.  Contains time to get
513
   * .cvmfschecksum in ms. -1 is unprobed, -2 is error.
514
   */
515
  std::vector<int> *opt_host_chain_rtt_;
516
  unsigned opt_host_chain_current_;
517
518
  // Proxy list
519
  std::vector< std::vector<ProxyInfo> > *opt_proxy_groups_;
520
  /**
521
   * The current load-balancing group (first dimension in opt_proxy_groups_).
522
   */
523
  unsigned opt_proxy_groups_current_;
524
  /**
525
   * Number of proxy servers that failed within current load-balance group.
526
   * Between 0 and (*opt_proxy_groups_)[opt_proxy_groups_current_].size().
527
   */
528
  unsigned opt_proxy_groups_current_burned_;
529
  /**
530
   * The index of the first fallback proxy group.  If there are none,
531
   *  it is set to the number of regular proxy groups.
532
   */
533
  unsigned opt_proxy_groups_fallback_;
534
  /**
535
   * Overall number of proxies summed over all the groups.
536
   */
537
  unsigned opt_num_proxies_;
538
  /**
539
   * The original proxy list provided to SetProxyChain.
540
   */
541
  std::string opt_proxy_list_;
542
  /**
543
   * The original proxy fallback list provided to SetProxyChain.
544
   */
545
  std::string opt_proxy_fallback_list_;
546
547
  /**
548
   * Used to resolve proxy addresses (host addresses are resolved by the proxy).
549
   */
550
  dns::NormalResolver *resolver_;
551
552
  /**
553
   * If a proxy has IPv4 and IPv6 addresses, which one to prefer
554
   */
555
  dns::IpPreference opt_ip_preference_;
556
557
  /**
558
   * Used to replace @proxy@ in the Geo-API calls to order Stratum 1 servers,
559
   * in case the active proxy is DIRECT (no proxy).  Should be a UUID
560
   * identifying the host.
561
   */
562
  std::string proxy_template_direct_;
563
  /**
564
   * Used to force a value for @proxy@ in the Geo-API calls to order Stratum 1
565
   * servers.  If empty, the fully qualified domain name of the active proxy
566
   * server is used.
567
   */
568
  std::string proxy_template_forced_;
569
570
  /**
571
   * More than one proxy group can be considered as group of primary proxies
572
   * followed by backup proxy groups, e.g. at another site.
573
   * If opt_proxy_groups_reset_after_ is > 0, cvmfs will reset its proxy group
574
   * to the first one after opt_proxy_groups_reset_after_ seconds are elapsed.
575
   */
576
  time_t opt_timestamp_backup_proxies_;
577
  time_t opt_timestamp_failover_proxies_;  // failover within the same group
578
  unsigned opt_proxy_groups_reset_after_;
579
580
  /**
581
   * Similarly to proxy group reset, we'd also like to reset the host after a
582
   * failover.  Host outages can last longer and might come with a separate
583
   * reset delay.
584
   */
585
  time_t opt_timestamp_backup_host_;
586
  unsigned opt_host_reset_after_;
587
588
  CredentialsAttachment *credentials_attachment_;
589
590
  // Writes and reads should be atomic because reading happens in a different
591
  // thread than writing.
592
  Counters *counters_;
593
};  // DownloadManager
594
595
}  // namespace download
596
597
#endif  // CVMFS_DOWNLOAD_H_