CernVM-FS  2.9.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
download.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_DOWNLOAD_H_
6 #define CVMFS_DOWNLOAD_H_
7 
8 #include <poll.h>
9 #include <pthread.h>
10 #include <stdint.h>
11 #include <unistd.h>
12 
13 #include <cstdio>
14 #include <set>
15 #include <string>
16 #include <vector>
17 
18 #include "gtest/gtest_prod.h"
19 
20 #include "atomic.h"
21 #include "compression.h"
22 #include "dns.h"
23 #include "duplex_curl.h"
24 #include "hash.h"
25 #include "prng.h"
26 #include "sink.h"
27 #include "statistics.h"
28 
29 
30 namespace download {
31 
36 enum Failures {
37  kFailOk = 0,
42  // artificial failure code. Try other host even though
43  // failure seems to be at the proxy
57 
59 }; // Failures
60 
61 
62 inline bool IsHostTransferError(const Failures error) {
63  switch (error) {
65  case kFailHostTooSlow:
67  return true;
68  default:
69  break;
70  }
71  return false;
72 }
73 
74 inline bool IsProxyTransferError(const Failures error) {
75  switch (error) {
77  case kFailProxyTooSlow:
79  return true;
80  default:
81  break;
82  }
83  return false;
84 }
85 
86 inline const char *Code2Ascii(const Failures error) {
87  const char *texts[kFailNumEntries + 1];
88  texts[0] = "OK";
89  texts[1] = "local I/O failure";
90  texts[2] = "malformed URL";
91  texts[3] = "failed to resolve proxy address";
92  texts[4] = "failed to resolve host address";
93  texts[5] = "all proxies failed, trying host fail-over";
94  texts[6] = "proxy connection problem";
95  texts[7] = "host connection problem";
96  texts[8] = "proxy returned HTTP error";
97  texts[9] = "host returned HTTP error";
98  texts[10] = "corrupted data received";
99  texts[11] = "resource too big to download";
100  texts[12] = "unknown network error";
101  texts[13] = "Unsupported URL in protocol";
102  texts[14] = "proxy serving data too slowly";
103  texts[15] = "host serving data too slowly";
104  texts[16] = "proxy data transfer cut short";
105  texts[17] = "host data transfer cut short";
106  texts[18] = "no text";
107  return texts[error];
108 }
109 
119 }; // Destination
120 
121 
122 struct Counters {
124  perf::Counter *sz_transfer_time; // measured in miliseconds
129 
130  explicit Counters(perf::StatisticsTemplate statistics) {
131  sz_transferred_bytes = statistics.RegisterTemplated("sz_transferred_bytes",
132  "Number of transferred bytes");
133  sz_transfer_time = statistics.RegisterTemplated("sz_transfer_time",
134  "Transfer time (miliseconds)");
135  n_requests = statistics.RegisterTemplated("n_requests",
136  "Number of requests");
137  n_retries = statistics.RegisterTemplated("n_retries", "Number of retries");
138  n_proxy_failover = statistics.RegisterTemplated("n_proxy_failover",
139  "Number of proxy failovers");
140  n_host_failover = statistics.RegisterTemplated("n_host_failover",
141  "Number of host failovers");
142  }
143 }; // Counters
144 
145 
149 struct JobInfo {
150  const std::string *url;
156  pid_t pid;
157  uid_t uid;
158  gid_t gid;
159  void *cred_data; // Per-transfer credential data
161  struct {
162  size_t size;
163  size_t pos;
164  char *data;
165  } destination_mem;
167  const std::string *destination_path;
170  const std::string *extra_info;
171 
172  // Allow byte ranges to be specified.
174  off_t range_size;
175 
176  // Default initialization of fields
177  void Init() {
178  url = NULL;
179  compressed = false;
180  probe_hosts = false;
181  head_request = false;
182  follow_redirects = false;
183  force_nocache = false;
184  pid = -1;
185  uid = -1;
186  gid = -1;
187  cred_data = NULL;
189  destination_mem.size = destination_mem.pos = 0;
190  destination_mem.data = NULL;
191  destination_file = NULL;
192  destination_path = NULL;
193  destination_sink = NULL;
194  expected_hash = NULL;
195  extra_info = NULL;
196 
197  curl_handle = NULL;
198  headers = NULL;
199  memset(&zstream, 0, sizeof(zstream));
200  info_header = NULL;
201  wait_at[0] = wait_at[1] = -1;
202  nocache = false;
205  backoff_ms = 0;
207 
208  range_offset = -1;
209  range_size = -1;
210  http_code = -1;
211  }
212 
213  // One constructor per destination + head request
214  JobInfo() { Init(); }
215  JobInfo(const std::string *u, const bool c, const bool ph,
216  const std::string *p, const shash::Any *h)
217  {
218  Init();
219  url = u;
220  compressed = c;
221  probe_hosts = ph;
223  destination_path = p;
224  expected_hash = h;
225  }
226  JobInfo(const std::string *u, const bool c, const bool ph, FILE *f,
227  const shash::Any *h)
228  {
229  Init();
230  url = u;
231  compressed = c;
232  probe_hosts = ph;
234  destination_file = f;
235  expected_hash = h;
236  }
237  JobInfo(const std::string *u, const bool c, const bool ph,
238  const shash::Any *h)
239  {
240  Init();
241  url = u;
242  compressed = c;
243  probe_hosts = ph;
245  expected_hash = h;
246  }
247  JobInfo(const std::string *u, const bool c, const bool ph,
248  cvmfs::Sink *s, const shash::Any *h)
249  {
250  Init();
251  url = u;
252  compressed = c;
253  probe_hosts = ph;
255  destination_sink = s;
256  expected_hash = h;
257  }
258  JobInfo(const std::string *u, const bool ph) {
259  Init();
260  url = u;
261  probe_hosts = ph;
262  head_request = true;
263  }
264 
266  if (wait_at[0] >= 0) {
267  close(wait_at[0]);
268  close(wait_at[1]);
269  }
270  }
271 
276  bool IsFileNotFound();
277 
278  // Internal state, don't touch
279  CURL *curl_handle;
280  curl_slist *headers;
281  char *info_header;
282  z_stream zstream;
284  int wait_at[2];
285  std::string proxy;
286  bool nocache;
289  unsigned char num_used_proxies;
290  unsigned char num_used_hosts;
291  unsigned char num_retries;
292  unsigned backoff_ms;
294 }; // JobInfo
295 
296 
306 class HeaderLists {
307  FRIEND_TEST(T_HeaderLists, Intrinsics);
308  public:
309  ~HeaderLists();
310  curl_slist *GetList(const char *header);
311  curl_slist *DuplicateList(curl_slist *slist);
312  void AppendHeader(curl_slist *slist, const char *header);
313  void CutHeader(const char *header, curl_slist **slist);
314  void PutList(curl_slist *slist);
315  std::string Print(curl_slist *slist);
316 
317  private:
318  static const unsigned kBlockSize = 4096/sizeof(curl_slist);
319 
320  bool IsUsed(curl_slist *slist) { return slist->data != NULL; }
321  curl_slist *Get(const char *header);
322  void Put(curl_slist *slist);
323  void AddBlock();
324 
325  std::vector<curl_slist *> blocks_; // List of curl_slist blocks
326 };
327 
328 
335  public:
337  virtual bool ConfigureCurlHandle(CURL *curl_handle,
338  pid_t pid,
339  void **info_data) = 0;
340  virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data) = 0;
341 };
342 
343 
348  FRIEND_TEST(T_Download, ValidateGeoReply);
349  FRIEND_TEST(T_Download, StripDirect);
350 
351  public:
352  struct ProxyInfo {
353  ProxyInfo() { }
354  explicit ProxyInfo(const std::string &url) : url(url) { }
355  ProxyInfo(const dns::Host &host, const std::string &url)
356  : host(host)
357  , url(url)
358  { }
359  std::string Print();
361  std::string url;
362  };
363 
368  };
369 
373  static const int kProbeUnprobed;
378  static const int kProbeDown;
382  static const int kProbeGeo;
383 
387  static const unsigned kMaxMemSize;
388 
389  static const unsigned kDnsDefaultRetries = 1;
390  static const unsigned kDnsDefaultTimeoutMs = 3000;
391 
392  DownloadManager();
394 
395  static int ParseHttpCode(const char digits[3]);
396 
397  void Init(const unsigned max_pool_handles,
398  const bool use_system_proxy,
399  perf::StatisticsTemplate statistics);
400  void Fini();
401  void Spawn();
403  Failures Fetch(JobInfo *info);
404 
406  std::string GetDnsServer() const;
407  void SetDnsServer(const std::string &address);
408  void SetDnsParameters(const unsigned retries, const unsigned timeout_ms);
409  void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds);
410  void SetIpPreference(const dns::IpPreference preference);
411  void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct);
412  void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct);
413  void SetLowSpeedLimit(const unsigned low_speed_limit);
414  void SetHostChain(const std::string &host_list);
415  void SetHostChain(const std::vector<std::string> &host_list);
416  void GetHostInfo(std::vector<std::string> *host_chain,
417  std::vector<int> *rtt, unsigned *current_host);
418  void ProbeHosts();
419  bool ProbeGeo();
420  // Sort list of servers using the Geo API. If the output_order
421  // vector is NULL, then the servers vector input is itself sorted.
422  // If it is non-NULL, then servers is left unchanged and the zero-based
423  // ordering is stored into output_order.
424  bool GeoSortServers(std::vector<std::string> *servers,
425  std::vector<uint64_t> *output_order = NULL);
426  void SwitchHost();
427  void SetProxyChain(const std::string &proxy_list,
428  const std::string &fallback_proxy_list,
429  const ProxySetModes set_mode);
430  void GetProxyInfo(std::vector< std::vector<ProxyInfo> > *proxy_chain,
431  unsigned *current_group,
432  unsigned *fallback_group);
433  std::string GetProxyList();
434  std::string GetFallbackProxyList();
435  void RebalanceProxies();
436  void SwitchProxyGroup();
437  void SetProxyGroupResetDelay(const unsigned seconds);
438  void SetHostResetDelay(const unsigned seconds);
439  void SetRetryParameters(const unsigned max_retries,
440  const unsigned backoff_init_ms,
441  const unsigned backoff_max_ms);
442  void SetMaxIpaddrPerProxy(unsigned limit);
443  void SetProxyTemplates(const std::string &direct, const std::string &forced);
444  void EnableInfoHeader();
445  void EnableRedirects();
446 
447  unsigned num_hosts() {
448  if (opt_host_chain_) return opt_host_chain_->size();
449  return 0;
450  }
451 
453  return opt_ip_preference_;
454  }
455 
456  private:
457  static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action,
458  void *userp, void *socketp);
459  static void *MainDownload(void *data);
460 
461  bool StripDirect(const std::string &proxy_list, std::string *cleaned_list);
462  bool ValidateGeoReply(const std::string &reply_order,
463  const unsigned expected_size,
464  std::vector<uint64_t> *reply_vals);
465  void SwitchHost(JobInfo *info);
466  void SwitchProxy(JobInfo *info);
468  CURL *AcquireCurlHandle();
469  void ReleaseCurlHandle(CURL *handle);
470  void ReleaseCredential(JobInfo *info);
471  void InitializeRequest(JobInfo *info, CURL *handle);
472  void SetUrlOptions(JobInfo *info);
473  void ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host);
474  void UpdateStatistics(CURL *handle);
475  bool CanRetry(const JobInfo *info);
476  void Backoff(JobInfo *info);
477  void SetNocache(JobInfo *info);
478  void SetRegularCache(JobInfo *info);
479  bool VerifyAndFinalize(const int curl_error, JobInfo *info);
480  void InitHeaders();
481  void FiniHeaders();
482  void CloneProxyConfig(DownloadManager *clone);
483 
485  std::set<CURL *> *pool_handles_idle_;
486  std::set<CURL *> *pool_handles_inuse_;
488  CURLM *curl_multi_;
490  curl_slist *default_headers_;
491  char *user_agent_;
492 
493  pthread_t thread_download_;
496 
497  int pipe_jobs_[2];
498  struct pollfd *watch_fds_;
499  uint32_t watch_fds_size_;
501  uint32_t watch_fds_max_;
502 
503  pthread_mutex_t *lock_options_;
504  pthread_mutex_t *lock_synchronous_mode_;
505  std::string opt_dns_server_;
516 
517  // Host list
518  std::vector<std::string> *opt_host_chain_;
523  std::vector<int> *opt_host_chain_rtt_;
525 
526  // Proxy list
527  std::vector< std::vector<ProxyInfo> > *opt_proxy_groups_;
549  std::string opt_proxy_list_;
554 
559 
564 
577 
585  time_t opt_timestamp_failover_proxies_; // failover within the same group
587 
595 
597 
598  // Writes and reads should be atomic because reading happens in a different
599  // thread than writing.
601 }; // DownloadManager
602 
603 } // namespace download
604 
605 #endif // CVMFS_DOWNLOAD_H_
unsigned opt_timeout_direct_
Definition: download.h:507
bool StripDirect(const std::string &proxy_list, std::string *cleaned_list)
Definition: download.cc:2390
unsigned opt_low_speed_limit_
Definition: download.h:508
Destination destination
Definition: download.h:160
Definition: prng.h:25
static const unsigned kDnsDefaultTimeoutMs
Definition: download.h:390
unsigned opt_backoff_init_ms_
Definition: download.h:510
unsigned char num_used_hosts
Definition: download.h:290
curl_slist * Get(const char *header)
Definition: download.cc:709
std::string Print(curl_slist *slist)
Definition: download.cc:699
unsigned opt_proxy_groups_current_burned_
Definition: download.h:536
void ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host)
Definition: download.cc:1052
unsigned opt_proxy_groups_reset_after_
Definition: download.h:586
void SetUrlOptions(JobInfo *info)
Definition: download.cc:878
void ReleaseCredential(JobInfo *info)
Definition: download.cc:1198
unsigned backoff_ms
Definition: download.h:292
bool IsUsed(curl_slist *slist)
Definition: download.h:320
std::string opt_proxy_fallback_list_
Definition: download.h:553
void SetHostChain(const std::string &host_list)
void SetNocache(JobInfo *info)
Definition: download.cc:1171
unsigned opt_host_reset_after_
Definition: download.h:594
void SetLowSpeedLimit(const unsigned low_speed_limit)
Definition: download.cc:1886
std::string proxy_template_direct_
Definition: download.h:570
static int ParseHttpCode(const char digits[3])
Definition: download.cc:384
FRIEND_TEST(T_Download, ValidateGeoReply)
static const int kProbeGeo
Definition: download.h:382
static const unsigned kBlockSize
Definition: download.h:318
unsigned opt_proxy_groups_current_
Definition: download.h:531
off_t range_offset
Definition: download.h:173
shash::ContextPtr hash_context
Definition: download.h:283
bool ValidateGeoReply(const std::string &reply_order, const unsigned expected_size, std::vector< uint64_t > *reply_vals)
Definition: download.cc:2350
bool IsFileNotFound()
Definition: download.cc:364
time_t opt_timestamp_backup_proxies_
Definition: download.h:584
void SetProxyChain(const std::string &proxy_list, const std::string &fallback_proxy_list, const ProxySetModes set_mode)
Definition: download.cc:2430
std::string GetProxyList()
Definition: download.cc:2608
unsigned int current_host_chain_index
Definition: download.h:293
std::set< CURL * > * pool_handles_inuse_
Definition: download.h:486
void * cred_data
Definition: download.h:159
pthread_mutex_t * lock_options_
Definition: download.h:503
pthread_t thread_download_
Definition: download.h:493
ProxyInfo(const dns::Host &host, const std::string &url)
Definition: download.h:355
std::string opt_proxy_list_
Definition: download.h:549
perf::Counter * sz_transfer_time
Definition: download.h:124
std::vector< std::vector< ProxyInfo > > * opt_proxy_groups_
Definition: download.h:527
unsigned opt_proxy_groups_fallback_
Definition: download.h:541
curl_slist * default_headers_
Definition: download.h:490
curl_slist * GetList(const char *header)
Definition: download.cc:633
void ReleaseCurlHandle(CURL *handle)
Definition: download.cc:795
z_stream zstream
Definition: download.h:282
void SetDnsServer(const std::string &address)
Definition: download.cc:1815
JobInfo(const std::string *u, const bool c, const bool ph, const shash::Any *h)
Definition: download.h:237
FRIEND_TEST(T_HeaderLists, Intrinsics)
static void * MainDownload(void *data)
Definition: download.cc:475
void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct)
Definition: download.cc:1872
perf::Counter * n_retries
Definition: download.h:126
std::string opt_dns_server_
Definition: download.h:505
void Backoff(JobInfo *info)
Definition: download.cc:1149
perf::Counter * sz_transferred_bytes
Definition: download.h:123
int32_t atomic_int32
Definition: atomic.h:17
Counter * RegisterTemplated(const std::string &name_minor, const std::string &desc)
Definition: statistics.h:109
void UpdateStatistics(CURL *handle)
Definition: download.cc:1115
void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds)
Definition: download.cc:1851
const char * Code2Ascii(const Failures error)
Definition: download.h:86
unsigned char num_retries
Definition: download.h:291
void GetProxyInfo(std::vector< std::vector< ProxyInfo > > *proxy_chain, unsigned *current_group, unsigned *fallback_group)
Definition: download.cc:2583
void SetProxyGroupResetDelay(const unsigned seconds)
Definition: download.cc:2664
atomic_int32 multi_threaded_
Definition: download.h:494
dns::NormalResolver * resolver_
Definition: download.h:558
std::vector< curl_slist * > blocks_
Definition: download.h:325
ProxyInfo(const std::string &url)
Definition: download.h:354
dns::IpPreference opt_ip_preference_
Definition: download.h:563
Definition: dns.h:90
perf::Counter * n_host_failover
Definition: download.h:128
DownloadManager * Clone(perf::StatisticsTemplate statistics)
Definition: download.cc:2724
bool IsProxyTransferError(const Failures error)
Definition: download.h:74
FILE * destination_file
Definition: download.h:166
void AppendHeader(curl_slist *slist, const char *header)
Definition: download.cc:655
bool follow_redirects
Definition: download.h:154
void SetIpPreference(const dns::IpPreference preference)
Definition: download.cc:1861
perf::Counter * n_requests
Definition: download.h:125
JobInfo(const std::string *u, const bool c, const bool ph, const std::string *p, const shash::Any *h)
Definition: download.h:215
void SetRetryParameters(const unsigned max_retries, const unsigned backoff_init_ms, const unsigned backoff_max_ms)
Definition: download.cc:2683
void SetRegularCache(JobInfo *info)
Definition: download.cc:1185
void CloneProxyConfig(DownloadManager *clone)
Definition: download.cc:2758
void PutList(curl_slist *slist)
Definition: download.cc:690
void SetMaxIpaddrPerProxy(unsigned limit)
Definition: download.cc:2694
const shash::Any * expected_hash
Definition: download.h:169
const std::string * extra_info
Definition: download.h:170
std::vector< int > * opt_host_chain_rtt_
Definition: download.h:523
dns::IpPreference opt_ip_preference() const
Definition: download.h:452
cvmfs::Sink * destination_sink
Definition: download.h:168
time_t opt_timestamp_backup_host_
Definition: download.h:593
std::string GetFallbackProxyList()
Definition: download.cc:2612
void SetProxyTemplates(const std::string &direct, const std::string &forced)
Definition: download.cc:2700
IpPreference
Definition: dns.h:46
unsigned opt_backoff_max_ms_
Definition: download.h:511
std::string GetDnsServer() const
Definition: download.cc:1807
unsigned opt_host_chain_current_
Definition: download.h:524
CURL * curl_handle
Definition: download.h:279
CredentialsAttachment * credentials_attachment_
Definition: download.h:596
std::vector< std::string > * opt_host_chain_
Definition: download.h:518
struct pollfd * watch_fds_
Definition: download.h:498
curl_slist * DuplicateList(curl_slist *slist)
Definition: download.cc:638
Failures error_code
Definition: download.h:287
JobInfo(const std::string *u, const bool ph)
Definition: download.h:258
Failures Fetch(JobInfo *info)
Definition: download.cc:1719
struct download::JobInfo::@3 destination_mem
bool CanRetry(const JobInfo *info)
Definition: download.cc:1133
perf::Counter * n_proxy_failover
Definition: download.h:127
JobInfo(const std::string *u, const bool c, const bool ph, FILE *f, const shash::Any *h)
Definition: download.h:226
void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct)
Definition: download.cc:1895
std::string proxy_template_forced_
Definition: download.h:576
const std::string * destination_path
Definition: download.h:167
time_t opt_timestamp_failover_proxies_
Definition: download.h:585
void SetDnsParameters(const unsigned retries, const unsigned timeout_ms)
Definition: download.cc:1833
static const int kProbeUnprobed
Definition: download.h:373
virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data)=0
static const unsigned kMaxMemSize
Definition: download.h:387
bool IsHostTransferError(const Failures error)
Definition: download.h:62
void Init(const unsigned max_pool_handles, const bool use_system_proxy, perf::StatisticsTemplate statistics)
Definition: download.cc:1594
JobInfo(const std::string *u, const bool c, const bool ph, cvmfs::Sink *s, const shash::Any *h)
Definition: download.h:247
static const unsigned kDnsDefaultRetries
Definition: download.h:389
bool GeoSortServers(std::vector< std::string > *servers, std::vector< uint64_t > *output_order=NULL)
Definition: download.cc:2138
static const int kProbeDown
Definition: download.h:378
curl_slist * headers
Definition: download.h:280
unsigned char num_used_proxies
Definition: download.h:289
Counters(perf::StatisticsTemplate statistics)
Definition: download.h:130
std::string proxy
Definition: download.h:285
void GetHostInfo(std::vector< std::string > *host_chain, std::vector< int > *rtt, unsigned *current_host)
Definition: download.cc:1939
bool VerifyAndFinalize(const int curl_error, JobInfo *info)
Definition: download.cc:1214
void CutHeader(const char *header, curl_slist **slist)
Definition: download.cc:671
const std::string * url
Definition: download.h:150
void SwitchProxy(JobInfo *info)
Definition: download.cc:1958
void SetCredentialsAttachment(CredentialsAttachment *ca)
Definition: download.cc:1799
std::set< CURL * > * pool_handles_idle_
Definition: download.h:485
pthread_mutex_t * lock_synchronous_mode_
Definition: download.h:504
virtual bool ConfigureCurlHandle(CURL *curl_handle, pid_t pid, void **info_data)=0
void InitializeRequest(JobInfo *info, CURL *handle)
Definition: download.cc:812
static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action, void *userp, void *socketp)
Definition: download.cc:400
HeaderLists * header_lists_
Definition: download.h:489
void SetHostResetDelay(const unsigned seconds)
Definition: download.cc:2674
void Put(curl_slist *slist)
Definition: download.cc:726
char * info_header
Definition: download.h:281