CernVM-FS  2.12.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
download.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_NETWORK_DOWNLOAD_H_
6 #define CVMFS_NETWORK_DOWNLOAD_H_
7 
8 #include <poll.h>
9 #include <pthread.h>
10 #include <stdint.h>
11 #include <unistd.h>
12 
13 #include <cstdio>
14 #include <map>
15 #include <set>
16 #include <string>
17 #include <vector>
18 
19 #include "gtest/gtest_prod.h"
20 
22 #include "crypto/hash.h"
23 #include "duplex_curl.h"
24 #include "network/dns.h"
25 #include "network/health_check.h"
26 #include "network/jobinfo.h"
27 #include "network/network_errors.h"
29 #include "network/sink.h"
30 #include "ssl.h"
31 #include "statistics.h"
32 #include "util/atomic.h"
33 #include "util/pipe.h"
34 #include "util/pointer.h"
35 #include "util/prng.h"
36 #include "util/shared_ptr.h"
37 
38 class InterruptCue;
39 
40 namespace download {
41 
42 struct Counters {
44  perf::Counter *sz_transfer_time; // measured in milliseconds
50 
51  explicit Counters(perf::StatisticsTemplate statistics) {
52  sz_transferred_bytes = statistics.RegisterTemplated("sz_transferred_bytes",
53  "Number of transferred bytes");
54  sz_transfer_time = statistics.RegisterTemplated("sz_transfer_time",
55  "Transfer time (milliseconds)");
56  n_requests = statistics.RegisterTemplated("n_requests",
57  "Number of requests");
58  n_retries = statistics.RegisterTemplated("n_retries", "Number of retries");
59  n_metalink_failover = statistics.RegisterTemplated("n_metalink_failover",
60  "Number of metalink failovers");
61  n_host_failover = statistics.RegisterTemplated("n_host_failover",
62  "Number of host failovers");
63  n_proxy_failover = statistics.RegisterTemplated("n_proxy_failover",
64  "Number of proxy failovers");
65  }
66 }; // Counters
67 
77 class HeaderLists {
78  FRIEND_TEST(T_HeaderLists, Intrinsics);
79  public:
80  ~HeaderLists();
81  curl_slist *GetList(const char *header);
82  curl_slist *DuplicateList(curl_slist *slist);
83  void AppendHeader(curl_slist *slist, const char *header);
84  void CutHeader(const char *header, curl_slist **slist);
85  void PutList(curl_slist *slist);
86  std::string Print(curl_slist *slist);
87 
88  private:
89  static const unsigned kBlockSize = 4096/sizeof(curl_slist);
90 
91  bool IsUsed(curl_slist *slist) { return slist->data != NULL; }
92  curl_slist *Get(const char *header);
93  void Put(curl_slist *slist);
94  void AddBlock();
95 
96  std::vector<curl_slist *> blocks_; // List of curl_slist blocks
97 };
98 
99 
106  public:
108  virtual bool ConfigureCurlHandle(CURL *curl_handle,
109  pid_t pid,
110  void **info_data) = 0;
111  virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data) = 0;
112 };
113 
114 
119 class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding)
120  FRIEND_TEST(T_Download, ValidateGeoReply);
121  FRIEND_TEST(T_Download, StripDirect);
122  FRIEND_TEST(T_Download, EscapeUrl);
123 
124  public:
125  // HostInfo is used for both metalink and host
126  struct HostInfo {
127  HostInfo() { }
129  std::vector<std::string> *chain,
130  const int current,
131  const time_t timestamp_backup,
132  const unsigned reset_after)
133  : chain(chain)
134  , current(current)
135  , timestamp_backup(timestamp_backup)
136  , reset_after(reset_after)
137  { }
138  std::vector<std::string> *chain;
139  int current;
141  unsigned reset_after;
142  };
143 
144  struct ProxyInfo {
145  ProxyInfo() { }
146  explicit ProxyInfo(const std::string &url) : url(url) { }
147  ProxyInfo(const dns::Host &host, const std::string &url)
148  : host(host)
149  , url(url)
150  { }
151  std::string Print();
153  std::string url;
154  };
155 
160  };
161 
165  static const int kProbeUnprobed;
170  static const int kProbeDown;
174  static const int kProbeGeo;
175 
176  static const unsigned kDnsDefaultRetries = 1;
177  static const unsigned kDnsDefaultTimeoutMs = 3000;
178  static const unsigned kProxyMapScale = 16;
179 
180  DownloadManager(const unsigned max_pool_handles,
181  const perf::StatisticsTemplate &statistics,
182  const std::string &name = "standard");
184 
185  static int ParseHttpCode(const char digits[3]);
186 
187  void Spawn();
189  const std::string &cloned_name);
190  Failures Fetch(JobInfo *info);
191 
193  std::string GetDnsServer() const;
194  void SetDnsServer(const std::string &address);
195  void SetDnsParameters(const unsigned retries, const unsigned timeout_ms);
196  void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds);
197  void SetIpPreference(const dns::IpPreference preference);
198  void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct);
199  void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct);
200  void SetLowSpeedLimit(const unsigned low_speed_limit);
201  void SetMetalinkChain(const std::string &metalink_list);
202  void SetMetalinkChain(const std::vector<std::string> &metalink_list);
203  void GetMetalinkInfo(std::vector<std::string> *metalink_chain,
204  unsigned *current_metalink);
205  void SwitchMetalink();
206  bool CheckMetalinkChain(const time_t now);
207  void SetHostChain(const std::string &host_list);
208  void SetHostChain(const std::vector<std::string> &host_list);
209  void GetHostInfo(std::vector<std::string> *host_chain,
210  std::vector<int> *rtt, unsigned *current_host);
211  void ProbeHosts();
212  bool ProbeGeo();
213  // Sort list of servers using the Geo API. If the output_order
214  // vector is NULL, then the servers vector input is itself sorted.
215  // If it is non-NULL, then servers is left unchanged and the zero-based
216  // ordering is stored into output_order.
217  bool GeoSortServers(std::vector<std::string> *servers,
218  std::vector<uint64_t> *output_order = NULL);
219  void SwitchHost();
220  void SetProxyChain(const std::string &proxy_list,
221  const std::string &fallback_proxy_list,
222  const ProxySetModes set_mode);
223  void GetProxyInfo(std::vector< std::vector<ProxyInfo> > *proxy_chain,
224  unsigned *current_group,
225  unsigned *fallback_group);
226  std::string GetProxyList();
227  std::string GetFallbackProxyList();
228  void ShardProxies();
229  void RebalanceProxies();
230  void SwitchProxyGroup();
231  void SetProxyGroupResetDelay(const unsigned seconds);
232  void SetMetalinkResetDelay(const unsigned seconds);
233  void SetHostResetDelay(const unsigned seconds);
234  void SetRetryParameters(const unsigned max_retries,
235  const unsigned backoff_init_ms,
236  const unsigned backoff_max_ms);
237  void SetMaxIpaddrPerProxy(unsigned limit);
238  void SetProxyTemplates(const std::string &direct, const std::string &forced);
239  void EnableInfoHeader();
240  void EnableRedirects();
242  void EnableHTTPTracing();
243  void AddHTTPTracingHeader(const std::string &header);
245 
248  void SetFqrn(const std::string &fqrn) { fqrn_ = fqrn; }
249 
250  unsigned num_hosts() {
251  if (opt_host_.chain) return opt_host_.chain->size();
252  return 0;
253  }
254 
255  unsigned num_metalinks() {
256  if (opt_metalink_.chain) return opt_metalink_.chain->size();
257  return 0;
258  }
259 
261  return opt_ip_preference_;
262  }
263 
264  private:
265  static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action,
266  void *userp, void *socketp);
267  static void *MainDownload(void *data);
268 
269  bool StripDirect(const std::string &proxy_list, std::string *cleaned_list);
270  bool ValidateGeoReply(const std::string &reply_order,
271  const unsigned expected_size,
272  std::vector<uint64_t> *reply_vals);
273  void SwitchHostInfo(const std::string &typ, HostInfo &info, JobInfo *jobinfo);
274  void SwitchMetalink(JobInfo *info);
275  void SwitchHost(JobInfo *info);
276  void SwitchProxy(JobInfo *info);
277  ProxyInfo *ChooseProxyUnlocked(const shash::Any *hash);
278  void UpdateProxiesUnlocked(const std::string &reason);
279  void RebalanceProxiesUnlocked(const std::string &reason);
280  CURL *AcquireCurlHandle();
281  void ReleaseCurlHandle(CURL *handle);
282  void ReleaseCredential(JobInfo *info);
283  void InitializeRequest(JobInfo *info, CURL *handle);
284  void SetUrlOptions(JobInfo *info);
285  bool ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host);
286  void UpdateStatistics(CURL *handle);
287  bool CanRetry(const JobInfo *info);
288  void Backoff(JobInfo *info);
289  void SetNocache(JobInfo *info);
290  void SetRegularCache(JobInfo *info);
291  void ProcessLink(JobInfo *info);
292  bool VerifyAndFinalize(const int curl_error, JobInfo *info);
293  void InitHeaders();
294  void CloneProxyConfig(DownloadManager *clone);
295  void CheckHostInfoReset(const std::string &typ, HostInfo &info,
296  JobInfo *jobinfo, time_t &now);
297 
298  bool EscapeUrlChar(unsigned char input, char output[3]);
299  std::string EscapeUrl(const int64_t jobinfo_id, const std::string &url);
300  unsigned EscapeHeader(const std::string &header, char *escaped_buf,
301  size_t buf_size);
302 
303  inline std::vector<ProxyInfo> *current_proxy_group() const {
304  return (opt_proxy_groups_ ?
306  }
307 
309  std::set<CURL *> *pool_handles_idle_;
310  std::set<CURL *> *pool_handles_inuse_;
312  CURLM *curl_multi_;
314  curl_slist *default_headers_;
315  char *user_agent_;
316 
317  pthread_t thread_download_;
320 
322  struct pollfd *watch_fds_;
323  uint32_t watch_fds_size_;
325  uint32_t watch_fds_max_;
326 
327  pthread_mutex_t *lock_options_;
328  pthread_mutex_t *lock_synchronous_mode_;
329  std::string opt_dns_server_;
339 
345 
347  std::vector<std::string> http_tracing_headers_;
348 
349  // Metalink list
352 
353  // Host list
359  std::vector<int> *opt_host_chain_rtt_;
360 
361  // Proxy list
362  std::vector< std::vector<ProxyInfo> > *opt_proxy_groups_;
384  std::string opt_proxy_list_;
392  std::map<uint32_t, ProxyInfo *> opt_proxy_map_;
396  std::vector<std::string> opt_proxies_;
401 
427  std::string fqrn_;
428 
432  std::string name_;
433 
438 
443 
456 
464  time_t opt_timestamp_failover_proxies_; // failover within the same group
466 
468 
474 
479 }; // DownloadManager
480 
481 } // namespace download
482 
483 #endif // CVMFS_NETWORK_DOWNLOAD_H_
unsigned opt_timeout_direct_
Definition: download.h:331
std::vector< std::string > http_tracing_headers_
Definition: download.h:347
bool StripDirect(const std::string &proxy_list, std::string *cleaned_list)
Definition: download.cc:2772
unsigned opt_low_speed_limit_
Definition: download.h:332
Definition: prng.h:28
static const unsigned kDnsDefaultTimeoutMs
Definition: download.h:177
unsigned opt_backoff_init_ms_
Definition: download.h:334
curl_slist * Get(const char *header)
Definition: download.cc:830
bool EscapeUrlChar(unsigned char input, char output[3])
Definition: download.cc:400
HostInfo(std::vector< std::string > *chain, const int current, const time_t timestamp_backup, const unsigned reset_after)
Definition: download.h:128
std::string Print(curl_slist *slist)
Definition: download.cc:820
unsigned opt_proxy_groups_current_burned_
Definition: download.h:371
unsigned opt_proxy_groups_reset_after_
Definition: download.h:465
void SetUrlOptions(JobInfo *info)
Definition: download.cc:1046
void ReleaseCredential(JobInfo *info)
Definition: download.cc:1395
SharedPtr< ShardingPolicy > sharding_policy_
Definition: download.h:410
bool IsUsed(curl_slist *slist)
Definition: download.h:91
std::string opt_proxy_fallback_list_
Definition: download.h:388
void SetHostChain(const std::string &host_list)
bool CheckMetalinkChain(const time_t now)
Definition: download.cc:2458
void SetNocache(JobInfo *info)
Definition: download.cc:1368
void SetMetalinkChain(const std::string &metalink_list)
void SetLowSpeedLimit(const unsigned low_speed_limit)
Definition: download.cc:2201
DownloadManager(const unsigned max_pool_handles, const perf::StatisticsTemplate &statistics, const std::string &name="standard")
Definition: download.cc:1895
std::string proxy_template_direct_
Definition: download.h:449
std::vector< std::string > opt_proxies_
Definition: download.h:396
static int ParseHttpCode(const char digits[3])
Definition: download.cc:481
unsigned num_metalinks()
Definition: download.h:255
FRIEND_TEST(T_Download, ValidateGeoReply)
void CheckHostInfoReset(const std::string &typ, HostInfo &info, JobInfo *jobinfo, time_t &now)
Definition: download.cc:1018
static const int kProbeGeo
Definition: download.h:174
static const unsigned kBlockSize
Definition: download.h:89
unsigned opt_proxy_groups_current_
Definition: download.h:366
bool ValidateGeoReply(const std::string &reply_order, const unsigned expected_size, std::vector< uint64_t > *reply_vals)
Definition: download.cc:2732
std::vector< ProxyInfo > * current_proxy_group() const
Definition: download.h:303
time_t opt_timestamp_backup_proxies_
Definition: download.h:463
void SetProxyChain(const std::string &proxy_list, const std::string &fallback_proxy_list, const ProxySetModes set_mode)
Definition: download.cc:2812
std::string GetProxyList()
Definition: download.cc:2994
void GetMetalinkInfo(std::vector< std::string > *metalink_chain, unsigned *current_metalink)
Definition: download.cc:2248
std::set< CURL * > * pool_handles_inuse_
Definition: download.h:310
pthread_mutex_t * lock_options_
Definition: download.h:327
ProxyInfo * ChooseProxyUnlocked(const shash::Any *hash)
Definition: download.cc:3006
pthread_t thread_download_
Definition: download.h:317
ProxyInfo(const dns::Host &host, const std::string &url)
Definition: download.h:147
std::string opt_proxy_list_
Definition: download.h:384
perf::Counter * sz_transfer_time
Definition: download.h:44
std::vector< std::vector< ProxyInfo > > * opt_proxy_groups_
Definition: download.h:362
unsigned opt_proxy_groups_fallback_
Definition: download.h:376
curl_slist * default_headers_
Definition: download.h:314
curl_slist * GetList(const char *header)
Definition: download.cc:754
void ReleaseCurlHandle(CURL *handle)
Definition: download.cc:916
void SetDnsServer(const std::string &address)
Definition: download.cc:2129
DownloadManager * Clone(const perf::StatisticsTemplate &statistics, const std::string &cloned_name)
Definition: download.cc:3225
FRIEND_TEST(T_HeaderLists, Intrinsics)
static void * MainDownload(void *data)
Definition: download.cc:577
void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct)
Definition: download.cc:2187
perf::Counter * n_retries
Definition: download.h:46
std::string opt_dns_server_
Definition: download.h:329
void SwitchHostInfo(const std::string &typ, HostInfo &info, JobInfo *jobinfo)
Definition: download.cc:2381
void Backoff(JobInfo *info)
Definition: download.cc:1342
perf::Counter * sz_transferred_bytes
Definition: download.h:43
void SetFqrn(const std::string &fqrn)
Definition: download.h:248
std::string EscapeUrl(const int64_t jobinfo_id, const std::string &url)
Definition: download.cc:427
int32_t atomic_int32
Definition: atomic.h:17
Counter * RegisterTemplated(const std::string &name_minor, const std::string &desc)
Definition: statistics.h:111
void UpdateStatistics(CURL *handle)
Definition: download.cc:1308
void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds)
Definition: download.cc:2166
perf::Counter * n_metalink_failover
Definition: download.h:47
std::vector< std::string > * chain
Definition: download.h:138
void GetProxyInfo(std::vector< std::vector< ProxyInfo > > *proxy_chain, unsigned *current_group, unsigned *fallback_group)
Definition: download.cc:2969
void SetProxyGroupResetDelay(const unsigned seconds)
Definition: download.cc:3126
atomic_int32 multi_threaded_
Definition: download.h:318
dns::NormalResolver * resolver_
Definition: download.h:437
std::vector< curl_slist * > blocks_
Definition: download.h:96
ProxyInfo(const std::string &url)
Definition: download.h:146
dns::IpPreference opt_ip_preference_
Definition: download.h:442
Definition: dns.h:90
bool SetShardingPolicy(const ShardingPolicySelector type)
Definition: download.cc:3206
perf::Counter * n_host_failover
Definition: download.h:48
void UpdateProxiesUnlocked(const std::string &reason)
Definition: download.cc:3020
time_t opt_metalink_timestamp_link_
Definition: download.h:351
void ProcessLink(JobInfo *info)
Definition: download.cc:1423
void AppendHeader(curl_slist *slist, const char *header)
Definition: download.cc:776
void SetIpPreference(const dns::IpPreference preference)
Definition: download.cc:2176
perf::Counter * n_requests
Definition: download.h:45
void SetRetryParameters(const unsigned max_retries, const unsigned backoff_init_ms, const unsigned backoff_max_ms)
Definition: download.cc:3154
void SetRegularCache(JobInfo *info)
Definition: download.cc:1382
void CloneProxyConfig(DownloadManager *clone)
Definition: download.cc:3272
void PutList(curl_slist *slist)
Definition: download.cc:811
void SetMaxIpaddrPerProxy(unsigned limit)
Definition: download.cc:3165
void EnableIgnoreSignatureFailures()
Definition: download.cc:3190
std::vector< int > * opt_host_chain_rtt_
Definition: download.h:359
dns::IpPreference opt_ip_preference() const
Definition: download.h:260
SslCertificateStore ssl_certificate_store_
Definition: download.h:478
std::string GetFallbackProxyList()
Definition: download.cc:2998
void SetProxyTemplates(const std::string &direct, const std::string &forced)
Definition: download.cc:3171
IpPreference
Definition: dns.h:46
unsigned opt_backoff_max_ms_
Definition: download.h:335
std::string GetDnsServer() const
Definition: download.cc:2121
CredentialsAttachment * credentials_attachment_
Definition: download.h:467
struct pollfd * watch_fds_
Definition: download.h:322
std::map< uint32_t, ProxyInfo * > opt_proxy_map_
Definition: download.h:392
curl_slist * DuplicateList(curl_slist *slist)
Definition: download.cc:759
UniquePtr< Pipe< kPipeDownloadJobs > > pipe_jobs_
Definition: download.h:321
Failures Fetch(JobInfo *info)
Definition: download.cc:2001
bool CanRetry(const JobInfo *info)
Definition: download.cc:1326
perf::Counter * n_proxy_failover
Definition: download.h:49
void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct)
Definition: download.cc:2210
std::string proxy_template_forced_
Definition: download.h:455
time_t opt_timestamp_failover_proxies_
Definition: download.h:464
void SetDnsParameters(const unsigned retries, const unsigned timeout_ms)
Definition: download.cc:2148
unsigned EscapeHeader(const std::string &header, char *escaped_buf, size_t buf_size)
Definition: download.cc:449
UniquePtr< Pipe< kPipeThreadTerminator > > pipe_terminate_
Definition: download.h:319
static const int kProbeUnprobed
Definition: download.h:165
virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data)=0
void SetMetalinkResetDelay(const unsigned seconds)
Definition: download.cc:3136
static const unsigned kDnsDefaultRetries
Definition: download.h:176
bool GeoSortServers(std::vector< std::string > *servers, std::vector< uint64_t > *output_order=NULL)
Definition: download.cc:2525
static const int kProbeDown
Definition: download.h:170
Counters(perf::StatisticsTemplate statistics)
Definition: download.h:51
static const unsigned kProxyMapScale
Definition: download.h:178
void GetHostInfo(std::vector< std::string > *host_chain, std::vector< int > *rtt, unsigned *current_host)
Definition: download.cc:2293
bool ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host)
Definition: download.cc:1241
bool VerifyAndFinalize(const int curl_error, JobInfo *info)
Definition: download.cc:1486
void CutHeader(const char *header, curl_slist **slist)
Definition: download.cc:792
SharedPtr< HealthCheck > health_check_
Definition: download.h:418
void SwitchProxy(JobInfo *info)
Definition: download.cc:2313
void AddHTTPTracingHeader(const std::string &header)
Definition: download.cc:3198
void SetCredentialsAttachment(CredentialsAttachment *ca)
Definition: download.cc:2113
std::set< CURL * > * pool_handles_idle_
Definition: download.h:309
void RebalanceProxiesUnlocked(const std::string &reason)
Definition: download.cc:3090
pthread_mutex_t * lock_synchronous_mode_
Definition: download.h:328
virtual bool ConfigureCurlHandle(CURL *curl_handle, pid_t pid, void **info_data)=0
void InitializeRequest(JobInfo *info, CURL *handle)
Definition: download.cc:934
static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action, void *userp, void *socketp)
Definition: download.cc:497
HeaderLists * header_lists_
Definition: download.h:313
void SetHostResetDelay(const unsigned seconds)
Definition: download.cc:3145
void Put(curl_slist *slist)
Definition: download.cc:847