CernVM-FS  2.13.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
download.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_NETWORK_DOWNLOAD_H_
6 #define CVMFS_NETWORK_DOWNLOAD_H_
7 
8 #include <poll.h>
9 #include <pthread.h>
10 #include <stdint.h>
11 #include <unistd.h>
12 
13 #include <cstdio>
14 #include <map>
15 #include <set>
16 #include <string>
17 #include <vector>
18 
20 #include "crypto/hash.h"
21 #include "duplex_curl.h"
22 #include "gtest/gtest_prod.h"
23 #include "network/dns.h"
24 #include "network/health_check.h"
25 #include "network/jobinfo.h"
26 #include "network/network_errors.h"
28 #include "network/sink.h"
29 #include "ssl.h"
30 #include "statistics.h"
31 #include "util/atomic.h"
32 #include "util/pipe.h"
33 #include "util/pointer.h"
34 #include "util/prng.h"
35 #include "util/shared_ptr.h"
36 
37 class InterruptCue;
38 
39 namespace download {
40 
41 struct Counters {
43  perf::Counter *sz_transfer_time; // measured in milliseconds
49 
50  explicit Counters(perf::StatisticsTemplate statistics) {
52  "sz_transferred_bytes", "Number of transferred bytes");
54  "sz_transfer_time", "Transfer time (milliseconds)");
55  n_requests = statistics.RegisterTemplated("n_requests",
56  "Number of requests");
57  n_retries = statistics.RegisterTemplated("n_retries", "Number of retries");
59  "n_metalink_failover", "Number of metalink failovers");
60  n_host_failover = statistics.RegisterTemplated("n_host_failover",
61  "Number of host failovers");
63  "n_proxy_failover", "Number of proxy failovers");
64  }
65 }; // Counters
66 
76 class HeaderLists {
77  FRIEND_TEST(T_HeaderLists, Intrinsics);
78 
79  public:
80  ~HeaderLists();
81  curl_slist *GetList(const char *header);
82  curl_slist *DuplicateList(curl_slist *slist);
83  void AppendHeader(curl_slist *slist, const char *header);
84  void CutHeader(const char *header, curl_slist **slist);
85  void PutList(curl_slist *slist);
86  std::string Print(curl_slist *slist);
87 
88  private:
89  static const unsigned kBlockSize = 4096 / sizeof(curl_slist);
90 
91  bool IsUsed(curl_slist *slist) { return slist->data != NULL; }
92  curl_slist *Get(const char *header);
93  void Put(curl_slist *slist);
94  void AddBlock();
95 
96  std::vector<curl_slist *> blocks_; // List of curl_slist blocks
97 };
98 
99 
106  public:
108  virtual bool ConfigureCurlHandle(CURL *curl_handle,
109  pid_t pid,
110  void **info_data) = 0;
111  virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data) = 0;
112 };
113 
114 
119 class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding)
120  FRIEND_TEST(T_Download, ValidateGeoReply);
121  FRIEND_TEST(T_Download, StripDirect);
122  FRIEND_TEST(T_Download, EscapeUrl);
123 
124  public:
125  // HostInfo is used for both metalink and host
126  struct HostInfo {
127  HostInfo() { }
128  HostInfo(std::vector<std::string> *chain,
129  const int current,
130  const time_t timestamp_backup,
131  const unsigned reset_after)
132  : chain(chain)
133  , current(current)
134  , timestamp_backup(timestamp_backup)
135  , reset_after(reset_after) { }
136  std::vector<std::string> *chain;
137  int current;
139  unsigned reset_after;
140  };
141 
142  struct ProxyInfo {
143  ProxyInfo() { }
144  explicit ProxyInfo(const std::string &url) : url(url) { }
145  ProxyInfo(const dns::Host &host, const std::string &url)
146  : host(host), url(url) { }
147  std::string Print();
149  std::string url;
150  };
151 
156  };
157 
161  static const int kProbeUnprobed;
166  static const int kProbeDown;
170  static const int kProbeGeo;
171 
172  static const unsigned kDnsDefaultRetries = 1;
173  static const unsigned kDnsDefaultTimeoutMs = 3000;
174  static const unsigned kProxyMapScale = 16;
175 
176  DownloadManager(const unsigned max_pool_handles,
177  const perf::StatisticsTemplate &statistics,
178  const std::string &name = "standard");
180 
181  static int ParseHttpCode(const char digits[3]);
182 
183  void Spawn();
185  const std::string &cloned_name);
186  Failures Fetch(JobInfo *info);
187 
189  std::string GetDnsServer() const;
190  void SetDnsServer(const std::string &address);
191  void SetDnsParameters(const unsigned retries, const unsigned timeout_ms);
192  void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds);
193  void SetIpPreference(const dns::IpPreference preference);
194  void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct);
195  void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct);
196  void SetLowSpeedLimit(const unsigned low_speed_limit);
197  void SetMetalinkChain(const std::string &metalink_list);
198  void SetMetalinkChain(const std::vector<std::string> &metalink_list);
199  void GetMetalinkInfo(std::vector<std::string> *metalink_chain,
200  unsigned *current_metalink);
201  void SwitchMetalink();
202  bool CheckMetalinkChain(const time_t now);
203  void SetHostChain(const std::string &host_list);
204  void SetHostChain(const std::vector<std::string> &host_list);
205  void GetHostInfo(std::vector<std::string> *host_chain, std::vector<int> *rtt,
206  unsigned *current_host);
207  void ProbeHosts();
208  bool ProbeGeo();
209  // Sort list of servers using the Geo API. If the output_order
210  // vector is NULL, then the servers vector input is itself sorted.
211  // If it is non-NULL, then servers is left unchanged and the zero-based
212  // ordering is stored into output_order.
213  bool GeoSortServers(std::vector<std::string> *servers,
214  std::vector<uint64_t> *output_order = NULL);
215  void SwitchHost();
216  void SetProxyChain(const std::string &proxy_list,
217  const std::string &fallback_proxy_list,
218  const ProxySetModes set_mode);
219  void GetProxyInfo(std::vector<std::vector<ProxyInfo> > *proxy_chain,
220  unsigned *current_group,
221  unsigned *fallback_group);
222  std::string GetProxyList();
223  std::string GetFallbackProxyList();
224  void ShardProxies();
225  void RebalanceProxies();
226  void SwitchProxyGroup();
227  void SetProxyGroupResetDelay(const unsigned seconds);
228  void SetMetalinkResetDelay(const unsigned seconds);
229  void SetHostResetDelay(const unsigned seconds);
230  void SetRetryParameters(const unsigned max_retries,
231  const unsigned backoff_init_ms,
232  const unsigned backoff_max_ms);
233  void SetMaxIpaddrPerProxy(unsigned limit);
234  void SetProxyTemplates(const std::string &direct, const std::string &forced);
235  void EnableInfoHeader();
236  void EnableRedirects();
238  void EnableHTTPTracing();
239  void AddHTTPTracingHeader(const std::string &header);
241 
244  void SetFqrn(const std::string &fqrn) { fqrn_ = fqrn; }
245 
246  unsigned num_hosts() {
247  if (opt_host_.chain)
248  return opt_host_.chain->size();
249  return 0;
250  }
251 
252  unsigned num_metalinks() {
253  if (opt_metalink_.chain)
254  return opt_metalink_.chain->size();
255  return 0;
256  }
257 
259 
260  private:
261  static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action,
262  void *userp, void *socketp);
263  static void *MainDownload(void *data);
264 
265  bool StripDirect(const std::string &proxy_list, std::string *cleaned_list);
266  bool ValidateGeoReply(const std::string &reply_order,
267  const unsigned expected_size,
268  std::vector<uint64_t> *reply_vals);
269  void SwitchHostInfo(const std::string &typ, HostInfo &info, JobInfo *jobinfo);
270  void SwitchMetalink(JobInfo *info);
271  void SwitchHost(JobInfo *info);
272  void SwitchProxy(JobInfo *info);
273  ProxyInfo *ChooseProxyUnlocked(const shash::Any *hash);
274  void UpdateProxiesUnlocked(const std::string &reason);
275  void RebalanceProxiesUnlocked(const std::string &reason);
276  CURL *AcquireCurlHandle();
277  void ReleaseCurlHandle(CURL *handle);
278  void ReleaseCredential(JobInfo *info);
279  void InitializeRequest(JobInfo *info, CURL *handle);
280  void SetUrlOptions(JobInfo *info);
281  bool ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host);
282  void UpdateStatistics(CURL *handle);
283  bool CanRetry(const JobInfo *info);
284  void Backoff(JobInfo *info);
285  void SetNocache(JobInfo *info);
286  void SetRegularCache(JobInfo *info);
287  void ProcessLink(JobInfo *info);
288  bool VerifyAndFinalize(const int curl_error, JobInfo *info);
289  void InitHeaders();
290  void CloneProxyConfig(DownloadManager *clone);
291  void CheckHostInfoReset(const std::string &typ, HostInfo &info,
292  JobInfo *jobinfo, time_t &now);
293 
294  bool EscapeUrlChar(unsigned char input, char output[3]);
295  std::string EscapeUrl(const int64_t jobinfo_id, const std::string &url);
296  unsigned EscapeHeader(const std::string &header, char *escaped_buf,
297  size_t buf_size);
298 
299  inline std::vector<ProxyInfo> *current_proxy_group() const {
300  return (opt_proxy_groups_
302  : NULL);
303  }
304 
306  std::set<CURL *> *pool_handles_idle_;
307  std::set<CURL *> *pool_handles_inuse_;
309  CURLM *curl_multi_;
311  curl_slist *default_headers_;
312  char *user_agent_;
313 
314  pthread_t thread_download_;
317 
319  struct pollfd *watch_fds_;
320  uint32_t watch_fds_size_;
322  uint32_t watch_fds_max_;
323 
324  pthread_mutex_t *lock_options_;
325  pthread_mutex_t *lock_synchronous_mode_;
326  std::string opt_dns_server_;
336 
342 
344  std::vector<std::string> http_tracing_headers_;
345 
346  // Metalink list
349 
350  // Host list
356  std::vector<int> *opt_host_chain_rtt_;
357 
358  // Proxy list
359  std::vector<std::vector<ProxyInfo> > *opt_proxy_groups_;
381  std::string opt_proxy_list_;
389  std::map<uint32_t, ProxyInfo *> opt_proxy_map_;
393  std::vector<std::string> opt_proxies_;
398 
424  std::string fqrn_;
425 
429  std::string name_;
430 
435 
440 
453 
461  time_t opt_timestamp_failover_proxies_; // failover within the same group
463 
465 
471 
476 }; // DownloadManager
477 
478 } // namespace download
479 
480 #endif // CVMFS_NETWORK_DOWNLOAD_H_
unsigned opt_timeout_direct_
Definition: download.h:328
std::vector< std::string > http_tracing_headers_
Definition: download.h:344
bool StripDirect(const std::string &proxy_list, std::string *cleaned_list)
Definition: download.cc:2753
unsigned opt_low_speed_limit_
Definition: download.h:329
Definition: prng.h:27
static const unsigned kDnsDefaultTimeoutMs
Definition: download.h:173
unsigned opt_backoff_init_ms_
Definition: download.h:331
curl_slist * Get(const char *header)
Definition: download.cc:813
bool EscapeUrlChar(unsigned char input, char output[3])
Definition: download.cc:396
HostInfo(std::vector< std::string > *chain, const int current, const time_t timestamp_backup, const unsigned reset_after)
Definition: download.h:128
std::string Print(curl_slist *slist)
Definition: download.cc:803
unsigned opt_proxy_groups_current_burned_
Definition: download.h:368
unsigned opt_proxy_groups_reset_after_
Definition: download.h:462
void SetUrlOptions(JobInfo *info)
Definition: download.cc:1026
void ReleaseCredential(JobInfo *info)
Definition: download.cc:1377
SharedPtr< ShardingPolicy > sharding_policy_
Definition: download.h:407
bool IsUsed(curl_slist *slist)
Definition: download.h:91
std::string opt_proxy_fallback_list_
Definition: download.h:385
void SetHostChain(const std::string &host_list)
bool CheckMetalinkChain(const time_t now)
Definition: download.cc:2435
void SetNocache(JobInfo *info)
Definition: download.cc:1350
void SetMetalinkChain(const std::string &metalink_list)
void SetLowSpeedLimit(const unsigned low_speed_limit)
Definition: download.cc:2173
DownloadManager(const unsigned max_pool_handles, const perf::StatisticsTemplate &statistics, const std::string &name="standard")
Definition: download.cc:1878
std::string proxy_template_direct_
Definition: download.h:446
std::vector< std::string > opt_proxies_
Definition: download.h:393
static int ParseHttpCode(const char digits[3])
Definition: download.cc:472
unsigned num_metalinks()
Definition: download.h:252
FRIEND_TEST(T_Download, ValidateGeoReply)
void CheckHostInfoReset(const std::string &typ, HostInfo &info, JobInfo *jobinfo, time_t &now)
Definition: download.cc:1000
static const int kProbeGeo
Definition: download.h:170
static const unsigned kBlockSize
Definition: download.h:89
unsigned opt_proxy_groups_current_
Definition: download.h:363
bool ValidateGeoReply(const std::string &reply_order, const unsigned expected_size, std::vector< uint64_t > *reply_vals)
Definition: download.cc:2715
std::vector< ProxyInfo > * current_proxy_group() const
Definition: download.h:299
time_t opt_timestamp_backup_proxies_
Definition: download.h:460
void SetProxyChain(const std::string &proxy_list, const std::string &fallback_proxy_list, const ProxySetModes set_mode)
Definition: download.cc:2791
std::string GetProxyList()
Definition: download.cc:2974
void GetMetalinkInfo(std::vector< std::string > *metalink_chain, unsigned *current_metalink)
Definition: download.cc:2219
std::set< CURL * > * pool_handles_inuse_
Definition: download.h:307
pthread_mutex_t * lock_options_
Definition: download.h:324
ProxyInfo * ChooseProxyUnlocked(const shash::Any *hash)
Definition: download.cc:2983
pthread_t thread_download_
Definition: download.h:314
ProxyInfo(const dns::Host &host, const std::string &url)
Definition: download.h:145
std::string opt_proxy_list_
Definition: download.h:381
perf::Counter * sz_transfer_time
Definition: download.h:43
unsigned opt_proxy_groups_fallback_
Definition: download.h:373
curl_slist * default_headers_
Definition: download.h:311
curl_slist * GetList(const char *header)
Definition: download.cc:739
void ReleaseCurlHandle(CURL *handle)
Definition: download.cc:899
void SetDnsServer(const std::string &address)
Definition: download.cc:2108
DownloadManager * Clone(const perf::StatisticsTemplate &statistics, const std::string &cloned_name)
Definition: download.cc:3198
FRIEND_TEST(T_HeaderLists, Intrinsics)
static void * MainDownload(void *data)
Definition: download.cc:567
void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct)
Definition: download.cc:2160
perf::Counter * n_retries
Definition: download.h:45
std::string opt_dns_server_
Definition: download.h:326
void SwitchHostInfo(const std::string &typ, HostInfo &info, JobInfo *jobinfo)
Definition: download.cc:2362
void Backoff(JobInfo *info)
Definition: download.cc:1324
perf::Counter * sz_transferred_bytes
Definition: download.h:42
void SetFqrn(const std::string &fqrn)
Definition: download.h:244
std::string EscapeUrl(const int64_t jobinfo_id, const std::string &url)
Definition: download.cc:419
int32_t atomic_int32
Definition: atomic.h:17
Counter * RegisterTemplated(const std::string &name_minor, const std::string &desc)
Definition: statistics.h:109
void UpdateStatistics(CURL *handle)
Definition: download.cc:1290
void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds)
Definition: download.cc:2141
perf::Counter * n_metalink_failover
Definition: download.h:46
std::vector< std::string > * chain
Definition: download.h:136
std::vector< std::vector< ProxyInfo > > * opt_proxy_groups_
Definition: download.h:359
void GetProxyInfo(std::vector< std::vector< ProxyInfo > > *proxy_chain, unsigned *current_group, unsigned *fallback_group)
Definition: download.cc:2950
void SetProxyGroupResetDelay(const unsigned seconds)
Definition: download.cc:3108
atomic_int32 multi_threaded_
Definition: download.h:315
dns::NormalResolver * resolver_
Definition: download.h:434
std::vector< curl_slist * > blocks_
Definition: download.h:96
ProxyInfo(const std::string &url)
Definition: download.h:144
dns::IpPreference opt_ip_preference_
Definition: download.h:439
Definition: dns.h:90
bool SetShardingPolicy(const ShardingPolicySelector type)
Definition: download.cc:3177
perf::Counter * n_host_failover
Definition: download.h:47
void UpdateProxiesUnlocked(const std::string &reason)
Definition: download.cc:2998
time_t opt_metalink_timestamp_link_
Definition: download.h:348
void ProcessLink(JobInfo *info)
Definition: download.cc:1404
void AppendHeader(curl_slist *slist, const char *header)
Definition: download.cc:759
void SetIpPreference(const dns::IpPreference preference)
Definition: download.cc:2149
perf::Counter * n_requests
Definition: download.h:44
void SetRetryParameters(const unsigned max_retries, const unsigned backoff_init_ms, const unsigned backoff_max_ms)
Definition: download.cc:3134
void SetRegularCache(JobInfo *info)
Definition: download.cc:1364
void CloneProxyConfig(DownloadManager *clone)
Definition: download.cc:3245
void PutList(curl_slist *slist)
Definition: download.cc:794
void SetMaxIpaddrPerProxy(unsigned limit)
Definition: download.cc:3144
void EnableIgnoreSignatureFailures()
Definition: download.cc:3163
std::vector< int > * opt_host_chain_rtt_
Definition: download.h:356
dns::IpPreference opt_ip_preference() const
Definition: download.h:258
SslCertificateStore ssl_certificate_store_
Definition: download.h:475
std::string GetFallbackProxyList()
Definition: download.cc:2976
void SetProxyTemplates(const std::string &direct, const std::string &forced)
Definition: download.cc:3150
IpPreference
Definition: dns.h:46
unsigned opt_backoff_max_ms_
Definition: download.h:332
std::string GetDnsServer() const
Definition: download.cc:2102
CredentialsAttachment * credentials_attachment_
Definition: download.h:464
struct pollfd * watch_fds_
Definition: download.h:319
std::map< uint32_t, ProxyInfo * > opt_proxy_map_
Definition: download.h:389
curl_slist * DuplicateList(curl_slist *slist)
Definition: download.cc:742
UniquePtr< Pipe< kPipeDownloadJobs > > pipe_jobs_
Definition: download.h:318
Failures Fetch(JobInfo *info)
Definition: download.cc:1982
bool CanRetry(const JobInfo *info)
Definition: download.cc:1308
perf::Counter * n_proxy_failover
Definition: download.h:48
void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct)
Definition: download.cc:2182
std::string proxy_template_forced_
Definition: download.h:452
time_t opt_timestamp_failover_proxies_
Definition: download.h:461
void SetDnsParameters(const unsigned retries, const unsigned timeout_ms)
Definition: download.cc:2127
unsigned EscapeHeader(const std::string &header, char *escaped_buf, size_t buf_size)
Definition: download.cc:441
UniquePtr< Pipe< kPipeThreadTerminator > > pipe_terminate_
Definition: download.h:316
static const int kProbeUnprobed
Definition: download.h:161
virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data)=0
void SetMetalinkResetDelay(const unsigned seconds)
Definition: download.cc:3118
static const unsigned kDnsDefaultRetries
Definition: download.h:172
bool GeoSortServers(std::vector< std::string > *servers, std::vector< uint64_t > *output_order=NULL)
Definition: download.cc:2504
static const int kProbeDown
Definition: download.h:166
Counters(perf::StatisticsTemplate statistics)
Definition: download.h:50
static const unsigned kProxyMapScale
Definition: download.h:174
void GetHostInfo(std::vector< std::string > *host_chain, std::vector< int > *rtt, unsigned *current_host)
Definition: download.cc:2267
bool ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host)
Definition: download.cc:1225
bool VerifyAndFinalize(const int curl_error, JobInfo *info)
Definition: download.cc:1468
void CutHeader(const char *header, curl_slist **slist)
Definition: download.cc:775
SharedPtr< HealthCheck > health_check_
Definition: download.h:415
void SwitchProxy(JobInfo *info)
Definition: download.cc:2292
void AddHTTPTracingHeader(const std::string &header)
Definition: download.cc:3169
void SetCredentialsAttachment(CredentialsAttachment *ca)
Definition: download.cc:2094
std::set< CURL * > * pool_handles_idle_
Definition: download.h:306
void RebalanceProxiesUnlocked(const std::string &reason)
Definition: download.cc:3072
pthread_mutex_t * lock_synchronous_mode_
Definition: download.h:325
virtual bool ConfigureCurlHandle(CURL *curl_handle, pid_t pid, void **info_data)=0
void InitializeRequest(JobInfo *info, CURL *handle)
Definition: download.cc:917
static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action, void *userp, void *socketp)
Definition: download.cc:488
HeaderLists * header_lists_
Definition: download.h:310
void SetHostResetDelay(const unsigned seconds)
Definition: download.cc:3126
void Put(curl_slist *slist)
Definition: download.cc:830