CernVM-FS  2.12.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
download.h
Go to the documentation of this file.
1 
5 #ifndef CVMFS_NETWORK_DOWNLOAD_H_
6 #define CVMFS_NETWORK_DOWNLOAD_H_
7 
8 #include <poll.h>
9 #include <pthread.h>
10 #include <stdint.h>
11 #include <unistd.h>
12 
13 #include <cstdio>
14 #include <map>
15 #include <set>
16 #include <string>
17 #include <vector>
18 
19 #include "gtest/gtest_prod.h"
20 
21 #include "compression.h"
22 #include "crypto/hash.h"
23 #include "duplex_curl.h"
24 #include "network/dns.h"
25 #include "network/health_check.h"
26 #include "network/jobinfo.h"
27 #include "network/network_errors.h"
29 #include "network/sink.h"
30 #include "ssl.h"
31 #include "statistics.h"
32 #include "util/atomic.h"
33 #include "util/pipe.h"
34 #include "util/pointer.h"
35 #include "util/prng.h"
36 #include "util/shared_ptr.h"
37 
38 class InterruptCue;
39 
40 namespace download {
41 
42 struct Counters {
44  perf::Counter *sz_transfer_time; // measured in milliseconds
49 
50  explicit Counters(perf::StatisticsTemplate statistics) {
51  sz_transferred_bytes = statistics.RegisterTemplated("sz_transferred_bytes",
52  "Number of transferred bytes");
53  sz_transfer_time = statistics.RegisterTemplated("sz_transfer_time",
54  "Transfer time (milliseconds)");
55  n_requests = statistics.RegisterTemplated("n_requests",
56  "Number of requests");
57  n_retries = statistics.RegisterTemplated("n_retries", "Number of retries");
58  n_proxy_failover = statistics.RegisterTemplated("n_proxy_failover",
59  "Number of proxy failovers");
60  n_host_failover = statistics.RegisterTemplated("n_host_failover",
61  "Number of host failovers");
62  }
63 }; // Counters
64 
74 class HeaderLists {
75  FRIEND_TEST(T_HeaderLists, Intrinsics);
76  public:
77  ~HeaderLists();
78  curl_slist *GetList(const char *header);
79  curl_slist *DuplicateList(curl_slist *slist);
80  void AppendHeader(curl_slist *slist, const char *header);
81  void CutHeader(const char *header, curl_slist **slist);
82  void PutList(curl_slist *slist);
83  std::string Print(curl_slist *slist);
84 
85  private:
86  static const unsigned kBlockSize = 4096/sizeof(curl_slist);
87 
88  bool IsUsed(curl_slist *slist) { return slist->data != NULL; }
89  curl_slist *Get(const char *header);
90  void Put(curl_slist *slist);
91  void AddBlock();
92 
93  std::vector<curl_slist *> blocks_; // List of curl_slist blocks
94 };
95 
96 
103  public:
105  virtual bool ConfigureCurlHandle(CURL *curl_handle,
106  pid_t pid,
107  void **info_data) = 0;
108  virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data) = 0;
109 };
110 
111 
116 class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding)
117  FRIEND_TEST(T_Download, ValidateGeoReply);
118  FRIEND_TEST(T_Download, StripDirect);
119  FRIEND_TEST(T_Download, EscapeUrl);
120 
121  public:
122  struct ProxyInfo {
123  ProxyInfo() { }
124  explicit ProxyInfo(const std::string &url) : url(url) { }
125  ProxyInfo(const dns::Host &host, const std::string &url)
126  : host(host)
127  , url(url)
128  { }
129  std::string Print();
131  std::string url;
132  };
133 
138  };
139 
143  static const int kProbeUnprobed;
148  static const int kProbeDown;
152  static const int kProbeGeo;
153 
154  static const unsigned kDnsDefaultRetries = 1;
155  static const unsigned kDnsDefaultTimeoutMs = 3000;
156  static const unsigned kProxyMapScale = 16;
157 
158  DownloadManager(const unsigned max_pool_handles,
159  const perf::StatisticsTemplate &statistics);
161 
162  static int ParseHttpCode(const char digits[3]);
163 
164  void Spawn();
165  DownloadManager *Clone(const perf::StatisticsTemplate &statistics);
166  Failures Fetch(JobInfo *info);
167 
169  std::string GetDnsServer() const;
170  void SetDnsServer(const std::string &address);
171  void SetDnsParameters(const unsigned retries, const unsigned timeout_ms);
172  void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds);
173  void SetIpPreference(const dns::IpPreference preference);
174  void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct);
175  void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct);
176  void SetLowSpeedLimit(const unsigned low_speed_limit);
177  void SetHostChain(const std::string &host_list);
178  void SetHostChain(const std::vector<std::string> &host_list);
179  void GetHostInfo(std::vector<std::string> *host_chain,
180  std::vector<int> *rtt, unsigned *current_host);
181  void ProbeHosts();
182  bool ProbeGeo();
183  // Sort list of servers using the Geo API. If the output_order
184  // vector is NULL, then the servers vector input is itself sorted.
185  // If it is non-NULL, then servers is left unchanged and the zero-based
186  // ordering is stored into output_order.
187  bool GeoSortServers(std::vector<std::string> *servers,
188  std::vector<uint64_t> *output_order = NULL);
189  void SwitchHost();
190  void SetProxyChain(const std::string &proxy_list,
191  const std::string &fallback_proxy_list,
192  const ProxySetModes set_mode);
193  void GetProxyInfo(std::vector< std::vector<ProxyInfo> > *proxy_chain,
194  unsigned *current_group,
195  unsigned *fallback_group);
196  std::string GetProxyList();
197  std::string GetFallbackProxyList();
198  void ShardProxies();
199  void RebalanceProxies();
200  void SwitchProxyGroup();
201  void SetProxyGroupResetDelay(const unsigned seconds);
202  void SetHostResetDelay(const unsigned seconds);
203  void SetRetryParameters(const unsigned max_retries,
204  const unsigned backoff_init_ms,
205  const unsigned backoff_max_ms);
206  void SetMaxIpaddrPerProxy(unsigned limit);
207  void SetProxyTemplates(const std::string &direct, const std::string &forced);
208  void EnableInfoHeader();
209  void EnableRedirects();
211  void EnableHTTPTracing();
212  void AddHTTPTracingHeader(const std::string &header);
214 
217  void SetFqrn(const std::string &fqrn) { fqrn_ = fqrn; }
218 
219  unsigned num_hosts() {
220  if (opt_host_chain_) return opt_host_chain_->size();
221  return 0;
222  }
223 
225  return opt_ip_preference_;
226  }
227 
228  private:
229  static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action,
230  void *userp, void *socketp);
231  static void *MainDownload(void *data);
232 
233  bool StripDirect(const std::string &proxy_list, std::string *cleaned_list);
234  bool ValidateGeoReply(const std::string &reply_order,
235  const unsigned expected_size,
236  std::vector<uint64_t> *reply_vals);
237  void SwitchHost(JobInfo *info);
238  void SwitchProxy(JobInfo *info);
239  ProxyInfo *ChooseProxyUnlocked(const shash::Any *hash);
240  void UpdateProxiesUnlocked(const std::string &reason);
241  void RebalanceProxiesUnlocked(const std::string &reason);
242  CURL *AcquireCurlHandle();
243  void ReleaseCurlHandle(CURL *handle);
244  void ReleaseCredential(JobInfo *info);
245  void InitializeRequest(JobInfo *info, CURL *handle);
246  void SetUrlOptions(JobInfo *info);
247  bool ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host);
248  void UpdateStatistics(CURL *handle);
249  bool CanRetry(const JobInfo *info);
250  void Backoff(JobInfo *info);
251  void SetNocache(JobInfo *info);
252  void SetRegularCache(JobInfo *info);
253  bool VerifyAndFinalize(const int curl_error, JobInfo *info);
254  void InitHeaders();
255  void CloneProxyConfig(DownloadManager *clone);
256 
257  bool EscapeUrlChar(unsigned char input, char output[3]);
258  std::string EscapeUrl(const std::string &url);
259  unsigned EscapeHeader(const std::string &header, char *escaped_buf,
260  size_t buf_size);
261 
262  inline std::vector<ProxyInfo> *current_proxy_group() const {
263  return (opt_proxy_groups_ ?
265  }
266 
268  std::set<CURL *> *pool_handles_idle_;
269  std::set<CURL *> *pool_handles_inuse_;
271  CURLM *curl_multi_;
273  curl_slist *default_headers_;
274  char *user_agent_;
275 
276  pthread_t thread_download_;
279 
281  struct pollfd *watch_fds_;
282  uint32_t watch_fds_size_;
284  uint32_t watch_fds_max_;
285 
286  pthread_mutex_t *lock_options_;
287  pthread_mutex_t *lock_synchronous_mode_;
288  std::string opt_dns_server_;
298 
304 
306  std::vector<std::string> http_tracing_headers_;
307 
308  // Host list
309  std::vector<std::string> *opt_host_chain_;
314  std::vector<int> *opt_host_chain_rtt_;
316 
317  // Proxy list
318  std::vector< std::vector<ProxyInfo> > *opt_proxy_groups_;
340  std::string opt_proxy_list_;
348  std::map<uint32_t, ProxyInfo *> opt_proxy_map_;
352  std::vector<std::string> opt_proxy_urls_;
357 
383  std::string fqrn_;
384 
389 
394 
407 
415  time_t opt_timestamp_failover_proxies_; // failover within the same group
417 
425 
427 
433 
438 }; // DownloadManager
439 
440 } // namespace download
441 
442 #endif // CVMFS_NETWORK_DOWNLOAD_H_
unsigned opt_timeout_direct_
Definition: download.h:290
std::vector< std::string > http_tracing_headers_
Definition: download.h:306
bool StripDirect(const std::string &proxy_list, std::string *cleaned_list)
Definition: download.cc:2428
unsigned opt_low_speed_limit_
Definition: download.h:291
Definition: prng.h:28
static const unsigned kDnsDefaultTimeoutMs
Definition: download.h:155
unsigned opt_backoff_init_ms_
Definition: download.h:293
curl_slist * Get(const char *header)
Definition: download.cc:756
bool EscapeUrlChar(unsigned char input, char output[3])
Definition: download.cc:339
std::string Print(curl_slist *slist)
Definition: download.cc:746
unsigned opt_proxy_groups_current_burned_
Definition: download.h:327
unsigned opt_proxy_groups_reset_after_
Definition: download.h:416
void SetUrlOptions(JobInfo *info)
Definition: download.cc:946
void ReleaseCredential(JobInfo *info)
Definition: download.cc:1282
SharedPtr< ShardingPolicy > sharding_policy_
Definition: download.h:366
bool IsUsed(curl_slist *slist)
Definition: download.h:88
std::string opt_proxy_fallback_list_
Definition: download.h:344
void SetHostChain(const std::string &host_list)
void SetNocache(JobInfo *info)
Definition: download.cc:1255
unsigned opt_host_reset_after_
Definition: download.h:424
void SetLowSpeedLimit(const unsigned low_speed_limit)
Definition: download.cc:1946
std::string proxy_template_direct_
Definition: download.h:400
static int ParseHttpCode(const char digits[3])
Definition: download.cc:420
FRIEND_TEST(T_Download, ValidateGeoReply)
static const int kProbeGeo
Definition: download.h:152
static const unsigned kBlockSize
Definition: download.h:86
unsigned opt_proxy_groups_current_
Definition: download.h:322
bool ValidateGeoReply(const std::string &reply_order, const unsigned expected_size, std::vector< uint64_t > *reply_vals)
Definition: download.cc:2388
std::vector< ProxyInfo > * current_proxy_group() const
Definition: download.h:262
time_t opt_timestamp_backup_proxies_
Definition: download.h:414
void SetProxyChain(const std::string &proxy_list, const std::string &fallback_proxy_list, const ProxySetModes set_mode)
Definition: download.cc:2468
std::string GetProxyList()
Definition: download.cc:2646
std::string EscapeUrl(const std::string &url)
Definition: download.cc:366
std::set< CURL * > * pool_handles_inuse_
Definition: download.h:269
pthread_mutex_t * lock_options_
Definition: download.h:286
ProxyInfo * ChooseProxyUnlocked(const shash::Any *hash)
Definition: download.cc:2658
pthread_t thread_download_
Definition: download.h:276
ProxyInfo(const dns::Host &host, const std::string &url)
Definition: download.h:125
DownloadManager(const unsigned max_pool_handles, const perf::StatisticsTemplate &statistics)
Definition: download.cc:1662
std::string opt_proxy_list_
Definition: download.h:340
perf::Counter * sz_transfer_time
Definition: download.h:44
std::vector< std::vector< ProxyInfo > > * opt_proxy_groups_
Definition: download.h:318
unsigned opt_proxy_groups_fallback_
Definition: download.h:332
curl_slist * default_headers_
Definition: download.h:273
curl_slist * GetList(const char *header)
Definition: download.cc:680
void ReleaseCurlHandle(CURL *handle)
Definition: download.cc:842
void SetDnsServer(const std::string &address)
Definition: download.cc:1875
FRIEND_TEST(T_HeaderLists, Intrinsics)
static void * MainDownload(void *data)
Definition: download.cc:516
void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct)
Definition: download.cc:1932
perf::Counter * n_retries
Definition: download.h:46
std::string opt_dns_server_
Definition: download.h:288
void Backoff(JobInfo *info)
Definition: download.cc:1230
perf::Counter * sz_transferred_bytes
Definition: download.h:43
void SetFqrn(const std::string &fqrn)
Definition: download.h:217
int32_t atomic_int32
Definition: atomic.h:17
Counter * RegisterTemplated(const std::string &name_minor, const std::string &desc)
Definition: statistics.h:111
void UpdateStatistics(CURL *handle)
Definition: download.cc:1196
void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds)
Definition: download.cc:1911
std::vector< std::string > opt_proxy_urls_
Definition: download.h:352
void GetProxyInfo(std::vector< std::vector< ProxyInfo > > *proxy_chain, unsigned *current_group, unsigned *fallback_group)
Definition: download.cc:2621
void SetProxyGroupResetDelay(const unsigned seconds)
Definition: download.cc:2769
atomic_int32 multi_threaded_
Definition: download.h:277
dns::NormalResolver * resolver_
Definition: download.h:388
std::vector< curl_slist * > blocks_
Definition: download.h:93
ProxyInfo(const std::string &url)
Definition: download.h:124
dns::IpPreference opt_ip_preference_
Definition: download.h:393
Definition: dns.h:90
bool SetShardingPolicy(const ShardingPolicySelector type)
Definition: download.cc:2840
perf::Counter * n_host_failover
Definition: download.h:48
void UpdateProxiesUnlocked(const std::string &reason)
Definition: download.cc:2672
void AppendHeader(curl_slist *slist, const char *header)
Definition: download.cc:702
void SetIpPreference(const dns::IpPreference preference)
Definition: download.cc:1921
perf::Counter * n_requests
Definition: download.h:45
void SetRetryParameters(const unsigned max_retries, const unsigned backoff_init_ms, const unsigned backoff_max_ms)
Definition: download.cc:2788
void SetRegularCache(JobInfo *info)
Definition: download.cc:1269
void CloneProxyConfig(DownloadManager *clone)
Definition: download.cc:2903
void PutList(curl_slist *slist)
Definition: download.cc:737
void SetMaxIpaddrPerProxy(unsigned limit)
Definition: download.cc:2799
void EnableIgnoreSignatureFailures()
Definition: download.cc:2824
DownloadManager * Clone(const perf::StatisticsTemplate &statistics)
Definition: download.cc:2858
std::vector< int > * opt_host_chain_rtt_
Definition: download.h:314
dns::IpPreference opt_ip_preference() const
Definition: download.h:224
SslCertificateStore ssl_certificate_store_
Definition: download.h:437
time_t opt_timestamp_backup_host_
Definition: download.h:423
std::string GetFallbackProxyList()
Definition: download.cc:2650
void SetProxyTemplates(const std::string &direct, const std::string &forced)
Definition: download.cc:2805
IpPreference
Definition: dns.h:46
unsigned opt_backoff_max_ms_
Definition: download.h:294
std::string GetDnsServer() const
Definition: download.cc:1867
unsigned opt_host_chain_current_
Definition: download.h:315
CredentialsAttachment * credentials_attachment_
Definition: download.h:426
std::vector< std::string > * opt_host_chain_
Definition: download.h:309
struct pollfd * watch_fds_
Definition: download.h:281
std::map< uint32_t, ProxyInfo * > opt_proxy_map_
Definition: download.h:348
curl_slist * DuplicateList(curl_slist *slist)
Definition: download.cc:685
UniquePtr< Pipe< kPipeDownloadJobs > > pipe_jobs_
Definition: download.h:280
Failures Fetch(JobInfo *info)
Definition: download.cc:1766
bool CanRetry(const JobInfo *info)
Definition: download.cc:1214
perf::Counter * n_proxy_failover
Definition: download.h:47
void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct)
Definition: download.cc:1955
std::string proxy_template_forced_
Definition: download.h:406
time_t opt_timestamp_failover_proxies_
Definition: download.h:415
void SetDnsParameters(const unsigned retries, const unsigned timeout_ms)
Definition: download.cc:1893
unsigned EscapeHeader(const std::string &header, char *escaped_buf, size_t buf_size)
Definition: download.cc:388
UniquePtr< Pipe< kPipeThreadTerminator > > pipe_terminate_
Definition: download.h:278
static const int kProbeUnprobed
Definition: download.h:143
virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data)=0
static const unsigned kDnsDefaultRetries
Definition: download.h:154
bool GeoSortServers(std::vector< std::string > *servers, std::vector< uint64_t > *output_order=NULL)
Definition: download.cc:2186
static const int kProbeDown
Definition: download.h:148
Counters(perf::StatisticsTemplate statistics)
Definition: download.h:50
static const unsigned kProxyMapScale
Definition: download.h:156
void GetHostInfo(std::vector< std::string > *host_chain, std::vector< int > *rtt, unsigned *current_host)
Definition: download.cc:1999
bool ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host)
Definition: download.cc:1132
bool VerifyAndFinalize(const int curl_error, JobInfo *info)
Definition: download.cc:1298
void CutHeader(const char *header, curl_slist **slist)
Definition: download.cc:718
SharedPtr< HealthCheck > health_check_
Definition: download.h:374
void SwitchProxy(JobInfo *info)
Definition: download.cc:2019
void AddHTTPTracingHeader(const std::string &header)
Definition: download.cc:2832
void SetCredentialsAttachment(CredentialsAttachment *ca)
Definition: download.cc:1859
std::set< CURL * > * pool_handles_idle_
Definition: download.h:268
void RebalanceProxiesUnlocked(const std::string &reason)
Definition: download.cc:2736
pthread_mutex_t * lock_synchronous_mode_
Definition: download.h:287
virtual bool ConfigureCurlHandle(CURL *curl_handle, pid_t pid, void **info_data)=0
void InitializeRequest(JobInfo *info, CURL *handle)
Definition: download.cc:860
static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action, void *userp, void *socketp)
Definition: download.cc:436
HeaderLists * header_lists_
Definition: download.h:272
void SetHostResetDelay(const unsigned seconds)
Definition: download.cc:2779
void Put(curl_slist *slist)
Definition: download.cc:773