GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/network/download.h
Date: 2026-04-26 02:35:59
Exec Total Coverage
Lines: 26 31 83.9%
Branches: 25 50 50.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_NETWORK_DOWNLOAD_H_
6 #define CVMFS_NETWORK_DOWNLOAD_H_
7
8 #include <poll.h>
9 #include <pthread.h>
10 #include <stdint.h>
11 #include <unistd.h>
12
13 #include <cstdio>
14 #include <map>
15 #include <set>
16 #include <string>
17 #include <vector>
18
19 #include "compression/compression.h"
20 #include "crypto/hash.h"
21 #include "duplex_curl.h" // IWYU pragma: keep
22 #include "duplex_testing.h"
23 #include "network/dns.h"
24 #include "network/health_check.h"
25 #include "network/jobinfo.h"
26 #include "network/network_errors.h"
27 #include "network/sharding_policy.h"
28 #include "ssl.h"
29 #include "statistics.h"
30 #include "util/atomic.h"
31 #include "util/pipe.h"
32 #include "util/pointer.h"
33 #include "util/prng.h"
34 #include "util/shared_ptr.h"
35
36 class InterruptCue;
37
38 namespace download {
39
40 struct Counters {
41 perf::Counter *sz_transferred_bytes;
42 perf::Counter *sz_transfer_time; // measured in milliseconds
43 perf::Counter *n_requests;
44 perf::Counter *n_retries;
45 perf::Counter *n_metalink_failover;
46 perf::Counter *n_host_failover;
47 perf::Counter *n_proxy_failover;
48
49 4726 explicit Counters(perf::StatisticsTemplate statistics) {
50
3/6
✓ Branch 2 taken 4726 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 4726 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 4726 times.
✗ Branch 10 not taken.
4726 sz_transferred_bytes = statistics.RegisterTemplated(
51 "sz_transferred_bytes", "Number of transferred bytes");
52
3/6
✓ Branch 2 taken 4726 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 4726 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 4726 times.
✗ Branch 10 not taken.
4726 sz_transfer_time = statistics.RegisterTemplated(
53 "sz_transfer_time", "Transfer time (milliseconds)");
54
3/6
✓ Branch 2 taken 4726 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 4726 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 4726 times.
✗ Branch 10 not taken.
4726 n_requests = statistics.RegisterTemplated("n_requests",
55 "Number of requests");
56
3/6
✓ Branch 2 taken 4726 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 4726 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 4726 times.
✗ Branch 10 not taken.
4726 n_retries = statistics.RegisterTemplated("n_retries", "Number of retries");
57
3/6
✓ Branch 2 taken 4726 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 4726 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 4726 times.
✗ Branch 10 not taken.
4726 n_metalink_failover = statistics.RegisterTemplated(
58 "n_metalink_failover", "Number of metalink failovers");
59
3/6
✓ Branch 2 taken 4726 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 4726 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 4726 times.
✗ Branch 10 not taken.
4726 n_host_failover = statistics.RegisterTemplated("n_host_failover",
60 "Number of host failovers");
61
3/6
✓ Branch 2 taken 4726 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 4726 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 4726 times.
✗ Branch 10 not taken.
4726 n_proxy_failover = statistics.RegisterTemplated(
62 "n_proxy_failover", "Number of proxy failovers");
63 4726 }
64 }; // Counters
65
66 /**
67 * Manages blocks of arrays of curl_slist storing header strings. In contrast
68 * to curl's slists, these ones don't take ownership of the header strings.
69 * Overall number of elements is limited as number of concurrent connections
70 * is limited.
71 *
72 * Only use curl_slist objects created in the same HeaderLists instance in this
73 * class
74 */
75 class HeaderLists {
76 FRIEND_TEST(T_HeaderLists, Intrinsics);
77
78 public:
79 ~HeaderLists();
80 curl_slist *GetList(const char *header);
81 curl_slist *DuplicateList(curl_slist *slist);
82 void AppendHeader(curl_slist *slist, const char *header);
83 void CutHeader(const char *header, curl_slist **slist);
84 void PutList(curl_slist *slist);
85 std::string Print(curl_slist *slist);
86
87 private:
88 static const unsigned kBlockSize = 4096 / sizeof(curl_slist);
89
90 231848 bool IsUsed(curl_slist *slist) { return slist->data != NULL; }
91 curl_slist *Get(const char *header);
92 void Put(curl_slist *slist);
93 void AddBlock();
94
95 std::vector<curl_slist *> blocks_; // List of curl_slist blocks
96 };
97
98
99 /**
100 * Provides hooks to attach per-transfer credentials to curl handles.
101 * Overwritten by the AuthzX509Attachment in authz_curl.cc. Needs to be
102 * thread-safe because it can be potentially used by multiple DownloadManagers.
103 */
104 class CredentialsAttachment {
105 public:
106 1988 virtual ~CredentialsAttachment() { }
107 virtual bool ConfigureCurlHandle(CURL *curl_handle,
108 pid_t pid,
109 void **info_data) = 0;
110 virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data) = 0;
111 };
112
113
114 /**
115 * Note when adding new fields: Clone() probably needs to be adjusted, too.
116 * TODO(jblomer): improve ordering of members
117 */
118 class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding)
119 FRIEND_TEST(T_Download, ValidateGeoReply);
120 FRIEND_TEST(T_Download, StripDirect);
121 FRIEND_TEST(T_Download, EscapeUrl);
122
123 public:
124 // HostInfo is used for both metalink and host
125 struct HostInfo {
126 HostInfo() { }
127 9452 HostInfo(std::vector<std::string> *chain,
128 const int current,
129 const time_t timestamp_backup,
130 const unsigned reset_after)
131 9452 : chain(chain)
132 9452 , current(current)
133 9452 , timestamp_backup(timestamp_backup)
134 9452 , reset_after(reset_after) { }
135 std::vector<std::string> *chain;
136 int current;
137 time_t timestamp_backup;
138 unsigned reset_after;
139 };
140
141 struct ProxyInfo {
142 ProxyInfo() { }
143
1/2
✓ Branch 2 taken 1844 times.
✗ Branch 3 not taken.
1844 explicit ProxyInfo(const std::string &url) : url(url) { }
144 176 ProxyInfo(const dns::Host &host, const std::string &url)
145
1/2
✓ Branch 2 taken 176 times.
✗ Branch 3 not taken.
176 : host(host), url(url) { }
146 std::string Print();
147 dns::Host host;
148 std::string url;
149 };
150
151 enum ProxySetModes {
152 kSetProxyRegular = 0,
153 kSetProxyFallback,
154 kSetProxyBoth,
155 };
156
157 /**
158 * No attempt was made to order stratum 1 servers
159 */
160 static const int kProbeUnprobed;
161 /**
162 * The rtt to a stratum 1 could not be determined because the stratum 1
163 * was unreachable.
164 */
165 static const int kProbeDown;
166 /**
167 * The stratum 1 server was put in order according to a Geo-API result
168 */
169 static const int kProbeGeo;
170
171 static const unsigned kDnsDefaultRetries = 1;
172 static const unsigned kDnsDefaultTimeoutMs = 3000;
173 static const unsigned kProxyMapScale = 16;
174
175 DownloadManager(const unsigned max_pool_handles,
176 const perf::StatisticsTemplate &statistics,
177 const std::string &name = "standard");
178 ~DownloadManager();
179
180 static int ParseHttpCode(const char digits[3]);
181
182 void Spawn();
183 DownloadManager *Clone(const perf::StatisticsTemplate &statistics,
184 const std::string &cloned_name);
185 Failures Fetch(JobInfo *info);
186
187 void SetCredentialsAttachment(CredentialsAttachment *ca);
188 std::string GetDnsServer() const;
189 void SetDnsServer(const std::string &address);
190 void SetDnsParameters(const unsigned retries, const unsigned timeout_ms);
191 void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds);
192 void SetIpPreference(const dns::IpPreference preference);
193 void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct);
194 void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct);
195 void SetLowSpeedLimit(const unsigned low_speed_limit);
196 void SetMetalinkChain(const std::string &metalink_list);
197 void SetMetalinkChain(const std::vector<std::string> &metalink_list);
198 void GetMetalinkInfo(std::vector<std::string> *metalink_chain,
199 unsigned *current_metalink);
200 void SwitchMetalink();
201 bool CheckMetalinkChain(const time_t now);
202 void SetHostChain(const std::string &host_list);
203 void SetHostChain(const std::vector<std::string> &host_list);
204 void GetHostInfo(std::vector<std::string> *host_chain, std::vector<int> *rtt,
205 unsigned *current_host);
206 void ProbeHosts();
207 bool ProbeGeo();
208 // Sort list of servers using the Geo API. If the output_order
209 // vector is NULL, then the servers vector input is itself sorted.
210 // If it is non-NULL, then servers is left unchanged and the zero-based
211 // ordering is stored into output_order.
212 bool GeoSortServers(std::vector<std::string> *servers,
213 std::vector<uint64_t> *output_order = NULL);
214 void SwitchHost();
215 void SetProxyChain(const std::string &proxy_list,
216 const std::string &fallback_proxy_list,
217 const ProxySetModes set_mode);
218 void GetProxyInfo(std::vector<std::vector<ProxyInfo> > *proxy_chain,
219 unsigned *current_group,
220 unsigned *fallback_group);
221 std::string GetProxyList();
222 std::string GetFallbackProxyList();
223 void ShardProxies();
224 void RebalanceProxies();
225 void SwitchProxyGroup();
226 void SetProxyGroupResetDelay(const unsigned seconds);
227 void SetMetalinkResetDelay(const unsigned seconds);
228 void SetHostResetDelay(const unsigned seconds);
229 void SetRetryParameters(const unsigned max_retries,
230 const unsigned backoff_init_ms,
231 const unsigned backoff_max_ms);
232 void SetMaxIpaddrPerProxy(unsigned limit);
233 void SetProxyTemplates(const std::string &direct, const std::string &forced);
234 void EnableInfoHeader();
235 void EnableRedirects();
236 void EnableIgnoreSignatureFailures();
237 void EnableHTTPTracing();
238 void AddHTTPTracingHeader(const std::string &header);
239 void UseSystemCertificatePath();
240
241 bool SetShardingPolicy(const ShardingPolicySelector type);
242 void SetFailoverIndefinitely();
243 void SetFqrn(const std::string &fqrn) { fqrn_ = fqrn; }
244 void SetInfoHeaderTemplate(const std::string &templ) {
245 info_header_template_ = templ;
246 }
247
248 61 unsigned num_hosts() {
249
1/2
✓ Branch 0 taken 61 times.
✗ Branch 1 not taken.
61 if (opt_host_.chain)
250 61 return opt_host_.chain->size();
251 return 0;
252 }
253
254 unsigned num_metalinks() {
255 if (opt_metalink_.chain)
256 return opt_metalink_.chain->size();
257 return 0;
258 }
259
260 dns::IpPreference opt_ip_preference() const { return opt_ip_preference_; }
261
262 private:
263 static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action,
264 void *userp, void *socketp);
265 static void *MainDownload(void *data);
266
267 bool StripDirect(const std::string &proxy_list, std::string *cleaned_list);
268 bool ValidateGeoReply(const std::string &reply_order,
269 const unsigned expected_size,
270 std::vector<uint64_t> *reply_vals);
271 void SwitchHostInfo(const std::string &typ, HostInfo &info, JobInfo *jobinfo);
272 void SwitchMetalink(JobInfo *info);
273 void SwitchHost(JobInfo *info);
274 void SwitchProxy(JobInfo *info);
275 ProxyInfo *ChooseProxyUnlocked(const shash::Any *hash);
276 void UpdateProxiesUnlocked(const std::string &reason);
277 void RebalanceProxiesUnlocked(const std::string &reason);
278 CURL *AcquireCurlHandle();
279 void ReleaseCurlHandle(CURL *handle);
280 void ReleaseCredential(JobInfo *info);
281 void InitializeRequest(JobInfo *info, CURL *handle);
282 void SetUrlOptions(JobInfo *info);
283 bool ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host);
284 void UpdateStatistics(CURL *handle);
285 bool CanRetry(const JobInfo *info);
286 void Backoff(JobInfo *info);
287 void SetNocache(JobInfo *info);
288 void SetRegularCache(JobInfo *info);
289 void ProcessLink(JobInfo *info);
290 bool VerifyAndFinalize(const int curl_error, JobInfo *info);
291 void InitHeaders();
292 void CloneProxyConfig(DownloadManager *clone);
293 void CheckHostInfoReset(const std::string &typ, HostInfo &info,
294 JobInfo *jobinfo, time_t &now);
295
296 std::string EscapeUrl(const int64_t jobinfo_id, const std::string &url);
297
298 3030 inline std::vector<ProxyInfo> *current_proxy_group() const {
299 3030 return (opt_proxy_groups_
300
1/2
✓ Branch 0 taken 3030 times.
✗ Branch 1 not taken.
3030 ? &((*opt_proxy_groups_)[opt_proxy_groups_current_])
301 3030 : NULL);
302 }
303
304 Prng prng_;
305 std::set<CURL *> *pool_handles_idle_;
306 std::set<CURL *> *pool_handles_inuse_;
307 uint32_t pool_max_handles_;
308 CURLM *curl_multi_;
309 HeaderLists *header_lists_;
310 curl_slist *default_headers_;
311 char *user_agent_;
312
313 pthread_t thread_download_;
314 atomic_int32 multi_threaded_;
315 UniquePtr<Pipe<kPipeThreadTerminator> > pipe_terminate_;
316
317 UniquePtr<Pipe<kPipeDownloadJobs> > pipe_jobs_;
318 struct pollfd *watch_fds_;
319 uint32_t watch_fds_size_;
320 uint32_t watch_fds_inuse_;
321 uint32_t watch_fds_max_;
322
323 pthread_mutex_t *lock_options_;
324 pthread_mutex_t *lock_synchronous_mode_;
325 std::string opt_dns_server_;
326 std::string info_header_template_;
327 unsigned opt_timeout_proxy_;
328 unsigned opt_timeout_direct_;
329 unsigned opt_low_speed_limit_;
330 unsigned opt_max_retries_;
331 unsigned opt_backoff_init_ms_;
332 unsigned opt_backoff_max_ms_;
333 bool enable_info_header_;
334 bool opt_ipv4_only_;
335 bool follow_redirects_;
336
337 /**
338 * Ignore signature failures during download.
339 * In general it is a bad idea to do this!
340 */
341 bool ignore_signature_failures_;
342
343 bool enable_http_tracing_;
344 std::vector<std::string> http_tracing_headers_;
345
346 // Metalink list
347 HostInfo opt_metalink_;
348 time_t opt_metalink_timestamp_link_;
349
350 // Host list
351 HostInfo opt_host_;
352 /**
353 * Created by SetHostChain(), filled by probe_hosts. Contains time to get
354 * .cvmfschecksum in ms. -1 is unprobed, -2 is error.
355 */
356 std::vector<int> *opt_host_chain_rtt_;
357
358 // Proxy list
359 std::vector<std::vector<ProxyInfo> > *opt_proxy_groups_;
360 /**
361 * The current load-balancing group (first dimension in opt_proxy_groups_).
362 */
363 unsigned opt_proxy_groups_current_;
364 /**
365 * Number of proxy servers that failed within current load-balance group.
366 * Between 0 and (*opt_proxy_groups_)[opt_proxy_groups_current_].size().
367 */
368 unsigned opt_proxy_groups_current_burned_;
369 /**
370 * The index of the first fallback proxy group. If there are none,
371 * it is set to the number of regular proxy groups.
372 */
373 unsigned opt_proxy_groups_fallback_;
374 /**
375 * Overall number of proxies summed over all the groups.
376 */
377 unsigned opt_num_proxies_;
378 /**
379 * The original proxy list provided to SetProxyChain.
380 */
381 std::string opt_proxy_list_;
382 /**
383 * The original proxy fallback list provided to SetProxyChain.
384 */
385 std::string opt_proxy_fallback_list_;
386 /**
387 * Load-balancing map of currently active proxies
388 */
389 std::map<uint32_t, ProxyInfo *> opt_proxy_map_;
390 /**
391 * Sorted list of currently active proxy URLs (for log messages)
392 */
393 std::vector<std::string> opt_proxies_;
394 /**
395 * Shard requests across multiple proxies via consistent hashing
396 */
397 bool opt_proxy_shard_;
398
399 /**
400 * Sharding policy deciding which proxy should be chosen for each download
401 * request
402 *
403 * Sharding policy is shared between all download managers. As such shared
404 * pointers are used to allow for proper clean-up afterwards in the destructor
405 * (We cannot assume the order in which the download managers are stopped)
406 */
407 SharedPtr<ShardingPolicy> sharding_policy_;
408 /**
409 * Health check for the proxies
410 *
411 * Health check is shared between all download managers. As such shared
412 * pointers are used to allow for proper clean-up afterwards in the destructor
413 * (We cannot assume the order in which the download managers are stopped)
414 */
415 SharedPtr<HealthCheck> health_check_;
416 /**
417 * Endless retries for a failed download (hard failures will result in abort)
418 */
419 bool failover_indefinitely_;
420 /**
421 * Repo name. Needed for the re-try logic if a download was unsuccessful
422 * Used in sharding policy && Interrupted()
423 */
424 std::string fqrn_;
425
426 /**
427 * Name of the download manager (default is "standard")
428 */
429 std::string name_;
430
431 /**
432 * Used to resolve proxy addresses (host addresses are resolved by the proxy).
433 */
434 dns::NormalResolver *resolver_;
435
436 /**
437 * If a proxy has IPv4 and IPv6 addresses, which one to prefer
438 */
439 dns::IpPreference opt_ip_preference_;
440
441 /**
442 * Used to replace @proxy@ in the Geo-API calls to order Stratum 1 servers,
443 * in case the active proxy is DIRECT (no proxy). Should be a UUID
444 * identifying the host.
445 */
446 std::string proxy_template_direct_;
447 /**
448 * Used to force a value for @proxy@ in the Geo-API calls to order Stratum 1
449 * servers. If empty, the fully qualified domain name of the active proxy
450 * server is used.
451 */
452 std::string proxy_template_forced_;
453
454 /**
455 * More than one proxy group can be considered as group of primary proxies
456 * followed by backup proxy groups, e.g. at another site.
457 * If opt_proxy_groups_reset_after_ is > 0, cvmfs will reset its proxy group
458 * to the first one after opt_proxy_groups_reset_after_ seconds are elapsed.
459 */
460 time_t opt_timestamp_backup_proxies_;
461 time_t opt_timestamp_failover_proxies_; // failover within the same group
462 unsigned opt_proxy_groups_reset_after_;
463
464 CredentialsAttachment *credentials_attachment_;
465
466 /**
467 * Writes and reads should be atomic because reading happens in a different
468 * thread than writing.
469 */
470 Counters *counters_;
471
472 /**
473 * Carries the path settings for SSL certificates
474 */
475 SslCertificateStore ssl_certificate_store_;
476 }; // DownloadManager
477
478 } // namespace download
479
480 #endif // CVMFS_NETWORK_DOWNLOAD_H_
481