GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/network/download.h
Date: 2025-06-22 02:36:02
Exec Total Coverage
Lines: 26 29 89.7%
Branches: 25 50 50.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_NETWORK_DOWNLOAD_H_
6 #define CVMFS_NETWORK_DOWNLOAD_H_
7
8 #include <poll.h>
9 #include <pthread.h>
10 #include <stdint.h>
11 #include <unistd.h>
12
13 #include <cstdio>
14 #include <map>
15 #include <set>
16 #include <string>
17 #include <vector>
18
19 #include "compression/compression.h"
20 #include "crypto/hash.h"
21 #include "duplex_curl.h"
22 #include "gtest/gtest_prod.h"
23 #include "network/dns.h"
24 #include "network/health_check.h"
25 #include "network/jobinfo.h"
26 #include "network/network_errors.h"
27 #include "network/sharding_policy.h"
28 #include "network/sink.h"
29 #include "ssl.h"
30 #include "statistics.h"
31 #include "util/atomic.h"
32 #include "util/pipe.h"
33 #include "util/pointer.h"
34 #include "util/prng.h"
35 #include "util/shared_ptr.h"
36
37 class InterruptCue;
38
39 namespace download {
40
41 struct Counters {
42 perf::Counter *sz_transferred_bytes;
43 perf::Counter *sz_transfer_time; // measured in milliseconds
44 perf::Counter *n_requests;
45 perf::Counter *n_retries;
46 perf::Counter *n_metalink_failover;
47 perf::Counter *n_host_failover;
48 perf::Counter *n_proxy_failover;
49
50 6245 explicit Counters(perf::StatisticsTemplate statistics) {
51
3/6
✓ Branch 2 taken 6245 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 6245 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 6245 times.
✗ Branch 10 not taken.
6245 sz_transferred_bytes = statistics.RegisterTemplated(
52 "sz_transferred_bytes", "Number of transferred bytes");
53
3/6
✓ Branch 2 taken 6245 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 6245 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 6245 times.
✗ Branch 10 not taken.
6245 sz_transfer_time = statistics.RegisterTemplated(
54 "sz_transfer_time", "Transfer time (milliseconds)");
55
3/6
✓ Branch 2 taken 6245 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 6245 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 6245 times.
✗ Branch 10 not taken.
6245 n_requests = statistics.RegisterTemplated("n_requests",
56 "Number of requests");
57
3/6
✓ Branch 2 taken 6245 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 6245 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 6245 times.
✗ Branch 10 not taken.
6245 n_retries = statistics.RegisterTemplated("n_retries", "Number of retries");
58
3/6
✓ Branch 2 taken 6245 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 6245 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 6245 times.
✗ Branch 10 not taken.
6245 n_metalink_failover = statistics.RegisterTemplated(
59 "n_metalink_failover", "Number of metalink failovers");
60
3/6
✓ Branch 2 taken 6245 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 6245 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 6245 times.
✗ Branch 10 not taken.
6245 n_host_failover = statistics.RegisterTemplated("n_host_failover",
61 "Number of host failovers");
62
3/6
✓ Branch 2 taken 6245 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 6245 times.
✗ Branch 7 not taken.
✓ Branch 9 taken 6245 times.
✗ Branch 10 not taken.
6245 n_proxy_failover = statistics.RegisterTemplated(
63 "n_proxy_failover", "Number of proxy failovers");
64 6245 }
65 }; // Counters
66
67 /**
68 * Manages blocks of arrays of curl_slist storing header strings. In contrast
69 * to curl's slists, these ones don't take ownership of the header strings.
70 * Overall number of elements is limited as number of concurrent connections
71 * is limited.
72 *
73 * Only use curl_slist objects created in the same HeaderLists instance in this
74 * class
75 */
76 class HeaderLists {
77 FRIEND_TEST(T_HeaderLists, Intrinsics);
78
79 public:
80 ~HeaderLists();
81 curl_slist *GetList(const char *header);
82 curl_slist *DuplicateList(curl_slist *slist);
83 void AppendHeader(curl_slist *slist, const char *header);
84 void CutHeader(const char *header, curl_slist **slist);
85 void PutList(curl_slist *slist);
86 std::string Print(curl_slist *slist);
87
88 private:
89 static const unsigned kBlockSize = 4096 / sizeof(curl_slist);
90
91 289153 bool IsUsed(curl_slist *slist) { return slist->data != NULL; }
92 curl_slist *Get(const char *header);
93 void Put(curl_slist *slist);
94 void AddBlock();
95
96 std::vector<curl_slist *> blocks_; // List of curl_slist blocks
97 };
98
99
100 /**
101 * Provides hooks to attach per-transfer credentials to curl handles.
102 * Overwritten by the AuthzX509Attachment in authz_curl.cc. Needs to be
103 * thread-safe because it can be potentially used by multiple DownloadManagers.
104 */
105 class CredentialsAttachment {
106 public:
107 3324 virtual ~CredentialsAttachment() { }
108 virtual bool ConfigureCurlHandle(CURL *curl_handle,
109 pid_t pid,
110 void **info_data) = 0;
111 virtual void ReleaseCurlHandle(CURL *curl_handle, void *info_data) = 0;
112 };
113
114
115 /**
116 * Note when adding new fields: Clone() probably needs to be adjusted, too.
117 * TODO(jblomer): improve ordering of members
118 */
119 class DownloadManager { // NOLINT(clang-analyzer-optin.performance.Padding)
120 FRIEND_TEST(T_Download, ValidateGeoReply);
121 FRIEND_TEST(T_Download, StripDirect);
122 FRIEND_TEST(T_Download, EscapeUrl);
123
124 public:
125 // HostInfo is used for both metalink and host
126 struct HostInfo {
127 HostInfo() { }
128 12490 HostInfo(std::vector<std::string> *chain,
129 const int current,
130 const time_t timestamp_backup,
131 const unsigned reset_after)
132 12490 : chain(chain)
133 12490 , current(current)
134 12490 , timestamp_backup(timestamp_backup)
135 12490 , reset_after(reset_after) { }
136 std::vector<std::string> *chain;
137 int current;
138 time_t timestamp_backup;
139 unsigned reset_after;
140 };
141
142 struct ProxyInfo {
143 ProxyInfo() { }
144
1/2
✓ Branch 2 taken 3020 times.
✗ Branch 3 not taken.
3020 explicit ProxyInfo(const std::string &url) : url(url) { }
145 196 ProxyInfo(const dns::Host &host, const std::string &url)
146
1/2
✓ Branch 2 taken 196 times.
✗ Branch 3 not taken.
196 : host(host), url(url) { }
147 std::string Print();
148 dns::Host host;
149 std::string url;
150 };
151
152 enum ProxySetModes {
153 kSetProxyRegular = 0,
154 kSetProxyFallback,
155 kSetProxyBoth,
156 };
157
158 /**
159 * No attempt was made to order stratum 1 servers
160 */
161 static const int kProbeUnprobed;
162 /**
163 * The rtt to a stratum 1 could not be determined because the stratum 1
164 * was unreachable.
165 */
166 static const int kProbeDown;
167 /**
168 * The stratum 1 server was put in order according to a Geo-API result
169 */
170 static const int kProbeGeo;
171
172 static const unsigned kDnsDefaultRetries = 1;
173 static const unsigned kDnsDefaultTimeoutMs = 3000;
174 static const unsigned kProxyMapScale = 16;
175
176 DownloadManager(const unsigned max_pool_handles,
177 const perf::StatisticsTemplate &statistics,
178 const std::string &name = "standard");
179 ~DownloadManager();
180
181 static int ParseHttpCode(const char digits[3]);
182
183 void Spawn();
184 DownloadManager *Clone(const perf::StatisticsTemplate &statistics,
185 const std::string &cloned_name);
186 Failures Fetch(JobInfo *info);
187
188 void SetCredentialsAttachment(CredentialsAttachment *ca);
189 std::string GetDnsServer() const;
190 void SetDnsServer(const std::string &address);
191 void SetDnsParameters(const unsigned retries, const unsigned timeout_ms);
192 void SetDnsTtlLimits(const unsigned min_seconds, const unsigned max_seconds);
193 void SetIpPreference(const dns::IpPreference preference);
194 void SetTimeout(const unsigned seconds_proxy, const unsigned seconds_direct);
195 void GetTimeout(unsigned *seconds_proxy, unsigned *seconds_direct);
196 void SetLowSpeedLimit(const unsigned low_speed_limit);
197 void SetMetalinkChain(const std::string &metalink_list);
198 void SetMetalinkChain(const std::vector<std::string> &metalink_list);
199 void GetMetalinkInfo(std::vector<std::string> *metalink_chain,
200 unsigned *current_metalink);
201 void SwitchMetalink();
202 bool CheckMetalinkChain(const time_t now);
203 void SetHostChain(const std::string &host_list);
204 void SetHostChain(const std::vector<std::string> &host_list);
205 void GetHostInfo(std::vector<std::string> *host_chain, std::vector<int> *rtt,
206 unsigned *current_host);
207 void ProbeHosts();
208 bool ProbeGeo();
209 // Sort list of servers using the Geo API. If the output_order
210 // vector is NULL, then the servers vector input is itself sorted.
211 // If it is non-NULL, then servers is left unchanged and the zero-based
212 // ordering is stored into output_order.
213 bool GeoSortServers(std::vector<std::string> *servers,
214 std::vector<uint64_t> *output_order = NULL);
215 void SwitchHost();
216 void SetProxyChain(const std::string &proxy_list,
217 const std::string &fallback_proxy_list,
218 const ProxySetModes set_mode);
219 void GetProxyInfo(std::vector<std::vector<ProxyInfo> > *proxy_chain,
220 unsigned *current_group,
221 unsigned *fallback_group);
222 std::string GetProxyList();
223 std::string GetFallbackProxyList();
224 void ShardProxies();
225 void RebalanceProxies();
226 void SwitchProxyGroup();
227 void SetProxyGroupResetDelay(const unsigned seconds);
228 void SetMetalinkResetDelay(const unsigned seconds);
229 void SetHostResetDelay(const unsigned seconds);
230 void SetRetryParameters(const unsigned max_retries,
231 const unsigned backoff_init_ms,
232 const unsigned backoff_max_ms);
233 void SetMaxIpaddrPerProxy(unsigned limit);
234 void SetProxyTemplates(const std::string &direct, const std::string &forced);
235 void EnableInfoHeader();
236 void EnableRedirects();
237 void EnableIgnoreSignatureFailures();
238 void EnableHTTPTracing();
239 void AddHTTPTracingHeader(const std::string &header);
240 void UseSystemCertificatePath();
241
242 bool SetShardingPolicy(const ShardingPolicySelector type);
243 void SetFailoverIndefinitely();
244 void SetFqrn(const std::string &fqrn) { fqrn_ = fqrn; }
245
246 49 unsigned num_hosts() {
247
1/2
✓ Branch 0 taken 49 times.
✗ Branch 1 not taken.
49 if (opt_host_.chain)
248 49 return opt_host_.chain->size();
249 return 0;
250 }
251
252 unsigned num_metalinks() {
253 if (opt_metalink_.chain)
254 return opt_metalink_.chain->size();
255 return 0;
256 }
257
258 dns::IpPreference opt_ip_preference() const { return opt_ip_preference_; }
259
260 private:
261 static int CallbackCurlSocket(CURL *easy, curl_socket_t s, int action,
262 void *userp, void *socketp);
263 static void *MainDownload(void *data);
264
265 bool StripDirect(const std::string &proxy_list, std::string *cleaned_list);
266 bool ValidateGeoReply(const std::string &reply_order,
267 const unsigned expected_size,
268 std::vector<uint64_t> *reply_vals);
269 void SwitchHostInfo(const std::string &typ, HostInfo &info, JobInfo *jobinfo);
270 void SwitchMetalink(JobInfo *info);
271 void SwitchHost(JobInfo *info);
272 void SwitchProxy(JobInfo *info);
273 ProxyInfo *ChooseProxyUnlocked(const shash::Any *hash);
274 void UpdateProxiesUnlocked(const std::string &reason);
275 void RebalanceProxiesUnlocked(const std::string &reason);
276 CURL *AcquireCurlHandle();
277 void ReleaseCurlHandle(CURL *handle);
278 void ReleaseCredential(JobInfo *info);
279 void InitializeRequest(JobInfo *info, CURL *handle);
280 void SetUrlOptions(JobInfo *info);
281 bool ValidateProxyIpsUnlocked(const std::string &url, const dns::Host &host);
282 void UpdateStatistics(CURL *handle);
283 bool CanRetry(const JobInfo *info);
284 void Backoff(JobInfo *info);
285 void SetNocache(JobInfo *info);
286 void SetRegularCache(JobInfo *info);
287 void ProcessLink(JobInfo *info);
288 bool VerifyAndFinalize(const int curl_error, JobInfo *info);
289 void InitHeaders();
290 void CloneProxyConfig(DownloadManager *clone);
291 void CheckHostInfoReset(const std::string &typ, HostInfo &info,
292 JobInfo *jobinfo, time_t &now);
293
294 bool EscapeUrlChar(unsigned char input, char output[3]);
295 std::string EscapeUrl(const int64_t jobinfo_id, const std::string &url);
296 unsigned EscapeHeader(const std::string &header, char *escaped_buf,
297 size_t buf_size);
298
299 4824 inline std::vector<ProxyInfo> *current_proxy_group() const {
300 4824 return (opt_proxy_groups_
301
1/2
✓ Branch 0 taken 4824 times.
✗ Branch 1 not taken.
4824 ? &((*opt_proxy_groups_)[opt_proxy_groups_current_])
302 4824 : NULL);
303 }
304
305 Prng prng_;
306 std::set<CURL *> *pool_handles_idle_;
307 std::set<CURL *> *pool_handles_inuse_;
308 uint32_t pool_max_handles_;
309 CURLM *curl_multi_;
310 HeaderLists *header_lists_;
311 curl_slist *default_headers_;
312 char *user_agent_;
313
314 pthread_t thread_download_;
315 atomic_int32 multi_threaded_;
316 UniquePtr<Pipe<kPipeThreadTerminator> > pipe_terminate_;
317
318 UniquePtr<Pipe<kPipeDownloadJobs> > pipe_jobs_;
319 struct pollfd *watch_fds_;
320 uint32_t watch_fds_size_;
321 uint32_t watch_fds_inuse_;
322 uint32_t watch_fds_max_;
323
324 pthread_mutex_t *lock_options_;
325 pthread_mutex_t *lock_synchronous_mode_;
326 std::string opt_dns_server_;
327 unsigned opt_timeout_proxy_;
328 unsigned opt_timeout_direct_;
329 unsigned opt_low_speed_limit_;
330 unsigned opt_max_retries_;
331 unsigned opt_backoff_init_ms_;
332 unsigned opt_backoff_max_ms_;
333 bool enable_info_header_;
334 bool opt_ipv4_only_;
335 bool follow_redirects_;
336
337 /**
338 * Ignore signature failures during download.
339 * In general it is a bad idea to do this!
340 */
341 bool ignore_signature_failures_;
342
343 bool enable_http_tracing_;
344 std::vector<std::string> http_tracing_headers_;
345
346 // Metalink list
347 HostInfo opt_metalink_;
348 time_t opt_metalink_timestamp_link_;
349
350 // Host list
351 HostInfo opt_host_;
352 /**
353 * Created by SetHostChain(), filled by probe_hosts. Contains time to get
354 * .cvmfschecksum in ms. -1 is unprobed, -2 is error.
355 */
356 std::vector<int> *opt_host_chain_rtt_;
357
358 // Proxy list
359 std::vector<std::vector<ProxyInfo> > *opt_proxy_groups_;
360 /**
361 * The current load-balancing group (first dimension in opt_proxy_groups_).
362 */
363 unsigned opt_proxy_groups_current_;
364 /**
365 * Number of proxy servers that failed within current load-balance group.
366 * Between 0 and (*opt_proxy_groups_)[opt_proxy_groups_current_].size().
367 */
368 unsigned opt_proxy_groups_current_burned_;
369 /**
370 * The index of the first fallback proxy group. If there are none,
371 * it is set to the number of regular proxy groups.
372 */
373 unsigned opt_proxy_groups_fallback_;
374 /**
375 * Overall number of proxies summed over all the groups.
376 */
377 unsigned opt_num_proxies_;
378 /**
379 * The original proxy list provided to SetProxyChain.
380 */
381 std::string opt_proxy_list_;
382 /**
383 * The original proxy fallback list provided to SetProxyChain.
384 */
385 std::string opt_proxy_fallback_list_;
386 /**
387 * Load-balancing map of currently active proxies
388 */
389 std::map<uint32_t, ProxyInfo *> opt_proxy_map_;
390 /**
391 * Sorted list of currently active proxy URLs (for log messages)
392 */
393 std::vector<std::string> opt_proxies_;
394 /**
395 * Shard requests across multiple proxies via consistent hashing
396 */
397 bool opt_proxy_shard_;
398
399 /**
400 * Sharding policy deciding which proxy should be chosen for each download
401 * request
402 *
403 * Sharding policy is shared between all download managers. As such shared
404 * pointers are used to allow for proper clean-up afterwards in the destructor
405 * (We cannot assume the order in which the download managers are stopped)
406 */
407 SharedPtr<ShardingPolicy> sharding_policy_;
408 /**
409 * Health check for the proxies
410 *
411 * Health check is shared between all download managers. As such shared
412 * pointers are used to allow for proper clean-up afterwards in the destructor
413 * (We cannot assume the order in which the download managers are stopped)
414 */
415 SharedPtr<HealthCheck> health_check_;
416 /**
417 * Endless retries for a failed download (hard failures will result in abort)
418 */
419 bool failover_indefinitely_;
420 /**
421 * Repo name. Needed for the re-try logic if a download was unsuccessful
422 * Used in sharding policy && Interrupted()
423 */
424 std::string fqrn_;
425
426 /**
427 * Name of the download manager (default is "standard")
428 */
429 std::string name_;
430
431 /**
432 * Used to resolve proxy addresses (host addresses are resolved by the proxy).
433 */
434 dns::NormalResolver *resolver_;
435
436 /**
437 * If a proxy has IPv4 and IPv6 addresses, which one to prefer
438 */
439 dns::IpPreference opt_ip_preference_;
440
441 /**
442 * Used to replace @proxy@ in the Geo-API calls to order Stratum 1 servers,
443 * in case the active proxy is DIRECT (no proxy). Should be a UUID
444 * identifying the host.
445 */
446 std::string proxy_template_direct_;
447 /**
448 * Used to force a value for @proxy@ in the Geo-API calls to order Stratum 1
449 * servers. If empty, the fully qualified domain name of the active proxy
450 * server is used.
451 */
452 std::string proxy_template_forced_;
453
454 /**
455 * More than one proxy group can be considered as group of primary proxies
456 * followed by backup proxy groups, e.g. at another site.
457 * If opt_proxy_groups_reset_after_ is > 0, cvmfs will reset its proxy group
458 * to the first one after opt_proxy_groups_reset_after_ seconds are elapsed.
459 */
460 time_t opt_timestamp_backup_proxies_;
461 time_t opt_timestamp_failover_proxies_; // failover within the same group
462 unsigned opt_proxy_groups_reset_after_;
463
464 CredentialsAttachment *credentials_attachment_;
465
466 /**
467 * Writes and reads should be atomic because reading happens in a different
468 * thread than writing.
469 */
470 Counters *counters_;
471
472 /**
473 * Carries the path settings for SSL certificates
474 */
475 SslCertificateStore ssl_certificate_store_;
476 }; // DownloadManager
477
478 } // namespace download
479
480 #endif // CVMFS_NETWORK_DOWNLOAD_H_
481