GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/ingestion/chunk_detector.cc
Date: 2026-01-04 02:35:37
Exec Total Coverage
Lines: 56 56 100.0%
Branches: 33 46 71.7%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5
6 #include "chunk_detector.h"
7
8 #include <algorithm>
9 #include <cassert>
10 #include <limits>
11
12 #include "ingestion/item.h"
13
14
15 4486275 uint64_t ChunkDetector::FindNextCutMark(BlockItem *block) {
16 4486275 const uint64_t result = DoFindNextCutMark(block);
17
2/2
✓ Branch 0 taken 1456510 times.
✓ Branch 1 taken 3030071 times.
4486581 if (result == 0)
18 1456510 offset_ += block->size();
19 4486513 return result;
20 }
21
22
23 //------------------------------------------------------------------------------
24
25
26 2767608 uint64_t StaticOffsetDetector::DoFindNextCutMark(BlockItem *buffer) {
27
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 2767608 times.
2767608 assert(buffer->type() == BlockItem::kBlockData);
28
29 2767608 const uint64_t beginning = offset();
30 2767608 const uint64_t end = offset() + buffer->size();
31
32 2767608 const uint64_t next_cut = last_cut() + chunk_size_;
33
3/4
✓ Branch 0 taken 2767608 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2764827 times.
✓ Branch 3 taken 2781 times.
2767608 if (next_cut >= beginning && next_cut < end) {
34 2764827 return DoCut(next_cut);
35 }
36
37 2781 return NoCut(next_cut);
38 }
39
40
41 //------------------------------------------------------------------------------
42
43
44 // This defines the center of the interval where the xor32 rolling checksum is
45 // queried. You should never change this number, since it affects the definition
46 // of cut marks.
47 const int32_t Xor32Detector::kMagicNumber = std::numeric_limits<uint32_t>::max()
48 / 2;
49
50
51 8623351 Xor32Detector::Xor32Detector(const uint64_t minimal_chunk_size,
52 const uint64_t average_chunk_size,
53 8623351 const uint64_t maximal_chunk_size)
54 8623351 : minimal_chunk_size_(minimal_chunk_size)
55 8623351 , average_chunk_size_(average_chunk_size)
56 8623351 , maximal_chunk_size_(maximal_chunk_size)
57 48 , threshold_(
58 (average_chunk_size > 0)
59 8623303 ? (std::numeric_limits<uint32_t>::max() / average_chunk_size)
60 : 0)
61 8623351 , xor32_ptr_(0)
62
2/2
✓ Branch 1 taken 8623303 times.
✓ Branch 2 taken 48 times.
8623351 , xor32_(0) {
63
3/4
✓ Branch 0 taken 8623303 times.
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 8623303 times.
8623351 assert((average_chunk_size_ == 0) || (minimal_chunk_size_ > 0));
64
2/2
✓ Branch 0 taken 8623303 times.
✓ Branch 1 taken 48 times.
8623351 if (minimal_chunk_size_ > 0) {
65
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8623303 times.
8623303 assert(minimal_chunk_size_ >= kXor32Window);
66
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8623303 times.
8623303 assert(minimal_chunk_size_ < average_chunk_size_);
67
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8623303 times.
8623303 assert(average_chunk_size_ < maximal_chunk_size_);
68 }
69 8623351 }
70
71
72 1718667 uint64_t Xor32Detector::DoFindNextCutMark(BlockItem *buffer) {
73
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1718667 times.
1718667 assert(minimal_chunk_size_ > 0);
74 1718667 const unsigned char *data = buffer->data();
75
76 // Get the offset where the next xor32 computation needs to be continued
77 // Note: this could be after collecting at least kMinChunkSize bytes in the
78 // current chunk, or directly at the beginning of the buffer, when a
79 // cut mark is currently searched
80 1719041 const uint64_t global_offset = std::max(
81 1719041 last_cut() + static_cast<uint64_t>(minimal_chunk_size_ - kXor32Window),
82 1718633 xor32_ptr_);
83
84 // Check if the next xor32 computation is taking place in the current buffer
85
2/2
✓ Branch 2 taken 695139 times.
✓ Branch 3 taken 1023868 times.
1718667 if (global_offset >= offset() + static_cast<uint64_t>(buffer->size())) {
86
1/2
✓ Branch 1 taken 695139 times.
✗ Branch 2 not taken.
695139 return NoCut(global_offset);
87 }
88
89 // get the byte offset in the current buffer
90 1023868 uint64_t internal_offset = global_offset - offset();
91
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1023868 times.
1023868 assert(internal_offset < static_cast<uint64_t>(buffer->size()));
92
93 // Precompute the xor32 rolling checksum for finding the next cut mark
94 // Note: this might be skipped, if the precomputation was already performed
95 // for the current rolling checksum
96 // (internal_precompute_end will be negative --> loop is not entered)
97 1023868 const uint64_t precompute_end = last_cut() + minimal_chunk_size_;
98 1023902 const int64_t internal_precompute_end = std::min(
99 1023902 static_cast<int64_t>(precompute_end - offset()),
100 1023902 static_cast<int64_t>(buffer->size()));
101
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1023902 times.
1023902 assert(internal_precompute_end - static_cast<int64_t>(internal_offset)
102 <= static_cast<int64_t>(kXor32Window));
103
2/2
✓ Branch 0 taken 8501152 times.
✓ Branch 1 taken 1023902 times.
9525054 for (; static_cast<int64_t>(internal_offset) < internal_precompute_end;
104 ++internal_offset) {
105 8501152 xor32(data[internal_offset]);
106 }
107
108 // Do the actual computation and try to find a xor32 based cut mark
109 // Note: this loop is bound either by kMaxChunkSize or by the size of the
110 // current buffer, thus the computation would continue later
111 1023902 const uint64_t internal_max_chunk_size_end = last_cut() + maximal_chunk_size_
112 1023868 - offset();
113 1023800 const uint64_t internal_compute_end = std::min(
114 1023800 internal_max_chunk_size_end, static_cast<uint64_t>(buffer->size()));
115
2/2
✓ Branch 0 taken 14807971208 times.
✓ Branch 1 taken 761822 times.
14808733030 for (; internal_offset < internal_compute_end; ++internal_offset) {
116 14807971208 xor32(data[internal_offset]);
117
118 // check if we found a cut mark
119
2/2
✓ Branch 1 taken 262080 times.
✓ Branch 2 taken 14775579774 times.
14758891222 if (CheckThreshold()) {
120
1/2
✓ Branch 2 taken 262080 times.
✗ Branch 3 not taken.
262080 return DoCut(internal_offset + offset());
121 }
122 }
123
124 // Check if the loop was exited because we reached kMaxChunkSize and do a
125 // hard cut in this case. If not, it exited because we ran out of data in this
126 // buffer --> continue computation with the next buffer
127
2/2
✓ Branch 0 taken 3164 times.
✓ Branch 1 taken 758658 times.
761822 if (internal_offset == internal_max_chunk_size_end) {
128
1/2
✓ Branch 2 taken 3164 times.
✗ Branch 3 not taken.
3164 return DoCut(internal_offset + offset());
129 } else {
130
1/2
✓ Branch 2 taken 758624 times.
✗ Branch 3 not taken.
758658 return NoCut(internal_offset + offset());
131 }
132 }
133