GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/ingestion/chunk_detector.cc
Date: 2024-04-21 02:33:16
Exec Total Coverage
Lines: 55 55 100.0%
Branches: 33 46 71.7%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #include "cvmfs_config.h"
6 #include "chunk_detector.h"
7
8 #include <algorithm>
9 #include <cassert>
10 #include <limits>
11
12 #include "ingestion/item.h"
13
14
15 147286 uint64_t ChunkDetector::FindNextCutMark(BlockItem *block) {
16 147286 uint64_t result = DoFindNextCutMark(block);
17
2/2
✓ Branch 0 taken 39048 times.
✓ Branch 1 taken 108232 times.
147280 if (result == 0)
18 39048 offset_ += block->size();
19 147278 return result;
20 }
21
22
23 //------------------------------------------------------------------------------
24
25
26 102504 uint64_t StaticOffsetDetector::DoFindNextCutMark(BlockItem *buffer) {
27
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 102504 times.
102504 assert(buffer->type() == BlockItem::kBlockData);
28
29 102504 const uint64_t beginning = offset();
30 102504 const uint64_t end = offset() + buffer->size();
31
32 102504 const uint64_t next_cut = last_cut() + chunk_size_;
33
3/4
✓ Branch 0 taken 102504 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 102401 times.
✓ Branch 3 taken 103 times.
102504 if (next_cut >= beginning && next_cut < end) {
34 102401 return DoCut(next_cut);
35 }
36
37 103 return NoCut(next_cut);
38 }
39
40
41 //------------------------------------------------------------------------------
42
43
44
45 // This defines the center of the interval where the xor32 rolling checksum is
46 // queried. You should never change this number, since it affects the definition
47 // of cut marks.
48 const int32_t Xor32Detector::kMagicNumber =
49 std::numeric_limits<uint32_t>::max() / 2;
50
51
52 253266 Xor32Detector::Xor32Detector(const uint64_t minimal_chunk_size,
53 const uint64_t average_chunk_size,
54 253266 const uint64_t maximal_chunk_size)
55 253266 : minimal_chunk_size_(minimal_chunk_size)
56 253266 , average_chunk_size_(average_chunk_size)
57 253266 , maximal_chunk_size_(maximal_chunk_size)
58 1 , threshold_((average_chunk_size > 0)
59 253265 ? (std::numeric_limits<uint32_t>::max() / average_chunk_size)
60 : 0)
61 253266 , xor32_ptr_(0)
62
2/2
✓ Branch 1 taken 253265 times.
✓ Branch 2 taken 1 times.
253266 , xor32_(0)
63 {
64
3/4
✓ Branch 0 taken 253265 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 253265 times.
253266 assert((average_chunk_size_ == 0) || (minimal_chunk_size_ > 0));
65
2/2
✓ Branch 0 taken 253265 times.
✓ Branch 1 taken 1 times.
253266 if (minimal_chunk_size_ > 0) {
66
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 253265 times.
253265 assert(minimal_chunk_size_ >= kXor32Window);
67
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 253265 times.
253265 assert(minimal_chunk_size_ < average_chunk_size_);
68
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 253265 times.
253265 assert(average_chunk_size_ < maximal_chunk_size_);
69 }
70 253266 }
71
72
73 44780 uint64_t Xor32Detector::DoFindNextCutMark(BlockItem *buffer) {
74
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 44780 times.
44780 assert(minimal_chunk_size_ > 0);
75 44780 const unsigned char *data = buffer->data();
76
77 // Get the offset where the next xor32 computation needs to be continued
78 // Note: this could be after collecting at least kMinChunkSize bytes in the
79 // current chunk, or directly at the beginning of the buffer, when a
80 // cut mark is currently searched
81 const uint64_t global_offset =
82 44776 std::max(
83 134329 last_cut() +
84 44776 static_cast<uint64_t>(minimal_chunk_size_ - kXor32Window),
85 44777 xor32_ptr_);
86
87 // Check if the next xor32 computation is taking place in the current buffer
88
2/2
✓ Branch 2 taken 18534 times.
✓ Branch 3 taken 26244 times.
44776 if (global_offset >= offset() + static_cast<uint64_t>(buffer->size())) {
89
1/2
✓ Branch 1 taken 18536 times.
✗ Branch 2 not taken.
18534 return NoCut(global_offset);
90 }
91
92 // get the byte offset in the current buffer
93 26244 uint64_t internal_offset = global_offset - offset();
94
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 26244 times.
26244 assert(internal_offset < static_cast<uint64_t>(buffer->size()));
95
96 // Precompute the xor32 rolling checksum for finding the next cut mark
97 // Note: this might be skipped, if the precomputation was already performed
98 // for the current rolling checksum
99 // (internal_precompute_end will be negative --> loop is not entered)
100 26244 const uint64_t precompute_end = last_cut() + minimal_chunk_size_;
101 const int64_t internal_precompute_end =
102 26244 std::min(static_cast<int64_t>(precompute_end - offset()),
103 26244 static_cast<int64_t>(buffer->size()));
104
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 26244 times.
26244 assert(internal_precompute_end - static_cast<int64_t>(internal_offset) <=
105 static_cast<int64_t>(kXor32Window));
106
2/2
✓ Branch 0 taken 187010 times.
✓ Branch 1 taken 26244 times.
213254 for (; static_cast<int64_t>(internal_offset) < internal_precompute_end;
107 ++internal_offset)
108 {
109 187010 xor32(data[internal_offset]);
110 }
111
112 // Do the actual computation and try to find a xor32 based cut mark
113 // Note: this loop is bound either by kMaxChunkSize or by the size of the
114 // current buffer, thus the computation would continue later
115 const uint64_t internal_max_chunk_size_end =
116 26244 last_cut() + maximal_chunk_size_ - offset();
117 const uint64_t internal_compute_end =
118 26243 std::min(internal_max_chunk_size_end,
119 26243 static_cast<uint64_t>(buffer->size()));
120
2/2
✓ Branch 0 taken 497389622 times.
✓ Branch 1 taken 20511 times.
497410133 for (; internal_offset < internal_compute_end; ++internal_offset) {
121 497389622 xor32(data[internal_offset]);
122
123 // check if we found a cut mark
124
2/2
✓ Branch 1 taken 5733 times.
✓ Branch 2 taken 496531124 times.
496434001 if (CheckThreshold()) {
125
1/2
✓ Branch 2 taken 5733 times.
✗ Branch 3 not taken.
5733 return DoCut(internal_offset + offset());
126 }
127 }
128
129 // Check if the loop was exited because we reached kMaxChunkSize and do a
130 // hard cut in this case. If not, it exited because we ran out of data in this
131 // buffer --> continue computation with the next buffer
132
2/2
✓ Branch 0 taken 98 times.
✓ Branch 1 taken 20413 times.
20511 if (internal_offset == internal_max_chunk_size_end) {
133
1/2
✓ Branch 2 taken 98 times.
✗ Branch 3 not taken.
98 return DoCut(internal_offset + offset());
134 } else {
135
1/2
✓ Branch 2 taken 20413 times.
✗ Branch 3 not taken.
20413 return NoCut(internal_offset + offset());
136 }
137 }
138