GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/ingestion/chunk_detector.h
Date: 2025-06-22 02:36:02
Exec Total Coverage
Lines: 22 22 100.0%
Branches: 0 0 -%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_INGESTION_CHUNK_DETECTOR_H_
6 #define CVMFS_INGESTION_CHUNK_DETECTOR_H_
7
8 #include <gtest/gtest_prod.h>
9 #include <stdint.h>
10
11 #include <algorithm>
12 #include <cstdlib>
13
14 class BlockItem;
15
16 /**
17 * Abstract base class for a cutmark detector. This decides on which file
18 * positions a File should be chunked.
19 */
20 class ChunkDetector {
21 public:
22 332587 ChunkDetector() : last_cut_(0), offset_(0) { }
23 663536 virtual ~ChunkDetector() { }
24 uint64_t FindNextCutMark(BlockItem *block);
25
26 virtual bool MightFindChunks(uint64_t size) const = 0;
27
28 protected:
29 virtual uint64_t DoFindNextCutMark(BlockItem *block) = 0;
30
31 /**
32 * When returning from an implemented FindNextCutMark call you must call this
33 * function when a cut mark has been found.
34 * Like: return DoCut(found_offset)
35 */
36 2127072 virtual uint64_t DoCut(uint64_t offset) {
37 2127072 last_cut_ = offset;
38 2127072 return offset;
39 }
40
41 /**
42 * Same as DoCut() but if no cut mark has been found in the given buffer in
43 * FindNextCutMark()
44 */
45 200603 virtual uint64_t NoCut(uint64_t /* offset */) { return 0; }
46
47 2687223 uint64_t last_cut() const { return last_cut_; }
48 5096846 uint64_t offset() const { return offset_; }
49
50 private:
51 uint64_t last_cut_;
52 uint64_t offset_;
53 };
54
55
56 /**
57 * The StaticOffsetDetector cuts files on a hard threshold and generates
58 * uniform sized Chunks.
59 */
60 class StaticOffsetDetector : public ChunkDetector {
61 public:
62 20 explicit StaticOffsetDetector(uint64_t s) : chunk_size_(s) { }
63 40 bool MightFindChunks(uint64_t size) const { return size > chunk_size_; }
64
65 protected:
66 virtual uint64_t DoFindNextCutMark(BlockItem *buffer);
67
68 private:
69 const uint64_t chunk_size_;
70 };
71
72
73 /**
74 * The xor32 rolling used in Igor-FS [1].
75 *
76 * It takes a byte stream and constantly computes a 32-bit checksum in a way
77 * that the result is only dependent on the last read 32 bytes of the stream.
78 * Given random input data, the checksum eventually produces each number in the
79 * 32-bit value range with roughly the same probability.
80 * We exploit that by constantly checking the rolling checksum result for a
81 * specific interval. Thus, we detect characteristic 32-byte long patches in a
82 * file that do not depend on their actual position inside the data stream.
83 * Thereby local modifications of a file might not affect global chunk creation.
84 *
85 * [1] "The Decentralized File System Igor-FS
86 * as an Application for Overlay-Networks"
87 * Dissertation of Dipl.-Ing. Kendy Kutzner (2008)
88 */
89 class Xor32Detector : public ChunkDetector {
90 FRIEND_TEST(T_ChunkDetectors, Xor32);
91
92 public:
93 Xor32Detector(const uint64_t minimal_chunk_size,
94 const uint64_t average_chunk_size,
95 const uint64_t maximal_chunk_size);
96
97 250040 bool MightFindChunks(const uint64_t size) const {
98 250040 return size > minimal_chunk_size_;
99 }
100
101 protected:
102 virtual uint64_t DoFindNextCutMark(BlockItem *buffer);
103
104 79052 virtual uint64_t DoCut(const uint64_t offset) {
105 79052 xor32_ = 0;
106 79052 xor32_ptr_ = offset;
107 79052 return ChunkDetector::DoCut(offset);
108 }
109
110 198547 virtual uint64_t NoCut(const uint64_t offset) {
111 198547 xor32_ptr_ = offset;
112 198547 return ChunkDetector::NoCut(offset);
113 }
114
115 5879434463 inline void xor32(const unsigned char byte) { xor32_ = (xor32_ << 1) ^ byte; }
116
117 5876792600 inline bool CheckThreshold() {
118 5876792600 return abs(static_cast<int32_t>(xor32_) - kMagicNumber) < threshold_;
119 }
120
121 private:
122 // xor32 only depends on a window of the last 32 bytes in the data stream
123 static const unsigned kXor32Window = 32;
124 static const int32_t kMagicNumber;
125
126 const uint64_t minimal_chunk_size_;
127 const uint64_t average_chunk_size_;
128 const uint64_t maximal_chunk_size_;
129 const int32_t threshold_;
130
131 uint64_t xor32_ptr_;
132 uint32_t xor32_;
133 };
134
135 #endif // CVMFS_INGESTION_CHUNK_DETECTOR_H_
136