GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/ingestion/chunk_detector.h
Date: 2024-04-21 02:33:16
Exec Total Coverage
Lines: 24 24 100.0%
Branches: 0 0 -%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_INGESTION_CHUNK_DETECTOR_H_
6 #define CVMFS_INGESTION_CHUNK_DETECTOR_H_
7
8 #include <gtest/gtest_prod.h>
9 #include <stdint.h>
10
11 #include <cstdlib>
12
13 #include <algorithm>
14
15 class BlockItem;
16
17 /**
18 * Abstract base class for a cutmark detector. This decides on which file
19 * positions a File should be chunked.
20 */
21 class ChunkDetector {
22 public:
23 253267 ChunkDetector() : last_cut_(0), offset_(0) {}
24 506312 virtual ~ChunkDetector() { }
25 uint64_t FindNextCutMark(BlockItem *block);
26
27 virtual bool MightFindChunks(uint64_t size) const = 0;
28
29 protected:
30 virtual uint64_t DoFindNextCutMark(BlockItem *block) = 0;
31
32 /**
33 * When returning from an implemented FindNextCutMark call you must call this
34 * function when a cut mark has been found.
35 * Like: return DoCut(found_offset)
36 */
37 108232 virtual uint64_t DoCut(uint64_t offset) {
38 108232 last_cut_ = offset;
39 108232 return offset;
40 }
41
42 /**
43 * Same as DoCut() but if no cut mark has been found in the given buffer in
44 * FindNextCutMark()
45 */
46 39048 virtual uint64_t NoCut(uint64_t /* offset */) { return 0; }
47
48 199753 uint64_t last_cut() const { return last_cut_; }
49 354743 uint64_t offset() const { return offset_; }
50
51 private:
52 uint64_t last_cut_;
53 uint64_t offset_;
54 };
55
56
57 /**
58 * The StaticOffsetDetector cuts files on a hard threshold and generates
59 * uniform sized Chunks.
60 */
61 class StaticOffsetDetector : public ChunkDetector {
62 public:
63 1 explicit StaticOffsetDetector(uint64_t s) : chunk_size_(s) { }
64 2 bool MightFindChunks(uint64_t size) const { return size > chunk_size_; }
65
66 protected:
67 virtual uint64_t DoFindNextCutMark(BlockItem *buffer);
68
69 private:
70 const uint64_t chunk_size_;
71 };
72
73
74 /**
75 * The xor32 rolling used in Igor-FS [1].
76 *
77 * It takes a byte stream and constantly computes a 32-bit checksum in a way
78 * that the result is only dependent on the last read 32 bytes of the stream.
79 * Given random input data, the checksum eventually produces each number in the
80 * 32-bit value range with roughly the same probability.
81 * We exploit that by constantly checking the rolling checksum result for a
82 * specific interval. Thus, we detect characteristic 32-byte long patches in a
83 * file that do not depend on their actual position inside the data stream.
84 * Thereby local modifications of a file might not affect global chunk creation.
85 *
86 * [1] "The Decentralized File System Igor-FS
87 * as an Application for Overlay-Networks"
88 * Dissertation of Dipl.-Ing. Kendy Kutzner (2008)
89 */
90 class Xor32Detector : public ChunkDetector {
91 FRIEND_TEST(T_ChunkDetectors, Xor32);
92
93 public:
94 Xor32Detector(const uint64_t minimal_chunk_size,
95 const uint64_t average_chunk_size,
96 const uint64_t maximal_chunk_size);
97
98 249944 bool MightFindChunks(const uint64_t size) const {
99 249944 return size > minimal_chunk_size_;
100 }
101
102 protected:
103 virtual uint64_t DoFindNextCutMark(BlockItem *buffer);
104
105 5831 virtual uint64_t DoCut(const uint64_t offset) {
106 5831 xor32_ = 0;
107 5831 xor32_ptr_ = offset;
108 5831 return ChunkDetector::DoCut(offset);
109 }
110
111 38946 virtual uint64_t NoCut(const uint64_t offset) {
112 38946 xor32_ptr_ = offset;
113 38946 return ChunkDetector::NoCut(offset);
114 }
115
116 496915785 inline void xor32(const unsigned char byte) {
117 496915785 xor32_ = (xor32_ << 1) ^ byte;
118 496915785 }
119
120 496194188 inline bool CheckThreshold() {
121 496194188 return abs(static_cast<int32_t>(xor32_) - kMagicNumber) < threshold_;
122 }
123
124 private:
125 // xor32 only depends on a window of the last 32 bytes in the data stream
126 static const unsigned kXor32Window = 32;
127 static const int32_t kMagicNumber;
128
129 const uint64_t minimal_chunk_size_;
130 const uint64_t average_chunk_size_;
131 const uint64_t maximal_chunk_size_;
132 const int32_t threshold_;
133
134 uint64_t xor32_ptr_;
135 uint32_t xor32_;
136 };
137
138 #endif // CVMFS_INGESTION_CHUNK_DETECTOR_H_
139