GCC Code Coverage Report
Directory: cvmfs/ Exec Total Coverage
File: cvmfs/ingestion/chunk_detector.h Lines: 26 26 100.0 %
Date: 2019-02-03 02:48:13 Branches: 3 6 50.0 %

Line Branch Exec Source
1
/**
2
 * This file is part of the CernVM File System.
3
 */
4
5
#ifndef CVMFS_INGESTION_CHUNK_DETECTOR_H_
6
#define CVMFS_INGESTION_CHUNK_DETECTOR_H_
7
8
#include <gtest/gtest_prod.h>
9
#include <stdint.h>
10
11
#include <cstdlib>
12
13
#include <algorithm>
14
15
class BlockItem;
16
17
/**
18
 * Abstract base class for a cutmark detector. This decides on which file
19
 * positions a File should be chunked.
20
 */
21
class ChunkDetector {
22
 public:
23
966
  ChunkDetector() : last_cut_(0), offset_(0) {}
24
966
  virtual ~ChunkDetector() { }
25
  uint64_t FindNextCutMark(BlockItem *block);
26
27
  virtual bool MightFindChunks(uint64_t size) const = 0;
28
29
 protected:
30
  virtual uint64_t DoFindNextCutMark(BlockItem *block) = 0;
31
32
  /**
33
   * When returning from an implemented FindNextCutMark call you must call this
34
   * function when a cut mark has been found.
35
   * Like: return DoCut(found_offset)
36
   */
37
109426
  virtual uint64_t DoCut(uint64_t offset) {
38
109426
    last_cut_ = offset;
39
109426
    return offset;
40
  }
41
42
  /**
43
   * Same as DoCut() but if no cut mark has been found in the given buffer in
44
   * FindNextCutMark()
45
   */
46
121234
  virtual uint64_t NoCut(uint64_t offset) { return 0; }
47
48
373162
  uint64_t last_cut() const { return last_cut_; }
49
618168
  uint64_t offset() const { return offset_; }
50
51
 private:
52
  uint64_t last_cut_;
53
  uint64_t offset_;
54
};
55
56
57
/**
58
 * The StaticOffsetDetector cuts files on a hard threshold and generates
59
 * uniform sized Chunks.
60
 */
61
1
class StaticOffsetDetector : public ChunkDetector {
62
 public:
63
1
  explicit StaticOffsetDetector(uint64_t s) : chunk_size_(s) { }
64
2
  bool MightFindChunks(uint64_t size) const { return size > chunk_size_; }
65
66
 protected:
67
  virtual uint64_t DoFindNextCutMark(BlockItem *buffer);
68
69
 private:
70
  const uint64_t chunk_size_;
71
};
72
73
74
/**
75
 * The xor32 rolling used in Igor-FS [1].
76
 *
77
 * It takes a byte stream and constantly computes a 32-bit checksum in a way
78
 * that the result is only dependent on the last read 32 bytes of the stream.
79
 * Given random input data, the checksum eventually produces each number in the
80
 * 32-bit value range with roughly the same probability.
81
 * We exploit that by constantly checking the rolling checksum result for a
82
 * specific interval. Thus, we detect characteristic 32-byte long patches in a
83
 * file that do not depend on their actual position inside the data stream.
84
 * Thereby local modifications of a file might not affect global chunk creation.
85
 *
86
 * [1]     "The Decentralized File System Igor-FS
87
 *          as an Application for Overlay-Networks"
88
 *     Dissertation of Dipl.-Ing. Kendy Kutzner (2008)
89
 */
90
965
class Xor32Detector : public ChunkDetector {
91
  FRIEND_TEST(T_ChunkDetectors, Xor32);
92
93
 public:
94
  Xor32Detector(const uint64_t minimal_chunk_size,
95
                const uint64_t average_chunk_size,
96
                const uint64_t maximal_chunk_size);
97
98
105
  bool MightFindChunks(const uint64_t size) const {
99
105
    return size > minimal_chunk_size_;
100
  }
101
102
 protected:
103
  virtual uint64_t DoFindNextCutMark(BlockItem *buffer);
104
105
7025
  virtual uint64_t DoCut(const uint64_t offset) {
106
7025
    xor32_     = 0;
107
7025
    xor32_ptr_ = offset;
108
7025
    return ChunkDetector::DoCut(offset);
109
  }
110
111
121131
  virtual uint64_t NoCut(const uint64_t offset) {
112
121131
    xor32_ptr_ = offset;
113
121131
    return ChunkDetector::NoCut(offset);
114
  }
115
116
1242867123
  inline void xor32(const unsigned char byte) {
117
1242867123
    xor32_ = (xor32_ << 1) ^ byte;
118
1242867123
  }
119
120
1242640993
  inline bool CheckThreshold() {
121
1242640993
    return abs(static_cast<int32_t>(xor32_) - kMagicNumber) < threshold_;
122
  }
123
124
 private:
125
  // xor32 only depends on a window of the last 32 bytes in the data stream
126
  static const unsigned kXor32Window = 32;
127
  static const int32_t kMagicNumber;
128
129
  const uint64_t minimal_chunk_size_;
130
  const uint64_t average_chunk_size_;
131
  const uint64_t maximal_chunk_size_;
132
  const int32_t threshold_;
133
134
  uint64_t xor32_ptr_;
135
  uint32_t xor32_;
136
};
137
138
#endif  // CVMFS_INGESTION_CHUNK_DETECTOR_H_