GCC Code Coverage Report
Directory: cvmfs/ Exec Total Coverage
File: cvmfs/ingestion/chunk_detector.cc Lines: 44 44 100.0 %
Date: 2019-02-03 02:48:13 Branches: 31 42 73.8 %

Line Branch Exec Source
1
/**
2
 * This file is part of the CernVM File System.
3
 */
4
5
#include "cvmfs_config.h"
6
#include "chunk_detector.h"
7
8
#include <algorithm>
9
#include <cassert>
10
#include <limits>
11
12
#include "ingestion/item.h"
13
14
15
230660
uint64_t ChunkDetector::FindNextCutMark(BlockItem *block) {
16
230660
  uint64_t result = DoFindNextCutMark(block);
17
230660
  if (result == 0)
18
121234
    offset_ += block->size();
19
230660
  return result;
20
}
21
22
23
//------------------------------------------------------------------------------
24
25
26
102504
uint64_t StaticOffsetDetector::DoFindNextCutMark(BlockItem *buffer) {
27
102504
  assert(buffer->type() == BlockItem::kBlockData);
28
29
102504
  const uint64_t beginning = offset();
30
102504
  const uint64_t end = offset() + buffer->size();
31
32
102504
  const uint64_t next_cut = last_cut() + chunk_size_;
33

102504
  if (next_cut >= beginning && next_cut < end) {
34
102401
    return DoCut(next_cut);
35
  }
36
37
103
  return NoCut(next_cut);
38
}
39
40
41
//------------------------------------------------------------------------------
42
43
44
45
// This defines the center of the interval where the xor32 rolling checksum is
46
// queried. You should never change this number, since it affects the definition
47
// of cut marks.
48
15
const int32_t Xor32Detector::kMagicNumber =
49
  std::numeric_limits<uint32_t>::max() / 2;
50
51
52
965
Xor32Detector::Xor32Detector(const uint64_t minimal_chunk_size,
53
                             const uint64_t average_chunk_size,
54
                             const uint64_t maximal_chunk_size)
55
  : minimal_chunk_size_(minimal_chunk_size)
56
  , average_chunk_size_(average_chunk_size)
57
  , maximal_chunk_size_(maximal_chunk_size)
58
  , threshold_((average_chunk_size > 0)
59
               ? (std::numeric_limits<uint32_t>::max() / average_chunk_size)
60
               : 0)
61
  , xor32_ptr_(0)
62
965
  , xor32_(0)
63
{
64

965
  assert((average_chunk_size_ == 0) || (minimal_chunk_size_ > 0));
65
965
  if (minimal_chunk_size_ > 0) {
66
964
    assert(minimal_chunk_size_ >= kXor32Window);
67
964
    assert(minimal_chunk_size_ < average_chunk_size_);
68
964
    assert(average_chunk_size_ < maximal_chunk_size_);
69
  }
70
}
71
72
73
128156
uint64_t Xor32Detector::DoFindNextCutMark(BlockItem *buffer) {
74
128156
  assert(minimal_chunk_size_ > 0);
75
128156
  const unsigned char *data = buffer->data();
76
77
  // Get the offset where the next xor32 computation needs to be continued
78
  // Note: this could be after collecting at least kMinChunkSize bytes in the
79
  //       current chunk, or directly at the beginning of the buffer, when a
80
  //       cut mark is currently searched
81
  const uint64_t global_offset =
82
    std::max(
83
           last_cut() +
84
           static_cast<uint64_t>(minimal_chunk_size_ - kXor32Window),
85
128156
           xor32_ptr_);
86
87
  // Check if the next xor32 computation is taking place in the current buffer
88
128156
  if (global_offset >= offset() + static_cast<uint64_t>(buffer->size())) {
89
56905
    return NoCut(global_offset);
90
  }
91
92
  // get the byte offset in the current buffer
93
71251
  uint64_t internal_offset = global_offset - offset();
94
71251
  assert(internal_offset < static_cast<uint64_t>(buffer->size()));
95
96
  // Precompute the xor32 rolling checksum for finding the next cut mark
97
  // Note: this might be skipped, if the precomputation was already performed
98
  //       for the current rolling checksum
99
  //       (internal_precompute_end will be negative --> loop is not entered)
100
71251
  const uint64_t precompute_end = last_cut() + minimal_chunk_size_;
101
  const int64_t internal_precompute_end =
102
    std::min(static_cast<int64_t>(precompute_end - offset()),
103
71251
             static_cast<int64_t>(buffer->size()));
104
  assert(internal_precompute_end - static_cast<int64_t>(internal_offset) <=
105
71251
         static_cast<int64_t>(kXor32Window));
106
297333
  for (; static_cast<int64_t>(internal_offset) < internal_precompute_end;
107
       ++internal_offset)
108
  {
109
226082
    xor32(data[internal_offset]);
110
  }
111
112
  // Do the actual computation and try to find a xor32 based cut mark
113
  // Note: this loop is bound either by kMaxChunkSize or by the size of the
114
  //       current buffer, thus the computation would continue later
115
  const uint64_t internal_max_chunk_size_end =
116
71251
    last_cut() + maximal_chunk_size_ - offset();
117
  const uint64_t internal_compute_end =
118
    std::min(internal_max_chunk_size_end,
119
71251
             static_cast<uint64_t>(buffer->size()));
120
1242705521
  for (; internal_offset < internal_compute_end; ++internal_offset) {
121
1242640993
    xor32(data[internal_offset]);
122
123
    // check if we found a cut mark
124
1242640993
    if (CheckThreshold()) {
125
6723
      return DoCut(internal_offset + offset());
126
    }
127
  }
128
129
  // Check if the loop was exited because we reached kMaxChunkSize and do a
130
  // hard cut in this case. If not, it exited because we ran out of data in this
131
  // buffer --> continue computation with the next buffer
132
64528
  if (internal_offset == internal_max_chunk_size_end) {
133
302
    return DoCut(internal_offset + offset());
134
  } else {
135
64226
    return NoCut(internal_offset + offset());
136
  }
137

45
}