GCC Code Coverage Report
Directory: cvmfs/ Exec Total Coverage
File: cvmfs/swissknife_graft.cc Lines: 0 186 0.0 %
Date: 2019-02-03 02:48:13 Branches: 0 162 0.0 %

Line Branch Exec Source
1
/**
2
 * This file is part of the CernVM File System.
3
 *
4
 * Process a set of input files and create appropriate graft files.
5
 */
6
7
#include "swissknife_graft.h"
8
#include "cvmfs_config.h"
9
10
#include <fcntl.h>
11
#include <unistd.h>
12
13
#include <cstdio>
14
#include <vector>
15
16
#include "fs_traversal.h"
17
#include "hash.h"
18
#include "platform.h"
19
#include "util/posix.h"
20
21
bool swissknife::CommandGraft::ChecksumFdWithChunks(
22
    int fd, zlib::Compressor *compressor, uint64_t *file_size,
23
    shash::Any *file_hash, std::vector<uint64_t> *chunk_offsets,
24
    std::vector<shash::Any> *chunk_checksums) {
25
  if (!compressor || !file_size || !file_hash) {
26
    return false;
27
  }
28
  *file_size = 0;
29
  shash::Any chunk_hash(hash_alg_);
30
  ssize_t bytes_read;
31
  unsigned char in_buf[zlib::kZChunk];
32
  unsigned char *cur_in_buf = in_buf;
33
  size_t in_buf_size = zlib::kZChunk;
34
  unsigned char out_buf[zlib::kZChunk];
35
  size_t avail_in = 0;
36
37
  // Initialize the file and per-chunk checksums
38
  shash::ContextPtr file_hash_context(hash_alg_);
39
  file_hash_context.buffer = alloca(file_hash_context.size);
40
  shash::Init(file_hash_context);
41
42
  shash::ContextPtr chunk_hash_context(hash_alg_);
43
  chunk_hash_context.buffer = alloca(chunk_hash_context.size);
44
  shash::Init(chunk_hash_context);
45
46
  bool do_chunk = chunk_size_ > 0;
47
  if (do_chunk) {
48
    if (!chunk_offsets || !chunk_checksums) {
49
      return false;
50
    }
51
    chunk_offsets->push_back(0);
52
  }
53
54
  bool flush = 0;
55
  do {
56
    bytes_read = read(fd, cur_in_buf + avail_in, in_buf_size);
57
    if (-1 == bytes_read) {
58
      if (errno == EINTR) {
59
        continue;
60
      }
61
      LogCvmfs(kLogCvmfs, kLogStderr, "Failure when reading file: %s",
62
               strerror(errno));
63
      return false;
64
    }
65
    *file_size += bytes_read;
66
    avail_in += bytes_read;
67
68
    flush = (static_cast<size_t>(bytes_read) < in_buf_size);
69
70
    // If possible, make progress on deflate.
71
    unsigned char *cur_out_buf = out_buf;
72
    size_t avail_out = zlib::kZChunk;
73
    compressor->Deflate(flush, &cur_in_buf, &avail_in, &cur_out_buf,
74
                        &avail_out);
75
    if (do_chunk) {
76
      shash::Update(out_buf, avail_out, chunk_hash_context);
77
      if (generate_bulk_hash_)
78
        shash::Update(out_buf, avail_out, file_hash_context);
79
    } else {
80
      shash::Update(out_buf, avail_out, file_hash_context);
81
    }
82
83
    if (!avail_in) {
84
      // All bytes are consumed; set the buffer back to the beginning.
85
      cur_in_buf = in_buf;
86
      in_buf_size = zlib::kZChunk;
87
    } else {
88
      in_buf_size = zlib::kZChunk - (cur_in_buf - in_buf) - avail_in;
89
    }
90
91
    // Start a new hash if current one is above threshold
92
    if (do_chunk && (*file_size - chunk_offsets->back() >= chunk_size_)) {
93
      shash::Final(chunk_hash_context, &chunk_hash);
94
      chunk_offsets->push_back(*file_size);
95
      chunk_checksums->push_back(chunk_hash);
96
      shash::Init(chunk_hash_context);
97
    }
98
  } while (!flush);
99
100
  shash::Final(file_hash_context, file_hash);
101
  if (do_chunk) {
102
    shash::Final(chunk_hash_context, &chunk_hash);
103
    chunk_checksums->push_back(chunk_hash);
104
  }
105
106
  // Zero-size chunks are not allowed;
107
  if (do_chunk && (chunk_offsets->back() == *file_size)) {
108
    chunk_offsets->pop_back();
109
    chunk_checksums->pop_back();
110
  }
111
112
  if (do_chunk && !generate_bulk_hash_)
113
    file_hash->SetNull();
114
115
  // Do not chunk a file if it is under threshold.
116
  if (do_chunk && (chunk_offsets->size() == 1)) {
117
    *file_hash = (*chunk_checksums)[0];
118
    chunk_offsets->clear();
119
    chunk_checksums->clear();
120
  }
121
  return true;
122
}
123
124
bool swissknife::CommandGraft::DirCallback(const std::string &relative_path,
125
                                           const std::string &dir_name) {
126
  if (!output_file_.size()) {
127
    return true;
128
  }
129
  std::string full_output_path = output_file_ + "/" +
130
                                 (relative_path.size() ? relative_path : ".") +
131
                                 "/" + dir_name;
132
  std::string full_input_path = input_file_ + "/" +
133
                                (relative_path.size() ? relative_path : ".") +
134
                                "/" + dir_name;
135
  platform_stat64 sbuf;
136
  if (-1 == platform_stat(full_input_path.c_str(), &sbuf)) {
137
    return false;
138
  }
139
  return MkdirDeep(full_output_path, sbuf.st_mode);
140
}
141
142
void swissknife::CommandGraft::FileCallback(const std::string &relative_path,
143
                                            const std::string &file_name) {
144
  std::string full_input_path = input_file_ + "/" +
145
                                (relative_path.size() ? relative_path : ".") +
146
                                "/" + file_name;
147
  std::string full_output_path;
148
  if (output_file_.size()) {
149
    full_output_path = output_file_ + "/" +
150
                       (relative_path.size() ? relative_path : ".") + "/" +
151
                       file_name;
152
  }
153
  Publish(full_input_path, full_output_path, false, false);
154
}
155
156
int swissknife::CommandGraft::Main(const swissknife::ArgumentList &args) {
157
  const std::string &input_file = *args.find('i')->second;
158
  const std::string output_file =
159
      (args.find('o') == args.end()) ? "" : *(args.find('o')->second);
160
  verbose_ = args.find('v') != args.end();
161
  generate_bulk_hash_ = args.find('b') != args.end();
162
  hash_alg_ = (args.find('a') == args.end())
163
                  ? shash::kSha1
164
                  : shash::ParseHashAlgorithm(*args.find('a')->second);
165
  compression_alg_ =
166
      (args.find('Z') == args.end())
167
          ? zlib::kNoCompression
168
          : zlib::ParseCompressionAlgorithm(*args.find('Z')->second);
169
170
  if (args.find('c') == args.end()) {
171
    chunk_size_ = kDefaultChunkSize;
172
  } else {
173
    std::string chunk_size = *args.find('c')->second;
174
    if (!String2Uint64Parse(chunk_size, &chunk_size_)) {
175
      LogCvmfs(kLogCvmfs, kLogStderr, "Unable to parse chunk size: %s",
176
               chunk_size.c_str());
177
      return 1;
178
    }
179
  }
180
  chunk_size_ *= 1024 * 1024;  // Convert to MB.
181
182
  platform_stat64 sbuf;
183
  bool output_file_is_dir = output_file.size() &&
184
                            (0 == platform_stat(output_file.c_str(), &sbuf)) &&
185
                            S_ISDIR(sbuf.st_mode);
186
  if (output_file_is_dir && (input_file == "-")) {
187
    LogCvmfs(kLogCvmfs, kLogStderr, "Output file (%s): Is a directory\n",
188
             output_file.c_str());
189
    return 1;
190
  }
191
192
  if (input_file != "-") {
193
    bool input_file_is_dir = (0 == platform_stat(input_file.c_str(), &sbuf)) &&
194
                             S_ISDIR(sbuf.st_mode);
195
    if (input_file_is_dir) {
196
      if (!output_file_is_dir && output_file.size()) {
197
        LogCvmfs(kLogCvmfs, kLogStderr,
198
                 "Input (%s) is a directory but output"
199
                 " (%s) is not\n",
200
                 input_file.c_str(), output_file.c_str());
201
        return 1;
202
      }
203
      if (verbose_) {
204
        LogCvmfs(kLogCvmfs, kLogStderr, "Recursing into directory %s\n",
205
                 input_file.c_str());
206
      }
207
      return Recurse(input_file, output_file);
208
    } else {
209
      return Publish(input_file, output_file, output_file_is_dir, false);
210
    }
211
  }
212
  return Publish(input_file, output_file, output_file_is_dir, true);
213
}
214
215
int swissknife::CommandGraft::Publish(const std::string &input_file,
216
                                      const std::string &output_file,
217
                                      bool output_file_is_dir,
218
                                      bool input_file_is_stdin) {
219
  if (output_file.size() && verbose_) {
220
    LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s to %s", input_file.c_str(),
221
             output_file.c_str());
222
  } else if (!output_file.size()) {
223
    LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s", input_file.c_str());
224
  }
225
  int fd;
226
  if (input_file_is_stdin) {
227
    fd = 0;
228
  } else {
229
    fd = open(input_file.c_str(), O_RDONLY);
230
    if (fd < 0) {
231
      std::string errmsg = "Unable to open input file (" + input_file + ")";
232
      perror(errmsg.c_str());
233
      return 1;
234
    }
235
  }
236
237
  // Get input file mode; output file will be set identically.
238
  platform_stat64 sbuf;
239
  if (-1 == platform_fstat(fd, &sbuf)) {
240
    std::string errmsg = "Unable to stat input file (" + input_file + ")";
241
    perror(errmsg.c_str());
242
  }
243
  mode_t input_file_mode = input_file_is_stdin ? 0644 : sbuf.st_mode;
244
245
  shash::Any file_hash(hash_alg_);
246
  uint64_t processed_size;
247
  std::vector<uint64_t> chunk_offsets;
248
  std::vector<shash::Any> chunk_checksums;
249
  zlib::Compressor *compressor = zlib::Compressor::Construct(compression_alg_);
250
251
  bool retval =
252
      ChecksumFdWithChunks(fd, compressor, &processed_size, &file_hash,
253
                           &chunk_offsets, &chunk_checksums);
254
255
  if (!input_file_is_stdin) {
256
    close(fd);
257
  }
258
  if (!retval) {
259
    std::string errmsg = "Unable to checksum input file (" + input_file + ")";
260
    perror(errmsg.c_str());
261
    return 1;
262
  }
263
264
  // Build the .cvmfsgraft-$filename
265
  if (output_file.size()) {
266
    std::string dirname, fname;
267
    std::string graft_fname;
268
    if (output_file_is_dir) {
269
      SplitPath(input_file, &dirname, &fname);
270
      graft_fname = output_file + "/.cvmfsgraft-" + fname;
271
    } else {
272
      SplitPath(output_file, &dirname, &fname);
273
      graft_fname = dirname + "/.cvmfsgraft-" + fname;
274
    }
275
    fd = open(graft_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
276
    if (fd < 0) {
277
      std::string errmsg = "Unable to open graft file (" + graft_fname + ")";
278
      perror(errmsg.c_str());
279
      return 1;
280
    }
281
  } else {
282
    fd = 1;
283
  }
284
  const bool with_suffix = true;
285
  std::string graft_contents = "size=" + StringifyInt(processed_size) + "\n" +
286
                               "checksum=" + file_hash.ToString(with_suffix) +
287
                               "\n";
288
  if (!chunk_offsets.empty()) {
289
    std::vector<std::string> chunk_off_str;
290
    chunk_off_str.reserve(chunk_offsets.size());
291
    std::vector<std::string> chunk_ck_str;
292
    chunk_ck_str.reserve(chunk_offsets.size());
293
    for (unsigned idx = 0; idx < chunk_offsets.size(); idx++) {
294
      chunk_off_str.push_back(StringifyInt(chunk_offsets[idx]));
295
      chunk_ck_str.push_back(chunk_checksums[idx].ToStringWithSuffix());
296
    }
297
    graft_contents += "chunk_offsets=" + JoinStrings(chunk_off_str, ",") + "\n";
298
    graft_contents +=
299
        "chunk_checksums=" + JoinStrings(chunk_ck_str, ",") + "\n";
300
  }
301
  size_t nbytes = graft_contents.size();
302
  const char *buf = graft_contents.c_str();
303
  retval = SafeWrite(fd, buf, nbytes);
304
  if (!retval) {
305
    perror("Failed writing to graft file");
306
    close(fd);
307
    return 1;
308
  }
309
  if (output_file.size()) {
310
    close(fd);
311
  } else {
312
    return 0;
313
  }
314
315
  // Create and truncate the output file.
316
  std::string output_fname;
317
  if (output_file_is_dir) {
318
    output_fname = output_file + "/" + GetFileName(input_file);
319
  } else {
320
    output_fname = output_file;
321
  }
322
  fd =
323
      open(output_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, input_file_mode);
324
  if (fd < 0) {
325
    std::string errmsg = "Unable to open output file (" + output_file + ")";
326
    perror(errmsg.c_str());
327
    return 1;
328
  }
329
  close(fd);
330
331
  return 0;
332
}
333
334
int swissknife::CommandGraft::Recurse(const std::string &input_file,
335
                                      const std::string &output_file) {
336
  output_file_ = output_file;
337
  input_file_ = input_file;
338
339
  FileSystemTraversal<CommandGraft> traverser(this, input_file, true);
340
  traverser.fn_new_file = &CommandGraft::FileCallback;
341
  traverser.fn_new_dir_prefix = &CommandGraft::DirCallback;
342
  traverser.Recurse(input_file);
343
  return 0;
344
}