CernVM-FS  2.10.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
swissknife_graft.cc
Go to the documentation of this file.
1 
7 #include "swissknife_graft.h"
8 #include "cvmfs_config.h"
9 
10 #include <fcntl.h>
11 #include <unistd.h>
12 
13 #include <cstdio>
14 #include <vector>
15 
16 #include "fs_traversal.h"
17 #include "hash.h"
18 #include "platform.h"
19 #include "util/posix.h"
20 
22  int fd, zlib::Compressor *compressor, uint64_t *file_size,
23  shash::Any *file_hash, std::vector<uint64_t> *chunk_offsets,
24  std::vector<shash::Any> *chunk_checksums) {
25  if (!compressor || !file_size || !file_hash) {
26  return false;
27  }
28  *file_size = 0;
29  shash::Any chunk_hash(hash_alg_);
30  ssize_t bytes_read;
31  unsigned char in_buf[zlib::kZChunk];
32  unsigned char *cur_in_buf = in_buf;
33  size_t in_buf_size = zlib::kZChunk;
34  unsigned char out_buf[zlib::kZChunk];
35  size_t avail_in = 0;
36 
37  // Initialize the file and per-chunk checksums
38  shash::ContextPtr file_hash_context(hash_alg_);
39  file_hash_context.buffer = alloca(file_hash_context.size);
40  shash::Init(file_hash_context);
41 
42  shash::ContextPtr chunk_hash_context(hash_alg_);
43  chunk_hash_context.buffer = alloca(chunk_hash_context.size);
44  shash::Init(chunk_hash_context);
45 
46  bool do_chunk = chunk_size_ > 0;
47  if (do_chunk) {
48  if (!chunk_offsets || !chunk_checksums) {
49  return false;
50  }
51  chunk_offsets->push_back(0);
52  }
53 
54  bool flush = 0;
55  do {
56  bytes_read = read(fd, cur_in_buf + avail_in, in_buf_size);
57  if (-1 == bytes_read) {
58  if (errno == EINTR) {
59  continue;
60  }
61  LogCvmfs(kLogCvmfs, kLogStderr, "Failure when reading file: %s",
62  strerror(errno));
63  return false;
64  }
65  *file_size += bytes_read;
66  avail_in += bytes_read;
67 
68  flush = (static_cast<size_t>(bytes_read) < in_buf_size);
69 
70  // If possible, make progress on deflate.
71  unsigned char *cur_out_buf = out_buf;
72  size_t avail_out = zlib::kZChunk;
73  compressor->Deflate(flush, &cur_in_buf, &avail_in, &cur_out_buf,
74  &avail_out);
75  if (do_chunk) {
76  shash::Update(out_buf, avail_out, chunk_hash_context);
78  shash::Update(out_buf, avail_out, file_hash_context);
79  } else {
80  shash::Update(out_buf, avail_out, file_hash_context);
81  }
82 
83  if (!avail_in) {
84  // All bytes are consumed; set the buffer back to the beginning.
85  cur_in_buf = in_buf;
86  in_buf_size = zlib::kZChunk;
87  } else {
88  in_buf_size = zlib::kZChunk - (cur_in_buf - in_buf) - avail_in;
89  }
90 
91  // Start a new hash if current one is above threshold
92  if (do_chunk && (*file_size - chunk_offsets->back() >= chunk_size_)) {
93  shash::Final(chunk_hash_context, &chunk_hash);
94  chunk_offsets->push_back(*file_size);
95  chunk_checksums->push_back(chunk_hash);
96  shash::Init(chunk_hash_context);
97  }
98  } while (!flush);
99 
100  shash::Final(file_hash_context, file_hash);
101  if (do_chunk) {
102  shash::Final(chunk_hash_context, &chunk_hash);
103  chunk_checksums->push_back(chunk_hash);
104  }
105 
106  // Zero-size chunks are not allowed; except if there is only one chunk
107  if (do_chunk && (chunk_offsets->back() == *file_size) &&
108  (chunk_offsets->size() > 1))
109  {
110  chunk_offsets->pop_back();
111  chunk_checksums->pop_back();
112  }
113 
114  if (do_chunk && !generate_bulk_hash_)
115  file_hash->SetNull();
116 
117  // Do not chunk a file if it is under threshold.
118  if (do_chunk && (chunk_offsets->size() == 1)) {
119  *file_hash = (*chunk_checksums)[0];
120  chunk_offsets->clear();
121  chunk_checksums->clear();
122  }
123  return true;
124 }
125 
126 bool swissknife::CommandGraft::DirCallback(const std::string &relative_path,
127  const std::string &dir_name) {
128  if (!output_file_.size()) {
129  return true;
130  }
131  std::string full_output_path = output_file_ + "/" +
132  (relative_path.size() ? relative_path : ".") +
133  "/" + dir_name;
134  std::string full_input_path = input_file_ + "/" +
135  (relative_path.size() ? relative_path : ".") +
136  "/" + dir_name;
137  platform_stat64 sbuf;
138  if (-1 == platform_stat(full_input_path.c_str(), &sbuf)) {
139  return false;
140  }
141  return MkdirDeep(full_output_path, sbuf.st_mode);
142 }
143 
144 void swissknife::CommandGraft::FileCallback(const std::string &relative_path,
145  const std::string &file_name) {
146  std::string full_input_path = input_file_ + "/" +
147  (relative_path.size() ? relative_path : ".") +
148  "/" + file_name;
149  std::string full_output_path;
150  if (output_file_.size()) {
151  full_output_path = output_file_ + "/" +
152  (relative_path.size() ? relative_path : ".") + "/" +
153  file_name;
154  }
155  Publish(full_input_path, full_output_path, false, false);
156 }
157 
159  const std::string &input_file = *args.find('i')->second;
160  const std::string output_file =
161  (args.find('o') == args.end()) ? "" : *(args.find('o')->second);
162  verbose_ = args.find('v') != args.end();
163  generate_bulk_hash_ = args.find('b') != args.end();
164  hash_alg_ = (args.find('a') == args.end())
165  ? shash::kSha1
166  : shash::ParseHashAlgorithm(*args.find('a')->second);
167  compression_alg_ =
168  (args.find('Z') == args.end())
170  : zlib::ParseCompressionAlgorithm(*args.find('Z')->second);
171 
172  if (args.find('c') == args.end()) {
173  chunk_size_ = kDefaultChunkSize;
174  } else {
175  std::string chunk_size = *args.find('c')->second;
176  if (!String2Uint64Parse(chunk_size, &chunk_size_)) {
177  LogCvmfs(kLogCvmfs, kLogStderr, "Unable to parse chunk size: %s",
178  chunk_size.c_str());
179  return 1;
180  }
181  }
182  chunk_size_ *= 1024 * 1024; // Convert to MB.
183 
184  platform_stat64 sbuf;
185  bool output_file_is_dir = output_file.size() &&
186  (0 == platform_stat(output_file.c_str(), &sbuf)) &&
187  S_ISDIR(sbuf.st_mode);
188  if (output_file_is_dir && (input_file == "-")) {
189  LogCvmfs(kLogCvmfs, kLogStderr, "Output file (%s): Is a directory\n",
190  output_file.c_str());
191  return 1;
192  }
193 
194  if (input_file != "-") {
195  bool input_file_is_dir = (0 == platform_stat(input_file.c_str(), &sbuf)) &&
196  S_ISDIR(sbuf.st_mode);
197  if (input_file_is_dir) {
198  if (!output_file_is_dir && output_file.size()) {
200  "Input (%s) is a directory but output"
201  " (%s) is not\n",
202  input_file.c_str(), output_file.c_str());
203  return 1;
204  }
205  if (verbose_) {
206  LogCvmfs(kLogCvmfs, kLogStderr, "Recursing into directory %s\n",
207  input_file.c_str());
208  }
209  return Recurse(input_file, output_file);
210  } else {
211  return Publish(input_file, output_file, output_file_is_dir, false);
212  }
213  }
214  return Publish(input_file, output_file, output_file_is_dir, true);
215 }
216 
217 int swissknife::CommandGraft::Publish(const std::string &input_file,
218  const std::string &output_file,
219  bool output_file_is_dir,
220  bool input_file_is_stdin) {
221  if (output_file.size() && verbose_) {
222  LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s to %s", input_file.c_str(),
223  output_file.c_str());
224  } else if (!output_file.size()) {
225  LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s", input_file.c_str());
226  }
227  int fd;
228  if (input_file_is_stdin) {
229  fd = 0;
230  } else {
231  fd = open(input_file.c_str(), O_RDONLY);
232  if (fd < 0) {
233  std::string errmsg = "Unable to open input file (" + input_file + ")";
234  perror(errmsg.c_str());
235  return 1;
236  }
237  }
238 
239  // Get input file mode; output file will be set identically.
240  platform_stat64 sbuf;
241  if (-1 == platform_fstat(fd, &sbuf)) {
242  std::string errmsg = "Unable to stat input file (" + input_file + ")";
243  perror(errmsg.c_str());
244  }
245  mode_t input_file_mode = input_file_is_stdin ? 0644 : sbuf.st_mode;
246 
247  shash::Any file_hash(hash_alg_);
248  uint64_t processed_size;
249  std::vector<uint64_t> chunk_offsets;
250  std::vector<shash::Any> chunk_checksums;
251  zlib::Compressor *compressor = zlib::Compressor::Construct(compression_alg_);
252 
253  bool retval =
254  ChecksumFdWithChunks(fd, compressor, &processed_size, &file_hash,
255  &chunk_offsets, &chunk_checksums);
256 
257  if (!input_file_is_stdin) {
258  close(fd);
259  }
260  if (!retval) {
261  std::string errmsg = "Unable to checksum input file (" + input_file + ")";
262  perror(errmsg.c_str());
263  return 1;
264  }
265 
266  // Build the .cvmfsgraft-$filename
267  if (output_file.size()) {
268  std::string dirname, fname;
269  std::string graft_fname;
270  if (output_file_is_dir) {
271  SplitPath(input_file, &dirname, &fname);
272  graft_fname = output_file + "/.cvmfsgraft-" + fname;
273  } else {
274  SplitPath(output_file, &dirname, &fname);
275  graft_fname = dirname + "/.cvmfsgraft-" + fname;
276  }
277  fd = open(graft_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
278  if (fd < 0) {
279  std::string errmsg = "Unable to open graft file (" + graft_fname + ")";
280  perror(errmsg.c_str());
281  return 1;
282  }
283  } else {
284  fd = 1;
285  }
286  const bool with_suffix = true;
287  std::string graft_contents =
288  "size=" + StringifyInt(processed_size) + "\n" +
289  "checksum=" + file_hash.ToString(with_suffix) + "\n" +
290  "compression=" + zlib::AlgorithmName(compression_alg_) + "\n";
291  if (!chunk_offsets.empty()) {
292  std::vector<std::string> chunk_off_str;
293  chunk_off_str.reserve(chunk_offsets.size());
294  std::vector<std::string> chunk_ck_str;
295  chunk_ck_str.reserve(chunk_offsets.size());
296  for (unsigned idx = 0; idx < chunk_offsets.size(); idx++) {
297  chunk_off_str.push_back(StringifyInt(chunk_offsets[idx]));
298  chunk_ck_str.push_back(chunk_checksums[idx].ToStringWithSuffix());
299  }
300  graft_contents += "chunk_offsets=" + JoinStrings(chunk_off_str, ",") + "\n";
301  graft_contents +=
302  "chunk_checksums=" + JoinStrings(chunk_ck_str, ",") + "\n";
303  }
304  size_t nbytes = graft_contents.size();
305  const char *buf = graft_contents.c_str();
306  retval = SafeWrite(fd, buf, nbytes);
307  if (!retval) {
308  perror("Failed writing to graft file");
309  close(fd);
310  return 1;
311  }
312  if (output_file.size()) {
313  close(fd);
314  } else {
315  return 0;
316  }
317 
318  // Create and truncate the output file.
319  std::string output_fname;
320  if (output_file_is_dir) {
321  output_fname = output_file + "/" + GetFileName(input_file);
322  } else {
323  output_fname = output_file;
324  }
325  fd =
326  open(output_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, input_file_mode);
327  if (fd < 0) {
328  std::string errmsg = "Unable to open output file (" + output_file + ")";
329  perror(errmsg.c_str());
330  return 1;
331  }
332  close(fd);
333 
334  return 0;
335 }
336 
337 int swissknife::CommandGraft::Recurse(const std::string &input_file,
338  const std::string &output_file) {
339  output_file_ = output_file;
340  input_file_ = input_file;
341 
342  FileSystemTraversal<CommandGraft> traverser(this, input_file, true);
345  traverser.Recurse(input_file);
346  return 0;
347 }
#define LogCvmfs(source, mask,...)
Definition: logging.h:20
int Main(const ArgumentList &args)
Algorithms ParseCompressionAlgorithm(const std::string &algorithm_option)
Definition: compression.cc:148
struct stat64 platform_stat64
std::string GetFileName(const std::string &path)
Definition: posix.cc:162
virtual bool Deflate(const bool flush, unsigned char **inbuf, size_t *inbufsize, unsigned char **outbuf, size_t *outbufsize)=0
void Recurse(const std::string &dir_path) const
Definition: fs_traversal.h:112
string JoinStrings(const vector< string > &strings, const string &joint)
Definition: string.cc:318
void Init(ContextPtr context)
Definition: hash.cc:164
std::string ToString(const bool with_suffix=false) const
Definition: hash.h:248
A simple recursion engine to abstract the recursion of directories. It provides several callback hook...
Definition: fs_traversal.h:37
bool SafeWrite(int fd, const void *buf, size_t nbyte)
Definition: posix.cc:1929
std::string AlgorithmName(const zlib::Algorithms alg)
Definition: compression.cc:158
int Publish(const std::string &input_file, const std::string &output_file, bool output_file_is_dir, bool input_file_is_stdin)
int platform_stat(const char *path, platform_stat64 *buf)
const unsigned kZChunk
Definition: compression.h:34
bool DirCallback(const std::string &relative_path, const std::string &dir_name)
bool String2Uint64Parse(const std::string &value, uint64_t *result)
Definition: string.cc:243
static Compressor * Construct(const Algorithms &param)
Definition: plugin.h:188
void Update(const unsigned char *buffer, const unsigned buffer_length, ContextPtr context)
Definition: hash.cc:190
void SplitPath(const std::string &path, std::string *dirname, std::string *filename)
Definition: posix.cc:112
VoidCallback fn_new_file
Definition: fs_traversal.h:47
void Final(ContextPtr context, Any *any_digest)
Definition: hash.cc:221
void FileCallback(const std::string &relative_path, const std::string &file_name)
bool MkdirDeep(const std::string &path, const mode_t mode, bool verify_writable)
Definition: posix.cc:871
string StringifyInt(const int64_t value)
Definition: string.cc:78
shash::Algorithms hash_alg_
void Publish()
int Recurse(const std::string &input_file, const std::string &output_file)
void * buffer
Definition: hash.h:500
void SetNull()
Definition: hash.h:399
std::map< char, SharedPtr< std::string > > ArgumentList
Definition: swissknife.h:72
Algorithms ParseHashAlgorithm(const string &algorithm_option)
Definition: hash.cc:72
BoolCallback fn_new_dir_prefix
Definition: fs_traversal.h:69
unsigned size
Definition: hash.h:501
int platform_fstat(int filedes, platform_stat64 *buf)
bool ChecksumFdWithChunks(int fd, zlib::Compressor *compressor, uint64_t *file_size, shash::Any *file_hash, std::vector< uint64_t > *chunk_offsets, std::vector< shash::Any > *chunk_checksums)