CernVM-FS  2.13.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
swissknife_graft.cc
Go to the documentation of this file.
1 
7 #include "swissknife_graft.h"
8 
9 #include <fcntl.h>
10 #include <unistd.h>
11 
12 #include <cstdio>
13 #include <vector>
14 
15 #include "crypto/hash.h"
16 #include "util/fs_traversal.h"
17 #include "util/platform.h"
18 #include "util/posix.h"
19 
21  int fd, zlib::Compressor *compressor, uint64_t *file_size,
22  shash::Any *file_hash, std::vector<uint64_t> *chunk_offsets,
23  std::vector<shash::Any> *chunk_checksums) {
24  if (!compressor || !file_size || !file_hash) {
25  return false;
26  }
27  *file_size = 0;
28  shash::Any chunk_hash(hash_alg_);
29  ssize_t bytes_read;
30  unsigned char in_buf[zlib::kZChunk];
31  unsigned char *cur_in_buf = in_buf;
32  size_t in_buf_size = zlib::kZChunk;
33  unsigned char out_buf[zlib::kZChunk];
34  size_t avail_in = 0;
35 
36  // Initialize the file and per-chunk checksums
37  shash::ContextPtr file_hash_context(hash_alg_);
38  file_hash_context.buffer = alloca(file_hash_context.size);
39  shash::Init(file_hash_context);
40 
41  shash::ContextPtr chunk_hash_context(hash_alg_);
42  chunk_hash_context.buffer = alloca(chunk_hash_context.size);
43  shash::Init(chunk_hash_context);
44 
45  bool do_chunk = chunk_size_ > 0;
46  if (do_chunk) {
47  if (!chunk_offsets || !chunk_checksums) {
48  return false;
49  }
50  chunk_offsets->push_back(0);
51  }
52 
53  bool flush = 0;
54  do {
55  bytes_read = read(fd, cur_in_buf + avail_in, in_buf_size);
56  if (-1 == bytes_read) {
57  if (errno == EINTR) {
58  continue;
59  }
60  LogCvmfs(kLogCvmfs, kLogStderr, "Failure when reading file: %s",
61  strerror(errno));
62  return false;
63  }
64  *file_size += bytes_read;
65  avail_in += bytes_read;
66 
67  flush = (static_cast<size_t>(bytes_read) < in_buf_size);
68 
69  // If possible, make progress on deflate.
70  unsigned char *cur_out_buf = out_buf;
71  size_t avail_out = zlib::kZChunk;
72  compressor->Deflate(flush, &cur_in_buf, &avail_in, &cur_out_buf,
73  &avail_out);
74  if (do_chunk) {
75  shash::Update(out_buf, avail_out, chunk_hash_context);
77  shash::Update(out_buf, avail_out, file_hash_context);
78  } else {
79  shash::Update(out_buf, avail_out, file_hash_context);
80  }
81 
82  if (!avail_in) {
83  // All bytes are consumed; set the buffer back to the beginning.
84  cur_in_buf = in_buf;
85  in_buf_size = zlib::kZChunk;
86  } else {
87  in_buf_size = zlib::kZChunk - (cur_in_buf - in_buf) - avail_in;
88  }
89 
90  // Start a new hash if current one is above threshold
91  if (do_chunk && (*file_size - chunk_offsets->back() >= chunk_size_)) {
92  shash::Final(chunk_hash_context, &chunk_hash);
93  chunk_offsets->push_back(*file_size);
94  chunk_checksums->push_back(chunk_hash);
95  shash::Init(chunk_hash_context);
96  }
97  } while (!flush);
98 
99  shash::Final(file_hash_context, file_hash);
100  if (do_chunk) {
101  shash::Final(chunk_hash_context, &chunk_hash);
102  chunk_checksums->push_back(chunk_hash);
103  }
104 
105  // Zero-size chunks are not allowed; except if there is only one chunk
106  if (do_chunk && (chunk_offsets->back() == *file_size)
107  && (chunk_offsets->size() > 1)) {
108  chunk_offsets->pop_back();
109  chunk_checksums->pop_back();
110  }
111 
112  if (do_chunk && !generate_bulk_hash_)
113  file_hash->SetNull();
114 
115  // Do not chunk a file if it is under threshold.
116  if (do_chunk && (chunk_offsets->size() == 1)) {
117  *file_hash = (*chunk_checksums)[0];
118  chunk_offsets->clear();
119  chunk_checksums->clear();
120  }
121  return true;
122 }
123 
124 bool swissknife::CommandGraft::DirCallback(const std::string &relative_path,
125  const std::string &dir_name) {
126  if (!output_file_.size()) {
127  return true;
128  }
129  std::string full_output_path = output_file_ + "/"
130  + (relative_path.size() ? relative_path : ".")
131  + "/" + dir_name;
132  std::string full_input_path = input_file_ + "/"
133  + (relative_path.size() ? relative_path : ".")
134  + "/" + dir_name;
135  platform_stat64 sbuf;
136  if (-1 == platform_stat(full_input_path.c_str(), &sbuf)) {
137  return false;
138  }
139  return MkdirDeep(full_output_path, sbuf.st_mode);
140 }
141 
142 void swissknife::CommandGraft::FileCallback(const std::string &relative_path,
143  const std::string &file_name) {
144  std::string full_input_path = input_file_ + "/"
145  + (relative_path.size() ? relative_path : ".")
146  + "/" + file_name;
147  std::string full_output_path;
148  if (output_file_.size()) {
149  full_output_path = output_file_ + "/"
150  + (relative_path.size() ? relative_path : ".") + "/"
151  + file_name;
152  }
153  Publish(full_input_path, full_output_path, false, false);
154 }
155 
157  const std::string &input_file = *args.find('i')->second;
158  const std::string output_file = (args.find('o') == args.end())
159  ? ""
160  : *(args.find('o')->second);
161  verbose_ = args.find('v') != args.end();
162  generate_bulk_hash_ = args.find('b') != args.end();
163  hash_alg_ = (args.find('a') == args.end())
164  ? shash::kSha1
165  : shash::ParseHashAlgorithm(*args.find('a')->second);
166  compression_alg_ = (args.find('Z') == args.end())
169  *args.find('Z')->second);
170 
171  if (args.find('c') == args.end()) {
172  chunk_size_ = kDefaultChunkSize;
173  } else {
174  std::string chunk_size = *args.find('c')->second;
175  if (!String2Uint64Parse(chunk_size, &chunk_size_)) {
176  LogCvmfs(kLogCvmfs, kLogStderr, "Unable to parse chunk size: %s",
177  chunk_size.c_str());
178  return 1;
179  }
180  }
181  chunk_size_ *= 1024 * 1024; // Convert to MB.
182 
183  platform_stat64 sbuf;
184  bool output_file_is_dir = output_file.size()
185  && (0 == platform_stat(output_file.c_str(), &sbuf))
186  && S_ISDIR(sbuf.st_mode);
187  if (output_file_is_dir && (input_file == "-")) {
188  LogCvmfs(kLogCvmfs, kLogStderr, "Output file (%s): Is a directory\n",
189  output_file.c_str());
190  return 1;
191  }
192 
193  if (input_file != "-") {
194  bool input_file_is_dir = (0 == platform_stat(input_file.c_str(), &sbuf))
195  && S_ISDIR(sbuf.st_mode);
196  if (input_file_is_dir) {
197  if (!output_file_is_dir && output_file.size()) {
199  "Input (%s) is a directory but output"
200  " (%s) is not\n",
201  input_file.c_str(), output_file.c_str());
202  return 1;
203  }
204  if (verbose_) {
205  LogCvmfs(kLogCvmfs, kLogStderr, "Recursing into directory %s\n",
206  input_file.c_str());
207  }
208  return Recurse(input_file, output_file);
209  } else {
210  return Publish(input_file, output_file, output_file_is_dir, false);
211  }
212  }
213  return Publish(input_file, output_file, output_file_is_dir, true);
214 }
215 
216 int swissknife::CommandGraft::Publish(const std::string &input_file,
217  const std::string &output_file,
218  bool output_file_is_dir,
219  bool input_file_is_stdin) {
220  if (output_file.size() && verbose_) {
221  LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s to %s", input_file.c_str(),
222  output_file.c_str());
223  } else if (!output_file.size()) {
224  LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s", input_file.c_str());
225  }
226  int fd;
227  if (input_file_is_stdin) {
228  fd = 0;
229  } else {
230  fd = open(input_file.c_str(), O_RDONLY);
231  if (fd < 0) {
232  std::string errmsg = "Unable to open input file (" + input_file + ")";
233  perror(errmsg.c_str());
234  return 1;
235  }
236  }
237 
238  // Get input file mode; output file will be set identically.
239  platform_stat64 sbuf;
240  if (-1 == platform_fstat(fd, &sbuf)) {
241  std::string errmsg = "Unable to stat input file (" + input_file + ")";
242  perror(errmsg.c_str());
243  }
244  mode_t input_file_mode = input_file_is_stdin ? 0644 : sbuf.st_mode;
245 
246  shash::Any file_hash(hash_alg_);
247  uint64_t processed_size;
248  std::vector<uint64_t> chunk_offsets;
249  std::vector<shash::Any> chunk_checksums;
250  zlib::Compressor *compressor = zlib::Compressor::Construct(compression_alg_);
251 
252  bool retval = ChecksumFdWithChunks(fd, compressor, &processed_size,
253  &file_hash, &chunk_offsets,
254  &chunk_checksums);
255 
256  if (!input_file_is_stdin) {
257  close(fd);
258  }
259  if (!retval) {
260  std::string errmsg = "Unable to checksum input file (" + input_file + ")";
261  perror(errmsg.c_str());
262  return 1;
263  }
264 
265  // Build the .cvmfsgraft-$filename
266  if (output_file.size()) {
267  std::string dirname, fname;
268  std::string graft_fname;
269  if (output_file_is_dir) {
270  SplitPath(input_file, &dirname, &fname);
271  graft_fname = output_file + "/.cvmfsgraft-" + fname;
272  } else {
273  SplitPath(output_file, &dirname, &fname);
274  graft_fname = dirname + "/.cvmfsgraft-" + fname;
275  }
276  fd = open(graft_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
277  if (fd < 0) {
278  std::string errmsg = "Unable to open graft file (" + graft_fname + ")";
279  perror(errmsg.c_str());
280  return 1;
281  }
282  } else {
283  fd = 1;
284  }
285  const bool with_suffix = true;
286  std::string graft_contents = "size=" + StringifyInt(processed_size) + "\n"
287  + "checksum=" + file_hash.ToString(with_suffix)
288  + "\n" + "compression="
289  + zlib::AlgorithmName(compression_alg_) + "\n";
290  if (!chunk_offsets.empty()) {
291  std::vector<std::string> chunk_off_str;
292  chunk_off_str.reserve(chunk_offsets.size());
293  std::vector<std::string> chunk_ck_str;
294  chunk_ck_str.reserve(chunk_offsets.size());
295  for (unsigned idx = 0; idx < chunk_offsets.size(); idx++) {
296  chunk_off_str.push_back(StringifyInt(chunk_offsets[idx]));
297  chunk_ck_str.push_back(chunk_checksums[idx].ToStringWithSuffix());
298  }
299  graft_contents += "chunk_offsets=" + JoinStrings(chunk_off_str, ",") + "\n";
300  graft_contents += "chunk_checksums=" + JoinStrings(chunk_ck_str, ",")
301  + "\n";
302  }
303  size_t nbytes = graft_contents.size();
304  const char *buf = graft_contents.c_str();
305  retval = SafeWrite(fd, buf, nbytes);
306  if (!retval) {
307  perror("Failed writing to graft file");
308  close(fd);
309  return 1;
310  }
311  if (output_file.size()) {
312  close(fd);
313  } else {
314  return 0;
315  }
316 
317  // Create and truncate the output file.
318  std::string output_fname;
319  if (output_file_is_dir) {
320  output_fname = output_file + "/" + GetFileName(input_file);
321  } else {
322  output_fname = output_file;
323  }
324  fd = open(output_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY,
325  input_file_mode);
326  if (fd < 0) {
327  std::string errmsg = "Unable to open output file (" + output_file + ")";
328  perror(errmsg.c_str());
329  return 1;
330  }
331  close(fd);
332 
333  return 0;
334 }
335 
336 int swissknife::CommandGraft::Recurse(const std::string &input_file,
337  const std::string &output_file) {
338  output_file_ = output_file;
339  input_file_ = input_file;
340 
341  FileSystemTraversal<CommandGraft> traverser(this, input_file, true);
344  traverser.Recurse(input_file);
345  return 0;
346 }
int Main(const ArgumentList &args)
Algorithms ParseCompressionAlgorithm(const std::string &algorithm_option)
Definition: compression.cc:153
struct stat64 platform_stat64
NameString GetFileName(const PathString &path)
Definition: shortstring.cc:28
virtual bool Deflate(const bool flush, unsigned char **inbuf, size_t *inbufsize, unsigned char **outbuf, size_t *outbufsize)=0
void Recurse(const std::string &dir_path) const
Definition: fs_traversal.h:110
string JoinStrings(const vector< string > &strings, const string &joint)
Definition: string.cc:356
std::string ToString(const bool with_suffix=false) const
Definition: hash.h:241
A simple recursion engine to abstract the recursion of directories. It provides several callback hook...
Definition: fs_traversal.h:36
bool SafeWrite(int fd, const void *buf, size_t nbyte)
Definition: posix.cc:2035
std::string AlgorithmName(const zlib::Algorithms alg)
Definition: compression.cc:163
int Publish(const std::string &input_file, const std::string &output_file, bool output_file_is_dir, bool input_file_is_stdin)
int platform_stat(const char *path, platform_stat64 *buf)
const unsigned kZChunk
Definition: compression.h:34
bool DirCallback(const std::string &relative_path, const std::string &dir_name)
bool String2Uint64Parse(const std::string &value, uint64_t *result)
Definition: string.cc:257
void Init(ContextPtr context)
Definition: hash.cc:166
static Compressor * Construct(const Algorithms &param)
Definition: plugin.h:170
void SplitPath(const std::string &path, std::string *dirname, std::string *filename)
Definition: posix.cc:114
VoidCallback fn_new_file
Definition: fs_traversal.h:46
void FileCallback(const std::string &relative_path, const std::string &file_name)
bool MkdirDeep(const std::string &path, const mode_t mode, bool verify_writable)
Definition: posix.cc:855
void Final(ContextPtr context, Any *any_digest)
Definition: hash.cc:223
string StringifyInt(const int64_t value)
Definition: string.cc:77
shash::Algorithms hash_alg_
void Publish()
int Recurse(const std::string &input_file, const std::string &output_file)
void * buffer
Definition: hash.h:489
void Update(const unsigned char *buffer, const unsigned buffer_length, ContextPtr context)
Definition: hash.cc:192
void SetNull()
Definition: hash.h:388
std::map< char, SharedPtr< std::string > > ArgumentList
Definition: swissknife.h:72
Algorithms ParseHashAlgorithm(const string &algorithm_option)
Definition: hash.cc:71
BoolCallback fn_new_dir_prefix
Definition: fs_traversal.h:68
unsigned size
Definition: hash.h:490
int platform_fstat(int filedes, platform_stat64 *buf)
bool ChecksumFdWithChunks(int fd, zlib::Compressor *compressor, uint64_t *file_size, shash::Any *file_hash, std::vector< uint64_t > *chunk_offsets, std::vector< shash::Any > *chunk_checksums)
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:545