GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/swissknife_graft.cc
Date: 2024-04-21 02:33:16
Exec Total Coverage
Lines: 0 206 0.0%
Branches: 0 158 0.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * Process a set of input files and create appropriate graft files.
5 */
6
7 #include "swissknife_graft.h"
8 #include "cvmfs_config.h"
9
10 #include <fcntl.h>
11 #include <unistd.h>
12
13 #include <cstdio>
14 #include <vector>
15
16 #include "crypto/hash.h"
17 #include "util/fs_traversal.h"
18 #include "util/platform.h"
19 #include "util/posix.h"
20
21 bool swissknife::CommandGraft::ChecksumFdWithChunks(
22 int fd, zlib::Compressor *compressor, uint64_t *file_size,
23 shash::Any *file_hash, std::vector<uint64_t> *chunk_offsets,
24 std::vector<shash::Any> *chunk_checksums) {
25 if (!compressor || !file_size || !file_hash) {
26 return false;
27 }
28 *file_size = 0;
29 shash::Any chunk_hash(hash_alg_);
30 ssize_t bytes_read;
31 unsigned char in_buf[zlib::kZChunk];
32 unsigned char *cur_in_buf = in_buf;
33 size_t in_buf_size = zlib::kZChunk;
34 unsigned char out_buf[zlib::kZChunk];
35 size_t avail_in = 0;
36
37 // Initialize the file and per-chunk checksums
38 shash::ContextPtr file_hash_context(hash_alg_);
39 file_hash_context.buffer = alloca(file_hash_context.size);
40 shash::Init(file_hash_context);
41
42 shash::ContextPtr chunk_hash_context(hash_alg_);
43 chunk_hash_context.buffer = alloca(chunk_hash_context.size);
44 shash::Init(chunk_hash_context);
45
46 bool do_chunk = chunk_size_ > 0;
47 if (do_chunk) {
48 if (!chunk_offsets || !chunk_checksums) {
49 return false;
50 }
51 chunk_offsets->push_back(0);
52 }
53
54 bool flush = 0;
55 do {
56 bytes_read = read(fd, cur_in_buf + avail_in, in_buf_size);
57 if (-1 == bytes_read) {
58 if (errno == EINTR) {
59 continue;
60 }
61 LogCvmfs(kLogCvmfs, kLogStderr, "Failure when reading file: %s",
62 strerror(errno));
63 return false;
64 }
65 *file_size += bytes_read;
66 avail_in += bytes_read;
67
68 flush = (static_cast<size_t>(bytes_read) < in_buf_size);
69
70 // If possible, make progress on deflate.
71 unsigned char *cur_out_buf = out_buf;
72 size_t avail_out = zlib::kZChunk;
73 compressor->Deflate(flush, &cur_in_buf, &avail_in, &cur_out_buf,
74 &avail_out);
75 if (do_chunk) {
76 shash::Update(out_buf, avail_out, chunk_hash_context);
77 if (generate_bulk_hash_)
78 shash::Update(out_buf, avail_out, file_hash_context);
79 } else {
80 shash::Update(out_buf, avail_out, file_hash_context);
81 }
82
83 if (!avail_in) {
84 // All bytes are consumed; set the buffer back to the beginning.
85 cur_in_buf = in_buf;
86 in_buf_size = zlib::kZChunk;
87 } else {
88 in_buf_size = zlib::kZChunk - (cur_in_buf - in_buf) - avail_in;
89 }
90
91 // Start a new hash if current one is above threshold
92 if (do_chunk && (*file_size - chunk_offsets->back() >= chunk_size_)) {
93 shash::Final(chunk_hash_context, &chunk_hash);
94 chunk_offsets->push_back(*file_size);
95 chunk_checksums->push_back(chunk_hash);
96 shash::Init(chunk_hash_context);
97 }
98 } while (!flush);
99
100 shash::Final(file_hash_context, file_hash);
101 if (do_chunk) {
102 shash::Final(chunk_hash_context, &chunk_hash);
103 chunk_checksums->push_back(chunk_hash);
104 }
105
106 // Zero-size chunks are not allowed; except if there is only one chunk
107 if (do_chunk && (chunk_offsets->back() == *file_size) &&
108 (chunk_offsets->size() > 1))
109 {
110 chunk_offsets->pop_back();
111 chunk_checksums->pop_back();
112 }
113
114 if (do_chunk && !generate_bulk_hash_)
115 file_hash->SetNull();
116
117 // Do not chunk a file if it is under threshold.
118 if (do_chunk && (chunk_offsets->size() == 1)) {
119 *file_hash = (*chunk_checksums)[0];
120 chunk_offsets->clear();
121 chunk_checksums->clear();
122 }
123 return true;
124 }
125
126 bool swissknife::CommandGraft::DirCallback(const std::string &relative_path,
127 const std::string &dir_name) {
128 if (!output_file_.size()) {
129 return true;
130 }
131 std::string full_output_path = output_file_ + "/" +
132 (relative_path.size() ? relative_path : ".") +
133 "/" + dir_name;
134 std::string full_input_path = input_file_ + "/" +
135 (relative_path.size() ? relative_path : ".") +
136 "/" + dir_name;
137 platform_stat64 sbuf;
138 if (-1 == platform_stat(full_input_path.c_str(), &sbuf)) {
139 return false;
140 }
141 return MkdirDeep(full_output_path, sbuf.st_mode);
142 }
143
144 void swissknife::CommandGraft::FileCallback(const std::string &relative_path,
145 const std::string &file_name) {
146 std::string full_input_path = input_file_ + "/" +
147 (relative_path.size() ? relative_path : ".") +
148 "/" + file_name;
149 std::string full_output_path;
150 if (output_file_.size()) {
151 full_output_path = output_file_ + "/" +
152 (relative_path.size() ? relative_path : ".") + "/" +
153 file_name;
154 }
155 Publish(full_input_path, full_output_path, false, false);
156 }
157
158 int swissknife::CommandGraft::Main(const swissknife::ArgumentList &args) {
159 const std::string &input_file = *args.find('i')->second;
160 const std::string output_file =
161 (args.find('o') == args.end()) ? "" : *(args.find('o')->second);
162 verbose_ = args.find('v') != args.end();
163 generate_bulk_hash_ = args.find('b') != args.end();
164 hash_alg_ = (args.find('a') == args.end())
165 ? shash::kSha1
166 : shash::ParseHashAlgorithm(*args.find('a')->second);
167 compression_alg_ =
168 (args.find('Z') == args.end())
169 ? zlib::kNoCompression
170 : zlib::ParseCompressionAlgorithm(*args.find('Z')->second);
171
172 if (args.find('c') == args.end()) {
173 chunk_size_ = kDefaultChunkSize;
174 } else {
175 std::string chunk_size = *args.find('c')->second;
176 if (!String2Uint64Parse(chunk_size, &chunk_size_)) {
177 LogCvmfs(kLogCvmfs, kLogStderr, "Unable to parse chunk size: %s",
178 chunk_size.c_str());
179 return 1;
180 }
181 }
182 chunk_size_ *= 1024 * 1024; // Convert to MB.
183
184 platform_stat64 sbuf;
185 bool output_file_is_dir = output_file.size() &&
186 (0 == platform_stat(output_file.c_str(), &sbuf)) &&
187 S_ISDIR(sbuf.st_mode);
188 if (output_file_is_dir && (input_file == "-")) {
189 LogCvmfs(kLogCvmfs, kLogStderr, "Output file (%s): Is a directory\n",
190 output_file.c_str());
191 return 1;
192 }
193
194 if (input_file != "-") {
195 bool input_file_is_dir = (0 == platform_stat(input_file.c_str(), &sbuf)) &&
196 S_ISDIR(sbuf.st_mode);
197 if (input_file_is_dir) {
198 if (!output_file_is_dir && output_file.size()) {
199 LogCvmfs(kLogCvmfs, kLogStderr,
200 "Input (%s) is a directory but output"
201 " (%s) is not\n",
202 input_file.c_str(), output_file.c_str());
203 return 1;
204 }
205 if (verbose_) {
206 LogCvmfs(kLogCvmfs, kLogStderr, "Recursing into directory %s\n",
207 input_file.c_str());
208 }
209 return Recurse(input_file, output_file);
210 } else {
211 return Publish(input_file, output_file, output_file_is_dir, false);
212 }
213 }
214 return Publish(input_file, output_file, output_file_is_dir, true);
215 }
216
217 int swissknife::CommandGraft::Publish(const std::string &input_file,
218 const std::string &output_file,
219 bool output_file_is_dir,
220 bool input_file_is_stdin) {
221 if (output_file.size() && verbose_) {
222 LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s to %s", input_file.c_str(),
223 output_file.c_str());
224 } else if (!output_file.size()) {
225 LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s", input_file.c_str());
226 }
227 int fd;
228 if (input_file_is_stdin) {
229 fd = 0;
230 } else {
231 fd = open(input_file.c_str(), O_RDONLY);
232 if (fd < 0) {
233 std::string errmsg = "Unable to open input file (" + input_file + ")";
234 perror(errmsg.c_str());
235 return 1;
236 }
237 }
238
239 // Get input file mode; output file will be set identically.
240 platform_stat64 sbuf;
241 if (-1 == platform_fstat(fd, &sbuf)) {
242 std::string errmsg = "Unable to stat input file (" + input_file + ")";
243 perror(errmsg.c_str());
244 }
245 mode_t input_file_mode = input_file_is_stdin ? 0644 : sbuf.st_mode;
246
247 shash::Any file_hash(hash_alg_);
248 uint64_t processed_size;
249 std::vector<uint64_t> chunk_offsets;
250 std::vector<shash::Any> chunk_checksums;
251 zlib::Compressor *compressor = zlib::Compressor::Construct(compression_alg_);
252
253 bool retval =
254 ChecksumFdWithChunks(fd, compressor, &processed_size, &file_hash,
255 &chunk_offsets, &chunk_checksums);
256
257 if (!input_file_is_stdin) {
258 close(fd);
259 }
260 if (!retval) {
261 std::string errmsg = "Unable to checksum input file (" + input_file + ")";
262 perror(errmsg.c_str());
263 return 1;
264 }
265
266 // Build the .cvmfsgraft-$filename
267 if (output_file.size()) {
268 std::string dirname, fname;
269 std::string graft_fname;
270 if (output_file_is_dir) {
271 SplitPath(input_file, &dirname, &fname);
272 graft_fname = output_file + "/.cvmfsgraft-" + fname;
273 } else {
274 SplitPath(output_file, &dirname, &fname);
275 graft_fname = dirname + "/.cvmfsgraft-" + fname;
276 }
277 fd = open(graft_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
278 if (fd < 0) {
279 std::string errmsg = "Unable to open graft file (" + graft_fname + ")";
280 perror(errmsg.c_str());
281 return 1;
282 }
283 } else {
284 fd = 1;
285 }
286 const bool with_suffix = true;
287 std::string graft_contents =
288 "size=" + StringifyInt(processed_size) + "\n" +
289 "checksum=" + file_hash.ToString(with_suffix) + "\n" +
290 "compression=" + zlib::AlgorithmName(compression_alg_) + "\n";
291 if (!chunk_offsets.empty()) {
292 std::vector<std::string> chunk_off_str;
293 chunk_off_str.reserve(chunk_offsets.size());
294 std::vector<std::string> chunk_ck_str;
295 chunk_ck_str.reserve(chunk_offsets.size());
296 for (unsigned idx = 0; idx < chunk_offsets.size(); idx++) {
297 chunk_off_str.push_back(StringifyInt(chunk_offsets[idx]));
298 chunk_ck_str.push_back(chunk_checksums[idx].ToStringWithSuffix());
299 }
300 graft_contents += "chunk_offsets=" + JoinStrings(chunk_off_str, ",") + "\n";
301 graft_contents +=
302 "chunk_checksums=" + JoinStrings(chunk_ck_str, ",") + "\n";
303 }
304 size_t nbytes = graft_contents.size();
305 const char *buf = graft_contents.c_str();
306 retval = SafeWrite(fd, buf, nbytes);
307 if (!retval) {
308 perror("Failed writing to graft file");
309 close(fd);
310 return 1;
311 }
312 if (output_file.size()) {
313 close(fd);
314 } else {
315 return 0;
316 }
317
318 // Create and truncate the output file.
319 std::string output_fname;
320 if (output_file_is_dir) {
321 output_fname = output_file + "/" + GetFileName(input_file);
322 } else {
323 output_fname = output_file;
324 }
325 fd =
326 open(output_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, input_file_mode);
327 if (fd < 0) {
328 std::string errmsg = "Unable to open output file (" + output_file + ")";
329 perror(errmsg.c_str());
330 return 1;
331 }
332 close(fd);
333
334 return 0;
335 }
336
337 int swissknife::CommandGraft::Recurse(const std::string &input_file,
338 const std::string &output_file) {
339 output_file_ = output_file;
340 input_file_ = input_file;
341
342 FileSystemTraversal<CommandGraft> traverser(this, input_file, true);
343 traverser.fn_new_file = &CommandGraft::FileCallback;
344 traverser.fn_new_dir_prefix = &CommandGraft::DirCallback;
345 traverser.Recurse(input_file);
346 return 0;
347 }
348