GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/swissknife_graft.cc
Date: 2025-07-13 02:35:07
Exec Total Coverage
Lines: 0 214 0.0%
Branches: 0 158 0.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * Process a set of input files and create appropriate graft files.
5 */
6
7 #include "swissknife_graft.h"
8
9 #include <fcntl.h>
10 #include <unistd.h>
11
12 #include <cstdio>
13 #include <vector>
14
15 #include "crypto/hash.h"
16 #include "util/fs_traversal.h"
17 #include "util/platform.h"
18 #include "util/posix.h"
19
20 bool swissknife::CommandGraft::ChecksumFdWithChunks(
21 int fd, zlib::Compressor *compressor, uint64_t *file_size,
22 shash::Any *file_hash, std::vector<uint64_t> *chunk_offsets,
23 std::vector<shash::Any> *chunk_checksums) {
24 if (!compressor || !file_size || !file_hash) {
25 return false;
26 }
27 *file_size = 0;
28 shash::Any chunk_hash(hash_alg_);
29 ssize_t bytes_read;
30 unsigned char in_buf[zlib::kZChunk];
31 unsigned char *cur_in_buf = in_buf;
32 size_t in_buf_size = zlib::kZChunk;
33 unsigned char out_buf[zlib::kZChunk];
34 size_t avail_in = 0;
35
36 // Initialize the file and per-chunk checksums
37 shash::ContextPtr file_hash_context(hash_alg_);
38 file_hash_context.buffer = alloca(file_hash_context.size);
39 shash::Init(file_hash_context);
40
41 shash::ContextPtr chunk_hash_context(hash_alg_);
42 chunk_hash_context.buffer = alloca(chunk_hash_context.size);
43 shash::Init(chunk_hash_context);
44
45 const bool do_chunk = chunk_size_ > 0;
46 if (do_chunk) {
47 if (!chunk_offsets || !chunk_checksums) {
48 return false;
49 }
50 chunk_offsets->push_back(0);
51 }
52
53 bool flush = 0;
54 do {
55 bytes_read = read(fd, cur_in_buf + avail_in, in_buf_size);
56 if (-1 == bytes_read) {
57 if (errno == EINTR) {
58 continue;
59 }
60 LogCvmfs(kLogCvmfs, kLogStderr, "Failure when reading file: %s",
61 strerror(errno));
62 return false;
63 }
64 *file_size += bytes_read;
65 avail_in += bytes_read;
66
67 flush = (static_cast<size_t>(bytes_read) < in_buf_size);
68
69 // If possible, make progress on deflate.
70 unsigned char *cur_out_buf = out_buf;
71 size_t avail_out = zlib::kZChunk;
72 compressor->Deflate(flush, &cur_in_buf, &avail_in, &cur_out_buf,
73 &avail_out);
74 if (do_chunk) {
75 shash::Update(out_buf, avail_out, chunk_hash_context);
76 if (generate_bulk_hash_)
77 shash::Update(out_buf, avail_out, file_hash_context);
78 } else {
79 shash::Update(out_buf, avail_out, file_hash_context);
80 }
81
82 if (!avail_in) {
83 // All bytes are consumed; set the buffer back to the beginning.
84 cur_in_buf = in_buf;
85 in_buf_size = zlib::kZChunk;
86 } else {
87 in_buf_size = zlib::kZChunk - (cur_in_buf - in_buf) - avail_in;
88 }
89
90 // Start a new hash if current one is above threshold
91 if (do_chunk && (*file_size - chunk_offsets->back() >= chunk_size_)) {
92 shash::Final(chunk_hash_context, &chunk_hash);
93 chunk_offsets->push_back(*file_size);
94 chunk_checksums->push_back(chunk_hash);
95 shash::Init(chunk_hash_context);
96 }
97 } while (!flush);
98
99 shash::Final(file_hash_context, file_hash);
100 if (do_chunk) {
101 shash::Final(chunk_hash_context, &chunk_hash);
102 chunk_checksums->push_back(chunk_hash);
103 }
104
105 // Zero-size chunks are not allowed; except if there is only one chunk
106 if (do_chunk && (chunk_offsets->back() == *file_size)
107 && (chunk_offsets->size() > 1)) {
108 chunk_offsets->pop_back();
109 chunk_checksums->pop_back();
110 }
111
112 if (do_chunk && !generate_bulk_hash_)
113 file_hash->SetNull();
114
115 // Do not chunk a file if it is under threshold.
116 if (do_chunk && (chunk_offsets->size() == 1)) {
117 *file_hash = (*chunk_checksums)[0];
118 chunk_offsets->clear();
119 chunk_checksums->clear();
120 }
121 return true;
122 }
123
124 bool swissknife::CommandGraft::DirCallback(const std::string &relative_path,
125 const std::string &dir_name) {
126 if (!output_file_.size()) {
127 return true;
128 }
129 const std::string full_output_path = output_file_ + "/"
130 + (relative_path.size() ? relative_path
131 : ".")
132 + "/" + dir_name;
133 const std::string full_input_path = input_file_ + "/"
134 + (relative_path.size() ? relative_path
135 : ".")
136 + "/" + dir_name;
137 platform_stat64 sbuf;
138 if (-1 == platform_stat(full_input_path.c_str(), &sbuf)) {
139 return false;
140 }
141 return MkdirDeep(full_output_path, sbuf.st_mode);
142 }
143
144 void swissknife::CommandGraft::FileCallback(const std::string &relative_path,
145 const std::string &file_name) {
146 const std::string full_input_path = input_file_ + "/"
147 + (relative_path.size() ? relative_path
148 : ".")
149 + "/" + file_name;
150 std::string full_output_path;
151 if (output_file_.size()) {
152 full_output_path = output_file_ + "/"
153 + (relative_path.size() ? relative_path : ".") + "/"
154 + file_name;
155 }
156 Publish(full_input_path, full_output_path, false, false);
157 }
158
159 int swissknife::CommandGraft::Main(const swissknife::ArgumentList &args) {
160 const std::string &input_file = *args.find('i')->second;
161 const std::string output_file = (args.find('o') == args.end())
162 ? ""
163 : *(args.find('o')->second);
164 verbose_ = args.find('v') != args.end();
165 generate_bulk_hash_ = args.find('b') != args.end();
166 hash_alg_ = (args.find('a') == args.end())
167 ? shash::kSha1
168 : shash::ParseHashAlgorithm(*args.find('a')->second);
169 compression_alg_ = (args.find('Z') == args.end())
170 ? zlib::kNoCompression
171 : zlib::ParseCompressionAlgorithm(
172 *args.find('Z')->second);
173
174 if (args.find('c') == args.end()) {
175 chunk_size_ = kDefaultChunkSize;
176 } else {
177 const std::string chunk_size = *args.find('c')->second;
178 if (!String2Uint64Parse(chunk_size, &chunk_size_)) {
179 LogCvmfs(kLogCvmfs, kLogStderr, "Unable to parse chunk size: %s",
180 chunk_size.c_str());
181 return 1;
182 }
183 }
184 chunk_size_ *= 1024 * 1024; // Convert to MB.
185
186 platform_stat64 sbuf;
187 const bool output_file_is_dir = output_file.size()
188 && (0
189 == platform_stat(output_file.c_str(),
190 &sbuf))
191 && S_ISDIR(sbuf.st_mode);
192 if (output_file_is_dir && (input_file == "-")) {
193 LogCvmfs(kLogCvmfs, kLogStderr, "Output file (%s): Is a directory\n",
194 output_file.c_str());
195 return 1;
196 }
197
198 if (input_file != "-") {
199 const bool input_file_is_dir = (0
200 == platform_stat(input_file.c_str(), &sbuf))
201 && S_ISDIR(sbuf.st_mode);
202 if (input_file_is_dir) {
203 if (!output_file_is_dir && output_file.size()) {
204 LogCvmfs(kLogCvmfs, kLogStderr,
205 "Input (%s) is a directory but output"
206 " (%s) is not\n",
207 input_file.c_str(), output_file.c_str());
208 return 1;
209 }
210 if (verbose_) {
211 LogCvmfs(kLogCvmfs, kLogStderr, "Recursing into directory %s\n",
212 input_file.c_str());
213 }
214 return Recurse(input_file, output_file);
215 } else {
216 return Publish(input_file, output_file, output_file_is_dir, false);
217 }
218 }
219 return Publish(input_file, output_file, output_file_is_dir, true);
220 }
221
222 int swissknife::CommandGraft::Publish(const std::string &input_file,
223 const std::string &output_file,
224 bool output_file_is_dir,
225 bool input_file_is_stdin) {
226 if (output_file.size() && verbose_) {
227 LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s to %s", input_file.c_str(),
228 output_file.c_str());
229 } else if (!output_file.size()) {
230 LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s", input_file.c_str());
231 }
232 int fd;
233 if (input_file_is_stdin) {
234 fd = 0;
235 } else {
236 fd = open(input_file.c_str(), O_RDONLY);
237 if (fd < 0) {
238 const std::string errmsg = "Unable to open input file (" + input_file
239 + ")";
240 perror(errmsg.c_str());
241 return 1;
242 }
243 }
244
245 // Get input file mode; output file will be set identically.
246 platform_stat64 sbuf;
247 if (-1 == platform_fstat(fd, &sbuf)) {
248 const std::string errmsg = "Unable to stat input file (" + input_file + ")";
249 perror(errmsg.c_str());
250 }
251 const mode_t input_file_mode = input_file_is_stdin ? 0644 : sbuf.st_mode;
252
253 shash::Any file_hash(hash_alg_);
254 uint64_t processed_size;
255 std::vector<uint64_t> chunk_offsets;
256 std::vector<shash::Any> chunk_checksums;
257 zlib::Compressor *compressor = zlib::Compressor::Construct(compression_alg_);
258
259 bool retval = ChecksumFdWithChunks(fd, compressor, &processed_size,
260 &file_hash, &chunk_offsets,
261 &chunk_checksums);
262
263 if (!input_file_is_stdin) {
264 close(fd);
265 }
266 if (!retval) {
267 const std::string errmsg = "Unable to checksum input file (" + input_file
268 + ")";
269 perror(errmsg.c_str());
270 return 1;
271 }
272
273 // Build the .cvmfsgraft-$filename
274 if (output_file.size()) {
275 std::string dirname, fname;
276 std::string graft_fname;
277 if (output_file_is_dir) {
278 SplitPath(input_file, &dirname, &fname);
279 graft_fname = output_file + "/.cvmfsgraft-" + fname;
280 } else {
281 SplitPath(output_file, &dirname, &fname);
282 graft_fname = dirname + "/.cvmfsgraft-" + fname;
283 }
284 fd = open(graft_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
285 if (fd < 0) {
286 const std::string errmsg = "Unable to open graft file (" + graft_fname
287 + ")";
288 perror(errmsg.c_str());
289 return 1;
290 }
291 } else {
292 fd = 1;
293 }
294 const bool with_suffix = true;
295 std::string graft_contents = "size=" + StringifyInt(processed_size) + "\n"
296 + "checksum=" + file_hash.ToString(with_suffix)
297 + "\n" + "compression="
298 + zlib::AlgorithmName(compression_alg_) + "\n";
299 if (!chunk_offsets.empty()) {
300 std::vector<std::string> chunk_off_str;
301 chunk_off_str.reserve(chunk_offsets.size());
302 std::vector<std::string> chunk_ck_str;
303 chunk_ck_str.reserve(chunk_offsets.size());
304 for (unsigned idx = 0; idx < chunk_offsets.size(); idx++) {
305 chunk_off_str.push_back(StringifyInt(chunk_offsets[idx]));
306 chunk_ck_str.push_back(chunk_checksums[idx].ToStringWithSuffix());
307 }
308 graft_contents += "chunk_offsets=" + JoinStrings(chunk_off_str, ",") + "\n";
309 graft_contents += "chunk_checksums=" + JoinStrings(chunk_ck_str, ",")
310 + "\n";
311 }
312 const size_t nbytes = graft_contents.size();
313 const char *buf = graft_contents.c_str();
314 retval = SafeWrite(fd, buf, nbytes);
315 if (!retval) {
316 perror("Failed writing to graft file");
317 close(fd);
318 return 1;
319 }
320 if (output_file.size()) {
321 close(fd);
322 } else {
323 return 0;
324 }
325
326 // Create and truncate the output file.
327 std::string output_fname;
328 if (output_file_is_dir) {
329 output_fname = output_file + "/" + GetFileName(input_file);
330 } else {
331 output_fname = output_file;
332 }
333 fd = open(output_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY,
334 input_file_mode);
335 if (fd < 0) {
336 const std::string errmsg = "Unable to open output file (" + output_file
337 + ")";
338 perror(errmsg.c_str());
339 return 1;
340 }
341 close(fd);
342
343 return 0;
344 }
345
346 int swissknife::CommandGraft::Recurse(const std::string &input_file,
347 const std::string &output_file) {
348 output_file_ = output_file;
349 input_file_ = input_file;
350
351 FileSystemTraversal<CommandGraft> traverser(this, input_file, true);
352 traverser.fn_new_file = &CommandGraft::FileCallback;
353 traverser.fn_new_dir_prefix = &CommandGraft::DirCallback;
354 traverser.Recurse(input_file);
355 return 0;
356 }
357