GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/swissknife_graft.cc
Date: 2025-06-22 02:36:02
Exec Total Coverage
Lines: 0 205 0.0%
Branches: 0 158 0.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * Process a set of input files and create appropriate graft files.
5 */
6
7 #include "swissknife_graft.h"
8
9 #include <fcntl.h>
10 #include <unistd.h>
11
12 #include <cstdio>
13 #include <vector>
14
15 #include "crypto/hash.h"
16 #include "util/fs_traversal.h"
17 #include "util/platform.h"
18 #include "util/posix.h"
19
20 bool swissknife::CommandGraft::ChecksumFdWithChunks(
21 int fd, zlib::Compressor *compressor, uint64_t *file_size,
22 shash::Any *file_hash, std::vector<uint64_t> *chunk_offsets,
23 std::vector<shash::Any> *chunk_checksums) {
24 if (!compressor || !file_size || !file_hash) {
25 return false;
26 }
27 *file_size = 0;
28 shash::Any chunk_hash(hash_alg_);
29 ssize_t bytes_read;
30 unsigned char in_buf[zlib::kZChunk];
31 unsigned char *cur_in_buf = in_buf;
32 size_t in_buf_size = zlib::kZChunk;
33 unsigned char out_buf[zlib::kZChunk];
34 size_t avail_in = 0;
35
36 // Initialize the file and per-chunk checksums
37 shash::ContextPtr file_hash_context(hash_alg_);
38 file_hash_context.buffer = alloca(file_hash_context.size);
39 shash::Init(file_hash_context);
40
41 shash::ContextPtr chunk_hash_context(hash_alg_);
42 chunk_hash_context.buffer = alloca(chunk_hash_context.size);
43 shash::Init(chunk_hash_context);
44
45 const bool do_chunk = chunk_size_ > 0;
46 if (do_chunk) {
47 if (!chunk_offsets || !chunk_checksums) {
48 return false;
49 }
50 chunk_offsets->push_back(0);
51 }
52
53 bool flush = 0;
54 do {
55 bytes_read = read(fd, cur_in_buf + avail_in, in_buf_size);
56 if (-1 == bytes_read) {
57 if (errno == EINTR) {
58 continue;
59 }
60 LogCvmfs(kLogCvmfs, kLogStderr, "Failure when reading file: %s",
61 strerror(errno));
62 return false;
63 }
64 *file_size += bytes_read;
65 avail_in += bytes_read;
66
67 flush = (static_cast<size_t>(bytes_read) < in_buf_size);
68
69 // If possible, make progress on deflate.
70 unsigned char *cur_out_buf = out_buf;
71 size_t avail_out = zlib::kZChunk;
72 compressor->Deflate(flush, &cur_in_buf, &avail_in, &cur_out_buf,
73 &avail_out);
74 if (do_chunk) {
75 shash::Update(out_buf, avail_out, chunk_hash_context);
76 if (generate_bulk_hash_)
77 shash::Update(out_buf, avail_out, file_hash_context);
78 } else {
79 shash::Update(out_buf, avail_out, file_hash_context);
80 }
81
82 if (!avail_in) {
83 // All bytes are consumed; set the buffer back to the beginning.
84 cur_in_buf = in_buf;
85 in_buf_size = zlib::kZChunk;
86 } else {
87 in_buf_size = zlib::kZChunk - (cur_in_buf - in_buf) - avail_in;
88 }
89
90 // Start a new hash if current one is above threshold
91 if (do_chunk && (*file_size - chunk_offsets->back() >= chunk_size_)) {
92 shash::Final(chunk_hash_context, &chunk_hash);
93 chunk_offsets->push_back(*file_size);
94 chunk_checksums->push_back(chunk_hash);
95 shash::Init(chunk_hash_context);
96 }
97 } while (!flush);
98
99 shash::Final(file_hash_context, file_hash);
100 if (do_chunk) {
101 shash::Final(chunk_hash_context, &chunk_hash);
102 chunk_checksums->push_back(chunk_hash);
103 }
104
105 // Zero-size chunks are not allowed; except if there is only one chunk
106 if (do_chunk && (chunk_offsets->back() == *file_size)
107 && (chunk_offsets->size() > 1)) {
108 chunk_offsets->pop_back();
109 chunk_checksums->pop_back();
110 }
111
112 if (do_chunk && !generate_bulk_hash_)
113 file_hash->SetNull();
114
115 // Do not chunk a file if it is under threshold.
116 if (do_chunk && (chunk_offsets->size() == 1)) {
117 *file_hash = (*chunk_checksums)[0];
118 chunk_offsets->clear();
119 chunk_checksums->clear();
120 }
121 return true;
122 }
123
124 bool swissknife::CommandGraft::DirCallback(const std::string &relative_path,
125 const std::string &dir_name) {
126 if (!output_file_.size()) {
127 return true;
128 }
129 const std::string full_output_path =
130 output_file_ + "/" + (relative_path.size() ? relative_path : ".") + "/" +
131 dir_name;
132 const std::string full_input_path =
133 input_file_ + "/" + (relative_path.size() ? relative_path : ".") + "/" +
134 dir_name;
135 platform_stat64 sbuf;
136 if (-1 == platform_stat(full_input_path.c_str(), &sbuf)) {
137 return false;
138 }
139 return MkdirDeep(full_output_path, sbuf.st_mode);
140 }
141
142 void swissknife::CommandGraft::FileCallback(const std::string &relative_path,
143 const std::string &file_name) {
144 const std::string full_input_path =
145 input_file_ + "/" + (relative_path.size() ? relative_path : ".") + "/" +
146 file_name;
147 std::string full_output_path;
148 if (output_file_.size()) {
149 full_output_path = output_file_ + "/"
150 + (relative_path.size() ? relative_path : ".") + "/"
151 + file_name;
152 }
153 Publish(full_input_path, full_output_path, false, false);
154 }
155
156 int swissknife::CommandGraft::Main(const swissknife::ArgumentList &args) {
157 const std::string &input_file = *args.find('i')->second;
158 const std::string output_file = (args.find('o') == args.end())
159 ? ""
160 : *(args.find('o')->second);
161 verbose_ = args.find('v') != args.end();
162 generate_bulk_hash_ = args.find('b') != args.end();
163 hash_alg_ = (args.find('a') == args.end())
164 ? shash::kSha1
165 : shash::ParseHashAlgorithm(*args.find('a')->second);
166 compression_alg_ = (args.find('Z') == args.end())
167 ? zlib::kNoCompression
168 : zlib::ParseCompressionAlgorithm(
169 *args.find('Z')->second);
170
171 if (args.find('c') == args.end()) {
172 chunk_size_ = kDefaultChunkSize;
173 } else {
174 const std::string chunk_size = *args.find('c')->second;
175 if (!String2Uint64Parse(chunk_size, &chunk_size_)) {
176 LogCvmfs(kLogCvmfs, kLogStderr, "Unable to parse chunk size: %s",
177 chunk_size.c_str());
178 return 1;
179 }
180 }
181 chunk_size_ *= 1024 * 1024; // Convert to MB.
182
183 platform_stat64 sbuf;
184 const bool output_file_is_dir =
185 output_file.size() && (0 == platform_stat(output_file.c_str(), &sbuf)) &&
186 S_ISDIR(sbuf.st_mode);
187 if (output_file_is_dir && (input_file == "-")) {
188 LogCvmfs(kLogCvmfs, kLogStderr, "Output file (%s): Is a directory\n",
189 output_file.c_str());
190 return 1;
191 }
192
193 if (input_file != "-") {
194 const bool input_file_is_dir =
195 (0 == platform_stat(input_file.c_str(), &sbuf)) &&
196 S_ISDIR(sbuf.st_mode);
197 if (input_file_is_dir) {
198 if (!output_file_is_dir && output_file.size()) {
199 LogCvmfs(kLogCvmfs, kLogStderr,
200 "Input (%s) is a directory but output"
201 " (%s) is not\n",
202 input_file.c_str(), output_file.c_str());
203 return 1;
204 }
205 if (verbose_) {
206 LogCvmfs(kLogCvmfs, kLogStderr, "Recursing into directory %s\n",
207 input_file.c_str());
208 }
209 return Recurse(input_file, output_file);
210 } else {
211 return Publish(input_file, output_file, output_file_is_dir, false);
212 }
213 }
214 return Publish(input_file, output_file, output_file_is_dir, true);
215 }
216
217 int swissknife::CommandGraft::Publish(const std::string &input_file,
218 const std::string &output_file,
219 bool output_file_is_dir,
220 bool input_file_is_stdin) {
221 if (output_file.size() && verbose_) {
222 LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s to %s", input_file.c_str(),
223 output_file.c_str());
224 } else if (!output_file.size()) {
225 LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s", input_file.c_str());
226 }
227 int fd;
228 if (input_file_is_stdin) {
229 fd = 0;
230 } else {
231 fd = open(input_file.c_str(), O_RDONLY);
232 if (fd < 0) {
233 const std::string errmsg =
234 "Unable to open input file (" + input_file + ")";
235 perror(errmsg.c_str());
236 return 1;
237 }
238 }
239
240 // Get input file mode; output file will be set identically.
241 platform_stat64 sbuf;
242 if (-1 == platform_fstat(fd, &sbuf)) {
243 const std::string errmsg = "Unable to stat input file (" + input_file + ")";
244 perror(errmsg.c_str());
245 }
246 const mode_t input_file_mode = input_file_is_stdin ? 0644 : sbuf.st_mode;
247
248 shash::Any file_hash(hash_alg_);
249 uint64_t processed_size;
250 std::vector<uint64_t> chunk_offsets;
251 std::vector<shash::Any> chunk_checksums;
252 zlib::Compressor *compressor = zlib::Compressor::Construct(compression_alg_);
253
254 bool retval = ChecksumFdWithChunks(fd, compressor, &processed_size,
255 &file_hash, &chunk_offsets,
256 &chunk_checksums);
257
258 if (!input_file_is_stdin) {
259 close(fd);
260 }
261 if (!retval) {
262 const std::string errmsg =
263 "Unable to checksum input file (" + input_file + ")";
264 perror(errmsg.c_str());
265 return 1;
266 }
267
268 // Build the .cvmfsgraft-$filename
269 if (output_file.size()) {
270 std::string dirname, fname;
271 std::string graft_fname;
272 if (output_file_is_dir) {
273 SplitPath(input_file, &dirname, &fname);
274 graft_fname = output_file + "/.cvmfsgraft-" + fname;
275 } else {
276 SplitPath(output_file, &dirname, &fname);
277 graft_fname = dirname + "/.cvmfsgraft-" + fname;
278 }
279 fd = open(graft_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
280 if (fd < 0) {
281 const std::string errmsg =
282 "Unable to open graft file (" + graft_fname + ")";
283 perror(errmsg.c_str());
284 return 1;
285 }
286 } else {
287 fd = 1;
288 }
289 const bool with_suffix = true;
290 std::string graft_contents = "size=" + StringifyInt(processed_size) + "\n"
291 + "checksum=" + file_hash.ToString(with_suffix)
292 + "\n" + "compression="
293 + zlib::AlgorithmName(compression_alg_) + "\n";
294 if (!chunk_offsets.empty()) {
295 std::vector<std::string> chunk_off_str;
296 chunk_off_str.reserve(chunk_offsets.size());
297 std::vector<std::string> chunk_ck_str;
298 chunk_ck_str.reserve(chunk_offsets.size());
299 for (unsigned idx = 0; idx < chunk_offsets.size(); idx++) {
300 chunk_off_str.push_back(StringifyInt(chunk_offsets[idx]));
301 chunk_ck_str.push_back(chunk_checksums[idx].ToStringWithSuffix());
302 }
303 graft_contents += "chunk_offsets=" + JoinStrings(chunk_off_str, ",") + "\n";
304 graft_contents += "chunk_checksums=" + JoinStrings(chunk_ck_str, ",")
305 + "\n";
306 }
307 const size_t nbytes = graft_contents.size();
308 const char *buf = graft_contents.c_str();
309 retval = SafeWrite(fd, buf, nbytes);
310 if (!retval) {
311 perror("Failed writing to graft file");
312 close(fd);
313 return 1;
314 }
315 if (output_file.size()) {
316 close(fd);
317 } else {
318 return 0;
319 }
320
321 // Create and truncate the output file.
322 std::string output_fname;
323 if (output_file_is_dir) {
324 output_fname = output_file + "/" + GetFileName(input_file);
325 } else {
326 output_fname = output_file;
327 }
328 fd = open(output_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY,
329 input_file_mode);
330 if (fd < 0) {
331 const std::string errmsg =
332 "Unable to open output file (" + output_file + ")";
333 perror(errmsg.c_str());
334 return 1;
335 }
336 close(fd);
337
338 return 0;
339 }
340
341 int swissknife::CommandGraft::Recurse(const std::string &input_file,
342 const std::string &output_file) {
343 output_file_ = output_file;
344 input_file_ = input_file;
345
346 FileSystemTraversal<CommandGraft> traverser(this, input_file, true);
347 traverser.fn_new_file = &CommandGraft::FileCallback;
348 traverser.fn_new_dir_prefix = &CommandGraft::DirCallback;
349 traverser.Recurse(input_file);
350 return 0;
351 }
352