GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/swissknife_graft.cc
Date: 2026-04-26 02:35:59
Exec Total Coverage
Lines: 0 214 0.0%
Branches: 0 158 0.0%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 *
4 * Process a set of input files and create appropriate graft files.
5 */
6
7 #include "swissknife_graft.h"
8
9 #include <fcntl.h>
10 #include <unistd.h>
11
12 #include <cstdio>
13 #include <vector>
14
15 #include "crypto/hash.h"
16 #include "util/fs_traversal.h"
17 #include "util/posix.h"
18
19 bool swissknife::CommandGraft::ChecksumFdWithChunks(
20 int fd, zlib::Compressor *compressor, uint64_t *file_size,
21 shash::Any *file_hash, std::vector<uint64_t> *chunk_offsets,
22 std::vector<shash::Any> *chunk_checksums) {
23 if (!compressor || !file_size || !file_hash) {
24 return false;
25 }
26 *file_size = 0;
27 shash::Any chunk_hash(hash_alg_);
28 ssize_t bytes_read;
29 unsigned char in_buf[zlib::kZChunk];
30 unsigned char *cur_in_buf = in_buf;
31 size_t in_buf_size = zlib::kZChunk;
32 unsigned char out_buf[zlib::kZChunk];
33 size_t avail_in = 0;
34
35 // Initialize the file and per-chunk checksums
36 shash::ContextPtr file_hash_context(hash_alg_);
37 file_hash_context.buffer = alloca(file_hash_context.size);
38 shash::Init(file_hash_context);
39
40 shash::ContextPtr chunk_hash_context(hash_alg_);
41 chunk_hash_context.buffer = alloca(chunk_hash_context.size);
42 shash::Init(chunk_hash_context);
43
44 const bool do_chunk = chunk_size_ > 0;
45 if (do_chunk) {
46 if (!chunk_offsets || !chunk_checksums) {
47 return false;
48 }
49 chunk_offsets->push_back(0);
50 }
51
52 bool flush = 0;
53 do {
54 bytes_read = read(fd, cur_in_buf + avail_in, in_buf_size);
55 if (-1 == bytes_read) {
56 if (errno == EINTR) {
57 continue;
58 }
59 LogCvmfs(kLogCvmfs, kLogStderr, "Failure when reading file: %s",
60 strerror(errno));
61 return false;
62 }
63 *file_size += bytes_read;
64 avail_in += bytes_read;
65
66 flush = (static_cast<size_t>(bytes_read) < in_buf_size);
67
68 // If possible, make progress on deflate.
69 unsigned char *cur_out_buf = out_buf;
70 size_t avail_out = zlib::kZChunk;
71 compressor->Deflate(flush, &cur_in_buf, &avail_in, &cur_out_buf,
72 &avail_out);
73 if (do_chunk) {
74 shash::Update(out_buf, avail_out, chunk_hash_context);
75 if (generate_bulk_hash_)
76 shash::Update(out_buf, avail_out, file_hash_context);
77 } else {
78 shash::Update(out_buf, avail_out, file_hash_context);
79 }
80
81 if (!avail_in) {
82 // All bytes are consumed; set the buffer back to the beginning.
83 cur_in_buf = in_buf;
84 in_buf_size = zlib::kZChunk;
85 } else {
86 in_buf_size = zlib::kZChunk - (cur_in_buf - in_buf) - avail_in;
87 }
88
89 // Start a new hash if current one is above threshold
90 if (do_chunk && (*file_size - chunk_offsets->back() >= chunk_size_)) {
91 shash::Final(chunk_hash_context, &chunk_hash);
92 chunk_offsets->push_back(*file_size);
93 chunk_checksums->push_back(chunk_hash);
94 shash::Init(chunk_hash_context);
95 }
96 } while (!flush);
97
98 shash::Final(file_hash_context, file_hash);
99 if (do_chunk) {
100 shash::Final(chunk_hash_context, &chunk_hash);
101 chunk_checksums->push_back(chunk_hash);
102 }
103
104 // Zero-size chunks are not allowed; except if there is only one chunk
105 if (do_chunk && (chunk_offsets->back() == *file_size)
106 && (chunk_offsets->size() > 1)) {
107 chunk_offsets->pop_back();
108 chunk_checksums->pop_back();
109 }
110
111 if (do_chunk && !generate_bulk_hash_)
112 file_hash->SetNull();
113
114 // Do not chunk a file if it is under threshold.
115 if (do_chunk && (chunk_offsets->size() == 1)) {
116 *file_hash = (*chunk_checksums)[0];
117 chunk_offsets->clear();
118 chunk_checksums->clear();
119 }
120 return true;
121 }
122
123 bool swissknife::CommandGraft::DirCallback(const std::string &relative_path,
124 const std::string &dir_name) {
125 if (!output_file_.size()) {
126 return true;
127 }
128 const std::string full_output_path = output_file_ + "/"
129 + (relative_path.size() ? relative_path
130 : ".")
131 + "/" + dir_name;
132 const std::string full_input_path = input_file_ + "/"
133 + (relative_path.size() ? relative_path
134 : ".")
135 + "/" + dir_name;
136 platform_stat64 sbuf;
137 if (-1 == platform_stat(full_input_path.c_str(), &sbuf)) {
138 return false;
139 }
140 return MkdirDeep(full_output_path, sbuf.st_mode);
141 }
142
143 void swissknife::CommandGraft::FileCallback(const std::string &relative_path,
144 const std::string &file_name) {
145 const std::string full_input_path = input_file_ + "/"
146 + (relative_path.size() ? relative_path
147 : ".")
148 + "/" + file_name;
149 std::string full_output_path;
150 if (output_file_.size()) {
151 full_output_path = output_file_ + "/"
152 + (relative_path.size() ? relative_path : ".") + "/"
153 + file_name;
154 }
155 Publish(full_input_path, full_output_path, false, false);
156 }
157
158 int swissknife::CommandGraft::Main(const swissknife::ArgumentList &args) {
159 const std::string &input_file = *args.find('i')->second;
160 const std::string output_file = (args.find('o') == args.end())
161 ? ""
162 : *(args.find('o')->second);
163 verbose_ = args.find('v') != args.end();
164 generate_bulk_hash_ = args.find('b') != args.end();
165 hash_alg_ = (args.find('a') == args.end())
166 ? shash::kSha1
167 : shash::ParseHashAlgorithm(*args.find('a')->second);
168 compression_alg_ = (args.find('Z') == args.end())
169 ? zlib::kNoCompression
170 : zlib::ParseCompressionAlgorithm(
171 *args.find('Z')->second);
172
173 if (args.find('c') == args.end()) {
174 chunk_size_ = kDefaultChunkSize;
175 } else {
176 const std::string chunk_size = *args.find('c')->second;
177 if (!String2Uint64Parse(chunk_size, &chunk_size_)) {
178 LogCvmfs(kLogCvmfs, kLogStderr, "Unable to parse chunk size: %s",
179 chunk_size.c_str());
180 return 1;
181 }
182 }
183 chunk_size_ *= 1024 * 1024; // Convert to MB.
184
185 platform_stat64 sbuf;
186 const bool output_file_is_dir = output_file.size()
187 && (0
188 == platform_stat(output_file.c_str(),
189 &sbuf))
190 && S_ISDIR(sbuf.st_mode);
191 if (output_file_is_dir && (input_file == "-")) {
192 LogCvmfs(kLogCvmfs, kLogStderr, "Output file (%s): Is a directory\n",
193 output_file.c_str());
194 return 1;
195 }
196
197 if (input_file != "-") {
198 const bool input_file_is_dir = (0
199 == platform_stat(input_file.c_str(), &sbuf))
200 && S_ISDIR(sbuf.st_mode);
201 if (input_file_is_dir) {
202 if (!output_file_is_dir && output_file.size()) {
203 LogCvmfs(kLogCvmfs, kLogStderr,
204 "Input (%s) is a directory but output"
205 " (%s) is not\n",
206 input_file.c_str(), output_file.c_str());
207 return 1;
208 }
209 if (verbose_) {
210 LogCvmfs(kLogCvmfs, kLogStderr, "Recursing into directory %s\n",
211 input_file.c_str());
212 }
213 return Recurse(input_file, output_file);
214 } else {
215 return Publish(input_file, output_file, output_file_is_dir, false);
216 }
217 }
218 return Publish(input_file, output_file, output_file_is_dir, true);
219 }
220
221 int swissknife::CommandGraft::Publish(const std::string &input_file,
222 const std::string &output_file,
223 bool output_file_is_dir,
224 bool input_file_is_stdin) {
225 if (output_file.size() && verbose_) {
226 LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s to %s", input_file.c_str(),
227 output_file.c_str());
228 } else if (!output_file.size()) {
229 LogCvmfs(kLogCvmfs, kLogStdout, "Grafting %s", input_file.c_str());
230 }
231 int fd;
232 if (input_file_is_stdin) {
233 fd = 0;
234 } else {
235 fd = open(input_file.c_str(), O_RDONLY);
236 if (fd < 0) {
237 const std::string errmsg = "Unable to open input file (" + input_file
238 + ")";
239 perror(errmsg.c_str());
240 return 1;
241 }
242 }
243
244 // Get input file mode; output file will be set identically.
245 platform_stat64 sbuf;
246 if (-1 == platform_fstat(fd, &sbuf)) {
247 const std::string errmsg = "Unable to stat input file (" + input_file + ")";
248 perror(errmsg.c_str());
249 }
250 const mode_t input_file_mode = input_file_is_stdin ? 0644 : sbuf.st_mode;
251
252 shash::Any file_hash(hash_alg_);
253 uint64_t processed_size;
254 std::vector<uint64_t> chunk_offsets;
255 std::vector<shash::Any> chunk_checksums;
256 zlib::Compressor *compressor = zlib::Compressor::Construct(compression_alg_);
257
258 bool retval = ChecksumFdWithChunks(fd, compressor, &processed_size,
259 &file_hash, &chunk_offsets,
260 &chunk_checksums);
261
262 if (!input_file_is_stdin) {
263 close(fd);
264 }
265 if (!retval) {
266 const std::string errmsg = "Unable to checksum input file (" + input_file
267 + ")";
268 perror(errmsg.c_str());
269 return 1;
270 }
271
272 // Build the .cvmfsgraft-$filename
273 if (output_file.size()) {
274 std::string dirname, fname;
275 std::string graft_fname;
276 if (output_file_is_dir) {
277 SplitPath(input_file, &dirname, &fname);
278 graft_fname = output_file + "/.cvmfsgraft-" + fname;
279 } else {
280 SplitPath(output_file, &dirname, &fname);
281 graft_fname = dirname + "/.cvmfsgraft-" + fname;
282 }
283 fd = open(graft_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
284 if (fd < 0) {
285 const std::string errmsg = "Unable to open graft file (" + graft_fname
286 + ")";
287 perror(errmsg.c_str());
288 return 1;
289 }
290 } else {
291 fd = 1;
292 }
293 const bool with_suffix = true;
294 std::string graft_contents = "size=" + StringifyInt(processed_size) + "\n"
295 + "checksum=" + file_hash.ToString(with_suffix)
296 + "\n" + "compression="
297 + zlib::AlgorithmName(compression_alg_) + "\n";
298 if (!chunk_offsets.empty()) {
299 std::vector<std::string> chunk_off_str;
300 chunk_off_str.reserve(chunk_offsets.size());
301 std::vector<std::string> chunk_ck_str;
302 chunk_ck_str.reserve(chunk_offsets.size());
303 for (unsigned idx = 0; idx < chunk_offsets.size(); idx++) {
304 chunk_off_str.push_back(StringifyInt(chunk_offsets[idx]));
305 chunk_ck_str.push_back(chunk_checksums[idx].ToStringWithSuffix());
306 }
307 graft_contents += "chunk_offsets=" + JoinStrings(chunk_off_str, ",") + "\n";
308 graft_contents += "chunk_checksums=" + JoinStrings(chunk_ck_str, ",")
309 + "\n";
310 }
311 const size_t nbytes = graft_contents.size();
312 const char *buf = graft_contents.c_str();
313 retval = SafeWrite(fd, buf, nbytes);
314 if (!retval) {
315 perror("Failed writing to graft file");
316 close(fd);
317 return 1;
318 }
319 if (output_file.size()) {
320 close(fd);
321 } else {
322 return 0;
323 }
324
325 // Create and truncate the output file.
326 std::string output_fname;
327 if (output_file_is_dir) {
328 output_fname = output_file + "/" + GetFileName(input_file);
329 } else {
330 output_fname = output_file;
331 }
332 fd = open(output_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY,
333 input_file_mode);
334 if (fd < 0) {
335 const std::string errmsg = "Unable to open output file (" + output_file
336 + ")";
337 perror(errmsg.c_str());
338 return 1;
339 }
340 close(fd);
341
342 return 0;
343 }
344
345 int swissknife::CommandGraft::Recurse(const std::string &input_file,
346 const std::string &output_file) {
347 output_file_ = output_file;
348 input_file_ = input_file;
349
350 FileSystemTraversal<CommandGraft> traverser(this, input_file, true);
351 traverser.fn_new_file = &CommandGraft::FileCallback;
352 traverser.fn_new_dir_prefix = &CommandGraft::DirCallback;
353 traverser.Recurse(input_file);
354 return 0;
355 }
356