23 shash::Any *file_hash, std::vector<uint64_t> *chunk_offsets,
24 std::vector<shash::Any> *chunk_checksums) {
25 if (!compressor || !file_size || !file_hash) {
32 unsigned char *cur_in_buf = in_buf;
39 file_hash_context.
buffer = alloca(file_hash_context.
size);
43 chunk_hash_context.
buffer = alloca(chunk_hash_context.
size);
48 if (!chunk_offsets || !chunk_checksums) {
51 chunk_offsets->push_back(0);
56 bytes_read = read(fd, cur_in_buf + avail_in, in_buf_size);
57 if (-1 == bytes_read) {
65 *file_size += bytes_read;
66 avail_in += bytes_read;
68 flush = (
static_cast<size_t>(bytes_read) < in_buf_size);
71 unsigned char *cur_out_buf = out_buf;
73 compressor->
Deflate(flush, &cur_in_buf, &avail_in, &cur_out_buf,
88 in_buf_size =
zlib::kZChunk - (cur_in_buf - in_buf) - avail_in;
92 if (do_chunk && (*file_size - chunk_offsets->back() >=
chunk_size_)) {
94 chunk_offsets->push_back(*file_size);
95 chunk_checksums->push_back(chunk_hash);
103 chunk_checksums->push_back(chunk_hash);
107 if (do_chunk && (chunk_offsets->back() == *file_size) &&
108 (chunk_offsets->size() > 1))
110 chunk_offsets->pop_back();
111 chunk_checksums->pop_back();
118 if (do_chunk && (chunk_offsets->size() == 1)) {
119 *file_hash = (*chunk_checksums)[0];
120 chunk_offsets->clear();
121 chunk_checksums->clear();
127 const std::string &dir_name) {
128 if (!output_file_.size()) {
131 std::string full_output_path = output_file_ +
"/" +
132 (relative_path.size() ? relative_path :
".") +
134 std::string full_input_path = input_file_ +
"/" +
135 (relative_path.size() ? relative_path :
".") +
141 return MkdirDeep(full_output_path, sbuf.st_mode);
145 const std::string &file_name) {
146 std::string full_input_path = input_file_ +
"/" +
147 (relative_path.size() ? relative_path :
".") +
149 std::string full_output_path;
150 if (output_file_.size()) {
151 full_output_path = output_file_ +
"/" +
152 (relative_path.size() ? relative_path :
".") +
"/" +
155 Publish(full_input_path, full_output_path,
false,
false);
159 const std::string &input_file = *args.find(
'i')->second;
160 const std::string output_file =
161 (args.find(
'o') == args.end()) ?
"" : *(args.find(
'o')->second);
162 verbose_ = args.find(
'v') != args.end();
163 generate_bulk_hash_ = args.find(
'b') != args.end();
164 hash_alg_ = (args.find(
'a') == args.end())
168 (args.find(
'Z') == args.end())
172 if (args.find(
'c') == args.end()) {
173 chunk_size_ = kDefaultChunkSize;
175 std::string chunk_size = *args.find(
'c')->second;
182 chunk_size_ *= 1024 * 1024;
185 bool output_file_is_dir = output_file.size() &&
187 S_ISDIR(sbuf.st_mode);
188 if (output_file_is_dir && (input_file ==
"-")) {
190 output_file.c_str());
194 if (input_file !=
"-") {
195 bool input_file_is_dir = (0 ==
platform_stat(input_file.c_str(), &sbuf)) &&
196 S_ISDIR(sbuf.st_mode);
197 if (input_file_is_dir) {
198 if (!output_file_is_dir && output_file.size()) {
200 "Input (%s) is a directory but output"
202 input_file.c_str(), output_file.c_str());
209 return Recurse(input_file, output_file);
211 return Publish(input_file, output_file, output_file_is_dir,
false);
214 return Publish(input_file, output_file, output_file_is_dir,
true);
218 const std::string &output_file,
219 bool output_file_is_dir,
220 bool input_file_is_stdin) {
221 if (output_file.size() && verbose_) {
223 output_file.c_str());
224 }
else if (!output_file.size()) {
228 if (input_file_is_stdin) {
231 fd = open(input_file.c_str(), O_RDONLY);
233 std::string errmsg =
"Unable to open input file (" + input_file +
")";
234 perror(errmsg.c_str());
242 std::string errmsg =
"Unable to stat input file (" + input_file +
")";
243 perror(errmsg.c_str());
245 mode_t input_file_mode = input_file_is_stdin ? 0644 : sbuf.st_mode;
248 uint64_t processed_size;
249 std::vector<uint64_t> chunk_offsets;
250 std::vector<shash::Any> chunk_checksums;
254 ChecksumFdWithChunks(fd, compressor, &processed_size, &file_hash,
255 &chunk_offsets, &chunk_checksums);
257 if (!input_file_is_stdin) {
261 std::string errmsg =
"Unable to checksum input file (" + input_file +
")";
262 perror(errmsg.c_str());
267 if (output_file.size()) {
268 std::string dirname, fname;
269 std::string graft_fname;
270 if (output_file_is_dir) {
272 graft_fname = output_file +
"/.cvmfsgraft-" + fname;
274 SplitPath(output_file, &dirname, &fname);
275 graft_fname = dirname +
"/.cvmfsgraft-" + fname;
277 fd = open(graft_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
279 std::string errmsg =
"Unable to open graft file (" + graft_fname +
")";
280 perror(errmsg.c_str());
286 const bool with_suffix =
true;
287 std::string graft_contents =
289 "checksum=" + file_hash.
ToString(with_suffix) +
"\n" +
291 if (!chunk_offsets.empty()) {
292 std::vector<std::string> chunk_off_str;
293 chunk_off_str.reserve(chunk_offsets.size());
294 std::vector<std::string> chunk_ck_str;
295 chunk_ck_str.reserve(chunk_offsets.size());
296 for (
unsigned idx = 0; idx < chunk_offsets.size(); idx++) {
297 chunk_off_str.push_back(
StringifyInt(chunk_offsets[idx]));
298 chunk_ck_str.push_back(chunk_checksums[idx].ToStringWithSuffix());
300 graft_contents +=
"chunk_offsets=" +
JoinStrings(chunk_off_str,
",") +
"\n";
302 "chunk_checksums=" +
JoinStrings(chunk_ck_str,
",") +
"\n";
304 size_t nbytes = graft_contents.size();
305 const char *buf = graft_contents.c_str();
308 perror(
"Failed writing to graft file");
312 if (output_file.size()) {
319 std::string output_fname;
320 if (output_file_is_dir) {
321 output_fname = output_file +
"/" +
GetFileName(input_file);
323 output_fname = output_file;
326 open(output_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, input_file_mode);
328 std::string errmsg =
"Unable to open output file (" + output_file +
")";
329 perror(errmsg.c_str());
338 const std::string &output_file) {
339 output_file_ = output_file;
340 input_file_ = input_file;
int Main(const ArgumentList &args)
Algorithms ParseCompressionAlgorithm(const std::string &algorithm_option)
NameString GetFileName(const PathString &path)
virtual bool Deflate(const bool flush, unsigned char **inbuf, size_t *inbufsize, unsigned char **outbuf, size_t *outbufsize)=0
void Recurse(const std::string &dir_path) const
string JoinStrings(const vector< string > &strings, const string &joint)
std::string ToString(const bool with_suffix=false) const
A simple recursion engine to abstract the recursion of directories. It provides several callback hook...
bool SafeWrite(int fd, const void *buf, size_t nbyte)
std::string AlgorithmName(const zlib::Algorithms alg)
int Publish(const std::string &input_file, const std::string &output_file, bool output_file_is_dir, bool input_file_is_stdin)
bool DirCallback(const std::string &relative_path, const std::string &dir_name)
bool String2Uint64Parse(const std::string &value, uint64_t *result)
void Init(ContextPtr context)
static Compressor * Construct(const Algorithms ¶m)
void SplitPath(const std::string &path, std::string *dirname, std::string *filename)
void FileCallback(const std::string &relative_path, const std::string &file_name)
bool MkdirDeep(const std::string &path, const mode_t mode, bool verify_writable)
void Final(ContextPtr context, Any *any_digest)
string StringifyInt(const int64_t value)
shash::Algorithms hash_alg_
int Recurse(const std::string &input_file, const std::string &output_file)
void Update(const unsigned char *buffer, const unsigned buffer_length, ContextPtr context)
std::map< char, SharedPtr< std::string > > ArgumentList
Algorithms ParseHashAlgorithm(const string &algorithm_option)
BoolCallback fn_new_dir_prefix
bool ChecksumFdWithChunks(int fd, zlib::Compressor *compressor, uint64_t *file_size, shash::Any *file_hash, std::vector< uint64_t > *chunk_offsets, std::vector< shash::Any > *chunk_checksums)
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)