22 shash::Any *file_hash, std::vector<uint64_t> *chunk_offsets,
23 std::vector<shash::Any> *chunk_checksums) {
24 if (!compressor || !file_size || !file_hash) {
31 unsigned char *cur_in_buf = in_buf;
38 file_hash_context.
buffer = alloca(file_hash_context.
size);
42 chunk_hash_context.
buffer = alloca(chunk_hash_context.
size);
47 if (!chunk_offsets || !chunk_checksums) {
50 chunk_offsets->push_back(0);
55 bytes_read = read(fd, cur_in_buf + avail_in, in_buf_size);
56 if (-1 == bytes_read) {
64 *file_size += bytes_read;
65 avail_in += bytes_read;
67 flush = (
static_cast<size_t>(bytes_read) < in_buf_size);
70 unsigned char *cur_out_buf = out_buf;
72 compressor->
Deflate(flush, &cur_in_buf, &avail_in, &cur_out_buf,
87 in_buf_size =
zlib::kZChunk - (cur_in_buf - in_buf) - avail_in;
91 if (do_chunk && (*file_size - chunk_offsets->back() >=
chunk_size_)) {
93 chunk_offsets->push_back(*file_size);
94 chunk_checksums->push_back(chunk_hash);
102 chunk_checksums->push_back(chunk_hash);
106 if (do_chunk && (chunk_offsets->back() == *file_size)
107 && (chunk_offsets->size() > 1)) {
108 chunk_offsets->pop_back();
109 chunk_checksums->pop_back();
116 if (do_chunk && (chunk_offsets->size() == 1)) {
117 *file_hash = (*chunk_checksums)[0];
118 chunk_offsets->clear();
119 chunk_checksums->clear();
125 const std::string &dir_name) {
126 if (!output_file_.size()) {
129 std::string full_output_path = output_file_ +
"/"
130 + (relative_path.size() ? relative_path :
".")
132 std::string full_input_path = input_file_ +
"/"
133 + (relative_path.size() ? relative_path :
".")
139 return MkdirDeep(full_output_path, sbuf.st_mode);
143 const std::string &file_name) {
144 std::string full_input_path = input_file_ +
"/"
145 + (relative_path.size() ? relative_path :
".")
147 std::string full_output_path;
148 if (output_file_.size()) {
149 full_output_path = output_file_ +
"/"
150 + (relative_path.size() ? relative_path :
".") +
"/"
153 Publish(full_input_path, full_output_path,
false,
false);
157 const std::string &input_file = *args.find(
'i')->second;
158 const std::string output_file = (args.find(
'o') == args.end())
160 : *(args.find(
'o')->second);
161 verbose_ = args.find(
'v') != args.end();
162 generate_bulk_hash_ = args.find(
'b') != args.end();
163 hash_alg_ = (args.find(
'a') == args.end())
166 compression_alg_ = (args.find(
'Z') == args.end())
169 *args.find(
'Z')->second);
171 if (args.find(
'c') == args.end()) {
172 chunk_size_ = kDefaultChunkSize;
174 std::string chunk_size = *args.find(
'c')->second;
181 chunk_size_ *= 1024 * 1024;
184 bool output_file_is_dir = output_file.size()
186 && S_ISDIR(sbuf.st_mode);
187 if (output_file_is_dir && (input_file ==
"-")) {
189 output_file.c_str());
193 if (input_file !=
"-") {
194 bool input_file_is_dir = (0 ==
platform_stat(input_file.c_str(), &sbuf))
195 && S_ISDIR(sbuf.st_mode);
196 if (input_file_is_dir) {
197 if (!output_file_is_dir && output_file.size()) {
199 "Input (%s) is a directory but output"
201 input_file.c_str(), output_file.c_str());
208 return Recurse(input_file, output_file);
210 return Publish(input_file, output_file, output_file_is_dir,
false);
213 return Publish(input_file, output_file, output_file_is_dir,
true);
217 const std::string &output_file,
218 bool output_file_is_dir,
219 bool input_file_is_stdin) {
220 if (output_file.size() && verbose_) {
222 output_file.c_str());
223 }
else if (!output_file.size()) {
227 if (input_file_is_stdin) {
230 fd = open(input_file.c_str(), O_RDONLY);
232 std::string errmsg =
"Unable to open input file (" + input_file +
")";
233 perror(errmsg.c_str());
241 std::string errmsg =
"Unable to stat input file (" + input_file +
")";
242 perror(errmsg.c_str());
244 mode_t input_file_mode = input_file_is_stdin ? 0644 : sbuf.st_mode;
247 uint64_t processed_size;
248 std::vector<uint64_t> chunk_offsets;
249 std::vector<shash::Any> chunk_checksums;
252 bool retval = ChecksumFdWithChunks(fd, compressor, &processed_size,
253 &file_hash, &chunk_offsets,
256 if (!input_file_is_stdin) {
260 std::string errmsg =
"Unable to checksum input file (" + input_file +
")";
261 perror(errmsg.c_str());
266 if (output_file.size()) {
267 std::string dirname, fname;
268 std::string graft_fname;
269 if (output_file_is_dir) {
271 graft_fname = output_file +
"/.cvmfsgraft-" + fname;
273 SplitPath(output_file, &dirname, &fname);
274 graft_fname = dirname +
"/.cvmfsgraft-" + fname;
276 fd = open(graft_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
278 std::string errmsg =
"Unable to open graft file (" + graft_fname +
")";
279 perror(errmsg.c_str());
285 const bool with_suffix =
true;
286 std::string graft_contents =
"size=" +
StringifyInt(processed_size) +
"\n"
287 +
"checksum=" + file_hash.
ToString(with_suffix)
288 +
"\n" +
"compression="
290 if (!chunk_offsets.empty()) {
291 std::vector<std::string> chunk_off_str;
292 chunk_off_str.reserve(chunk_offsets.size());
293 std::vector<std::string> chunk_ck_str;
294 chunk_ck_str.reserve(chunk_offsets.size());
295 for (
unsigned idx = 0; idx < chunk_offsets.size(); idx++) {
296 chunk_off_str.push_back(
StringifyInt(chunk_offsets[idx]));
297 chunk_ck_str.push_back(chunk_checksums[idx].ToStringWithSuffix());
299 graft_contents +=
"chunk_offsets=" +
JoinStrings(chunk_off_str,
",") +
"\n";
300 graft_contents +=
"chunk_checksums=" +
JoinStrings(chunk_ck_str,
",")
303 size_t nbytes = graft_contents.size();
304 const char *buf = graft_contents.c_str();
307 perror(
"Failed writing to graft file");
311 if (output_file.size()) {
318 std::string output_fname;
319 if (output_file_is_dir) {
320 output_fname = output_file +
"/" +
GetFileName(input_file);
322 output_fname = output_file;
324 fd = open(output_fname.c_str(), O_CREAT | O_TRUNC | O_WRONLY,
327 std::string errmsg =
"Unable to open output file (" + output_file +
")";
328 perror(errmsg.c_str());
337 const std::string &output_file) {
338 output_file_ = output_file;
339 input_file_ = input_file;
int Main(const ArgumentList &args)
Algorithms ParseCompressionAlgorithm(const std::string &algorithm_option)
NameString GetFileName(const PathString &path)
virtual bool Deflate(const bool flush, unsigned char **inbuf, size_t *inbufsize, unsigned char **outbuf, size_t *outbufsize)=0
void Recurse(const std::string &dir_path) const
string JoinStrings(const vector< string > &strings, const string &joint)
std::string ToString(const bool with_suffix=false) const
A simple recursion engine to abstract the recursion of directories. It provides several callback hook...
bool SafeWrite(int fd, const void *buf, size_t nbyte)
std::string AlgorithmName(const zlib::Algorithms alg)
int Publish(const std::string &input_file, const std::string &output_file, bool output_file_is_dir, bool input_file_is_stdin)
bool DirCallback(const std::string &relative_path, const std::string &dir_name)
bool String2Uint64Parse(const std::string &value, uint64_t *result)
void Init(ContextPtr context)
static Compressor * Construct(const Algorithms ¶m)
void SplitPath(const std::string &path, std::string *dirname, std::string *filename)
void FileCallback(const std::string &relative_path, const std::string &file_name)
bool MkdirDeep(const std::string &path, const mode_t mode, bool verify_writable)
void Final(ContextPtr context, Any *any_digest)
string StringifyInt(const int64_t value)
shash::Algorithms hash_alg_
int Recurse(const std::string &input_file, const std::string &output_file)
void Update(const unsigned char *buffer, const unsigned buffer_length, ContextPtr context)
std::map< char, SharedPtr< std::string > > ArgumentList
Algorithms ParseHashAlgorithm(const string &algorithm_option)
BoolCallback fn_new_dir_prefix
bool ChecksumFdWithChunks(int fd, zlib::Compressor *compressor, uint64_t *file_size, shash::Any *file_hash, std::vector< uint64_t > *chunk_offsets, std::vector< shash::Any > *chunk_checksums)
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)