1 |
|
|
/** |
2 |
|
|
* This file is part of the CernVM file system |
3 |
|
|
*/ |
4 |
|
|
|
5 |
|
|
#include "sync_item.h" |
6 |
|
|
|
7 |
|
|
|
8 |
|
|
#if !defined(__APPLE__) |
9 |
|
|
#include <sys/sysmacros.h> |
10 |
|
|
#endif // __APPLE__ |
11 |
|
|
|
12 |
|
|
#include <cerrno> |
13 |
|
|
#include <vector> |
14 |
|
|
|
15 |
|
|
#include "duplex_libarchive.h" |
16 |
|
|
#include "ingestion/ingestion_source.h" |
17 |
|
|
#include "sync_mediator.h" |
18 |
|
|
#include "sync_union.h" |
19 |
|
|
|
20 |
|
|
using namespace std; // NOLINT |
21 |
|
|
|
22 |
|
|
namespace publish { |
23 |
|
|
|
24 |
|
|
SyncItem::SyncItem() : |
25 |
|
|
rdonly_type_(static_cast<SyncItemType>(0)), |
26 |
|
|
graft_size_(-1), |
27 |
|
|
scratch_type_(static_cast<SyncItemType>(0)), |
28 |
|
|
union_engine_(NULL), |
29 |
|
|
whiteout_(false), |
30 |
|
|
opaque_(false), |
31 |
|
|
masked_hardlink_(false), |
32 |
|
|
has_catalog_marker_(false), |
33 |
|
|
valid_graft_(false), |
34 |
|
|
graft_marker_present_(false), |
35 |
|
|
external_data_(false), |
36 |
|
|
graft_chunklist_(NULL), |
37 |
|
|
compression_algorithm_(zlib::kZlibDefault) {} |
38 |
|
|
|
39 |
|
|
SyncItem::SyncItem(const std::string &relative_parent_path, |
40 |
|
|
const std::string &filename, |
41 |
|
|
const SyncUnion *union_engine, |
42 |
|
|
const SyncItemType entry_type) : |
43 |
|
|
rdonly_type_(kItemUnknown), |
44 |
|
|
graft_size_(-1), |
45 |
|
|
scratch_type_(entry_type), |
46 |
|
|
filename_(filename), |
47 |
|
|
union_engine_(union_engine), |
48 |
|
|
whiteout_(false), |
49 |
|
|
opaque_(false), |
50 |
|
|
masked_hardlink_(false), |
51 |
|
|
has_catalog_marker_(false), |
52 |
|
|
valid_graft_(false), |
53 |
|
|
graft_marker_present_(false), |
54 |
|
|
external_data_(false), |
55 |
|
|
relative_parent_path_(relative_parent_path), |
56 |
|
|
graft_chunklist_(NULL), |
57 |
|
|
compression_algorithm_(zlib::kZlibDefault) { |
58 |
|
|
content_hash_.algorithm = shash::kAny; |
59 |
|
|
} |
60 |
|
|
|
61 |
|
|
SyncItem::~SyncItem() { |
62 |
|
|
delete graft_chunklist_; |
63 |
|
|
} |
64 |
|
|
|
65 |
|
|
|
66 |
|
|
SyncItemType SyncItem::GetGenericFiletype(const SyncItem::EntryStat &stat) const |
67 |
|
|
{ |
68 |
|
|
const SyncItemType type = stat.GetSyncItemType(); |
69 |
|
|
if (type == kItemUnknown) { |
70 |
|
|
PrintWarning("'" + GetRelativePath() + "' has an unsupported file type " |
71 |
|
|
"(st_mode: " + StringifyInt(stat.stat.st_mode) + |
72 |
|
|
" errno: " + StringifyInt(stat.error_code) + ")"); |
73 |
|
|
abort(); |
74 |
|
|
} |
75 |
|
|
return type; |
76 |
|
|
} |
77 |
|
|
|
78 |
|
|
|
79 |
|
|
SyncItemType SyncItem::GetRdOnlyFiletype() const { |
80 |
|
|
StatRdOnly(); |
81 |
|
|
// file could not exist in read-only branch, or a regular file could have |
82 |
|
|
// been replaced by a directory in the read/write branch, like: |
83 |
|
|
// rdonly: |
84 |
|
|
// /foo/bar/regular_file <-- ENOTDIR when asking for (.../is_dir_now) |
85 |
|
|
// r/w: |
86 |
|
|
// /foo/bar/regular_file/ |
87 |
|
|
// /foo/bar/regular_file/is_dir_now |
88 |
|
|
if (rdonly_stat_.error_code == ENOENT || |
89 |
|
|
rdonly_stat_.error_code == ENOTDIR) return kItemNew; |
90 |
|
|
return GetGenericFiletype(rdonly_stat_); |
91 |
|
|
} |
92 |
|
|
|
93 |
|
|
|
94 |
|
|
SyncItemType SyncItemNative::GetScratchFiletype() const { |
95 |
|
|
StatScratch(); |
96 |
|
|
if (scratch_stat_.error_code != 0) { |
97 |
|
|
PrintWarning("Failed to stat() '" + GetRelativePath() + "' in scratch. " |
98 |
|
|
"(errno: " + StringifyInt(scratch_stat_.error_code) + ")"); |
99 |
|
|
abort(); |
100 |
|
|
} |
101 |
|
|
|
102 |
|
|
return GetGenericFiletype(scratch_stat_); |
103 |
|
|
} |
104 |
|
|
|
105 |
|
|
SyncItemType SyncItem::GetUnionFiletype() const { |
106 |
|
|
StatUnion(); |
107 |
|
|
if (union_stat_.error_code == ENOENT || union_stat_.error_code == ENOTDIR) |
108 |
|
|
return kItemUnknown; |
109 |
|
|
return GetGenericFiletype(union_stat_); |
110 |
|
|
} |
111 |
|
|
|
112 |
|
|
bool SyncItemNative::IsType(const SyncItemType expected_type) const { |
113 |
|
|
if (filename().substr(0, 12) == ".cvmfsgraft-") { |
114 |
|
|
scratch_type_ = kItemMarker; |
115 |
|
|
} else if (scratch_type_ == kItemUnknown) { |
116 |
|
|
scratch_type_ = GetScratchFiletype(); |
117 |
|
|
} |
118 |
|
|
return scratch_type_ == expected_type; |
119 |
|
|
} |
120 |
|
|
|
121 |
|
|
void SyncItem::MarkAsWhiteout(const std::string &actual_filename) { |
122 |
|
|
StatScratch(true); |
123 |
|
|
// Mark the file as whiteout entry and strip the whiteout prefix |
124 |
|
|
whiteout_ = true; |
125 |
|
|
filename_ = actual_filename; |
126 |
|
|
|
127 |
|
|
// Find the entry in the repository |
128 |
|
|
StatRdOnly(true); // <== refreshing the stat (filename might have changed) |
129 |
|
|
|
130 |
|
|
const SyncItemType deleted_type = (rdonly_stat_.error_code == 0) |
131 |
|
|
? GetRdOnlyFiletype() |
132 |
|
|
: kItemUnknown; |
133 |
|
|
|
134 |
|
|
rdonly_type_ = deleted_type; |
135 |
|
|
scratch_type_ = deleted_type; |
136 |
|
|
|
137 |
|
|
if (deleted_type == kItemUnknown) { |
138 |
|
|
// Marking a SyncItem as 'whiteout' but no file to be removed found: This |
139 |
|
|
// should not happen (actually AUFS prevents users from creating whiteouts) |
140 |
|
|
// but can be provoked through an AUFS 'bug' (see test 593 or CVM-880). |
141 |
|
|
// --> Warn the user, continue with kItemUnknown and cross your fingers! |
142 |
|
|
PrintWarning("'" + GetRelativePath() + "' should be deleted, but was not " |
143 |
|
|
"found in repository."); |
144 |
|
|
} |
145 |
|
|
} |
146 |
|
|
|
147 |
|
|
|
148 |
|
|
void SyncItem::MarkAsOpaqueDirectory() { |
149 |
|
|
assert(IsDirectory()); |
150 |
|
|
opaque_ = true; |
151 |
|
|
} |
152 |
|
|
|
153 |
|
|
|
154 |
|
|
unsigned int SyncItem::GetRdOnlyLinkcount() const { |
155 |
|
|
StatRdOnly(); |
156 |
|
|
return rdonly_stat_.stat.st_nlink; |
157 |
|
|
} |
158 |
|
|
|
159 |
|
|
|
160 |
|
|
uint64_t SyncItem::GetRdOnlyInode() const { |
161 |
|
|
StatRdOnly(); |
162 |
|
|
return rdonly_stat_.stat.st_ino; |
163 |
|
|
} |
164 |
|
|
|
165 |
|
|
|
166 |
|
|
unsigned int SyncItem::GetUnionLinkcount() const { |
167 |
|
|
StatUnion(); |
168 |
|
|
return union_stat_.stat.st_nlink; |
169 |
|
|
} |
170 |
|
|
|
171 |
|
|
|
172 |
|
|
uint64_t SyncItem::GetUnionInode() const { |
173 |
|
|
StatUnion(); |
174 |
|
|
return union_stat_.stat.st_ino; |
175 |
|
|
} |
176 |
|
|
|
177 |
|
|
uint64_t SyncItem::GetScratchSize() const { |
178 |
|
|
StatScratch(); |
179 |
|
|
return scratch_stat_.stat.st_size; |
180 |
|
|
} |
181 |
|
|
|
182 |
|
|
uint64_t SyncItem::GetRdOnlySize() const { |
183 |
|
|
StatRdOnly(); |
184 |
|
|
return rdonly_stat_.stat.st_size; |
185 |
|
|
} |
186 |
|
|
|
187 |
|
|
IngestionSource *SyncItemNative::CreateIngestionSource() const { |
188 |
|
|
return new FileIngestionSource(GetUnionPath()); |
189 |
|
|
} |
190 |
|
|
|
191 |
|
|
void SyncItem::StatGeneric(const string &path, |
192 |
|
|
EntryStat *info, |
193 |
|
|
const bool refresh) { |
194 |
|
|
if (info->obtained && !refresh) return; |
195 |
|
|
int retval = platform_lstat(path.c_str(), &info->stat); |
196 |
|
|
info->error_code = (retval != 0) ? errno : 0; |
197 |
|
|
info->obtained = true; |
198 |
|
|
} |
199 |
|
|
|
200 |
|
|
|
201 |
|
|
catalog::DirectoryEntryBase SyncItemNative::CreateBasicCatalogDirent() const { |
202 |
|
|
catalog::DirectoryEntryBase dirent; |
203 |
|
|
|
204 |
|
|
// inode and parent inode is determined at runtime of client |
205 |
|
|
dirent.inode_ = catalog::DirectoryEntry::kInvalidInode; |
206 |
|
|
|
207 |
|
|
// this might mask the actual link count in case hardlinks are not supported |
208 |
|
|
// (i.e. on setups using OverlayFS) |
209 |
|
|
dirent.linkcount_ = HasHardlinks() ? this->GetUnionStat().st_nlink : 1; |
210 |
|
|
|
211 |
|
|
dirent.mode_ = this->GetUnionStat().st_mode; |
212 |
|
|
dirent.uid_ = this->GetUnionStat().st_uid; |
213 |
|
|
dirent.gid_ = this->GetUnionStat().st_gid; |
214 |
|
|
dirent.size_ = graft_size_ > -1 ? graft_size_ : |
215 |
|
|
this->GetUnionStat().st_size; |
216 |
|
|
dirent.mtime_ = this->GetUnionStat().st_mtime; |
217 |
|
|
dirent.checksum_ = this->GetContentHash(); |
218 |
|
|
dirent.is_external_file_ = this->IsExternalData(); |
219 |
|
|
dirent.compression_algorithm_ = this->GetCompressionAlgorithm(); |
220 |
|
|
|
221 |
|
|
dirent.name_.Assign(filename().data(), filename().length()); |
222 |
|
|
|
223 |
|
|
if (this->IsSymlink()) { |
224 |
|
|
char slnk[PATH_MAX+1]; |
225 |
|
|
const ssize_t length = |
226 |
|
|
readlink((this->GetUnionPath()).c_str(), slnk, PATH_MAX); |
227 |
|
|
assert(length >= 0); |
228 |
|
|
dirent.symlink_.Assign(slnk, length); |
229 |
|
|
} |
230 |
|
|
|
231 |
|
|
if (this->IsCharacterDevice() || this->IsBlockDevice()) { |
232 |
|
|
dirent.size_ = makedev(GetRdevMajor(), GetRdevMinor()); |
233 |
|
|
} |
234 |
|
|
|
235 |
|
|
return dirent; |
236 |
|
|
} |
237 |
|
|
|
238 |
|
|
|
239 |
|
|
std::string SyncItem::GetRdOnlyPath() const { |
240 |
|
|
const string relative_path = GetRelativePath().empty() ? |
241 |
|
|
"" : "/" + GetRelativePath(); |
242 |
|
|
return union_engine_->rdonly_path() + relative_path; |
243 |
|
|
} |
244 |
|
|
|
245 |
|
|
std::string SyncItem::GetUnionPath() const { |
246 |
|
|
const string relative_path = GetRelativePath().empty() ? |
247 |
|
|
"" : "/" + GetRelativePath(); |
248 |
|
|
return union_engine_->union_path() + relative_path; |
249 |
|
|
} |
250 |
|
|
|
251 |
|
|
std::string SyncItem::GetScratchPath() const { |
252 |
|
|
const string relative_path = GetRelativePath().empty() ? |
253 |
|
|
"" : "/" + GetRelativePath(); |
254 |
|
|
return union_engine_->scratch_path() + relative_path; |
255 |
|
|
// return union_engine_->scratch_path() + filename(); |
256 |
|
|
} |
257 |
|
|
|
258 |
|
|
void SyncItem::CheckMarkerFiles() { |
259 |
|
|
if (IsRegularFile()) { |
260 |
|
|
CheckGraft(); |
261 |
|
|
} else if (IsDirectory()) { |
262 |
|
|
CheckCatalogMarker(); |
263 |
|
|
} |
264 |
|
|
} |
265 |
|
|
|
266 |
|
|
void SyncItem::CheckCatalogMarker() { |
267 |
|
|
has_catalog_marker_ = FileExists(GetUnionPath() + "/.cvmfscatalog"); |
268 |
|
|
} |
269 |
|
|
|
270 |
|
|
|
271 |
|
|
std::string SyncItem::GetGraftMarkerPath() const { |
272 |
|
|
return union_engine_->scratch_path() + "/" + |
273 |
|
|
((relative_parent_path_.empty()) ? |
274 |
|
|
".cvmfsgraft-" + filename_ : |
275 |
|
|
relative_parent_path_ + (filename_.empty() ? "" : |
276 |
|
|
("/.cvmfsgraft-" + filename_))); |
277 |
|
|
} |
278 |
|
|
|
279 |
|
|
void SyncItem::CheckGraft() { |
280 |
|
|
valid_graft_ = false; |
281 |
|
|
bool found_checksum = false; |
282 |
|
|
std::string checksum_type; |
283 |
|
|
std::string checksum_value; |
284 |
|
|
std::string graftfile = GetGraftMarkerPath(); |
285 |
|
|
LogCvmfs(kLogFsTraversal, kLogDebug, "Checking potential graft path %s.", |
286 |
|
|
graftfile.c_str()); |
287 |
|
|
FILE *fp = fopen(graftfile.c_str(), "r"); |
288 |
|
|
if (fp == NULL) { |
289 |
|
|
// This sync item can be a file from a removed directory tree on overlayfs. |
290 |
|
|
// In this case, the entire tree is missing on the scratch directory and |
291 |
|
|
// the errno is ENOTDIR. |
292 |
|
|
if ((errno != ENOENT) && (errno != ENOTDIR)) { |
293 |
|
|
LogCvmfs(kLogFsTraversal, kLogWarning, "Unable to open graft file " |
294 |
|
|
"(%s): %s (errno=%d)", |
295 |
|
|
graftfile.c_str(), strerror(errno), errno); |
296 |
|
|
} |
297 |
|
|
return; |
298 |
|
|
} |
299 |
|
|
graft_marker_present_ = true; |
300 |
|
|
valid_graft_ = true; |
301 |
|
|
std::string line; |
302 |
|
|
std::vector<std::string> contents; |
303 |
|
|
|
304 |
|
|
std::vector<off_t> chunk_offsets; |
305 |
|
|
std::vector<shash::Any> chunk_checksums; |
306 |
|
|
|
307 |
|
|
while (GetLineFile(fp, &line)) { |
308 |
|
|
std::string trimmed_line = Trim(line); |
309 |
|
|
|
310 |
|
|
if (!trimmed_line.size()) {continue;} |
311 |
|
|
if (trimmed_line[0] == '#') {continue;} |
312 |
|
|
|
313 |
|
|
std::vector<std::string> info = SplitString(trimmed_line, '=', 2); |
314 |
|
|
|
315 |
|
|
if (info.size() != 2) { |
316 |
|
|
LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid line in graft file: %s", |
317 |
|
|
trimmed_line.c_str()); |
318 |
|
|
} |
319 |
|
|
info[0] = Trim(info[0]); |
320 |
|
|
info[1] = Trim(info[1]); |
321 |
|
|
if (info[0] == "size") { |
322 |
|
|
uint64_t tmp_size; |
323 |
|
|
if (!String2Uint64Parse(info[1], &tmp_size)) { |
324 |
|
|
LogCvmfs(kLogFsTraversal, kLogWarning, "Failed to parse value of %s " |
325 |
|
|
"to integer: %s (errno=%d)", trimmed_line.c_str(), |
326 |
|
|
strerror(errno), errno); |
327 |
|
|
continue; |
328 |
|
|
} |
329 |
|
|
graft_size_ = tmp_size; |
330 |
|
|
} else if (info[0] == "checksum") { |
331 |
|
|
std::string hash_str = info[1]; |
332 |
|
|
shash::HexPtr hashP(hash_str); |
333 |
|
|
if (hashP.IsValid()) { |
334 |
|
|
content_hash_ = shash::MkFromHexPtr(hashP); |
335 |
|
|
found_checksum = true; |
336 |
|
|
} else { |
337 |
|
|
LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid checksum value: %s.", |
338 |
|
|
info[1].c_str()); |
339 |
|
|
} |
340 |
|
|
continue; |
341 |
|
|
} else if (info[0] == "chunk_offsets") { |
342 |
|
|
std::vector<std::string> offsets = SplitString(info[1], ','); |
343 |
|
|
for (std::vector<std::string>::const_iterator it = offsets.begin(); |
344 |
|
|
it != offsets.end(); it++) |
345 |
|
|
{ |
346 |
|
|
uint64_t val; |
347 |
|
|
if (!String2Uint64Parse(*it, &val)) { |
348 |
|
|
valid_graft_ = false; |
349 |
|
|
LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid chunk offset: %s.", |
350 |
|
|
it->c_str()); |
351 |
|
|
break; |
352 |
|
|
} |
353 |
|
|
chunk_offsets.push_back(val); |
354 |
|
|
} |
355 |
|
|
} else if (info[0] == "chunk_checksums") { |
356 |
|
|
std::vector<std::string> csums = SplitString(info[1], ','); |
357 |
|
|
for (std::vector<std::string>::const_iterator it = csums.begin(); |
358 |
|
|
it != csums.end(); it++) |
359 |
|
|
{ |
360 |
|
|
shash::HexPtr hashP(*it); |
361 |
|
|
if (hashP.IsValid()) { |
362 |
|
|
chunk_checksums.push_back(shash::MkFromHexPtr(hashP)); |
363 |
|
|
} else { |
364 |
|
|
LogCvmfs(kLogFsTraversal, kLogWarning, "Invalid chunk checksum " |
365 |
|
|
"value: %s.", it->c_str()); |
366 |
|
|
valid_graft_ = false; |
367 |
|
|
break; |
368 |
|
|
} |
369 |
|
|
} |
370 |
|
|
} |
371 |
|
|
} |
372 |
|
|
if (!feof(fp)) { |
373 |
|
|
LogCvmfs(kLogFsTraversal, kLogWarning, "Unable to read from catalog " |
374 |
|
|
"marker (%s): %s (errno=%d)", |
375 |
|
|
graftfile.c_str(), strerror(errno), errno); |
376 |
|
|
} |
377 |
|
|
fclose(fp); |
378 |
|
|
valid_graft_ = valid_graft_ && (graft_size_ > -1) && found_checksum |
379 |
|
|
&& (chunk_checksums.size() == chunk_offsets.size()); |
380 |
|
|
|
381 |
|
|
if (!valid_graft_ || chunk_offsets.empty()) |
382 |
|
|
return; |
383 |
|
|
|
384 |
|
|
// Parse chunks |
385 |
|
|
graft_chunklist_ = new FileChunkList(chunk_offsets.size()); |
386 |
|
|
off_t last_offset = chunk_offsets[0]; |
387 |
|
|
if (last_offset != 0) { |
388 |
|
|
LogCvmfs(kLogFsTraversal, kLogWarning, "First chunk offset must be 0" |
389 |
|
|
" (in graft marker %s).", graftfile.c_str()); |
390 |
|
|
valid_graft_ = false; |
391 |
|
|
} |
392 |
|
|
for (unsigned idx = 1; idx < chunk_offsets.size(); idx++) { |
393 |
|
|
off_t cur_offset = chunk_offsets[idx]; |
394 |
|
|
if (last_offset >= cur_offset) { |
395 |
|
|
LogCvmfs(kLogFsTraversal, kLogWarning, "Chunk offsets must be sorted " |
396 |
|
|
"in strictly increasing order (in graft marker %s).", |
397 |
|
|
graftfile.c_str()); |
398 |
|
|
valid_graft_ = false; |
399 |
|
|
break; |
400 |
|
|
} |
401 |
|
|
size_t cur_size = cur_offset - last_offset; |
402 |
|
|
graft_chunklist_->PushBack(FileChunk(chunk_checksums[idx - 1], |
403 |
|
|
last_offset, |
404 |
|
|
cur_size)); |
405 |
|
|
last_offset = cur_offset; |
406 |
|
|
} |
407 |
|
|
if (graft_size_ <= last_offset) { |
408 |
|
|
LogCvmfs(kLogFsTraversal, kLogWarning, "Last offset must be strictly " |
409 |
|
|
"less than total file size (in graft marker %s).", |
410 |
|
|
graftfile.c_str()); |
411 |
|
|
valid_graft_ = false; |
412 |
|
|
} |
413 |
|
|
graft_chunklist_->PushBack(FileChunk(chunk_checksums.back(), |
414 |
|
|
last_offset, |
415 |
|
|
graft_size_ - last_offset)); |
416 |
|
|
} |
417 |
|
|
|
418 |
|
|
} // namespace publish |