16 namespace swissknife {
20 r.push_back(Parameter::Mandatory(
21 'r',
"repository URL (absolute local path or remote URL)"));
22 r.push_back(Parameter::Mandatory(
'o',
"output database file"));
23 r.push_back(Parameter::Optional(
'n',
"fully qualified repository name"));
24 r.push_back(Parameter::Optional(
'k',
"repository master key(s) / dir"));
25 r.push_back(Parameter::Optional(
'l',
"temporary directory"));
26 r.push_back(Parameter::Optional(
'h',
"root hash (other than trunk)"));
27 r.push_back(Parameter::Optional(
'@',
"proxy url"));
33 const std::string &repo_url = *args.find(
'r')->second;
34 db_path_ = *args.find(
'o')->second;
35 const std::string &repo_name =
36 (args.count(
'n') > 0) ? *args.find(
'n')->second :
"";
37 std::string repo_keys =
38 (args.count(
'k') > 0) ? *args.find(
'k')->second :
"";
41 const std::string &tmp_dir =
42 (args.count(
'l') > 0) ? *args.find(
'l')->second :
"/tmp";
43 if (args.count(
'h') > 0) {
48 tmp_db_path_ = tmp_dir +
"/cvmfs_filestats/";
49 atomic_init32(&num_downloaded_);
53 const bool follow_redirects =
false;
54 const string proxy = (args.count(
'@') > 0) ? *args.find(
'@')->second :
"";
55 if (!this->InitDownloadManager(follow_redirects, proxy) ||
56 !this->InitSignatureManager(repo_keys)) {
67 success = Run(&fetcher);
70 success = Run(&fetcher);
73 return (success) ? 0 : 1;
76 template <
class ObjectFetcherT>
77 bool CommandFileStats::Run(ObjectFetcherT *object_fetcher)
79 atomic_init32(&finished_);
82 unlink(abs_path.c_str());
84 db_->InitStatements();
93 pthread_create(&thread_processing_, NULL, MainProcessing,
this);
97 atomic_inc32(&finished_);
98 pthread_join(thread_processing_, NULL);
100 db_->DestroyStatements();
105 void CommandFileStats::CatalogCallback(
107 int32_t num = atomic_read32(&num_downloaded_);
108 string out_path = tmp_db_path_ +
StringifyInt(num + 1) +
".db";
110 atomic_inc32(&num_downloaded_);
113 void *CommandFileStats::MainProcessing(
void *data) {
117 int32_t fin = atomic_read32(&repo_stats->
finished_);
120 while (fin == 0 || processed < downloaded) {
121 if (processed < downloaded) {
129 fin = atomic_read32(&repo_stats->
finished_);
138 void CommandFileStats::ProcessCatalog(
string db_path) {
147 "SELECT count(*) FROM catalog;");
149 int cur_catalog_id = db_->StoreCatalog(catalog_count->
RetrieveInt64(0),
151 delete catalog_count;
155 "SELECT hash, size, flags, symlink FROM catalog;");
158 "SELECT md5path_1, md5path_2, size, hash FROM chunks "
159 "ORDER BY md5path_1 ASC, md5path_2 ASC;");
167 catalog::SqlDirent::kFlagLink) {
169 db_->StoreSymlink(symlink_length);
171 catalog::SqlDirent::kFlagFile)
174 catalog::SqlDirent::kFlagFileChunk)
176 int object_id = db_->StoreObject(hash, num_bytes, size);
177 db_->StoreFile(cur_catalog_id, object_id);
181 db_->StoreObject(hash, num_bytes, size);
186 int old_md5path_1 = 0, old_md5path_2 = 0;
187 int md5path_1 = 0, md5path_2 = 0;
189 while (chunks_list->FetchRow()) {
190 md5path_1 = chunks_list->RetrieveInt(0);
191 md5path_2 = chunks_list->RetrieveInt(1);
192 if (md5path_1 != old_md5path_1 || md5path_2 != old_md5path_2) {
193 cur_file_id = db_->StoreChunkedFile(cur_catalog_id);
195 const void *hash = chunks_list->RetrieveBlob(3);
196 int num_bytes = chunks_list->RetrieveBytes(3);
197 int64_t
size = chunks_list->RetrieveInt64(2);
198 db_->StoreChunk(hash, num_bytes, size, cur_file_id);
199 old_md5path_1 = md5path_1;
200 old_md5path_2 = md5path_2;
208 float FileStatsDatabase::kLatestSchema = 1;
209 unsigned FileStatsDatabase::kLatestSchemaRevision = 1;
211 bool FileStatsDatabase::CreateEmptyDatabase() {
214 "CREATE TABLE catalogs ("
215 "catalog_id INTEGER PRIMARY KEY,"
216 "num_entries INTEGER,"
220 "CREATE TABLE objects ("
221 "object_id INTEGER PRIMARY KEY,"
226 "CREATE INDEX idx_object_hash "
227 "ON objects (hash);").
Execute();
229 "CREATE TABLE files ("
230 "file_id INTEGER PRIMARY KEY,"
231 "catalog_id INTEGER,"
232 "FOREIGN KEY (catalog_id) REFERENCES catalogs (catalog_id)"
235 "CREATE TABLE files_objects ("
238 "FOREIGN KEY (file_id) REFERENCES files (file_id),"
239 "FOREIGN KEY (object_id) REFERENCES objects (object_id));").
Execute();
241 "CREATE INDEX idx_file_id ON files_objects (file_id);").
Execute();
243 "CREATE INDEX idx_object_id ON files_objects (object_id);").
Execute();
245 "CREATE TABLE symlinks ("
250 void FileStatsDatabase::InitStatements() {
251 query_insert_catalog =
new sqlite::Sql(sqlite_db(),
252 "INSERT INTO catalogs (num_entries, file_size) VALUES (:num, :size);");
254 "INSERT INTO objects (hash, size) VALUES (:hash, :size);");
256 "INSERT INTO files (catalog_id) VALUES (:catalog);");
257 query_insert_file_object =
new sqlite::Sql(sqlite_db(),
258 "INSERT INTO files_objects (file_id, object_id) VALUES (:file, :object);");
259 query_insert_symlink =
new sqlite::Sql(sqlite_db(),
260 "INSERT INTO symlinks (length) VALUES(:length);");
262 "SELECT object_id FROM objects WHERE hash = :hash;");
265 void FileStatsDatabase::DestroyStatements() {
266 delete query_insert_catalog;
267 delete query_insert_object;
268 delete query_insert_file;
269 delete query_insert_file_object;
270 delete query_insert_symlink;
271 delete query_lookup_object;
274 int64_t FileStatsDatabase::StoreCatalog(int64_t num_entries,
276 query_insert_catalog->Reset();
277 query_insert_catalog->BindInt64(1, num_entries);
278 query_insert_catalog->BindInt64(2, file_size);
279 query_insert_catalog->Execute();
280 return sqlite3_last_insert_rowid(sqlite_db());
283 int64_t FileStatsDatabase::StoreFile(int64_t catalog_id, int64_t object_id) {
284 query_insert_file->Reset();
285 query_insert_file->BindInt64(1, catalog_id);
286 query_insert_file->Execute();
287 int file_id = sqlite3_last_insert_rowid(sqlite_db());
289 query_insert_file_object->Reset();
290 query_insert_file_object->BindInt64(1, file_id);
291 query_insert_file_object->BindInt64(2, object_id);
292 query_insert_file_object->Execute();
296 int64_t FileStatsDatabase::StoreChunkedFile(int64_t catalog_id) {
297 query_insert_file->Reset();
298 query_insert_file->BindInt64(1, catalog_id);
299 query_insert_file->Execute();
300 return sqlite3_last_insert_rowid(sqlite_db());
303 int64_t FileStatsDatabase::StoreChunk(
const void *hash,
int hash_size,
304 int64_t
size, int64_t file_id) {
305 int object_id = StoreObject(hash, hash_size, size);
307 query_insert_file_object->Reset();
308 query_insert_file_object->BindInt64(1, file_id);
309 query_insert_file_object->BindInt64(2, object_id);
310 query_insert_file_object->Execute();
311 return sqlite3_last_insert_rowid(sqlite_db());
314 int64_t FileStatsDatabase::StoreObject(
const void *hash,
int hash_size,
316 query_lookup_object->Reset();
317 query_lookup_object->BindBlob(1, hash, hash_size);
318 if (query_lookup_object->FetchRow()) {
319 return query_lookup_object->RetrieveInt(0);
321 query_insert_object->Reset();
322 query_insert_object->BindBlob(1, hash, hash_size);
323 query_insert_object->BindInt64(2, size);
324 query_insert_object->Execute();
325 return sqlite3_last_insert_rowid(sqlite_db());
329 int64_t FileStatsDatabase::StoreSymlink(int64_t length) {
330 query_insert_symlink->Reset();
331 query_insert_symlink->BindInt64(1, length);
332 query_insert_symlink->Execute();
333 return sqlite3_last_insert_rowid(sqlite_db());
CallbackPtr RegisterListener(typename BoundClosure< CatalogTraversalData< ObjectFetcherT::CatalogTN >, DelegateT, ClosureDataT >::CallbackMethod method, DelegateT *delegate, ClosureDataT data)
std::string database_path() const
static Publisher * Create(const SettingsPublisher &settings)
const void * RetrieveBlob(const int idx_column) const
std::vector< Parameter > ParameterList
string JoinStrings(const vector< string > &strings, const string &joint)
bool IsHttpUrl(const std::string &path)
bool BeginTransaction() const
ObjectFetcherT * object_fetcher
assert((mem||(size==0))&&"Out Of Memory")
static const int kFlagFileChunk
static const int kFlagFile
bool CopyPath2Path(const string &src, const string &dest)
static DerivedT * Open(const std::string &filename, const OpenMode open_mode)
atomic_int32 num_downloaded_
static const int kFlagLink
std::string GetAbsolutePath(const std::string &path)
const char kSuffixCatalog
bool MkdirDeep(const std::string &path, const mode_t mode, bool verify_writable)
sqlite3_int64 RetrieveInt64(const int idx_column) const
string StringifyInt(const int64_t value)
sqlite3 * sqlite_db() const
bool CommitTransaction() const
bool DirectoryExists(const std::string &path)
bool Traverse(const TraversalType type=Base::kBreadthFirst)
void ProcessCatalog(string db_path)
std::map< char, SharedPtr< std::string > > ArgumentList
Any MkFromHexPtr(const HexPtr hex, const char suffix)
int64_t GetFileSize(const std::string &path)
int RetrieveInt(const int idx_column) const
std::vector< std::string > FindFilesBySuffix(const std::string &dir, const std::string &suffix)
int RetrieveBytes(const int idx_column) const
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)