CernVM-FS  2.10.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
statistics_database.cc
Go to the documentation of this file.
1 
5 #include "statistics_database.h"
6 
7 #include "util/exception.h"
8 
9 
12 
13 // Changelog
14 // 1 --> 2: (Sep 4 2019)
15 // * change column name `finished_time` -> `finish_time`
16 // in publish_statistics table
17 // * add column `revision` to publish_statistics table
18 // * change column name `duplicated_files` -> `chunks_duplicated`
19 // in publish_statistics table
20 // * add column `chunks_added` to publish_statistics table
21 // * add column `symlinks_added` to publish_statistics table
22 // * add column `symlinks_removed` to publish_statistics table
23 // * add column `symlinks_changed` to publish_statistics table
24 // * change column name `finished_time` -> `finish_time`
25 // in gc_statistics table
26 // 2 --> 3: (Jan 14 2020)
27 // * add `success` column to publish_statistics table (1 for success
28 // 0 for fail)
29 // * add `success` column to gc_statistics table (1 for success
30 // 0 for fail)
31 
33 unsigned int StatisticsDatabase::instances = 0;
35 
36 
37 namespace {
38 
39 struct PublishStats {
40  std::string revision;
41  std::string files_added;
42  std::string files_removed;
43  std::string files_changed;
44  std::string chunks_added;
45  std::string chunks_duplicated;
46  std::string catalogs_added;
47  std::string dirs_added;
48  std::string dirs_removed;
49  std::string dirs_changed;
50  std::string symlinks_added;
51  std::string symlinks_removed;
52  std::string symlinks_changed;
53  std::string bytes_added;
54  std::string bytes_removed;
55  std::string bytes_uploaded;
57 
58  explicit PublishStats(const perf::Statistics *statistics):
59  revision(statistics->
60  Lookup("publish.revision")->ToString()),
61  files_added(statistics->
62  Lookup("publish.n_files_added")->ToString()),
63  files_removed(statistics->
64  Lookup("publish.n_files_removed")->ToString()),
65  files_changed(statistics->
66  Lookup("publish.n_files_changed")->ToString()),
67  chunks_added(statistics->
68  Lookup("publish.n_chunks_added")->ToString()),
69  chunks_duplicated(statistics->
70  Lookup("publish.n_chunks_duplicated")->ToString()),
71  catalogs_added(statistics->
72  Lookup("publish.n_catalogs_added")->ToString()),
73  dirs_added(statistics->
74  Lookup("publish.n_directories_added")->ToString()),
75  dirs_removed(statistics->
76  Lookup("publish.n_directories_removed")->ToString()),
77  dirs_changed(statistics->
78  Lookup("publish.n_directories_changed")->ToString()),
79  symlinks_added(statistics->
80  Lookup("publish.n_symlinks_added")->ToString()),
81  symlinks_removed(statistics->
82  Lookup("publish.n_symlinks_removed")->ToString()),
83  symlinks_changed(statistics->
84  Lookup("publish.n_symlinks_changed")->ToString()),
85  bytes_added(statistics->
86  Lookup("publish.sz_added_bytes")->ToString()),
87  bytes_removed(statistics->
88  Lookup("publish.sz_removed_bytes")->ToString()),
89  bytes_uploaded(statistics->
90  Lookup("publish.sz_uploaded_bytes")->ToString()),
91  catalog_bytes_uploaded(statistics->
92  Lookup("publish.sz_uploaded_catalog_bytes")->ToString()) {
93  }
94 };
95 
96 
97 struct GcStats {
98  std::string n_preserved_catalogs;
99  std::string n_condemned_catalogs;
100  std::string n_condemned_objects;
101  std::string sz_condemned_bytes;
102 
103  explicit GcStats(const perf::Statistics *statistics) {
104  perf::Counter *c = NULL;
105  c = statistics->Lookup("gc.n_preserved_catalogs");
106  n_preserved_catalogs = c ? c->ToString() : "0";
107  c = statistics->Lookup("gc.n_condemned_catalogs");
108  n_condemned_catalogs = c ? c->ToString() : "0";
109  c = statistics->Lookup("gc.n_condemned_objects");
110  n_condemned_objects = c ? c->ToString() : "0";
111  c = statistics->Lookup("gc.sz_condemned_bytes");
112  sz_condemned_bytes = c ? c->ToString() : "0";
113  }
114 };
115 
116 
123 std::string PrepareStatementIntoPublish(const perf::Statistics *statistics,
124  const std::string &start_time,
125  const std::string &finish_time,
126  const bool success) {
127  struct PublishStats stats = PublishStats(statistics);
128  std::string insert_statement =
129  "INSERT INTO publish_statistics ("
130  "start_time,"
131  "finish_time,"
132  "revision,"
133  "files_added,"
134  "files_removed,"
135  "files_changed,"
136  "chunks_added,"
137  "chunks_duplicated,"
138  "catalogs_added,"
139  "directories_added,"
140  "directories_removed,"
141  "directories_changed,"
142  "symlinks_added,"
143  "symlinks_removed,"
144  "symlinks_changed,"
145  "sz_bytes_added,"
146  "sz_bytes_removed,"
147  "sz_bytes_uploaded,"
148  "sz_catalog_bytes_uploaded,"
149  "success)"
150  " VALUES("
151  "'"+start_time+"',"+
152  "'"+finish_time+"',"+
153  stats.revision+"," +
154  stats.files_added+"," +
155  stats.files_removed +","+
156  stats.files_changed + "," +
157  stats.chunks_added + "," +
158  stats.chunks_duplicated + "," +
159  stats.catalogs_added + "," +
160  stats.dirs_added + "," +
161  stats.dirs_removed + "," +
162  stats.dirs_changed + "," +
163  stats.symlinks_added + "," +
164  stats.symlinks_removed + "," +
165  stats.symlinks_changed + "," +
166  stats.bytes_added + "," +
167  stats.bytes_removed + "," +
168  stats.bytes_uploaded + "," +
169  stats.catalog_bytes_uploaded + "," +
170  (success ? "1" : "0") + ");";
171  return insert_statement;
172 }
173 
174 
184 std::string PrepareStatementIntoGc(const perf::Statistics *statistics,
185  const std::string &start_time,
186  const std::string &finish_time,
187  const std::string &repo_name,
188  const bool success) {
189  struct GcStats stats = GcStats(statistics);
190  std::string insert_statement = "";
191  if (StatisticsDatabase::GcExtendedStats(repo_name)) {
192  insert_statement =
193  "INSERT INTO gc_statistics ("
194  "start_time,"
195  "finish_time,"
196  "n_preserved_catalogs,"
197  "n_condemned_catalogs,"
198  "n_condemned_objects,"
199  "sz_condemned_bytes,"
200  "success)"
201  " VALUES("
202  "'" + start_time + "'," +
203  "'" + finish_time + "'," +
204  stats.n_preserved_catalogs + "," +
205  stats.n_condemned_catalogs + ","+
206  stats.n_condemned_objects + "," +
207  stats.sz_condemned_bytes + "," +
208  (success ? "1" : "0") + ");";
209  } else {
210  // insert values except sz_condemned_bytes
211  insert_statement =
212  "INSERT INTO gc_statistics ("
213  "start_time,"
214  "finish_time,"
215  "n_preserved_catalogs,"
216  "n_condemned_catalogs,"
217  "n_condemned_objects,"
218  "success)"
219  " VALUES("
220  "'" + start_time + "'," +
221  "'" + finish_time + "'," +
222  stats.n_preserved_catalogs + "," +
223  stats.n_condemned_catalogs + "," +
224  stats.n_condemned_objects + "," +
225  (success ? "1" : "0") + ");";
226  }
227  return insert_statement;
228 }
229 
230 } // namespace
231 
232 
235  bool ret1 = sqlite::Sql(sqlite_db(),
236  "CREATE TABLE publish_statistics ("
237  "publish_id INTEGER PRIMARY KEY,"
238  "start_time TEXT,"
239  "finish_time TEXT,"
240  "revision INTEGER,"
241  "files_added INTEGER,"
242  "files_removed INTEGER,"
243  "files_changed INTEGER,"
244  "chunks_added INTEGER,"
245  "chunks_duplicated INTEGER,"
246  "catalogs_added INTEGER,"
247  "directories_added INTEGER,"
248  "directories_removed INTEGER,"
249  "directories_changed INTEGER,"
250  "symlinks_added INTEGER,"
251  "symlinks_removed INTEGER,"
252  "symlinks_changed INTEGER,"
253  "sz_bytes_added INTEGER,"
254  "sz_bytes_removed INTEGER,"
255  "sz_bytes_uploaded INTEGER,"
256  "sz_catalog_bytes_uploaded INTEGER,"
257  "success INTEGER);").Execute();
258  bool ret2 = sqlite::Sql(sqlite_db(),
259  "CREATE TABLE gc_statistics ("
260  "gc_id INTEGER PRIMARY KEY,"
261  "start_time TEXT,"
262  "finish_time TEXT,"
263  "n_preserved_catalogs INTEGER,"
264  "n_condemned_catalogs INTEGER,"
265  "n_condemned_objects INTEGER,"
266  "sz_condemned_bytes INTEGER,"
267  "success INTEGER);").Execute();
268  return ret1 & ret2;
269 }
270 
271 
274  return (schema_version() > kLatestCompatibleSchema - 0.1 &&
276 }
277 
278 
282  (schema_revision() == 1)) {
283  LogCvmfs(kLogCvmfs, kLogDebug, "upgrading schema revision (1 --> 2) of "
284  "statistics database");
285 
286  sqlite::Sql publish_upgrade2_1(this->sqlite_db(), "ALTER TABLE "
287  "publish_statistics RENAME COLUMN finished_time TO finish_time;");
288  sqlite::Sql publish_upgrade2_2(this->sqlite_db(), "ALTER TABLE "
289  "publish_statistics ADD revision INTEGER;");
290  sqlite::Sql publish_upgrade2_3(this->sqlite_db(), "ALTER TABLE "
291  "publish_statistics RENAME COLUMN duplicated_files TO chunks_duplicated;");
292  sqlite::Sql publish_upgrade2_4(this->sqlite_db(), "ALTER TABLE "
293  "publish_statistics ADD chunks_added INTEGER;");
294  sqlite::Sql publish_upgrade2_5(this->sqlite_db(), "ALTER TABLE "
295  "publish_statistics ADD symlinks_added INTEGER;");
296  sqlite::Sql publish_upgrade2_6(this->sqlite_db(), "ALTER TABLE "
297  "publish_statistics ADD symlinks_removed INTEGER;");
298  sqlite::Sql publish_upgrade2_7(this->sqlite_db(), "ALTER TABLE "
299  "publish_statistics ADD symlinks_changed INTEGER;");
300  sqlite::Sql publish_upgrade2_8(this->sqlite_db(), "ALTER TABLE "
301  "publish_statistics ADD catalogs_added INTEGER;");
302  sqlite::Sql publish_upgrade2_9(this->sqlite_db(), "ALTER TABLE "
303  "publish_statistics ADD sz_catalog_bytes_uploaded INTEGER;");
304 
305  if (!publish_upgrade2_1.Execute() ||
306  !publish_upgrade2_2.Execute() ||
307  !publish_upgrade2_3.Execute() ||
308  !publish_upgrade2_4.Execute() ||
309  !publish_upgrade2_5.Execute() ||
310  !publish_upgrade2_6.Execute() ||
311  !publish_upgrade2_7.Execute() ||
312  !publish_upgrade2_8.Execute() ||
313  !publish_upgrade2_9.Execute()) {
314  LogCvmfs(kLogCvmfs, kLogSyslogErr, "failed to upgrade publish_statistics"
315  " table of statistics database");
316  return false;
317  }
318 
319  sqlite::Sql gc_upgrade2_1(this->sqlite_db(), "ALTER TABLE gc_statistics"
320  " RENAME COLUMN finished_time TO finish_time;");
321 
322  if (!gc_upgrade2_1.Execute()) {
323  LogCvmfs(kLogCvmfs, kLogSyslogErr, "failed to upgrade gc_statistics"
324  " table of statistics database");
325  return false;
326  }
327 
329  if (!StoreSchemaRevision()) {
330  LogCvmfs(kLogCvmfs, kLogSyslogErr, "failed to upgrade schema revision"
331  " of statistics database");
332  return false;
333  }
334  }
336  (schema_revision() == 2)) {
337  LogCvmfs(kLogCvmfs, kLogDebug, "upgrading schema revision (2 --> 3) of "
338  "statistics database");
339 
340  sqlite::Sql publish_upgrade3_1(this->sqlite_db(), "ALTER TABLE "
341  "publish_statistics ADD success INTEGER;");
342 
343  if (!publish_upgrade3_1.Execute()) {
344  LogCvmfs(kLogCvmfs, kLogSyslogErr, "failed to upgrade publish_statistics"
345  " table of statistics database");
346  return false;
347  }
348 
349  sqlite::Sql gc_upgrade3_1(this->sqlite_db(), "ALTER TABLE gc_statistics"
350  " ADD success INTEGER;");
351 
352  if (!gc_upgrade3_1.Execute()) {
353  LogCvmfs(kLogCvmfs, kLogSyslogErr, "failed to upgrade gc_statistics"
354  " table of statistics database");
355  return false;
356  }
357 
359  if (!StoreSchemaRevision()) {
360  LogCvmfs(kLogCvmfs, kLogSyslogErr, "failed to upgrade schema revision"
361  " of statistics database");
362  return false;
363  }
364  }
365  return true;
366 }
367 
368 
370  ++compact_calls;
371  return !compacting_fails;
372 }
373 
374 
377 }
378 
379 
381  const std::string repo_name)
382 {
383  StatisticsDatabase *db;
384  std::string db_file_path;
385  uint32_t days_to_keep;
386  GetDBParams(repo_name, &db_file_path, &days_to_keep);
387  if (FileExists(db_file_path)) {
388  db = StatisticsDatabase::Open(db_file_path, kOpenReadWrite);
389  if (db == NULL) {
390  PANIC(kLogSyslogErr, "Couldn't create StatisticsDatabase object!");
391  } else if (db->GetProperty<std::string>("repo_name") != repo_name) {
392  PANIC(kLogSyslogErr, "'repo_name' property of the statistics database %s "
393  "is incorrect. Please fix the database.", db_file_path.c_str());
394  }
395  if (!db->Prune(days_to_keep)) {
396  LogCvmfs(kLogCvmfs, kLogSyslogErr, "Failed to prune statistics database");
397  }
398  } else {
399  db = StatisticsDatabase::Create(db_file_path);
400  if (db == NULL) {
401  PANIC(kLogSyslogErr, "Couldn't create StatisticsDatabase object!");
402  // insert repo_name into properties table
403  } else if (!db->SetProperty("repo_name", repo_name)) {
405  "Couldn't insert repo_name into properties table!");
406  }
407  }
408  db->repo_name_ = repo_name;
409  return db;
410 }
411 
412 
414  const perf::Statistics *statistics,
415  const std::string &start_time,
416  const bool success)
417 {
418  std::string finish_time = GetGMTimestamp();
419  std::string statement = PrepareStatementIntoPublish(statistics, start_time,
420  finish_time, success);
421  return StoreEntry(statement);
422 }
423 
424 
426  const perf::Statistics *statistics,
427  const std::string &start_time,
428  const bool success)
429 {
430  std::string finish_time = GetGMTimestamp();
431  std::string statement =
432  PrepareStatementIntoGc(statistics, start_time, finish_time,
433  repo_name_, success);
434  return StoreEntry(statement);
435 }
436 
437 
438 bool StatisticsDatabase::StoreEntry(const std::string &insert_statement) {
439  sqlite::Sql insert(this->sqlite_db(), insert_statement);
440 
441  if (!insert.Execute()) {
443  "Couldn't store statistics in %s: insert.Execute failed!",
444  this->filename().c_str());
445  return false;
446  }
447 
448  LogCvmfs(kLogCvmfs, kLogStdout, "Statistics stored at: %s",
449  this->filename().c_str());
450  return true;
451 }
452 
453 
454 bool StatisticsDatabase::Prune(uint32_t days) {
455  if (days == 0) return true;
456 
457  std::string publish_stmt =
458  "DELETE FROM publish_statistics WHERE "
459  "julianday('now','start of day')-julianday(start_time) > " +
460  StringifyUint(days) + ";";
461 
462  std::string gc_stmt =
463  "DELETE FROM gc_statistics WHERE "
464  "julianday('now','start of day')-julianday(start_time) > " +
465  StringifyUint(days) + ";";
466 
467  sqlite::Sql publish_sql(this->sqlite_db(), publish_stmt);
468  sqlite::Sql gc_sql(this->sqlite_db(), gc_stmt);
469  if (!publish_sql.Execute() || !gc_sql.Execute()) {
471  "Couldn't prune statistics DB %s: SQL Execute() failed!",
472  this->filename().c_str());
473  return false;
474  }
475  if (!this->Vacuum()) {
477  "Couldn't prune statistics DB %s: Vacuum() failed!",
478  this->filename().c_str());
479  return false;
480  }
481 
482  return true;
483 }
484 
485 
487  std::string local_path)
488 {
489  if (local_path == "") {
490  local_path = this->filename();
491  }
492 
493  spooler->WaitForUpload();
494  unsigned errors_before = spooler->GetNumberOfErrors();
495  spooler->Mkdir("stats");
496  spooler->RemoveAsync("stats/stats.db");
497  spooler->WaitForUpload();
498  spooler->Upload(local_path, "stats/stats.db");
499  spooler->WaitForUpload();
500  unsigned errors_after = spooler->GetNumberOfErrors();
501 
502  if (errors_before != errors_after) {
504  "Could not upload statistics DB file into storage backend");
505  return false;
506  }
507  return true;
508 }
509 
510 
512  upload::AbstractUploader *uploader,
513  std::string local_path)
514 {
515  if (local_path == "") {
516  local_path = this->filename();
517  }
518 
519  uploader->WaitForUpload();
520  unsigned errors_before = uploader->GetNumberOfErrors();
521 
522  uploader->RemoveAsync("stats/stats.db");
523  uploader->WaitForUpload();
524  uploader->UploadFile(local_path, "stats/stats.db");
525  uploader->WaitForUpload();
526  unsigned errors_after = uploader->GetNumberOfErrors();
527  return errors_before == errors_after;
528 }
529 
530 
531 void StatisticsDatabase::GetDBParams(const std::string &repo_name,
532  std::string *path,
533  uint32_t *days_to_keep)
534 {
535  // default location
536  const std::string db_default_path =
537  "/var/spool/cvmfs/" + repo_name + "/stats.db";
538  const std::string repo_config_file =
539  "/etc/cvmfs/repositories.d/" + repo_name + "/server.conf";
540  SimpleOptionsParser parser;
541 
542  if (!parser.TryParsePath(repo_config_file)) {
544  "Could not parse repository configuration: %s.",
545  repo_config_file.c_str());
546  *path = db_default_path;
547  *days_to_keep = kDefaultDaysToKeep;
548  return;
549  }
550 
551  std::string statistics_db = "";
552  if (!parser.GetValue("CVMFS_STATISTICS_DB", &statistics_db)) {
554  "Parameter %s was not set in the repository configuration file. "
555  "Using default value: %s",
556  "CVMFS_STATISTICS_DB", db_default_path.c_str());
557  *path = db_default_path;
558  } else {
559  std::string dirname = GetParentPath(statistics_db);
560  int mode = S_IRUSR | S_IWUSR | S_IXUSR |
561  S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; // 755
562  if (!MkdirDeep(dirname, mode, true)) {
564  "Couldn't write statistics at the specified path %s.",
565  statistics_db.c_str());
566  *path = db_default_path;
567  } else {
568  *path = statistics_db;
569  }
570  }
571 
572  std::string days_to_keep_str = "";
573  if (!parser.GetValue("CVMFS_STATS_DB_DAYS_TO_KEEP", &statistics_db)) {
575  "Parameter %s was not set in the repository configuration file. "
576  "Using default value: %s",
577  "CVMFS_STATS_DB_DAYS_TO_KEEP",
579  *days_to_keep = kDefaultDaysToKeep;
580  } else {
581  *days_to_keep = static_cast<uint32_t> (String2Uint64(days_to_keep_str));
582  }
583 }
584 
585 
592 bool StatisticsDatabase::GcExtendedStats(const std::string &repo_name) {
593  SimpleOptionsParser parser;
594  std::string param_value = "";
595  const std::string repo_config_file =
596  "/etc/cvmfs/repositories.d/" + repo_name + "/server.conf";
597 
598  if (!parser.TryParsePath(repo_config_file)) {
600  "Could not parse repository configuration: %s.",
601  repo_config_file.c_str());
602  return false;
603  }
604  if (!parser.GetValue("CVMFS_EXTENDED_GC_STATS", &param_value)) {
606  "Parameter %s was not set in the repository configuration file. "
607  "condemned_bytes were not counted.",
608  "CVMFS_EXTENDED_GC_STATS");
609  } else if (parser.IsOn(param_value)) {
610  return true;
611  }
612  return false;
613 }
614 
615 
616 StatisticsDatabase::StatisticsDatabase(const std::string &filename,
617  const OpenMode open_mode) :
618  sqlite::Database<StatisticsDatabase>(filename, open_mode),
619  create_empty_db_calls(0), check_compatibility_calls(0),
620  live_upgrade_calls(0), compact_calls(0)
621 {
623 }
624 
625 const uint32_t StatisticsDatabase::kDefaultDaysToKeep = 365;
#define LogCvmfs(source, mask,...)
Definition: logging.h:20
virtual void WaitForUpload() const
StatisticsDatabase(const std::string &filename, const OpenMode open_mode)
std::string ToString()
Definition: statistics.cc:24
bool Execute()
Definition: sql.cc:42
std::string GetGMTimestamp(const std::string &format)
Definition: string.cc:585
std::string PrepareStatementIntoGc(const perf::Statistics *statistics, const std::string &start_time, const std::string &finish_time, const std::string &repo_name, const bool success)
const std::string & filename() const
Definition: sql.h:148
bool UploadStatistics(upload::Spooler *spooler, std::string local_path="")
bool StoreGCStatistics(const perf::Statistics *statistics, const std::string &start_time, const bool success)
#define PANIC(...)
Definition: exception.h:26
static unsigned int instances
bool IsOn(const std::string &param_value) const
Definition: options.cc:409
unsigned int live_upgrade_calls
bool Prune(uint32_t days)
std::string GetParentPath(const std::string &path)
Definition: posix.cc:131
std::string StringifyUint(const uint64_t value)
Definition: string.cc:84
void set_schema_revision(const unsigned rev)
Definition: sql.h:253
std::string PrepareStatementIntoPublish(const perf::Statistics *statistics, const std::string &start_time, const std::string &finish_time, const bool success)
static StatisticsDatabase * Open(const std::string &filename, const OpenMode open_mode)
T GetProperty(const std::string &key) const
Definition: sql_impl.h:303
unsigned schema_revision() const
Definition: sql.h:150
static bool compacting_fails
bool FileExists(const std::string &path)
Definition: posix.cc:816
Counter * Lookup(const std::string &name) const
Definition: statistics.cc:62
bool TryParsePath(const std::string &config_file)
Definition: options.cc:110
static const float kLatestCompatibleSchema
bool MkdirDeep(const std::string &path, const mode_t mode, bool verify_writable)
Definition: posix.cc:871
unsigned int create_empty_db_calls
static float kLatestSchema
sqlite3 * sqlite_db() const
Definition: sql.h:147
unsigned int check_compatibility_calls
static unsigned kLatestSchemaRevision
bool GetValue(const std::string &key, std::string *value) const
Definition: options.cc:376
virtual unsigned int GetNumberOfErrors() const =0
static StatisticsDatabase * OpenStandardDB(const std::string repo_name)
bool SetProperty(const std::string &key, const T value)
Definition: sql_impl.h:323
static bool GcExtendedStats(const std::string &repo_name)
bool StoreEntry(const std::string &insert_statement)
void UploadFile(const std::string &local_path, const std::string &remote_path, const CallbackTN *callback=NULL)
bool StorePublishStatistics(const perf::Statistics *statistics, const std::string &start_time, const bool success)
static const uint32_t kDefaultDaysToKeep
bool IsEqualSchema(const float value, const float compare) const
Definition: sql.h:131
uint64_t String2Uint64(const string &value)
Definition: string.cc:228
unsigned int compact_calls
void RemoveAsync(const std::string &file_to_delete)
static void GetDBParams(const std::string &repo_name, std::string *path, uint32_t *days_to_keep)
static StatisticsDatabase * Create(const std::string &filename)
bool CompactDatabase() const