CernVM-FS  2.13.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
garbage_collector.cc
Go to the documentation of this file.
1 
4 #include "garbage_collector.h"
5 
6 #include <dirent.h>
7 #include <errno.h>
8 
9 #include <cstdio>
10 #include <cstring>
11 #include <map>
12 #include <string>
13 
14 #include "helpers.h"
16 #include "util/logging.h"
17 #include "util/posix.h"
18 #include "util/smalloc.h"
19 
32  *posix_ctx = reinterpret_cast<struct fs_traversal_posix_context *>(
33  ctx->ctx);
34  int res2 = mkdir((std::string(ctx->data) + POSIX_GARBAGE_DIR).c_str(), 0700);
35  assert(res2 == 0 || errno == EEXIST);
36  std::string gc_path = std::string(ctx->data) + POSIX_GARBAGE_DIR
38  if (FileExists(gc_path)) {
39  FILE *gc_flagged_file = fopen(gc_path.c_str(), "r");
40  assert(gc_flagged_file != NULL);
41  while (true) {
42  ino_t cur_ino;
43  size_t read = fread(&cur_ino, sizeof(ino_t), 1, gc_flagged_file);
44  if (read == 1) {
45  posix_ctx->gc_flagged[cur_ino] = true;
46  } else {
47  assert(feof(gc_flagged_file) != 0);
48  break;
49  }
50  }
51  int res = fclose(gc_flagged_file);
52  assert(res == 0);
53  }
54 }
55 
63  *posix_ctx = reinterpret_cast<struct fs_traversal_posix_context *>(
64  ctx->ctx);
65  std::string gc_path = std::string(ctx->data) + POSIX_GARBAGE_DIR
67  FILE *gc_flagged_file = fopen(gc_path.c_str(), "w");
68  for (std::map<ino_t, bool>::const_iterator it = posix_ctx->gc_flagged.begin();
69  it != posix_ctx->gc_flagged.end();
70  it++) {
71  if (it->second) {
72  fwrite(&(it->first), sizeof(ino_t), 1, gc_flagged_file);
73  }
74  }
75  fclose(gc_flagged_file);
76 }
77 
87 void *PosixGcMainWorker(void *data) {
88  struct posix_gc_thread
89  *thread_context = reinterpret_cast<struct posix_gc_thread *>(data);
91  *posix_ctx = reinterpret_cast<struct fs_traversal_posix_context *>(
92  thread_context->ctx->ctx);
93  int64_t files_removed = 0;
94  int64_t bytes_removed = 0;
95  // Build path array
96  int offset = strlen(thread_context->ctx->data) + 1;
97  // used for both path building and stat calls (therefore +257)
98  char dir_path[offset + kDigitsPerDirLevel * kDirLevels + kDirLevels + 257];
99  snprintf(dir_path, offset, "%s", thread_context->ctx->data);
100  dir_path[offset - 1] = '/';
101  dir_path[offset + kDigitsPerDirLevel * kDirLevels + kDirLevels + 257] = '\0';
102  char dir_name_template[6];
103  snprintf(dir_name_template,
104  sizeof(dir_name_template),
105  "%%%02ux/",
107  const unsigned directory_mask = (1 << (kDigitsPerDirLevel * 4)) - 1;
108  const unsigned max_dir_name = (1 << kDigitsPerDirLevel * 4);
109  const unsigned max_val = (1 << (kDirLevels * kDigitsPerDirLevel * 4));
110  for (unsigned i =
111  thread_context->thread_num * max_dir_name * (kDirLevels - 1);
112  i < max_val;
113  i += thread_context->thread_total * +(max_val / max_dir_name)) {
114  // Iterate over paths of current subdirectory...
115  for (unsigned j = i; j < i + (max_val / max_dir_name); j++) {
116  // For every subdirectory chain (described by j)
117  unsigned path_pos = offset;
118  for (int level = kDirLevels - 1; level >= 0; level--) {
119  const unsigned cur_dir = (j >> (level * kDigitsPerDirLevel * 4))
120  & directory_mask;
121  snprintf(dir_path + path_pos, kDigitsPerDirLevel + 2, dir_name_template,
122  cur_dir);
123  path_pos += kDigitsPerDirLevel + 1;
124  }
125  dir_path[path_pos] = '\0';
126  // Calculated path - now garbage collection...
127  DIR *cur_dir_ent = opendir(dir_path);
128  assert(cur_dir_ent != NULL);
129  struct stat stat_buf;
130  struct dirent *de;
131  while ((de = readdir(cur_dir_ent)) != NULL) {
132  if (posix_ctx->gc_flagged.count(de->d_ino) > 0
133  && posix_ctx->gc_flagged[de->d_ino]) {
134  snprintf(dir_path + path_pos, sizeof(dir_path) - path_pos, "%s",
135  de->d_name);
136  stat(dir_path, &stat_buf);
137  if (stat_buf.st_nlink == 1) {
138  files_removed++;
139  bytes_removed += stat_buf.st_size;
140  int res = unlink(dir_path);
141  assert(res == 0);
142  posix_ctx->gc_flagged.erase(de->d_ino);
143  }
144  }
145  }
146  closedir(cur_dir_ent);
147  }
148  }
149  thread_context->stat->Lookup(POSIX_GC_STAT_FILES_REMOVED)
150  ->Xadd(files_removed);
151  thread_context->stat->Lookup(POSIX_GC_STAT_BYTES_REMOVED)
152  ->Xadd(bytes_removed);
153  return NULL;
154 }
155 
163  *posix_ctx = reinterpret_cast<struct fs_traversal_posix_context *>(
164  ctx->ctx);
165 
166  int thread_total = posix_ctx->num_threads;
167  struct posix_gc_thread
168  *thread_contexts = reinterpret_cast<struct posix_gc_thread *>(
169  smalloc(sizeof(struct posix_gc_thread) * thread_total));
170 
171  perf::Statistics *gc_statistics = new perf::Statistics();
172  gc_statistics->Register(
174  "Number of deduplicated files removed by Garbage Collector");
175  gc_statistics->Register(POSIX_GC_STAT_BYTES_REMOVED,
176  "Sum of sizes of removed files");
177 
178  if (thread_total > 1) {
179  pthread_t *workers = reinterpret_cast<pthread_t *>(
180  smalloc(sizeof(pthread_t) * thread_total));
181  for (int i = 0; i < thread_total; i++) {
182  thread_contexts[i].thread_total = thread_total;
183  thread_contexts[i].thread_num = i;
184  thread_contexts[i].ctx = ctx;
185  thread_contexts[i].stat = gc_statistics;
186  int retval = pthread_create(&workers[i], NULL, PosixGcMainWorker,
187  &thread_contexts[i]);
188  assert(retval == 0);
189  }
190 
191  for (int i = 0; i < thread_total; i++) {
192  pthread_join(workers[i], NULL);
193  }
194  free(workers);
195  } else {
196  thread_contexts[0].thread_total = thread_total;
197  thread_contexts[0].thread_num = 0;
198  thread_contexts[0].ctx = ctx;
199  thread_contexts[0].stat = gc_statistics;
200  PosixGcMainWorker(thread_contexts);
201  }
203  gc_statistics->PrintList(perf::Statistics::kPrintHeader).c_str());
204  free(thread_contexts);
205  delete gc_statistics;
206  return 0;
207 }
#define POSIX_GARBAGE_DIR
Definition: helpers.h:16
std::map< ino_t, bool > gc_flagged
Definition: helpers.h:26
Counter * Register(const std::string &name, const std::string &desc)
Definition: statistics.cc:163
struct cvmcache_context * ctx
void * PosixGcMainWorker(void *data)
#define POSIX_GARBAGE_FLAGGED_FILE
Definition: helpers.h:17
void FinalizeGarbageCollection(struct fs_traversal_context *ctx)
assert((mem||(size==0))&&"Out Of Memory")
void InitializeGarbageCollection(struct fs_traversal_context *ctx)
bool FileExists(const std::string &path)
Definition: posix.cc:803
Counter * Lookup(const std::string &name) const
Definition: statistics.cc:63
perf::Statistics * stat
int RunGarbageCollection(struct fs_traversal_context *ctx)
const unsigned kDigitsPerDirLevel
Definition: helpers.h:22
const unsigned kDirLevels
Definition: helpers.h:21
#define POSIX_GC_STAT_BYTES_REMOVED
Definition: helpers.h:19
int64_t Xadd(const int64_t delta)
Definition: statistics.h:34
std::string PrintList(const PrintOptions print_options)
Definition: statistics.cc:80
struct fs_traversal_context * ctx
#define POSIX_GC_STAT_FILES_REMOVED
Definition: helpers.h:18
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:545