CernVM-FS  2.12.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
garbage_collector.cc
Go to the documentation of this file.
1 
4 #include "garbage_collector.h"
5 
6 #include <dirent.h>
7 #include <errno.h>
8 
9 #include <cstdio>
10 #include <cstring>
11 
12 #include <map>
13 #include <string>
14 
15 #include "helpers.h"
17 #include "util/logging.h"
18 #include "util/posix.h"
19 #include "util/smalloc.h"
20 
32  struct fs_traversal_posix_context *posix_ctx
33  = reinterpret_cast<struct fs_traversal_posix_context *>(ctx->ctx);
34  int res2 = mkdir((std::string(ctx->data)+POSIX_GARBAGE_DIR).c_str(), 0700);
35  assert(res2 == 0 || errno == EEXIST);
36  std::string gc_path = std::string(ctx->data)
38  if (FileExists(gc_path)) {
39  FILE *gc_flagged_file = fopen(gc_path.c_str(), "r");
40  assert(gc_flagged_file != NULL);
41  while (true) {
42  ino_t cur_ino;
43  size_t read = fread(&cur_ino, sizeof(ino_t), 1, gc_flagged_file);
44  if (read == 1) {
45  posix_ctx->gc_flagged[cur_ino] = true;
46  } else {
47  assert(feof(gc_flagged_file) != 0);
48  break;
49  }
50  }
51  int res = fclose(gc_flagged_file);
52  assert(res == 0);
53  }
54 }
55 
62  struct fs_traversal_posix_context *posix_ctx
63  = reinterpret_cast<struct fs_traversal_posix_context*>(ctx->ctx);
64  std::string gc_path = std::string(ctx->data)
66  FILE *gc_flagged_file = fopen(gc_path.c_str(), "w");
67  for (
68  std::map<ino_t, bool>::const_iterator it = posix_ctx->gc_flagged.begin();
69  it != posix_ctx->gc_flagged.end();
70  it++) {
71  if (it->second) {
72  fwrite(&(it->first), sizeof(ino_t), 1, gc_flagged_file);
73  }
74  }
75  fclose(gc_flagged_file);
76 }
77 
87 void *PosixGcMainWorker(void *data) {
88  struct posix_gc_thread *thread_context
89  = reinterpret_cast<struct posix_gc_thread *>(data);
90  struct fs_traversal_posix_context *posix_ctx
91  = reinterpret_cast<struct fs_traversal_posix_context *>(
92  thread_context->ctx->ctx);
93  int64_t files_removed = 0;
94  int64_t bytes_removed = 0;
95  // Build path array
96  int offset = strlen(thread_context->ctx->data)+1;
97  // used for both path building and stat calls (therefore +257)
98  char dir_path[offset+kDigitsPerDirLevel*kDirLevels+kDirLevels+257];
99  snprintf(dir_path, offset, "%s", thread_context->ctx->data);
100  dir_path[offset-1]='/';
101  dir_path[offset+kDigitsPerDirLevel*kDirLevels+kDirLevels+257] = '\0';
102  char dir_name_template[6];
103  snprintf(dir_name_template,
104  sizeof(dir_name_template),
105  "%%%02ux/",
107  const unsigned directory_mask = (1 << (kDigitsPerDirLevel*4)) -1;
108  const unsigned max_dir_name = (1 << kDigitsPerDirLevel*4);
109  const unsigned max_val = (1 << (kDirLevels*kDigitsPerDirLevel*4));
110  for (unsigned i = thread_context->thread_num*max_dir_name*(kDirLevels-1);
111  i < max_val;
112  i+=thread_context->thread_total*+(max_val/max_dir_name)) {
113  // Iterate over paths of current subdirectory...
114  for (unsigned j = i; j < i+(max_val/max_dir_name); j++) {
115  // For every subdirectory chain (described by j)
116  unsigned path_pos = offset;
117  for (int level = kDirLevels-1;
118  level >= 0;
119  level--) {
120  const unsigned cur_dir
121  = (j >> (level*kDigitsPerDirLevel*4)) & directory_mask;
122  snprintf(dir_path+path_pos,
123  kDigitsPerDirLevel+2, dir_name_template, cur_dir);
124  path_pos+=kDigitsPerDirLevel+1;
125  }
126  dir_path[path_pos]='\0';
127  // Calculated path - now garbage collection...
128  DIR *cur_dir_ent = opendir(dir_path);
129  assert(cur_dir_ent != NULL);
130  struct stat stat_buf;
131  struct dirent *de;
132  while ((de = readdir(cur_dir_ent)) != NULL) {
133  if (posix_ctx->gc_flagged.count(de->d_ino) > 0
134  && posix_ctx->gc_flagged[de->d_ino]) {
135  snprintf(dir_path+path_pos, sizeof(dir_path)-path_pos, "%s",
136  de->d_name);
137  stat(dir_path, &stat_buf);
138  if (stat_buf.st_nlink == 1) {
139  files_removed++;
140  bytes_removed+=stat_buf.st_size;
141  int res = unlink(dir_path);
142  assert(res == 0);
143  posix_ctx->gc_flagged.erase(de->d_ino);
144  }
145  }
146  }
147  closedir(cur_dir_ent);
148  }
149  }
150  thread_context->stat
151  ->Lookup(POSIX_GC_STAT_FILES_REMOVED)->Xadd(files_removed);
152  thread_context->stat
153  ->Lookup(POSIX_GC_STAT_BYTES_REMOVED)->Xadd(bytes_removed);
154  return NULL;
155 }
156 
163  struct fs_traversal_posix_context *posix_ctx
164  = reinterpret_cast<struct fs_traversal_posix_context *>(ctx->ctx);
165 
166  int thread_total = posix_ctx->num_threads;
167  struct posix_gc_thread *thread_contexts
168  = reinterpret_cast<struct posix_gc_thread *>(
169  smalloc(sizeof(struct posix_gc_thread) * thread_total));
170 
171  perf::Statistics *gc_statistics = new perf::Statistics();
172  gc_statistics->Register(POSIX_GC_STAT_FILES_REMOVED,
173  "Number of deduplicated files removed by Garbage Collector");
174  gc_statistics->Register(POSIX_GC_STAT_BYTES_REMOVED,
175  "Sum of sizes of removed files");
176 
177  if (thread_total > 1) {
178  pthread_t *workers
179  = reinterpret_cast<pthread_t *>(smalloc(sizeof(pthread_t) * thread_total));
180  for (int i = 0; i < thread_total; i++) {
181  thread_contexts[i].thread_total = thread_total;
182  thread_contexts[i].thread_num = i;
183  thread_contexts[i].ctx = ctx;
184  thread_contexts[i].stat = gc_statistics;
185  int retval = pthread_create(&workers[i], NULL,
186  PosixGcMainWorker, &thread_contexts[i]);
187  assert(retval == 0);
188  }
189 
190  for (int i = 0; i < thread_total; i++) {
191  pthread_join(workers[i], NULL);
192  }
193  free(workers);
194  } else {
195  thread_contexts[0].thread_total = thread_total;
196  thread_contexts[0].thread_num = 0;
197  thread_contexts[0].ctx = ctx;
198  thread_contexts[0].stat = gc_statistics;
199  PosixGcMainWorker(thread_contexts);
200  }
202  "%s", gc_statistics->PrintList(perf::Statistics::kPrintHeader).c_str());
203  free(thread_contexts);
204  delete gc_statistics;
205  return 0;
206 }
#define POSIX_GARBAGE_DIR
Definition: helpers.h:16
std::map< ino_t, bool > gc_flagged
Definition: helpers.h:26
Counter * Register(const std::string &name, const std::string &desc)
Definition: statistics.cc:160
struct cvmcache_context * ctx
void * PosixGcMainWorker(void *data)
#define POSIX_GARBAGE_FLAGGED_FILE
Definition: helpers.h:17
void FinalizeGarbageCollection(struct fs_traversal_context *ctx)
assert((mem||(size==0))&&"Out Of Memory")
void InitializeGarbageCollection(struct fs_traversal_context *ctx)
bool FileExists(const std::string &path)
Definition: posix.cc:802
Counter * Lookup(const std::string &name) const
Definition: statistics.cc:62
perf::Statistics * stat
int RunGarbageCollection(struct fs_traversal_context *ctx)
const unsigned kDigitsPerDirLevel
Definition: helpers.h:22
const unsigned kDirLevels
Definition: helpers.h:21
#define POSIX_GC_STAT_BYTES_REMOVED
Definition: helpers.h:19
int64_t Xadd(const int64_t delta)
Definition: statistics.h:34
std::string PrintList(const PrintOptions print_options)
Definition: statistics.cc:79
struct fs_traversal_context * ctx
#define POSIX_GC_STAT_FILES_REMOVED
Definition: helpers.h:18
CVMFS_EXPORT void LogCvmfs(const LogSource source, const int mask, const char *format,...)
Definition: logging.cc:528