CernVM-FS  2.9.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
garbage_collector.cc
Go to the documentation of this file.
1 
4 #include "garbage_collector.h"
5 
6 #include <dirent.h>
7 #include <errno.h>
8 #include <stdio.h>
9 
10 #include <map>
11 #include <string>
12 
13 #include "helpers.h"
14 #include "logging.h"
16 #include "smalloc.h"
17 #include "util/posix.h"
18 
30  struct fs_traversal_posix_context *posix_ctx
31  = reinterpret_cast<struct fs_traversal_posix_context *>(ctx->ctx);
32  int res2 = mkdir((std::string(ctx->data)+POSIX_GARBAGE_DIR).c_str(), 0700);
33  assert(res2 == 0 || errno == EEXIST);
34  std::string gc_path = std::string(ctx->data)
36  if (FileExists(gc_path)) {
37  FILE *gc_flagged_file = fopen(gc_path.c_str(), "r");
38  assert(gc_flagged_file != NULL);
39  while (true) {
40  ino_t cur_ino;
41  size_t read = fread(&cur_ino, sizeof(ino_t), 1, gc_flagged_file);
42  if (read == 1) {
43  posix_ctx->gc_flagged[cur_ino] = true;
44  } else {
45  assert(feof(gc_flagged_file) != 0);
46  break;
47  }
48  }
49  int res = fclose(gc_flagged_file);
50  assert(res == 0);
51  }
52 }
53 
60  struct fs_traversal_posix_context *posix_ctx
61  = reinterpret_cast<struct fs_traversal_posix_context*>(ctx->ctx);
62  std::string gc_path = std::string(ctx->data)
64  FILE *gc_flagged_file = fopen(gc_path.c_str(), "w");
65  for (
66  std::map<ino_t, bool>::const_iterator it = posix_ctx->gc_flagged.begin();
67  it != posix_ctx->gc_flagged.end();
68  it++) {
69  if (it->second) {
70  fwrite(&(it->first), sizeof(ino_t), 1, gc_flagged_file);
71  }
72  }
73  fclose(gc_flagged_file);
74 }
75 
85 void *PosixGcMainWorker(void *data) {
86  struct posix_gc_thread *thread_context
87  = reinterpret_cast<struct posix_gc_thread *>(data);
88  struct fs_traversal_posix_context *posix_ctx
89  = reinterpret_cast<struct fs_traversal_posix_context *>(
90  thread_context->ctx->ctx);
91  int64_t files_removed = 0;
92  int64_t bytes_removed = 0;
93  // Build path array
94  int offset = strlen(thread_context->ctx->data)+1;
95  // used for both path building and stat calls (therefore +257)
96  char dir_path[offset+kDigitsPerDirLevel*kDirLevels+kDirLevels+257];
97  snprintf(dir_path, offset, "%s", thread_context->ctx->data);
98  dir_path[offset-1]='/';
99  dir_path[offset+kDigitsPerDirLevel*kDirLevels+kDirLevels+257] = '\0';
100  char dir_name_template[6];
101  snprintf(dir_name_template,
102  sizeof(dir_name_template),
103  "%%%02ux/",
105  const unsigned directory_mask = (1 << (kDigitsPerDirLevel*4)) -1;
106  const unsigned max_dir_name = (1 << kDigitsPerDirLevel*4);
107  const unsigned max_val = (1 << (kDirLevels*kDigitsPerDirLevel*4));
108  for (unsigned i = thread_context->thread_num*max_dir_name*(kDirLevels-1);
109  i < max_val;
110  i+=thread_context->thread_total*+(max_val/max_dir_name)) {
111  // Iterate over paths of current subdirectory...
112  for (unsigned j = i; j < i+(max_val/max_dir_name); j++) {
113  // For every subdirectory chain (described by j)
114  unsigned path_pos = offset;
115  for (int level = kDirLevels-1;
116  level >= 0;
117  level--) {
118  const unsigned cur_dir
119  = (j >> (level*kDigitsPerDirLevel*4)) & directory_mask;
120  snprintf(dir_path+path_pos,
121  kDigitsPerDirLevel+2, dir_name_template, cur_dir);
122  path_pos+=kDigitsPerDirLevel+1;
123  }
124  dir_path[path_pos]='\0';
125  // Calculated path - now garbage collection...
126  DIR *cur_dir_ent = opendir(dir_path);
127  assert(cur_dir_ent != NULL);
128  struct stat stat_buf;
129  struct dirent *de;
130  while ((de = readdir(cur_dir_ent)) != NULL) {
131  if (posix_ctx->gc_flagged.count(de->d_ino) > 0
132  && posix_ctx->gc_flagged[de->d_ino]) {
133  snprintf(dir_path+path_pos, sizeof(dir_path)-path_pos, "%s",
134  de->d_name);
135  stat(dir_path, &stat_buf);
136  if (stat_buf.st_nlink == 1) {
137  files_removed++;
138  bytes_removed+=stat_buf.st_size;
139  int res = unlink(dir_path);
140  assert(res == 0);
141  posix_ctx->gc_flagged.erase(de->d_ino);
142  }
143  }
144  }
145  closedir(cur_dir_ent);
146  }
147  }
148  thread_context->stat
149  ->Lookup(POSIX_GC_STAT_FILES_REMOVED)->Xadd(files_removed);
150  thread_context->stat
151  ->Lookup(POSIX_GC_STAT_BYTES_REMOVED)->Xadd(bytes_removed);
152  return NULL;
153 }
154 
161  struct fs_traversal_posix_context *posix_ctx
162  = reinterpret_cast<struct fs_traversal_posix_context *>(ctx->ctx);
163 
164  int thread_total = posix_ctx->num_threads;
165  struct posix_gc_thread *thread_contexts
166  = reinterpret_cast<struct posix_gc_thread *>(
167  smalloc(sizeof(struct posix_gc_thread) * thread_total));
168 
169  perf::Statistics *gc_statistics = new perf::Statistics();
170  gc_statistics->Register(POSIX_GC_STAT_FILES_REMOVED,
171  "Number of deduplicated files removed by Garbage Collector");
172  gc_statistics->Register(POSIX_GC_STAT_BYTES_REMOVED,
173  "Sum of sizes of removed files");
174 
175  if (thread_total > 1) {
176  pthread_t *workers
177  = reinterpret_cast<pthread_t *>(smalloc(sizeof(pthread_t) * thread_total));
178  for (int i = 0; i < thread_total; i++) {
179  thread_contexts[i].thread_total = thread_total;
180  thread_contexts[i].thread_num = i;
181  thread_contexts[i].ctx = ctx;
182  thread_contexts[i].stat = gc_statistics;
183  int retval = pthread_create(&workers[i], NULL,
184  PosixGcMainWorker, &thread_contexts[i]);
185  assert(retval == 0);
186  }
187 
188  for (int i = 0; i < thread_total; i++) {
189  pthread_join(workers[i], NULL);
190  }
191  free(workers);
192  } else {
193  thread_contexts[0].thread_total = thread_total;
194  thread_contexts[0].thread_num = 0;
195  thread_contexts[0].ctx = ctx;
196  thread_contexts[0].stat = gc_statistics;
197  PosixGcMainWorker(thread_contexts);
198  }
200  "%s", gc_statistics->PrintList(perf::Statistics::kPrintHeader).c_str());
201  free(thread_contexts);
202  delete gc_statistics;
203  return 0;
204 }
#define POSIX_GARBAGE_DIR
Definition: helpers.h:16
#define LogCvmfs(source, mask,...)
Definition: logging.h:20
std::map< ino_t, bool > gc_flagged
Definition: helpers.h:26
Counter * Register(const std::string &name, const std::string &desc)
Definition: statistics.cc:139
struct cvmcache_context * ctx
void * PosixGcMainWorker(void *data)
#define POSIX_GARBAGE_FLAGGED_FILE
Definition: helpers.h:17
void FinalizeGarbageCollection(struct fs_traversal_context *ctx)
assert((mem||(size==0))&&"Out Of Memory")
void InitializeGarbageCollection(struct fs_traversal_context *ctx)
bool FileExists(const std::string &path)
Definition: posix.cc:816
Counter * Lookup(const std::string &name) const
Definition: statistics.cc:62
perf::Statistics * stat
int RunGarbageCollection(struct fs_traversal_context *ctx)
const unsigned kDigitsPerDirLevel
Definition: helpers.h:22
const unsigned kDirLevels
Definition: helpers.h:21
#define POSIX_GC_STAT_BYTES_REMOVED
Definition: helpers.h:19
int64_t Xadd(const int64_t delta)
Definition: statistics.h:34
std::string PrintList(const PrintOptions print_options)
Definition: statistics.cc:79
struct fs_traversal_context * ctx
#define POSIX_GC_STAT_FILES_REMOVED
Definition: helpers.h:18