GCC Code Coverage Report
Directory: cvmfs/ Exec Total Coverage
File: cvmfs/catalog_balancer.h Lines: 15 15 100.0 %
Date: 2019-02-03 02:48:13 Branches: 9 10 90.0 %

Line Branch Exec Source
1
/**
2
 * This file is part of the CernVM File System.
3
 */
4
5
#ifndef CVMFS_CATALOG_BALANCER_H_
6
#define CVMFS_CATALOG_BALANCER_H_
7
8
#include <string>
9
#include <vector>
10
11
#include "catalog_mgr.h"
12
#include "directory_entry.h"
13
14
15
namespace catalog {
16
17
/**
18
 * This class is in charge of "balancing" a catalog embedded in a
19
 * WritableCatalogManager. The process of balancing consists of keeping the
20
 * number of entries in each catalog between a maximum and minimum threshold so
21
 * that all of them can be easily manipulable. This way there won't be catalogs
22
 * that take a lot of time being downloaded, or their SQL operations will take
23
 * a reasonable amount of time.
24
 *
25
 * The CatalogBalancer uses the following WritableCatalogManager attributes:
26
 * - max_weight_: maximum number of entries in a catalog
27
 * - min_weight_: minimum number of entries in a catalog
28
 * - balance_weight_: maximum number of entries during the balancing process
29
 *
30
 * The balancing process is done traversing the entire catalog tree in
31
 * postorder, so that the deepest catalogs are analyzed before their parents.
32
 * Each catalog is then analyzed individually, existing three different
33
 * scenarios:
34
 *
35
 * a) The number of entries of the catalog is between max_weight_ and
36
 * min_weight_: nothing happens, the catalog remains untouched.
37
 *
38
 * b) The number of entries of the catalog is greater than max_weight_: the
39
 * catalog gets split in smaller catalogs. One of the new generated catalogs
40
 * will be mounted in the original mount point.
41
 *
42
 * c) The number of entries of the catalog is lesser than min_weight_: the
43
 * catalog gets merged with its father (except the root catalog, obviously).
44
 */
45
template <class CatalogMgrT>
46
class CatalogBalancer {
47
 public:
48
  typedef typename CatalogMgrT::catalog_t catalog_t;
49
1
  explicit CatalogBalancer(CatalogMgrT *catalog_mgr)
50
1
    : catalog_mgr_(catalog_mgr) { }
51
52
  /**
53
   * This method balances a catalog. A catalog is considered overflowed if
54
   * its weight (the number of entries in the catalog database) is greater than
55
   * the stablish threshold defined by the max_weight_ variable. If a catalog is
56
   * overflowed it will be split in a number of new catalogs, meeting the
57
   * following properties:
58
   * - There will be a catalog in the same place where the overflowed catalog
59
   * was at the beginning of the process.
60
   * - New generated catalogs will have a maximum weight defined by the
61
   * attribute balance_weight_, which will always be lesser than max_weight_.
62
   * - The number of new generated catalogs is unknown.
63
   *
64
   * A catalog is considered underflowed if its weight is lesser than the
65
   * minimum threshold, defined by the min_weight_ variable. If a catalog is
66
   * underflowed it will be merged with the father catalog. However, it is
67
   * remarkable that such an operation can provoke an overflow in the father,
68
   * which will be threaten separately.
69
   *
70
   * If a catalog is not overflowed or underflowed no changes will be applied
71
   * to it.
72
   *
73
   * For a WritableCatalogManager to be balanced it requires the is_balanced_
74
   * flag to be set to true.
75
   *
76
   * @param catalog the catalog to be balanced, or NULL to balance all catalogs
77
   * in the WritableCatalogManager
78
   */
79
  void Balance(catalog_t *catalog);
80
81
 private:
82
 /**
83
  * A VirtualNode is the abstract representation of an entry in a catalog.
84
  * It is used by the CatalogBalancer to "virtually" represent the
85
  * file-system tree of a concrete catalog and spot the nodes where
86
  * a new catalog should be created.
87
  *
88
  * One of its main functions is to keep track of the current weight of a
89
  * file or directory, i.e., the number of entries it contains. Concretely:
90
  * - Normal files and symlinks: it is always one.
91
  * - Normal directories: one plus the weight of each node it contains.
92
  * - Directories which are catalog mount points: it is always one
93
  */
94
36
  struct VirtualNode {
95
    std::vector<VirtualNode> children;
96
    unsigned weight;
97
    DirectoryEntry dirent;
98
    std::string path;
99
    bool is_new_nested_catalog;
100
101
   /**
102
    * Extracts not only the direct children of this VirtualNode, but
103
    * recursively all the VirtualNodes of this catalog. When a VirtualNode that
104
    * is the root of a nested catalog is created, it won't be expanded. In order
105
    * to actually expand that node it will be necessary to manually call this
106
    * method on it.
107
    *
108
    * @param catalog_mgr catalog manager that contains the file system tree
109
    */
110
    void ExtractChildren(CatalogMgrT *catalog_mgr);
111
    void FixWeight();
112
1
    VirtualNode(const std::string &path, CatalogMgrT *catalog_mgr)
113
      : children(), weight(1), dirent(), path(path),
114
1
        is_new_nested_catalog(false) {
115
1
      catalog_mgr->LookupPath(path, kLookupSole, &dirent);
116
1
    }
117
5
    VirtualNode(const std::string &path, const DirectoryEntry &dirent,
118
                CatalogMgrT *catalog_mgr)
119
      : children(), weight(1), dirent(dirent), path(path),
120
5
        is_new_nested_catalog(false) {
121

5
      if (!IsCatalog() && IsDirectory())
122
2
        ExtractChildren(catalog_mgr);
123
5
    }
124
13
    bool IsDirectory() { return dirent.IsDirectory(); }
125
13
    bool IsCatalog() { return is_new_nested_catalog ||
126

13
        dirent.IsNestedCatalogMountpoint(); }
127
  };
128
  typedef typename CatalogBalancer<CatalogMgrT>::VirtualNode virtual_node_t;
129
130
  void PartitionOptimally(VirtualNode *virtual_node);
131
  void AddCatalogMarker(std::string path);
132
  DirectoryEntryBase MakeEmptyDirectoryEntryBase(std::string name,
133
                                                 uid_t uid,
134
                                                 gid_t gid);
135
  static VirtualNode *MaxChild(VirtualNode *virtual_node);
136
  void AddCatalog(VirtualNode *child_node);
137
138
  CatalogMgrT *catalog_mgr_;
139
};
140
141
}  // namespace catalog
142
143
#include "catalog_balancer_impl.h"
144
145
#endif  // CVMFS_CATALOG_BALANCER_H_
146