GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/catalog_balancer.h
Date: 2024-04-28 02:33:07
Exec Total Coverage
Lines: 16 16 100.0%
Branches: 15 22 68.2%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_CATALOG_BALANCER_H_
6 #define CVMFS_CATALOG_BALANCER_H_
7
8 #include <string>
9 #include <vector>
10
11 #include "catalog_mgr.h"
12 #include "directory_entry.h"
13
14
15 namespace catalog {
16
17 /**
18 * This class is in charge of "balancing" a catalog embedded in a
19 * WritableCatalogManager. The process of balancing consists of keeping the
20 * number of entries in each catalog between a maximum and minimum threshold so
21 * that all of them can be easily manipulable. This way there won't be catalogs
22 * that take a lot of time being downloaded, or their SQL operations will take
23 * a reasonable amount of time.
24 *
25 * The CatalogBalancer uses the following WritableCatalogManager attributes:
26 * - max_weight_: maximum number of entries in a catalog
27 * - min_weight_: minimum number of entries in a catalog
28 * - balance_weight_: maximum number of entries during the balancing process
29 *
30 * The balancing process is done traversing the entire catalog tree in
31 * postorder, so that the deepest catalogs are analyzed before their parents.
32 * Each catalog is then analyzed individually, existing three different
33 * scenarios:
34 *
35 * a) The number of entries of the catalog is between max_weight_ and
36 * min_weight_: nothing happens, the catalog remains untouched.
37 *
38 * b) The number of entries of the catalog is greater than max_weight_: the
39 * catalog gets split in smaller catalogs. One of the new generated catalogs
40 * will be mounted in the original mount point.
41 *
42 * c) The number of entries of the catalog is lesser than min_weight_: the
43 * catalog gets merged with its father (except the root catalog, obviously).
44 */
45 template <class CatalogMgrT>
46 class CatalogBalancer {
47 public:
48 typedef typename CatalogMgrT::catalog_t catalog_t;
49 1 explicit CatalogBalancer(CatalogMgrT *catalog_mgr)
50 1 : catalog_mgr_(catalog_mgr) { }
51
52 /**
53 * This method balances a catalog. A catalog is considered overflowed if
54 * its weight (the number of entries in the catalog database) is greater than
55 * the stablish threshold defined by the max_weight_ variable. If a catalog is
56 * overflowed it will be split in a number of new catalogs, meeting the
57 * following properties:
58 * - There will be a catalog in the same place where the overflowed catalog
59 * was at the beginning of the process.
60 * - New generated catalogs will have a maximum weight defined by the
61 * attribute balance_weight_, which will always be lesser than max_weight_.
62 * - The number of new generated catalogs is unknown.
63 *
64 * A catalog is considered underflowed if its weight is lesser than the
65 * minimum threshold, defined by the min_weight_ variable. If a catalog is
66 * underflowed it will be merged with the father catalog. However, it is
67 * remarkable that such an operation can provoke an overflow in the father,
68 * which will be threaten separately.
69 *
70 * If a catalog is not overflowed or underflowed no changes will be applied
71 * to it.
72 *
73 * For a WritableCatalogManager to be balanced it requires the is_balanced_
74 * flag to be set to true.
75 *
76 * @param catalog the catalog to be balanced, or NULL to balance all catalogs
77 * in the WritableCatalogManager
78 */
79 void Balance(catalog_t *catalog);
80
81 private:
82 /**
83 * A VirtualNode is the abstract representation of an entry in a catalog.
84 * It is used by the CatalogBalancer to "virtually" represent the
85 * file-system tree of a concrete catalog and spot the nodes where
86 * a new catalog should be created.
87 *
88 * One of its main functions is to keep track of the current weight of a
89 * file or directory, i.e., the number of entries it contains. Concretely:
90 * - Normal files and symlinks: it is always one.
91 * - Normal directories: one plus the weight of each node it contains.
92 * - Directories which are catalog mount points: it is always one
93 */
94 struct VirtualNode {
95 std::vector<VirtualNode> children;
96 unsigned weight;
97 DirectoryEntry dirent;
98 std::string path;
99 bool is_new_nested_catalog;
100
101 /**
102 * Extracts not only the direct children of this VirtualNode, but
103 * recursively all the VirtualNodes of this catalog. When a VirtualNode that
104 * is the root of a nested catalog is created, it won't be expanded. In order
105 * to actually expand that node it will be necessary to manually call this
106 * method on it.
107 *
108 * @param catalog_mgr catalog manager that contains the file system tree
109 */
110 void ExtractChildren(CatalogMgrT *catalog_mgr);
111 void FixWeight();
112 1 VirtualNode(const std::string &path, CatalogMgrT *catalog_mgr)
113
2/4
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 1 times.
✗ Branch 6 not taken.
1 : children(), weight(1), dirent(), path(path),
114 1 is_new_nested_catalog(false) {
115
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 catalog_mgr->LookupPath(path, kLookupDefault, &dirent);
116 1 }
117 6 VirtualNode(const std::string &path, const DirectoryEntry &dirent,
118 CatalogMgrT *catalog_mgr)
119
2/4
✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 6 times.
✗ Branch 6 not taken.
6 : children(), weight(1), dirent(dirent), path(path),
120 6 is_new_nested_catalog(false) {
121
6/6
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 2 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 2 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 4 times.
6 if (!IsCatalog() && IsDirectory())
122
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 ExtractChildren(catalog_mgr);
123 6 }
124 14 bool IsDirectory() { return dirent.IsDirectory(); }
125
3/4
✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 10 times.
30 bool IsCatalog() { return is_new_nested_catalog ||
126 30 dirent.IsNestedCatalogMountpoint(); }
127 };
128 typedef typename CatalogBalancer<CatalogMgrT>::VirtualNode virtual_node_t;
129
130 void PartitionOptimally(VirtualNode *virtual_node);
131 void AddCatalogMarker(std::string path);
132 DirectoryEntryBase MakeEmptyDirectoryEntryBase(std::string name,
133 uid_t uid,
134 gid_t gid);
135 static VirtualNode *MaxChild(VirtualNode *virtual_node);
136 void AddCatalog(VirtualNode *child_node);
137
138 CatalogMgrT *catalog_mgr_;
139 };
140
141 } // namespace catalog
142
143 #include "catalog_balancer_impl.h"
144
145 #endif // CVMFS_CATALOG_BALANCER_H_
146
147