GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/catalog_balancer.h
Date: 2025-06-22 02:36:02
Exec Total Coverage
Lines: 22 22 100.0%
Branches: 15 22 68.2%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5 #ifndef CVMFS_CATALOG_BALANCER_H_
6 #define CVMFS_CATALOG_BALANCER_H_
7
8 #include <string>
9 #include <vector>
10
11 #include "catalog_mgr.h"
12 #include "directory_entry.h"
13
14
15 namespace catalog {
16
17 /**
18 * This class is in charge of "balancing" a catalog embedded in a
19 * WritableCatalogManager. The process of balancing consists of keeping the
20 * number of entries in each catalog between a maximum and minimum threshold so
21 * that all of them can be easily manipulable. This way there won't be catalogs
22 * that take a lot of time being downloaded, or their SQL operations will take
23 * a reasonable amount of time.
24 *
25 * The CatalogBalancer uses the following WritableCatalogManager attributes:
26 * - max_weight_: maximum number of entries in a catalog
27 * - min_weight_: minimum number of entries in a catalog
28 * - balance_weight_: maximum number of entries during the balancing process
29 *
30 * The balancing process is done traversing the entire catalog tree in
31 * postorder, so that the deepest catalogs are analyzed before their parents.
32 * Each catalog is then analyzed individually, existing three different
33 * scenarios:
34 *
35 * a) The number of entries of the catalog is between max_weight_ and
36 * min_weight_: nothing happens, the catalog remains untouched.
37 *
38 * b) The number of entries of the catalog is greater than max_weight_: the
39 * catalog gets split in smaller catalogs. One of the new generated catalogs
40 * will be mounted in the original mount point.
41 *
42 * c) The number of entries of the catalog is lesser than min_weight_: the
43 * catalog gets merged with its father (except the root catalog, obviously).
44 */
45 template<class CatalogMgrT>
46 class CatalogBalancer {
47 public:
48 typedef typename CatalogMgrT::catalog_t catalog_t;
49 9 explicit CatalogBalancer(CatalogMgrT *catalog_mgr)
50 9 : catalog_mgr_(catalog_mgr) { }
51
52 /**
53 * This method balances a catalog. A catalog is considered overflowed if
54 * its weight (the number of entries in the catalog database) is greater than
55 * the stablish threshold defined by the max_weight_ variable. If a catalog is
56 * overflowed it will be split in a number of new catalogs, meeting the
57 * following properties:
58 * - There will be a catalog in the same place where the overflowed catalog
59 * was at the beginning of the process.
60 * - New generated catalogs will have a maximum weight defined by the
61 * attribute balance_weight_, which will always be lesser than max_weight_.
62 * - The number of new generated catalogs is unknown.
63 *
64 * A catalog is considered underflowed if its weight is lesser than the
65 * minimum threshold, defined by the min_weight_ variable. If a catalog is
66 * underflowed it will be merged with the father catalog. However, it is
67 * remarkable that such an operation can provoke an overflow in the father,
68 * which will be threaten separately.
69 *
70 * If a catalog is not overflowed or underflowed no changes will be applied
71 * to it.
72 *
73 * For a WritableCatalogManager to be balanced it requires the is_balanced_
74 * flag to be set to true.
75 *
76 * @param catalog the catalog to be balanced, or NULL to balance all catalogs
77 * in the WritableCatalogManager
78 */
79 void Balance(catalog_t *catalog);
80
81 private:
82 /**
83 * A VirtualNode is the abstract representation of an entry in a catalog.
84 * It is used by the CatalogBalancer to "virtually" represent the
85 * file-system tree of a concrete catalog and spot the nodes where
86 * a new catalog should be created.
87 *
88 * One of its main functions is to keep track of the current weight of a
89 * file or directory, i.e., the number of entries it contains. Concretely:
90 * - Normal files and symlinks: it is always one.
91 * - Normal directories: one plus the weight of each node it contains.
92 * - Directories which are catalog mount points: it is always one
93 */
94 struct VirtualNode {
95 std::vector<VirtualNode> children;
96 unsigned weight;
97 DirectoryEntry dirent;
98 std::string path;
99 bool is_new_nested_catalog;
100
101 /**
102 * Extracts not only the direct children of this VirtualNode, but
103 * recursively all the VirtualNodes of this catalog. When a VirtualNode that
104 * is the root of a nested catalog is created, it won't be expanded. In
105 * order to actually expand that node it will be necessary to manually call
106 * this method on it.
107 *
108 * @param catalog_mgr catalog manager that contains the file system tree
109 */
110 void ExtractChildren(CatalogMgrT *catalog_mgr);
111 void FixWeight();
112 9 VirtualNode(const std::string &path, CatalogMgrT *catalog_mgr)
113 9 : children()
114 9 , weight(1)
115
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 , dirent()
116
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 , path(path)
117 9 , is_new_nested_catalog(false) {
118
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 catalog_mgr->LookupPath(path, kLookupDefault, &dirent);
119 9 }
120 54 VirtualNode(const std::string &path, const DirectoryEntry &dirent,
121 CatalogMgrT *catalog_mgr)
122 54 : children()
123 54 , weight(1)
124
1/2
✓ Branch 1 taken 54 times.
✗ Branch 2 not taken.
54 , dirent(dirent)
125
1/2
✓ Branch 1 taken 54 times.
✗ Branch 2 not taken.
54 , path(path)
126 54 , is_new_nested_catalog(false) {
127
6/6
✓ Branch 1 taken 36 times.
✓ Branch 2 taken 18 times.
✓ Branch 4 taken 18 times.
✓ Branch 5 taken 18 times.
✓ Branch 6 taken 18 times.
✓ Branch 7 taken 36 times.
54 if (!IsCatalog() && IsDirectory())
128
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 ExtractChildren(catalog_mgr);
129 54 }
130 126 bool IsDirectory() { return dirent.IsDirectory(); }
131 135 bool IsCatalog() {
132
3/4
✓ Branch 0 taken 135 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 45 times.
✓ Branch 4 taken 90 times.
135 return is_new_nested_catalog || dirent.IsNestedCatalogMountpoint();
133 }
134 };
135 typedef typename CatalogBalancer<CatalogMgrT>::VirtualNode virtual_node_t;
136
137 void PartitionOptimally(VirtualNode *virtual_node);
138 void AddCatalogMarker(std::string path);
139 DirectoryEntryBase MakeEmptyDirectoryEntryBase(std::string name,
140 uid_t uid,
141 gid_t gid);
142 static VirtualNode *MaxChild(VirtualNode *virtual_node);
143 void AddCatalog(VirtualNode *child_node);
144
145 CatalogMgrT *catalog_mgr_;
146 };
147
148 } // namespace catalog
149
150 #include "catalog_balancer_impl.h"
151
152 #endif // CVMFS_CATALOG_BALANCER_H_
153