Directory: | cvmfs/ |
---|---|
File: | cvmfs/catalog_balancer.h |
Date: | 2025-02-09 02:34:19 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 16 | 16 | 100.0% |
Branches: | 15 | 22 | 68.2% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * This file is part of the CernVM File System. | ||
3 | */ | ||
4 | |||
5 | #ifndef CVMFS_CATALOG_BALANCER_H_ | ||
6 | #define CVMFS_CATALOG_BALANCER_H_ | ||
7 | |||
8 | #include <string> | ||
9 | #include <vector> | ||
10 | |||
11 | #include "catalog_mgr.h" | ||
12 | #include "directory_entry.h" | ||
13 | |||
14 | |||
15 | namespace catalog { | ||
16 | |||
17 | /** | ||
18 | * This class is in charge of "balancing" a catalog embedded in a | ||
19 | * WritableCatalogManager. The process of balancing consists of keeping the | ||
20 | * number of entries in each catalog between a maximum and minimum threshold so | ||
21 | * that all of them can be easily manipulable. This way there won't be catalogs | ||
22 | * that take a lot of time being downloaded, or their SQL operations will take | ||
23 | * a reasonable amount of time. | ||
24 | * | ||
25 | * The CatalogBalancer uses the following WritableCatalogManager attributes: | ||
26 | * - max_weight_: maximum number of entries in a catalog | ||
27 | * - min_weight_: minimum number of entries in a catalog | ||
28 | * - balance_weight_: maximum number of entries during the balancing process | ||
29 | * | ||
30 | * The balancing process is done traversing the entire catalog tree in | ||
31 | * postorder, so that the deepest catalogs are analyzed before their parents. | ||
32 | * Each catalog is then analyzed individually, existing three different | ||
33 | * scenarios: | ||
34 | * | ||
35 | * a) The number of entries of the catalog is between max_weight_ and | ||
36 | * min_weight_: nothing happens, the catalog remains untouched. | ||
37 | * | ||
38 | * b) The number of entries of the catalog is greater than max_weight_: the | ||
39 | * catalog gets split in smaller catalogs. One of the new generated catalogs | ||
40 | * will be mounted in the original mount point. | ||
41 | * | ||
42 | * c) The number of entries of the catalog is lesser than min_weight_: the | ||
43 | * catalog gets merged with its father (except the root catalog, obviously). | ||
44 | */ | ||
45 | template <class CatalogMgrT> | ||
46 | class CatalogBalancer { | ||
47 | public: | ||
48 | typedef typename CatalogMgrT::catalog_t catalog_t; | ||
49 | 1 | explicit CatalogBalancer(CatalogMgrT *catalog_mgr) | |
50 | 1 | : catalog_mgr_(catalog_mgr) { } | |
51 | |||
52 | /** | ||
53 | * This method balances a catalog. A catalog is considered overflowed if | ||
54 | * its weight (the number of entries in the catalog database) is greater than | ||
55 | * the stablish threshold defined by the max_weight_ variable. If a catalog is | ||
56 | * overflowed it will be split in a number of new catalogs, meeting the | ||
57 | * following properties: | ||
58 | * - There will be a catalog in the same place where the overflowed catalog | ||
59 | * was at the beginning of the process. | ||
60 | * - New generated catalogs will have a maximum weight defined by the | ||
61 | * attribute balance_weight_, which will always be lesser than max_weight_. | ||
62 | * - The number of new generated catalogs is unknown. | ||
63 | * | ||
64 | * A catalog is considered underflowed if its weight is lesser than the | ||
65 | * minimum threshold, defined by the min_weight_ variable. If a catalog is | ||
66 | * underflowed it will be merged with the father catalog. However, it is | ||
67 | * remarkable that such an operation can provoke an overflow in the father, | ||
68 | * which will be threaten separately. | ||
69 | * | ||
70 | * If a catalog is not overflowed or underflowed no changes will be applied | ||
71 | * to it. | ||
72 | * | ||
73 | * For a WritableCatalogManager to be balanced it requires the is_balanced_ | ||
74 | * flag to be set to true. | ||
75 | * | ||
76 | * @param catalog the catalog to be balanced, or NULL to balance all catalogs | ||
77 | * in the WritableCatalogManager | ||
78 | */ | ||
79 | void Balance(catalog_t *catalog); | ||
80 | |||
81 | private: | ||
82 | /** | ||
83 | * A VirtualNode is the abstract representation of an entry in a catalog. | ||
84 | * It is used by the CatalogBalancer to "virtually" represent the | ||
85 | * file-system tree of a concrete catalog and spot the nodes where | ||
86 | * a new catalog should be created. | ||
87 | * | ||
88 | * One of its main functions is to keep track of the current weight of a | ||
89 | * file or directory, i.e., the number of entries it contains. Concretely: | ||
90 | * - Normal files and symlinks: it is always one. | ||
91 | * - Normal directories: one plus the weight of each node it contains. | ||
92 | * - Directories which are catalog mount points: it is always one | ||
93 | */ | ||
94 | struct VirtualNode { | ||
95 | std::vector<VirtualNode> children; | ||
96 | unsigned weight; | ||
97 | DirectoryEntry dirent; | ||
98 | std::string path; | ||
99 | bool is_new_nested_catalog; | ||
100 | |||
101 | /** | ||
102 | * Extracts not only the direct children of this VirtualNode, but | ||
103 | * recursively all the VirtualNodes of this catalog. When a VirtualNode that | ||
104 | * is the root of a nested catalog is created, it won't be expanded. In order | ||
105 | * to actually expand that node it will be necessary to manually call this | ||
106 | * method on it. | ||
107 | * | ||
108 | * @param catalog_mgr catalog manager that contains the file system tree | ||
109 | */ | ||
110 | void ExtractChildren(CatalogMgrT *catalog_mgr); | ||
111 | void FixWeight(); | ||
112 | 1 | VirtualNode(const std::string &path, CatalogMgrT *catalog_mgr) | |
113 |
2/4✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 1 times.
✗ Branch 6 not taken.
|
1 | : children(), weight(1), dirent(), path(path), |
114 | 1 | is_new_nested_catalog(false) { | |
115 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | catalog_mgr->LookupPath(path, kLookupDefault, &dirent); |
116 | 1 | } | |
117 | 6 | VirtualNode(const std::string &path, const DirectoryEntry &dirent, | |
118 | CatalogMgrT *catalog_mgr) | ||
119 |
2/4✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 6 times.
✗ Branch 6 not taken.
|
6 | : children(), weight(1), dirent(dirent), path(path), |
120 | 6 | is_new_nested_catalog(false) { | |
121 |
6/6✓ Branch 1 taken 4 times.
✓ Branch 2 taken 2 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 2 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 4 times.
|
6 | if (!IsCatalog() && IsDirectory()) |
122 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | ExtractChildren(catalog_mgr); |
123 | 6 | } | |
124 | 14 | bool IsDirectory() { return dirent.IsDirectory(); } | |
125 |
3/4✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 10 times.
|
30 | bool IsCatalog() { return is_new_nested_catalog || |
126 | 30 | dirent.IsNestedCatalogMountpoint(); } | |
127 | }; | ||
128 | typedef typename CatalogBalancer<CatalogMgrT>::VirtualNode virtual_node_t; | ||
129 | |||
130 | void PartitionOptimally(VirtualNode *virtual_node); | ||
131 | void AddCatalogMarker(std::string path); | ||
132 | DirectoryEntryBase MakeEmptyDirectoryEntryBase(std::string name, | ||
133 | uid_t uid, | ||
134 | gid_t gid); | ||
135 | static VirtualNode *MaxChild(VirtualNode *virtual_node); | ||
136 | void AddCatalog(VirtualNode *child_node); | ||
137 | |||
138 | CatalogMgrT *catalog_mgr_; | ||
139 | }; | ||
140 | |||
141 | } // namespace catalog | ||
142 | |||
143 | #include "catalog_balancer_impl.h" | ||
144 | |||
145 | #endif // CVMFS_CATALOG_BALANCER_H_ | ||
146 | |||
147 |