| Directory: | cvmfs/ |
|---|---|
| File: | cvmfs/catalog_balancer.h |
| Date: | 2025-11-09 02:35:23 |
| Exec | Total | Coverage | |
|---|---|---|---|
| Lines: | 22 | 22 | 100.0% |
| Branches: | 15 | 22 | 68.2% |
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /** | ||
| 2 | * This file is part of the CernVM File System. | ||
| 3 | */ | ||
| 4 | |||
| 5 | #ifndef CVMFS_CATALOG_BALANCER_H_ | ||
| 6 | #define CVMFS_CATALOG_BALANCER_H_ | ||
| 7 | |||
| 8 | #include <string> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "catalog_mgr.h" | ||
| 12 | #include "directory_entry.h" | ||
| 13 | |||
| 14 | |||
| 15 | namespace catalog { | ||
| 16 | |||
| 17 | /** | ||
| 18 | * This class is in charge of "balancing" a catalog embedded in a | ||
| 19 | * WritableCatalogManager. The process of balancing consists of keeping the | ||
| 20 | * number of entries in each catalog between a maximum and minimum threshold so | ||
| 21 | * that all of them can be easily manipulable. This way there won't be catalogs | ||
| 22 | * that take a lot of time being downloaded, or their SQL operations will take | ||
| 23 | * a reasonable amount of time. | ||
| 24 | * | ||
| 25 | * The CatalogBalancer uses the following WritableCatalogManager attributes: | ||
| 26 | * - max_weight_: maximum number of entries in a catalog | ||
| 27 | * - min_weight_: minimum number of entries in a catalog | ||
| 28 | * - balance_weight_: maximum number of entries during the balancing process | ||
| 29 | * | ||
| 30 | * The balancing process is done traversing the entire catalog tree in | ||
| 31 | * postorder, so that the deepest catalogs are analyzed before their parents. | ||
| 32 | * Each catalog is then analyzed individually, existing three different | ||
| 33 | * scenarios: | ||
| 34 | * | ||
| 35 | * a) The number of entries of the catalog is between max_weight_ and | ||
| 36 | * min_weight_: nothing happens, the catalog remains untouched. | ||
| 37 | * | ||
| 38 | * b) The number of entries of the catalog is greater than max_weight_: the | ||
| 39 | * catalog gets split in smaller catalogs. One of the new generated catalogs | ||
| 40 | * will be mounted in the original mount point. | ||
| 41 | * | ||
| 42 | * c) The number of entries of the catalog is lesser than min_weight_: the | ||
| 43 | * catalog gets merged with its father (except the root catalog, obviously). | ||
| 44 | */ | ||
| 45 | template<class CatalogMgrT> | ||
| 46 | class CatalogBalancer { | ||
| 47 | public: | ||
| 48 | typedef typename CatalogMgrT::catalog_t catalog_t; | ||
| 49 | 48 | explicit CatalogBalancer(CatalogMgrT *catalog_mgr) | |
| 50 | 48 | : catalog_mgr_(catalog_mgr) { } | |
| 51 | |||
| 52 | /** | ||
| 53 | * This method balances a catalog. A catalog is considered overflowed if | ||
| 54 | * its weight (the number of entries in the catalog database) is greater than | ||
| 55 | * the stablish threshold defined by the max_weight_ variable. If a catalog is | ||
| 56 | * overflowed it will be split in a number of new catalogs, meeting the | ||
| 57 | * following properties: | ||
| 58 | * - There will be a catalog in the same place where the overflowed catalog | ||
| 59 | * was at the beginning of the process. | ||
| 60 | * - New generated catalogs will have a maximum weight defined by the | ||
| 61 | * attribute balance_weight_, which will always be lesser than max_weight_. | ||
| 62 | * - The number of new generated catalogs is unknown. | ||
| 63 | * | ||
| 64 | * A catalog is considered underflowed if its weight is lesser than the | ||
| 65 | * minimum threshold, defined by the min_weight_ variable. If a catalog is | ||
| 66 | * underflowed it will be merged with the father catalog. However, it is | ||
| 67 | * remarkable that such an operation can provoke an overflow in the father, | ||
| 68 | * which will be threaten separately. | ||
| 69 | * | ||
| 70 | * If a catalog is not overflowed or underflowed no changes will be applied | ||
| 71 | * to it. | ||
| 72 | * | ||
| 73 | * For a WritableCatalogManager to be balanced it requires the is_balanced_ | ||
| 74 | * flag to be set to true. | ||
| 75 | * | ||
| 76 | * @param catalog the catalog to be balanced, or NULL to balance all catalogs | ||
| 77 | * in the WritableCatalogManager | ||
| 78 | */ | ||
| 79 | void Balance(catalog_t *catalog); | ||
| 80 | |||
| 81 | private: | ||
| 82 | /** | ||
| 83 | * A VirtualNode is the abstract representation of an entry in a catalog. | ||
| 84 | * It is used by the CatalogBalancer to "virtually" represent the | ||
| 85 | * file-system tree of a concrete catalog and spot the nodes where | ||
| 86 | * a new catalog should be created. | ||
| 87 | * | ||
| 88 | * One of its main functions is to keep track of the current weight of a | ||
| 89 | * file or directory, i.e., the number of entries it contains. Concretely: | ||
| 90 | * - Normal files and symlinks: it is always one. | ||
| 91 | * - Normal directories: one plus the weight of each node it contains. | ||
| 92 | * - Directories which are catalog mount points: it is always one | ||
| 93 | */ | ||
| 94 | struct VirtualNode { | ||
| 95 | std::vector<VirtualNode> children; | ||
| 96 | unsigned weight; | ||
| 97 | DirectoryEntry dirent; | ||
| 98 | std::string path; | ||
| 99 | bool is_new_nested_catalog; | ||
| 100 | |||
| 101 | /** | ||
| 102 | * Extracts not only the direct children of this VirtualNode, but | ||
| 103 | * recursively all the VirtualNodes of this catalog. When a VirtualNode that | ||
| 104 | * is the root of a nested catalog is created, it won't be expanded. In | ||
| 105 | * order to actually expand that node it will be necessary to manually call | ||
| 106 | * this method on it. | ||
| 107 | * | ||
| 108 | * @param catalog_mgr catalog manager that contains the file system tree | ||
| 109 | */ | ||
| 110 | void ExtractChildren(CatalogMgrT *catalog_mgr); | ||
| 111 | void FixWeight(); | ||
| 112 | 48 | VirtualNode(const std::string &path, CatalogMgrT *catalog_mgr) | |
| 113 | 48 | : children() | |
| 114 | 48 | , weight(1) | |
| 115 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | , dirent() |
| 116 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | , path(path) |
| 117 | 48 | , is_new_nested_catalog(false) { | |
| 118 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | catalog_mgr->LookupPath(path, kLookupDefault, &dirent); |
| 119 | 48 | } | |
| 120 | 288 | VirtualNode(const std::string &path, const DirectoryEntry &dirent, | |
| 121 | CatalogMgrT *catalog_mgr) | ||
| 122 | 288 | : children() | |
| 123 | 288 | , weight(1) | |
| 124 |
1/2✓ Branch 1 taken 288 times.
✗ Branch 2 not taken.
|
288 | , dirent(dirent) |
| 125 |
1/2✓ Branch 1 taken 288 times.
✗ Branch 2 not taken.
|
288 | , path(path) |
| 126 | 288 | , is_new_nested_catalog(false) { | |
| 127 |
6/6✓ Branch 1 taken 192 times.
✓ Branch 2 taken 96 times.
✓ Branch 4 taken 96 times.
✓ Branch 5 taken 96 times.
✓ Branch 6 taken 96 times.
✓ Branch 7 taken 192 times.
|
288 | if (!IsCatalog() && IsDirectory()) |
| 128 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | ExtractChildren(catalog_mgr); |
| 129 | 288 | } | |
| 130 | 672 | bool IsDirectory() { return dirent.IsDirectory(); } | |
| 131 | 720 | bool IsCatalog() { | |
| 132 |
3/4✓ Branch 0 taken 720 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 240 times.
✓ Branch 4 taken 480 times.
|
720 | return is_new_nested_catalog || dirent.IsNestedCatalogMountpoint(); |
| 133 | } | ||
| 134 | }; | ||
| 135 | typedef typename CatalogBalancer<CatalogMgrT>::VirtualNode virtual_node_t; | ||
| 136 | |||
| 137 | void PartitionOptimally(VirtualNode *virtual_node); | ||
| 138 | void AddCatalogMarker(std::string path); | ||
| 139 | DirectoryEntryBase MakeEmptyDirectoryEntryBase(std::string name, | ||
| 140 | uid_t uid, | ||
| 141 | gid_t gid); | ||
| 142 | static VirtualNode *MaxChild(VirtualNode *virtual_node); | ||
| 143 | void AddCatalog(VirtualNode *child_node); | ||
| 144 | |||
| 145 | CatalogMgrT *catalog_mgr_; | ||
| 146 | }; | ||
| 147 | |||
| 148 | } // namespace catalog | ||
| 149 | |||
| 150 | #include "catalog_balancer_impl.h" | ||
| 151 | |||
| 152 | #endif // CVMFS_CATALOG_BALANCER_H_ | ||
| 153 |