| Directory: | cvmfs/ |
|---|---|
| File: | cvmfs/catalog_balancer_impl.h |
| Date: | 2025-11-09 02:35:23 |
| Exec | Total | Coverage | |
|---|---|---|---|
| Lines: | 89 | 91 | 97.8% |
| Branches: | 70 | 122 | 57.4% |
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /** | ||
| 2 | * This file is part of the CernVM File System. | ||
| 3 | */ | ||
| 4 | |||
| 5 | #ifndef CVMFS_CATALOG_BALANCER_IMPL_H_ | ||
| 6 | #define CVMFS_CATALOG_BALANCER_IMPL_H_ | ||
| 7 | |||
| 8 | |||
| 9 | #include <inttypes.h> | ||
| 10 | |||
| 11 | #include <cassert> | ||
| 12 | #include <cstdlib> | ||
| 13 | #include <string> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include "catalog_mgr.h" | ||
| 17 | #include "compression/compression.h" | ||
| 18 | #include "crypto/hash.h" | ||
| 19 | #include "directory_entry.h" | ||
| 20 | #include "util/logging.h" | ||
| 21 | |||
| 22 | |||
| 23 | using namespace std; // NOLINT | ||
| 24 | |||
| 25 | namespace catalog { | ||
| 26 | |||
| 27 | template<class CatalogMgrT> | ||
| 28 | 96 | DirectoryEntryBase CatalogBalancer<CatalogMgrT>::MakeEmptyDirectoryEntryBase( | |
| 29 | string name, uid_t uid, gid_t gid) { | ||
| 30 | // Note that another entity needs to ensure that the object of an empty | ||
| 31 | // file is in the repository! It is currently done by the sync_mediator. | ||
| 32 | 96 | shash::Algorithms algorithm = catalog_mgr_->spooler_->GetHashAlgorithm(); | |
| 33 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | shash::Any file_hash(algorithm); |
| 34 | void *empty_compressed; | ||
| 35 | uint64_t sz_empty_compressed; | ||
| 36 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | bool retval = zlib::CompressMem2Mem(NULL, 0, &empty_compressed, |
| 37 | &sz_empty_compressed); | ||
| 38 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 96 times.
|
96 | assert(retval); |
| 39 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | shash::HashMem(static_cast<unsigned char *>(empty_compressed), |
| 40 | sz_empty_compressed, &file_hash); | ||
| 41 | 96 | free(empty_compressed); | |
| 42 | |||
| 43 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | DirectoryEntryBase deb; |
| 44 |
2/4✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 96 times.
✗ Branch 5 not taken.
|
96 | deb.name_ = NameString(name); |
| 45 | 96 | deb.mode_ = S_IFREG | S_IRUSR | S_IWUSR; | |
| 46 | 96 | deb.checksum_ = file_hash; | |
| 47 | 96 | deb.mtime_ = time(NULL); | |
| 48 | 96 | deb.uid_ = uid; | |
| 49 | 96 | deb.gid_ = gid; | |
| 50 | 192 | return deb; | |
| 51 | } | ||
| 52 | |||
| 53 | template<class CatalogMgrT> | ||
| 54 | 48 | void CatalogBalancer<CatalogMgrT>::AddCatalogMarker(string path) { | |
| 55 | 48 | XattrList xattr; | |
| 56 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | DirectoryEntry parent; |
| 57 | bool retval; | ||
| 58 |
2/4✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 48 times.
✗ Branch 5 not taken.
|
48 | retval = catalog_mgr_->LookupPath(PathString(path), kLookupDefault, &parent); |
| 59 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
|
48 | assert(retval); |
| 60 |
2/4✓ Branch 4 taken 48 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 48 times.
✗ Branch 8 not taken.
|
96 | DirectoryEntryBase cvmfscatalog = MakeEmptyDirectoryEntryBase( |
| 61 | ".cvmfscatalog", parent.uid(), parent.gid()); | ||
| 62 |
2/4✓ Branch 4 taken 48 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 48 times.
✗ Branch 8 not taken.
|
96 | DirectoryEntryBase cvmfsautocatalog = MakeEmptyDirectoryEntryBase( |
| 63 | ".cvmfsautocatalog", parent.uid(), parent.gid()); | ||
| 64 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | string relative_path = path.substr(1); |
| 65 |
0/2✗ Branch 1 not taken.
✗ Branch 2 not taken.
|
48 | catalog_mgr_->AddFile(cvmfscatalog, xattr, relative_path); |
| 66 |
0/2✗ Branch 1 not taken.
✗ Branch 2 not taken.
|
48 | catalog_mgr_->AddFile(cvmfsautocatalog, xattr, relative_path); |
| 67 | 48 | } | |
| 68 | |||
| 69 | template<class CatalogMgrT> | ||
| 70 | 96 | void CatalogBalancer<CatalogMgrT>::Balance(catalog_t *catalog) { | |
| 71 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 48 times.
|
96 | if (catalog == NULL) { |
| 72 | // obtain a copy of the catalogs | ||
| 73 |
1/2✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
48 | vector<catalog_t *> catalogs = catalog_mgr_->GetCatalogs(); |
| 74 | // we need to reverse the catalog list in order to analyze the | ||
| 75 | // last added ones first. This is necessary in the weird case the child | ||
| 76 | // catalog's underflow provokes an overflow in the father | ||
| 77 |
1/2✓ Branch 3 taken 48 times.
✗ Branch 4 not taken.
|
48 | reverse(catalogs.begin(), catalogs.end()); |
| 78 |
2/2✓ Branch 1 taken 48 times.
✓ Branch 2 taken 48 times.
|
96 | for (unsigned i = 0; i < catalogs.size(); ++i) |
| 79 |
1/2✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
48 | Balance(catalogs[i]); |
| 80 | 48 | return; | |
| 81 | 48 | } | |
| 82 |
2/4✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 48 times.
✗ Branch 5 not taken.
|
48 | string catalog_path = catalog->mountpoint().ToString(); |
| 83 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | virtual_node_t root_node(catalog_path, catalog_mgr_); |
| 84 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | root_node.ExtractChildren(catalog_mgr_); |
| 85 | // we have just recursively loaded the entire virtual tree! | ||
| 86 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | PartitionOptimally(&root_node); |
| 87 | 48 | } | |
| 88 | |||
| 89 | template<class CatalogMgrT> | ||
| 90 | 144 | void CatalogBalancer<CatalogMgrT>::PartitionOptimally( | |
| 91 | virtual_node_t *virtual_node) { | ||
| 92 | // postorder track of the fs-tree | ||
| 93 |
2/2✓ Branch 1 taken 288 times.
✓ Branch 2 taken 144 times.
|
432 | for (unsigned i = 0; i < virtual_node->children.size(); ++i) { |
| 94 | 288 | virtual_node_t *virtual_child = &virtual_node->children[i]; | |
| 95 |
6/6✓ Branch 1 taken 192 times.
✓ Branch 2 taken 96 times.
✓ Branch 4 taken 96 times.
✓ Branch 5 taken 96 times.
✓ Branch 6 taken 96 times.
✓ Branch 7 taken 192 times.
|
288 | if (virtual_child->IsDirectory() && !virtual_child->IsCatalog()) |
| 96 | 96 | PartitionOptimally(virtual_child); | |
| 97 | } | ||
| 98 | 144 | virtual_node->FixWeight(); | |
| 99 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 144 times.
|
192 | while (virtual_node->weight > catalog_mgr_->balance_weight_) { |
| 100 | 48 | virtual_node_t *heaviest_node = MaxChild(virtual_node); | |
| 101 | // we directly add a catalog in this node | ||
| 102 | // TODO(molina) apply heuristics here | ||
| 103 |
1/2✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
|
48 | if (heaviest_node != NULL |
| 104 |
1/2✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
|
48 | && heaviest_node->weight >= catalog_mgr_->min_weight_) { |
| 105 | // the catalog now generated _cannot_ be overflowed because the tree is | ||
| 106 | // being traversed in postorder, handling the lightest nodes first | ||
| 107 | 48 | unsigned max_weight = heaviest_node->weight; | |
| 108 |
1/2✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
48 | AddCatalogMarker(heaviest_node->path); |
| 109 | 48 | AddCatalog(heaviest_node); | |
| 110 | 48 | virtual_node->weight -= (max_weight - 1); | |
| 111 | 48 | } else { | |
| 112 | // there is no possibility for any this directory's children | ||
| 113 | // to be a catalog | ||
| 114 | ✗ | LogCvmfs(kLogPublish, kLogStdout, | |
| 115 | "Couldn't create a new nested catalog" | ||
| 116 | " in any subdirectory of '%s' even though" | ||
| 117 | " currently it is overflowed", | ||
| 118 | virtual_node->path.c_str()); | ||
| 119 | ✗ | break; | |
| 120 | } | ||
| 121 | } | ||
| 122 | 144 | } | |
| 123 | |||
| 124 | template<class CatalogMgrT> | ||
| 125 | typename CatalogBalancer<CatalogMgrT>::VirtualNode * | ||
| 126 | 48 | CatalogBalancer<CatalogMgrT>::MaxChild(virtual_node_t *virtual_node) { | |
| 127 | 48 | virtual_node_t *max_child = NULL; | |
| 128 | 48 | unsigned max_weight = 0; | |
| 129 |
1/2✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
96 | if (virtual_node->IsDirectory() && !virtual_node->IsCatalog() |
| 130 |
3/6✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 48 times.
✗ Branch 5 not taken.
|
96 | && !virtual_node->is_new_nested_catalog) { |
| 131 |
2/2✓ Branch 1 taken 48 times.
✓ Branch 2 taken 48 times.
|
96 | for (unsigned i = 0; i < virtual_node->children.size(); ++i) { |
| 132 | 48 | virtual_node_t *child = &virtual_node->children[i]; | |
| 133 |
1/2✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
96 | if (child->IsDirectory() && !child->IsCatalog() |
| 134 |
3/6✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 48 times.
✗ Branch 5 not taken.
|
96 | && max_weight < child->weight) { |
| 135 | 48 | max_weight = child->weight; | |
| 136 | 48 | max_child = child; | |
| 137 | } | ||
| 138 | } | ||
| 139 | } | ||
| 140 | 48 | return max_child; | |
| 141 | } | ||
| 142 | |||
| 143 | template<class CatalogMgrT> | ||
| 144 | 48 | void CatalogBalancer<CatalogMgrT>::AddCatalog(virtual_node_t *child_node) { | |
| 145 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
|
48 | assert(child_node != NULL); |
| 146 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | string new_catalog_path = child_node->path.substr(1); |
| 147 |
0/2✗ Branch 1 not taken.
✗ Branch 2 not taken.
|
48 | catalog_mgr_->CreateNestedCatalog(new_catalog_path); |
| 148 | 48 | child_node->weight = 1; | |
| 149 | 48 | child_node->is_new_nested_catalog = true; | |
| 150 |
1/2✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
48 | LogCvmfs(kLogPublish, kLogStdout, |
| 151 | "Automatic creation of nested" | ||
| 152 | " catalog in '%s'", | ||
| 153 | child_node->path.c_str()); | ||
| 154 | 48 | } | |
| 155 | |||
| 156 | template<class CatalogMgrT> | ||
| 157 | 144 | void CatalogBalancer<CatalogMgrT>::VirtualNode::ExtractChildren( | |
| 158 | CatalogMgrT *catalog_mgr) { | ||
| 159 | 144 | DirectoryEntryList direntlist; | |
| 160 |
1/2✓ Branch 1 taken 144 times.
✗ Branch 2 not taken.
|
144 | catalog_mgr->Listing(path, &direntlist); |
| 161 |
2/2✓ Branch 1 taken 288 times.
✓ Branch 2 taken 144 times.
|
432 | for (unsigned i = 0; i < direntlist.size(); ++i) { |
| 162 |
4/8✓ Branch 2 taken 288 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 288 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 288 times.
✗ Branch 9 not taken.
✓ Branch 11 taken 288 times.
✗ Branch 12 not taken.
|
576 | string child_path = path + "/" + direntlist[i].name().ToString(); |
| 163 |
2/4✓ Branch 2 taken 288 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 288 times.
✗ Branch 6 not taken.
|
288 | children.push_back(virtual_node_t(child_path, direntlist[i], catalog_mgr)); |
| 164 | 288 | weight += children[i].weight; | |
| 165 | } | ||
| 166 | 144 | } | |
| 167 | |||
| 168 | /** | ||
| 169 | * This function is called in the father node when one of its children has | ||
| 170 | * changed its weight. This phenomenon only occurs when one of its children has | ||
| 171 | * become a new autogenerated nested catalog, and its weight is now 1 (which | ||
| 172 | * represents the sole DirectoryEntry of that directory). | ||
| 173 | * However this is not propagated to the top or the bottom of the tree, but each | ||
| 174 | * VirtualNode that represents a directory is responsible for calling it when | ||
| 175 | * previous operations might have changed the weight of its children (and | ||
| 176 | * consequently its own weight). | ||
| 177 | * This function is also called the first time this VirtualNodeit is used to | ||
| 178 | * set its weight to the actual value. Initially the weight of any VirtualNode | ||
| 179 | * is always 1. | ||
| 180 | */ | ||
| 181 | template<class CatalogMgrT> | ||
| 182 | 144 | void CatalogBalancer<CatalogMgrT>::VirtualNode::FixWeight() { | |
| 183 | 144 | weight = 1; | |
| 184 |
5/6✓ Branch 1 taken 96 times.
✓ Branch 2 taken 48 times.
✓ Branch 4 taken 96 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 96 times.
✓ Branch 7 taken 48 times.
|
144 | if (!IsCatalog() && IsDirectory()) { |
| 185 |
2/2✓ Branch 1 taken 144 times.
✓ Branch 2 taken 96 times.
|
240 | for (unsigned i = 0; i < children.size(); ++i) { |
| 186 | 144 | weight += children[i].weight; | |
| 187 | } | ||
| 188 | } | ||
| 189 | 144 | } | |
| 190 | |||
| 191 | } // namespace catalog | ||
| 192 | |||
| 193 | #endif // CVMFS_CATALOG_BALANCER_IMPL_H_ | ||
| 194 |