1 |
|
|
/** |
2 |
|
|
* This file is part of the CernVM File System. |
3 |
|
|
*/ |
4 |
|
|
|
5 |
|
|
#ifndef CVMFS_CATALOG_BALANCER_H_ |
6 |
|
|
#define CVMFS_CATALOG_BALANCER_H_ |
7 |
|
|
|
8 |
|
|
#include <string> |
9 |
|
|
#include <vector> |
10 |
|
|
|
11 |
|
|
#include "catalog_mgr.h" |
12 |
|
|
#include "directory_entry.h" |
13 |
|
|
|
14 |
|
|
|
15 |
|
|
namespace catalog { |
16 |
|
|
|
17 |
|
|
/** |
18 |
|
|
* This class is in charge of "balancing" a catalog embedded in a |
19 |
|
|
* WritableCatalogManager. The process of balancing consists of keeping the |
20 |
|
|
* number of entries in each catalog between a maximum and minimum threshold so |
21 |
|
|
* that all of them can be easily manipulable. This way there won't be catalogs |
22 |
|
|
* that take a lot of time being downloaded, or their SQL operations will take |
23 |
|
|
* a reasonable amount of time. |
24 |
|
|
* |
25 |
|
|
* The CatalogBalancer uses the following WritableCatalogManager attributes: |
26 |
|
|
* - max_weight_: maximum number of entries in a catalog |
27 |
|
|
* - min_weight_: minimum number of entries in a catalog |
28 |
|
|
* - balance_weight_: maximum number of entries during the balancing process |
29 |
|
|
* |
30 |
|
|
* The balancing process is done traversing the entire catalog tree in |
31 |
|
|
* postorder, so that the deepest catalogs are analyzed before their parents. |
32 |
|
|
* Each catalog is then analyzed individually, existing three different |
33 |
|
|
* scenarios: |
34 |
|
|
* |
35 |
|
|
* a) The number of entries of the catalog is between max_weight_ and |
36 |
|
|
* min_weight_: nothing happens, the catalog remains untouched. |
37 |
|
|
* |
38 |
|
|
* b) The number of entries of the catalog is greater than max_weight_: the |
39 |
|
|
* catalog gets split in smaller catalogs. One of the new generated catalogs |
40 |
|
|
* will be mounted in the original mount point. |
41 |
|
|
* |
42 |
|
|
* c) The number of entries of the catalog is lesser than min_weight_: the |
43 |
|
|
* catalog gets merged with its father (except the root catalog, obviously). |
44 |
|
|
*/ |
45 |
|
|
template <class CatalogMgrT> |
46 |
|
|
class CatalogBalancer { |
47 |
|
|
public: |
48 |
|
|
typedef typename CatalogMgrT::catalog_t catalog_t; |
49 |
|
1 |
explicit CatalogBalancer(CatalogMgrT *catalog_mgr) |
50 |
|
1 |
: catalog_mgr_(catalog_mgr) { } |
51 |
|
|
|
52 |
|
|
/** |
53 |
|
|
* This method balances a catalog. A catalog is considered overflowed if |
54 |
|
|
* its weight (the number of entries in the catalog database) is greater than |
55 |
|
|
* the stablish threshold defined by the max_weight_ variable. If a catalog is |
56 |
|
|
* overflowed it will be split in a number of new catalogs, meeting the |
57 |
|
|
* following properties: |
58 |
|
|
* - There will be a catalog in the same place where the overflowed catalog |
59 |
|
|
* was at the beginning of the process. |
60 |
|
|
* - New generated catalogs will have a maximum weight defined by the |
61 |
|
|
* attribute balance_weight_, which will always be lesser than max_weight_. |
62 |
|
|
* - The number of new generated catalogs is unknown. |
63 |
|
|
* |
64 |
|
|
* A catalog is considered underflowed if its weight is lesser than the |
65 |
|
|
* minimum threshold, defined by the min_weight_ variable. If a catalog is |
66 |
|
|
* underflowed it will be merged with the father catalog. However, it is |
67 |
|
|
* remarkable that such an operation can provoke an overflow in the father, |
68 |
|
|
* which will be threaten separately. |
69 |
|
|
* |
70 |
|
|
* If a catalog is not overflowed or underflowed no changes will be applied |
71 |
|
|
* to it. |
72 |
|
|
* |
73 |
|
|
* For a WritableCatalogManager to be balanced it requires the is_balanced_ |
74 |
|
|
* flag to be set to true. |
75 |
|
|
* |
76 |
|
|
* @param catalog the catalog to be balanced, or NULL to balance all catalogs |
77 |
|
|
* in the WritableCatalogManager |
78 |
|
|
*/ |
79 |
|
|
void Balance(catalog_t *catalog); |
80 |
|
|
|
81 |
|
|
private: |
82 |
|
|
/** |
83 |
|
|
* A VirtualNode is the abstract representation of an entry in a catalog. |
84 |
|
|
* It is used by the CatalogBalancer to "virtually" represent the |
85 |
|
|
* file-system tree of a concrete catalog and spot the nodes where |
86 |
|
|
* a new catalog should be created. |
87 |
|
|
* |
88 |
|
|
* One of its main functions is to keep track of the current weight of a |
89 |
|
|
* file or directory, i.e., the number of entries it contains. Concretely: |
90 |
|
|
* - Normal files and symlinks: it is always one. |
91 |
|
|
* - Normal directories: one plus the weight of each node it contains. |
92 |
|
|
* - Directories which are catalog mount points: it is always one |
93 |
|
|
*/ |
94 |
|
36 |
struct VirtualNode { |
95 |
|
|
std::vector<VirtualNode> children; |
96 |
|
|
unsigned weight; |
97 |
|
|
DirectoryEntry dirent; |
98 |
|
|
std::string path; |
99 |
|
|
bool is_new_nested_catalog; |
100 |
|
|
|
101 |
|
|
/** |
102 |
|
|
* Extracts not only the direct children of this VirtualNode, but |
103 |
|
|
* recursively all the VirtualNodes of this catalog. When a VirtualNode that |
104 |
|
|
* is the root of a nested catalog is created, it won't be expanded. In order |
105 |
|
|
* to actually expand that node it will be necessary to manually call this |
106 |
|
|
* method on it. |
107 |
|
|
* |
108 |
|
|
* @param catalog_mgr catalog manager that contains the file system tree |
109 |
|
|
*/ |
110 |
|
|
void ExtractChildren(CatalogMgrT *catalog_mgr); |
111 |
|
|
void FixWeight(); |
112 |
|
1 |
VirtualNode(const std::string &path, CatalogMgrT *catalog_mgr) |
113 |
|
|
: children(), weight(1), dirent(), path(path), |
114 |
|
1 |
is_new_nested_catalog(false) { |
115 |
|
1 |
catalog_mgr->LookupPath(path, kLookupSole, &dirent); |
116 |
|
1 |
} |
117 |
|
5 |
VirtualNode(const std::string &path, const DirectoryEntry &dirent, |
118 |
|
|
CatalogMgrT *catalog_mgr) |
119 |
|
|
: children(), weight(1), dirent(dirent), path(path), |
120 |
|
5 |
is_new_nested_catalog(false) { |
121 |
✓✓✓✓ ✓✓ |
5 |
if (!IsCatalog() && IsDirectory()) |
122 |
|
2 |
ExtractChildren(catalog_mgr); |
123 |
|
5 |
} |
124 |
|
13 |
bool IsDirectory() { return dirent.IsDirectory(); } |
125 |
|
13 |
bool IsCatalog() { return is_new_nested_catalog || |
126 |
✓✗✓✓
|
13 |
dirent.IsNestedCatalogMountpoint(); } |
127 |
|
|
}; |
128 |
|
|
typedef typename CatalogBalancer<CatalogMgrT>::VirtualNode virtual_node_t; |
129 |
|
|
|
130 |
|
|
void PartitionOptimally(VirtualNode *virtual_node); |
131 |
|
|
void AddCatalogMarker(std::string path); |
132 |
|
|
DirectoryEntryBase MakeEmptyDirectoryEntryBase(std::string name, |
133 |
|
|
uid_t uid, |
134 |
|
|
gid_t gid); |
135 |
|
|
static VirtualNode *MaxChild(VirtualNode *virtual_node); |
136 |
|
|
void AddCatalog(VirtualNode *child_node); |
137 |
|
|
|
138 |
|
|
CatalogMgrT *catalog_mgr_; |
139 |
|
|
}; |
140 |
|
|
|
141 |
|
|
} // namespace catalog |
142 |
|
|
|
143 |
|
|
#include "catalog_balancer_impl.h" |
144 |
|
|
|
145 |
|
|
#endif // CVMFS_CATALOG_BALANCER_H_ |
146 |
|
|
|