Directory: | cvmfs/ |
---|---|
File: | cvmfs/pathspec/pathspec.h |
Date: | 2025-02-09 02:34:19 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 5 | 5 | 100.0% |
Branches: | 4 | 4 | 100.0% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * This file is part of the CernVM File System. | ||
3 | */ | ||
4 | |||
5 | #ifndef CVMFS_PATHSPEC_PATHSPEC_H_ | ||
6 | #define CVMFS_PATHSPEC_PATHSPEC_H_ | ||
7 | |||
8 | #include <regex.h> | ||
9 | |||
10 | #include <string> | ||
11 | #include <vector> | ||
12 | |||
13 | #include "pathspec/pathspec_pattern.h" | ||
14 | |||
15 | /** | ||
16 | * A Pathspec is an abstract description of a file path pattern. | ||
17 | * Examples (adding a space in front of * - silence compiler warning): | ||
18 | * /foo/bar/ *.txt - matches .txt files in /foo/bar | ||
19 | * /kernel/2.6.? - matches directories like: /kernel/2.6.[0-9a-z] | ||
20 | * /test/ *_debug/ * - matches all files in /test/cvmfs_debug/ for example | ||
21 | * | ||
22 | * We are supporting both the wildcard (i.e. *) and the placeholder (i.e. ?) | ||
23 | * symbol. Furthermore Pathspecs can be absolute (starting with /) or relative. | ||
24 | * | ||
25 | * Pathspecs are similar to unix glob strings for file system paths and can be | ||
26 | * transforms into such strings or sequences of these (cut at directory | ||
27 | * boundaries). This comes in handy when searching CVMFS catalog entries with a | ||
28 | * Pathspec. | ||
29 | * Note: sophisticated Pathspec based catalog lookup was not needed, yet. But it | ||
30 | * is implemented and not merged (see: reneme/feature-lookup_pathspec). | ||
31 | * | ||
32 | * Also inverse matches are possible by transforming a Pathspec into a regular | ||
33 | * expression and matching path strings with it. There are two matching modes: | ||
34 | * IsMatching() - Matches the exact path | ||
35 | * (wildcards don't span directory boundaries) | ||
36 | * IsMatchingRelaxed() - Matches the path more relaxed | ||
37 | * (comparable to shell pattern matching, wildcards | ||
38 | * match any character including /) | ||
39 | * | ||
40 | * Internally a Pathspec is broken up into PathspecElementPatterns at the | ||
41 | * directory boundaries. Have a look there for further details. | ||
42 | * | ||
43 | * For matching, Pathspecs need to be transformed either into a regular expres- | ||
44 | * sion or GlobString(Sequence). These transformations are done lazily on first | ||
45 | * request. | ||
46 | */ | ||
47 | class Pathspec { | ||
48 | public: | ||
49 | static const char kSeparator = '/'; | ||
50 | static const char kEscaper = '\\'; | ||
51 | static const char kWildcard = '*'; | ||
52 | static const char kPlaceholder = '?'; | ||
53 | |||
54 | protected: | ||
55 | typedef std::vector<PathspecElementPattern> ElementPatterns; | ||
56 | |||
57 | public: | ||
58 | typedef std::vector<std::string> GlobStringSequence; | ||
59 | |||
60 | public: | ||
61 | /** | ||
62 | * Create a new Pathspec that represents the pattern handed in as a parameter. | ||
63 | * Note: The parser will determine if the given pattern is valid and set a | ||
64 | * flag. After creating a Pathspec it should be checked if .IsValid() | ||
65 | * | ||
66 | * @param spec the pathspec pattern to be parsed | ||
67 | */ | ||
68 | explicit Pathspec(const std::string &spec); | ||
69 | Pathspec(const Pathspec &other); | ||
70 | // TODO(rmeusel): C++11 move constructor | ||
71 | ~Pathspec(); | ||
72 | |||
73 | /** | ||
74 | * Matches an exact path string. Directory boundaries are taken into account | ||
75 | * Say: wildcards do not match beyond a singly directory tree level. | ||
76 | * | ||
77 | * @param query_path the path to be matched against this Pathstring | ||
78 | * @return true if the path matches | ||
79 | */ | ||
80 | bool IsMatching(const std::string &query_path) const; | ||
81 | |||
82 | |||
83 | /** | ||
84 | * Matches if the Pathspec is a prefix path of query_path. Directory | ||
85 | * boundaries are taken into account. Only works for absolute PathSpec. | ||
86 | * | ||
87 | * @param query_path the path to be matched against this Pathstring | ||
88 | * @return true if the path matches | ||
89 | */ | ||
90 | bool IsPrefixMatching(const std::string &query_path) const; | ||
91 | |||
92 | /** | ||
93 | * Matches path strings similar to shell pattern matching (case...esac). | ||
94 | * Say: wildcards match any character including / and therefore can span over | ||
95 | * directory boundaries. | ||
96 | * | ||
97 | * @param query_path the path to be matched against this Pathstring | ||
98 | * @return true if the path matches | ||
99 | */ | ||
100 | bool IsMatchingRelaxed(const std::string &query_path) const; | ||
101 | |||
102 | /** | ||
103 | * Checks if the parsed Pathspec is valid and can be used. | ||
104 | * @return true if this Pathspec is valid | ||
105 | */ | ||
106 | 998 | bool IsValid() const { return valid_; } | |
107 | |||
108 | /** | ||
109 | * Checks if this Pathspec is defining an absolute path (i.e. starts with /) | ||
110 | * @return true if this Pathspec is absolute | ||
111 | */ | ||
112 | 605 | bool IsAbsolute() const { return absolute_; } | |
113 | |||
114 | /** | ||
115 | * Generates an ordered list of unix-like glob strings split on directory | ||
116 | * boundaries. Can be used to traverse down into a directory tree along a | ||
117 | * given Pathspec. | ||
118 | * | ||
119 | * @return an ordered list of unixoid glob strings (usable in glob()) | ||
120 | */ | ||
121 | const GlobStringSequence& GetGlobStringSequence() const; | ||
122 | |||
123 | /** | ||
124 | * Generates a single glob string out of this Pathspec. This string can be | ||
125 | * used in glob() | ||
126 | * | ||
127 | * @return a unix-compatible glob string | ||
128 | */ | ||
129 | const std::string& GetGlobString() const; | ||
130 | |||
131 | Pathspec& operator=(const Pathspec &other); | ||
132 | bool operator== (const Pathspec &other) const; | ||
133 | 20 | bool operator!= (const Pathspec &other) const { return !(*this == other); } | |
134 | |||
135 | 3601 | static bool IsSpecialChar(const char chr) { | |
136 |
4/4✓ Branch 0 taken 3463 times.
✓ Branch 1 taken 138 times.
✓ Branch 2 taken 113 times.
✓ Branch 3 taken 3350 times.
|
3601 | return (chr == kWildcard || chr == kPlaceholder); |
137 | } | ||
138 | |||
139 | protected: | ||
140 | void Parse(const std::string &spec); | ||
141 | void ParsePathElement(const std::string::const_iterator &end, | ||
142 | std::string::const_iterator *itr); | ||
143 | |||
144 | bool IsPathspecMatching(const std::string &query_path) const; | ||
145 | bool IsPathspecPrefixMatching(const std::string &query_path) const; | ||
146 | bool IsPathspecMatchingRelaxed(const std::string &query_path) const; | ||
147 | |||
148 | bool ApplyRegularExpression(const std::string &query_path, | ||
149 | regex_t *regex) const; | ||
150 | |||
151 | regex_t* GetRegularExpression() const; | ||
152 | regex_t* GetPrefixRegularExpression() const; | ||
153 | regex_t* GetRelaxedRegularExpression() const; | ||
154 | |||
155 | std::string GenerateRegularExpression(const bool is_relaxed = false, | ||
156 | const bool is_prefix = false) const; | ||
157 | regex_t* CompileRegularExpression(const std::string ®ex) const; | ||
158 | |||
159 | void PrintRegularExpressionError(const int error_code) const; | ||
160 | |||
161 | void GenerateGlobStringSequence() const; | ||
162 | void GenerateGlobString() const; | ||
163 | |||
164 | void DestroyRegularExpressions(); | ||
165 | |||
166 | private: | ||
167 | ElementPatterns patterns_; | ||
168 | |||
169 | mutable regex_t *regex_; | ||
170 | mutable regex_t *relaxed_regex_; | ||
171 | mutable regex_t *prefix_regex_; | ||
172 | mutable std::string glob_string_; | ||
173 | mutable GlobStringSequence glob_string_sequence_; | ||
174 | |||
175 | mutable bool regex_compiled_; | ||
176 | mutable bool relaxed_regex_compiled_; | ||
177 | mutable bool prefix_regex_compiled_; | ||
178 | mutable bool glob_string_compiled_; | ||
179 | mutable bool glob_string_sequence_compiled_; | ||
180 | |||
181 | bool valid_; | ||
182 | bool absolute_; | ||
183 | }; | ||
184 | |||
185 | #endif // CVMFS_PATHSPEC_PATHSPEC_H_ | ||
186 |