Directory: | cvmfs/ |
---|---|
File: | cvmfs/pathspec/pathspec.cc |
Date: | 2025-06-29 02:35:41 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 198 | 207 | 95.7% |
Branches: | 120 | 172 | 69.8% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * This file is part of the CernVM File System. | ||
3 | */ | ||
4 | |||
5 | #include "pathspec.h" | ||
6 | |||
7 | #include <cassert> | ||
8 | |||
9 | #include "util/logging.h" | ||
10 | #include "util/smalloc.h" | ||
11 | |||
12 | 4826 | Pathspec::Pathspec(const std::string &spec) | |
13 | 4826 | : regex_(NULL) | |
14 | 4826 | , relaxed_regex_(NULL) | |
15 | 4826 | , prefix_regex_(NULL) | |
16 | 4826 | , regex_compiled_(false) | |
17 | 4826 | , relaxed_regex_compiled_(false) | |
18 | 4826 | , prefix_regex_compiled_(false) | |
19 | 4826 | , glob_string_compiled_(false) | |
20 | 4826 | , glob_string_sequence_compiled_(false) | |
21 | 4826 | , valid_(true) | |
22 | 4826 | , absolute_(false) { | |
23 |
1/2✓ Branch 1 taken 4826 times.
✗ Branch 2 not taken.
|
4826 | Parse(spec); |
24 |
2/2✓ Branch 1 taken 38 times.
✓ Branch 2 taken 4788 times.
|
4826 | if (patterns_.size() == 0) { |
25 | 38 | valid_ = false; | |
26 | } | ||
27 | |||
28 | 4826 | ElementPatterns::const_iterator i = patterns_.begin(); | |
29 | 4826 | const ElementPatterns::const_iterator iend = patterns_.end(); | |
30 |
2/2✓ Branch 2 taken 10333 times.
✓ Branch 3 taken 4826 times.
|
15159 | for (; i != iend; ++i) { |
31 |
2/2✓ Branch 2 taken 55 times.
✓ Branch 3 taken 10278 times.
|
10333 | if (!i->IsValid()) { |
32 | 55 | valid_ = false; | |
33 | } | ||
34 | } | ||
35 | 4826 | } | |
36 | |||
37 | // Compiled regex structure cannot be duplicated and needs to be re-compiled | ||
38 | // Note: the copy-constructed object will perform a lazy evaluation again | ||
39 | 2962 | Pathspec::Pathspec(const Pathspec &other) | |
40 | 2962 | : patterns_(other.patterns_) | |
41 | 2962 | , regex_(NULL) | |
42 | 2962 | , relaxed_regex_(NULL) | |
43 | 2962 | , prefix_regex_(NULL) | |
44 |
1/2✓ Branch 1 taken 2962 times.
✗ Branch 2 not taken.
|
2962 | , glob_string_(other.glob_string_) |
45 |
1/2✓ Branch 1 taken 2962 times.
✗ Branch 2 not taken.
|
2962 | , glob_string_sequence_(other.glob_string_sequence_) |
46 | 2962 | , regex_compiled_(false) | |
47 | 2962 | , relaxed_regex_compiled_(false) | |
48 | 2962 | , prefix_regex_compiled_(false) | |
49 | 2962 | , glob_string_compiled_(other.glob_string_compiled_) | |
50 | 2962 | , glob_string_sequence_compiled_(other.glob_string_sequence_compiled_) | |
51 | 2962 | , valid_(other.valid_) | |
52 | 2962 | , absolute_(other.absolute_) { } | |
53 | |||
54 | 7785 | Pathspec::~Pathspec() { DestroyRegularExpressions(); } | |
55 | |||
56 | 114 | Pathspec &Pathspec::operator=(const Pathspec &other) { | |
57 |
1/2✓ Branch 0 taken 114 times.
✗ Branch 1 not taken.
|
114 | if (this != &other) { |
58 | 114 | DestroyRegularExpressions(); // see: copy c'tor for details | |
59 | 114 | patterns_ = other.patterns_; | |
60 | |||
61 | 114 | glob_string_compiled_ = other.glob_string_compiled_; | |
62 | 114 | glob_string_ = other.glob_string_; | |
63 | |||
64 | 114 | glob_string_sequence_compiled_ = other.glob_string_sequence_compiled_; | |
65 | 114 | glob_string_sequence_ = other.glob_string_sequence_; | |
66 | |||
67 | 114 | valid_ = other.valid_; | |
68 | 114 | absolute_ = other.absolute_; | |
69 | } | ||
70 | |||
71 | 114 | return *this; | |
72 | } | ||
73 | |||
74 | |||
75 | 4826 | void Pathspec::Parse(const std::string &spec) { | |
76 | // parsing is done using std::string iterators to walk through the entire | ||
77 | // pathspec parameter. Thus, all parsing methods receive references to these | ||
78 | // iterators and increment itr as they pass along. | ||
79 | 4826 | std::string::const_iterator itr = spec.begin(); | |
80 | 4826 | const std::string::const_iterator end = spec.end(); | |
81 | |||
82 | 4826 | absolute_ = (*itr == kSeparator); | |
83 |
2/2✓ Branch 1 taken 19245 times.
✓ Branch 2 taken 4826 times.
|
24071 | while (itr != end) { |
84 |
2/2✓ Branch 1 taken 8912 times.
✓ Branch 2 taken 10333 times.
|
19245 | if (*itr == kSeparator) { |
85 | 8912 | ++itr; | |
86 | 8912 | continue; | |
87 | } | ||
88 |
1/2✓ Branch 1 taken 10333 times.
✗ Branch 2 not taken.
|
10333 | ParsePathElement(end, &itr); |
89 | } | ||
90 | 4826 | } | |
91 | |||
92 | 10333 | void Pathspec::ParsePathElement(const std::string::const_iterator &end, | |
93 | std::string::const_iterator *itr) { | ||
94 | // find the end of the current pattern element (next directory boundary) | ||
95 | 10333 | const std::string::const_iterator begin_element = *itr; | |
96 |
6/6✓ Branch 1 taken 51268 times.
✓ Branch 2 taken 4654 times.
✓ Branch 4 taken 45589 times.
✓ Branch 5 taken 5679 times.
✓ Branch 6 taken 45589 times.
✓ Branch 7 taken 10333 times.
|
55922 | while (*itr != end && **itr != kSeparator) { |
97 | 45589 | ++(*itr); | |
98 | } | ||
99 | 10333 | const std::string::const_iterator end_element = *itr; | |
100 | |||
101 | // create a PathspecElementPattern out of this directory description | ||
102 |
2/4✓ Branch 1 taken 10333 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10333 times.
✗ Branch 5 not taken.
|
10333 | patterns_.push_back(PathspecElementPattern(begin_element, end_element)); |
103 | 10333 | } | |
104 | |||
105 | 11743 | bool Pathspec::IsMatching(const std::string &query_path) const { | |
106 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 11743 times.
|
11743 | assert(IsValid()); |
107 | |||
108 |
2/2✓ Branch 1 taken 364 times.
✓ Branch 2 taken 11379 times.
|
11743 | if (query_path.empty()) { |
109 | 364 | return false; | |
110 | } | ||
111 | |||
112 | 11379 | const bool query_is_absolute = (query_path[0] == kSeparator); | |
113 |
2/2✓ Branch 1 taken 7541 times.
✓ Branch 2 taken 498 times.
|
8039 | return (!query_is_absolute || this->IsAbsolute()) |
114 |
4/4✓ Branch 0 taken 8039 times.
✓ Branch 1 taken 3340 times.
✓ Branch 3 taken 4754 times.
✓ Branch 4 taken 6127 times.
|
19418 | && IsPathspecMatching(query_path); |
115 | } | ||
116 | |||
117 | 468 | bool Pathspec::IsPrefixMatching(const std::string &query_path) const { | |
118 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 468 times.
|
468 | assert(IsValid()); |
119 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 468 times.
|
468 | assert(IsAbsolute()); |
120 | |||
121 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 468 times.
|
468 | if (query_path.empty()) { |
122 | ✗ | return false; | |
123 | } | ||
124 | |||
125 | 468 | const bool query_is_absolute = (query_path[0] == kSeparator); | |
126 |
4/4✓ Branch 0 taken 429 times.
✓ Branch 1 taken 39 times.
✓ Branch 3 taken 273 times.
✓ Branch 4 taken 156 times.
|
468 | return (query_is_absolute && IsPathspecPrefixMatching(query_path)); |
127 | } | ||
128 | |||
129 | 6162 | bool Pathspec::IsMatchingRelaxed(const std::string &query_path) const { | |
130 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 6162 times.
|
6162 | assert(IsValid()); |
131 | |||
132 |
2/2✓ Branch 1 taken 576 times.
✓ Branch 2 taken 5586 times.
|
6162 | if (query_path.empty()) { |
133 | 576 | return false; | |
134 | } | ||
135 | |||
136 | 5586 | return IsPathspecMatchingRelaxed(query_path); | |
137 | } | ||
138 | |||
139 | 10881 | bool Pathspec::IsPathspecMatching(const std::string &query_path) const { | |
140 | 10881 | return ApplyRegularExpression(query_path, GetRegularExpression()); | |
141 | } | ||
142 | |||
143 | 429 | bool Pathspec::IsPathspecPrefixMatching(const std::string &query_path) const { | |
144 | 429 | return ApplyRegularExpression(query_path, GetPrefixRegularExpression()); | |
145 | } | ||
146 | |||
147 | 5586 | bool Pathspec::IsPathspecMatchingRelaxed(const std::string &query_path) const { | |
148 | 5586 | return ApplyRegularExpression(query_path, GetRelaxedRegularExpression()); | |
149 | } | ||
150 | |||
151 | 16896 | bool Pathspec::ApplyRegularExpression(const std::string &query_path, | |
152 | regex_t *regex) const { | ||
153 | 16896 | const char *path = query_path.c_str(); | |
154 | 16896 | const int retval = regexec(regex, path, 0, NULL, 0); | |
155 | |||
156 |
3/4✓ Branch 0 taken 10509 times.
✓ Branch 1 taken 6387 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 10509 times.
|
16896 | if (retval != 0 && retval != REG_NOMATCH) { |
157 | ✗ | PrintRegularExpressionError(retval); | |
158 | } | ||
159 | |||
160 | 16896 | return (retval == 0); | |
161 | } | ||
162 | |||
163 | 10881 | regex_t *Pathspec::GetRegularExpression() const { | |
164 |
2/2✓ Branch 0 taken 2253 times.
✓ Branch 1 taken 8628 times.
|
10881 | if (!regex_compiled_) { |
165 | 2253 | const bool is_relaxed = false; | |
166 |
1/2✓ Branch 1 taken 2253 times.
✗ Branch 2 not taken.
|
2253 | const std::string regex = GenerateRegularExpression(is_relaxed); |
167 |
1/2✓ Branch 2 taken 2253 times.
✗ Branch 3 not taken.
|
2253 | LogCvmfs(kLogPathspec, kLogDebug, "compiled regex: %s", regex.c_str()); |
168 | |||
169 |
1/2✓ Branch 1 taken 2253 times.
✗ Branch 2 not taken.
|
2253 | regex_ = CompileRegularExpression(regex); |
170 | 2253 | regex_compiled_ = true; | |
171 | 2253 | } | |
172 | |||
173 | 10881 | return regex_; | |
174 | } | ||
175 | |||
176 | 429 | regex_t *Pathspec::GetPrefixRegularExpression() const { | |
177 |
2/2✓ Branch 0 taken 78 times.
✓ Branch 1 taken 351 times.
|
429 | if (!prefix_regex_compiled_) { |
178 | 78 | const bool is_relaxed = false; | |
179 | 78 | const bool is_prefix = true; | |
180 |
1/2✓ Branch 1 taken 78 times.
✗ Branch 2 not taken.
|
78 | const std::string regex = GenerateRegularExpression(is_relaxed, is_prefix); |
181 |
1/2✓ Branch 2 taken 78 times.
✗ Branch 3 not taken.
|
78 | LogCvmfs(kLogPathspec, kLogDebug, "compiled regex: %s", regex.c_str()); |
182 | |||
183 |
1/2✓ Branch 1 taken 78 times.
✗ Branch 2 not taken.
|
78 | prefix_regex_ = CompileRegularExpression(regex); |
184 | 78 | prefix_regex_compiled_ = true; | |
185 | 78 | } | |
186 | |||
187 | 429 | return prefix_regex_; | |
188 | } | ||
189 | |||
190 | 5586 | regex_t *Pathspec::GetRelaxedRegularExpression() const { | |
191 |
2/2✓ Branch 0 taken 668 times.
✓ Branch 1 taken 4918 times.
|
5586 | if (!relaxed_regex_compiled_) { |
192 | 668 | const bool is_relaxed = true; | |
193 |
1/2✓ Branch 1 taken 668 times.
✗ Branch 2 not taken.
|
668 | const std::string regex = GenerateRegularExpression(is_relaxed); |
194 |
1/2✓ Branch 2 taken 668 times.
✗ Branch 3 not taken.
|
668 | LogCvmfs(kLogPathspec, kLogDebug, "compiled relaxed regex: %s", |
195 | regex.c_str()); | ||
196 | |||
197 |
1/2✓ Branch 1 taken 668 times.
✗ Branch 2 not taken.
|
668 | relaxed_regex_ = CompileRegularExpression(regex); |
198 | 668 | relaxed_regex_compiled_ = true; | |
199 | 668 | } | |
200 | |||
201 | 5586 | return relaxed_regex_; | |
202 | } | ||
203 | |||
204 | 2999 | std::string Pathspec::GenerateRegularExpression(const bool is_relaxed, | |
205 | const bool is_prefix) const { | ||
206 | // start matching at the first character | ||
207 |
1/2✓ Branch 2 taken 2999 times.
✗ Branch 3 not taken.
|
2999 | std::string regex = "^"; |
208 | |||
209 | // absolute paths require a / in the beginning | ||
210 |
2/2✓ Branch 1 taken 2357 times.
✓ Branch 2 taken 642 times.
|
2999 | if (IsAbsolute()) { |
211 |
1/2✓ Branch 1 taken 2357 times.
✗ Branch 2 not taken.
|
2357 | regex += kSeparator; |
212 | } | ||
213 | |||
214 | // concatenate the regular expressions of the compiled path elements | ||
215 | 2999 | ElementPatterns::const_iterator i = patterns_.begin(); | |
216 | 2999 | const ElementPatterns::const_iterator iend = patterns_.end(); | |
217 |
2/2✓ Branch 2 taken 6358 times.
✓ Branch 3 taken 2999 times.
|
9357 | for (; i != iend; ++i) { |
218 |
2/4✓ Branch 2 taken 6358 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 6358 times.
✗ Branch 6 not taken.
|
6358 | regex += i->GenerateRegularExpression(is_relaxed); |
219 |
2/2✓ Branch 2 taken 3359 times.
✓ Branch 3 taken 2999 times.
|
6358 | if (i + 1 != iend) { |
220 |
1/2✓ Branch 1 taken 3359 times.
✗ Branch 2 not taken.
|
3359 | regex += kSeparator; |
221 | } | ||
222 | } | ||
223 | |||
224 |
2/2✓ Branch 0 taken 78 times.
✓ Branch 1 taken 2921 times.
|
2999 | if (is_prefix) { |
225 |
1/2✓ Branch 1 taken 78 times.
✗ Branch 2 not taken.
|
78 | regex += "($|"; |
226 |
1/2✓ Branch 1 taken 78 times.
✗ Branch 2 not taken.
|
78 | regex += kSeparator; |
227 |
1/2✓ Branch 1 taken 78 times.
✗ Branch 2 not taken.
|
78 | regex += ".*$)"; |
228 | } else { | ||
229 | // a path might end with a trailing slash | ||
230 | // (pathspec does not distinguish files and directories) | ||
231 |
1/2✓ Branch 1 taken 2921 times.
✗ Branch 2 not taken.
|
2921 | regex += kSeparator; |
232 |
1/2✓ Branch 1 taken 2921 times.
✗ Branch 2 not taken.
|
2921 | regex += "?$"; |
233 | } | ||
234 | |||
235 | 5998 | return regex; | |
236 | } | ||
237 | |||
238 | 2999 | regex_t *Pathspec::CompileRegularExpression(const std::string ®ex) const { | |
239 | 2999 | regex_t *result = reinterpret_cast<regex_t *>(smalloc(sizeof(regex_t))); | |
240 | 2999 | const int flags = REG_NOSUB | REG_NEWLINE | REG_EXTENDED; | |
241 | 2999 | const int retval = regcomp(result, regex.c_str(), flags); | |
242 | |||
243 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2999 times.
|
2999 | if (retval != 0) { |
244 | ✗ | PrintRegularExpressionError(retval); | |
245 | ✗ | assert(false && "failed to compile regex"); | |
246 | } | ||
247 | |||
248 | 2999 | return result; | |
249 | } | ||
250 | |||
251 | 7899 | void Pathspec::DestroyRegularExpressions() { | |
252 |
2/2✓ Branch 0 taken 2253 times.
✓ Branch 1 taken 5646 times.
|
7899 | if (regex_compiled_) { |
253 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2253 times.
|
2253 | assert(regex_ != NULL); |
254 | 2253 | regfree(regex_); | |
255 | 2253 | regex_ = NULL; | |
256 | 2253 | regex_compiled_ = false; | |
257 | } | ||
258 | |||
259 |
2/2✓ Branch 0 taken 668 times.
✓ Branch 1 taken 7231 times.
|
7899 | if (relaxed_regex_compiled_) { |
260 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 668 times.
|
668 | assert(relaxed_regex_ != NULL); |
261 | 668 | regfree(relaxed_regex_); | |
262 | 668 | relaxed_regex_ = NULL; | |
263 | 668 | relaxed_regex_compiled_ = false; | |
264 | } | ||
265 | 7899 | } | |
266 | |||
267 | 2210 | bool Pathspec::operator==(const Pathspec &other) const { | |
268 |
1/2✓ Branch 4 taken 1590 times.
✗ Branch 5 not taken.
|
3800 | if (patterns_.size() != other.patterns_.size() || IsValid() != other.IsValid() |
269 |
6/6✓ Branch 0 taken 1590 times.
✓ Branch 1 taken 620 times.
✓ Branch 4 taken 102 times.
✓ Branch 5 taken 1488 times.
✓ Branch 6 taken 722 times.
✓ Branch 7 taken 1488 times.
|
3800 | || IsAbsolute() != other.IsAbsolute()) { |
270 | 722 | return false; | |
271 | } | ||
272 | |||
273 | 1488 | ElementPatterns::const_iterator i = patterns_.begin(); | |
274 | 1488 | const ElementPatterns::const_iterator iend = patterns_.end(); | |
275 | 1488 | ElementPatterns::const_iterator j = other.patterns_.begin(); | |
276 | 1488 | const ElementPatterns::const_iterator jend = other.patterns_.end(); | |
277 | |||
278 |
5/6✓ Branch 3 taken 3128 times.
✓ Branch 4 taken 586 times.
✓ Branch 6 taken 3128 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 3128 times.
✓ Branch 9 taken 586 times.
|
3714 | for (; i != iend && j != jend; ++i, ++j) { |
279 |
3/4✓ Branch 3 taken 3128 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 902 times.
✓ Branch 6 taken 2226 times.
|
3128 | if (*i != *j) { |
280 | 902 | return false; | |
281 | } | ||
282 | } | ||
283 | |||
284 | 586 | return true; | |
285 | } | ||
286 | |||
287 | ✗ | void Pathspec::PrintRegularExpressionError(const int error_code) const { | |
288 | ✗ | assert(regex_compiled_); | |
289 | ✗ | const size_t errbuf_size = 1024; | |
290 | char error[errbuf_size]; | ||
291 | ✗ | regerror(error_code, regex_, error, errbuf_size); | |
292 | ✗ | LogCvmfs(kLogPathspec, kLogStderr, "RegEx Error: %d - %s", error_code, error); | |
293 | } | ||
294 | |||
295 | 646 | const Pathspec::GlobStringSequence &Pathspec::GetGlobStringSequence() const { | |
296 |
1/2✓ Branch 0 taken 646 times.
✗ Branch 1 not taken.
|
646 | if (!glob_string_sequence_compiled_) { |
297 | 646 | GenerateGlobStringSequence(); | |
298 | 646 | glob_string_sequence_compiled_ = true; | |
299 | } | ||
300 | 646 | return glob_string_sequence_; | |
301 | } | ||
302 | |||
303 | |||
304 | 646 | void Pathspec::GenerateGlobStringSequence() const { | |
305 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 646 times.
|
646 | assert(glob_string_sequence_.empty()); |
306 | 646 | ElementPatterns::const_iterator i = patterns_.begin(); | |
307 | 646 | const ElementPatterns::const_iterator iend = patterns_.end(); | |
308 |
2/2✓ Branch 2 taken 1292 times.
✓ Branch 3 taken 646 times.
|
1938 | for (; i != iend; ++i) { |
309 |
1/2✓ Branch 2 taken 1292 times.
✗ Branch 3 not taken.
|
1292 | const std::string glob_string = i->GenerateGlobString(); |
310 |
1/2✓ Branch 1 taken 1292 times.
✗ Branch 2 not taken.
|
1292 | glob_string_sequence_.push_back(glob_string); |
311 | 1292 | } | |
312 | 646 | } | |
313 | |||
314 | |||
315 | 874 | const std::string &Pathspec::GetGlobString() const { | |
316 |
2/2✓ Branch 0 taken 646 times.
✓ Branch 1 taken 228 times.
|
874 | if (!glob_string_compiled_) { |
317 | 646 | GenerateGlobString(); | |
318 | 646 | glob_string_compiled_ = true; | |
319 | } | ||
320 | 874 | return glob_string_; | |
321 | } | ||
322 | |||
323 | |||
324 | 646 | void Pathspec::GenerateGlobString() const { | |
325 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 646 times.
|
646 | assert(glob_string_.empty()); |
326 | |||
327 | 646 | bool is_first = true; | |
328 |
1/2✓ Branch 1 taken 646 times.
✗ Branch 2 not taken.
|
646 | const GlobStringSequence &seq = GetGlobStringSequence(); |
329 | 646 | GlobStringSequence::const_iterator i = seq.begin(); | |
330 | 646 | const GlobStringSequence::const_iterator iend = seq.end(); | |
331 |
2/2✓ Branch 1 taken 1292 times.
✓ Branch 2 taken 646 times.
|
1938 | for (; i != iend; ++i) { |
332 |
6/6✓ Branch 0 taken 646 times.
✓ Branch 1 taken 646 times.
✓ Branch 3 taken 418 times.
✓ Branch 4 taken 228 times.
✓ Branch 5 taken 1064 times.
✓ Branch 6 taken 228 times.
|
1292 | if (!is_first || IsAbsolute()) { |
333 |
1/2✓ Branch 1 taken 1064 times.
✗ Branch 2 not taken.
|
1064 | glob_string_ += kSeparator; |
334 | } | ||
335 |
1/2✓ Branch 2 taken 1292 times.
✗ Branch 3 not taken.
|
1292 | glob_string_ += *i; |
336 | 1292 | is_first = false; | |
337 | } | ||
338 | 646 | } | |
339 |