Directory: | cvmfs/ |
---|---|
File: | cvmfs/pathspec/pathspec.cc |
Date: | 2025-07-21 10:50:29 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 198 | 207 | 95.7% |
Branches: | 120 | 172 | 69.8% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * This file is part of the CernVM File System. | ||
3 | */ | ||
4 | |||
5 | #include "pathspec.h" | ||
6 | |||
7 | #include <cassert> | ||
8 | |||
9 | #include "util/logging.h" | ||
10 | #include "util/smalloc.h" | ||
11 | |||
12 | 2188 | Pathspec::Pathspec(const std::string &spec) | |
13 | 2188 | : regex_(NULL) | |
14 | 2188 | , relaxed_regex_(NULL) | |
15 | 2188 | , prefix_regex_(NULL) | |
16 | 2188 | , regex_compiled_(false) | |
17 | 2188 | , relaxed_regex_compiled_(false) | |
18 | 2188 | , prefix_regex_compiled_(false) | |
19 | 2188 | , glob_string_compiled_(false) | |
20 | 2188 | , glob_string_sequence_compiled_(false) | |
21 | 2188 | , valid_(true) | |
22 | 2188 | , absolute_(false) { | |
23 |
1/2✓ Branch 1 taken 2188 times.
✗ Branch 2 not taken.
|
2188 | Parse(spec); |
24 |
2/2✓ Branch 1 taken 14 times.
✓ Branch 2 taken 2174 times.
|
2188 | if (patterns_.size() == 0) { |
25 | 14 | valid_ = false; | |
26 | } | ||
27 | |||
28 | 2188 | ElementPatterns::const_iterator i = patterns_.begin(); | |
29 | 2188 | const ElementPatterns::const_iterator iend = patterns_.end(); | |
30 |
2/2✓ Branch 2 taken 4795 times.
✓ Branch 3 taken 2188 times.
|
6983 | for (; i != iend; ++i) { |
31 |
2/2✓ Branch 2 taken 26 times.
✓ Branch 3 taken 4769 times.
|
4795 | if (!i->IsValid()) { |
32 | 26 | valid_ = false; | |
33 | } | ||
34 | } | ||
35 | 2188 | } | |
36 | |||
37 | // Compiled regex structure cannot be duplicated and needs to be re-compiled | ||
38 | // Note: the copy-constructed object will perform a lazy evaluation again | ||
39 | 2268 | Pathspec::Pathspec(const Pathspec &other) | |
40 | 2268 | : patterns_(other.patterns_) | |
41 | 2268 | , regex_(NULL) | |
42 | 2268 | , relaxed_regex_(NULL) | |
43 | 2268 | , prefix_regex_(NULL) | |
44 |
1/2✓ Branch 1 taken 2268 times.
✗ Branch 2 not taken.
|
2268 | , glob_string_(other.glob_string_) |
45 |
1/2✓ Branch 1 taken 2268 times.
✗ Branch 2 not taken.
|
2268 | , glob_string_sequence_(other.glob_string_sequence_) |
46 | 2268 | , regex_compiled_(false) | |
47 | 2268 | , relaxed_regex_compiled_(false) | |
48 | 2268 | , prefix_regex_compiled_(false) | |
49 | 2268 | , glob_string_compiled_(other.glob_string_compiled_) | |
50 | 2268 | , glob_string_sequence_compiled_(other.glob_string_sequence_compiled_) | |
51 | 2268 | , valid_(other.valid_) | |
52 | 2268 | , absolute_(other.absolute_) { } | |
53 | |||
54 | 4453 | Pathspec::~Pathspec() { DestroyRegularExpressions(); } | |
55 | |||
56 | 42 | Pathspec &Pathspec::operator=(const Pathspec &other) { | |
57 |
1/2✓ Branch 0 taken 42 times.
✗ Branch 1 not taken.
|
42 | if (this != &other) { |
58 | 42 | DestroyRegularExpressions(); // see: copy c'tor for details | |
59 | 42 | patterns_ = other.patterns_; | |
60 | |||
61 | 42 | glob_string_compiled_ = other.glob_string_compiled_; | |
62 | 42 | glob_string_ = other.glob_string_; | |
63 | |||
64 | 42 | glob_string_sequence_compiled_ = other.glob_string_sequence_compiled_; | |
65 | 42 | glob_string_sequence_ = other.glob_string_sequence_; | |
66 | |||
67 | 42 | valid_ = other.valid_; | |
68 | 42 | absolute_ = other.absolute_; | |
69 | } | ||
70 | |||
71 | 42 | return *this; | |
72 | } | ||
73 | |||
74 | |||
75 | 2188 | void Pathspec::Parse(const std::string &spec) { | |
76 | // parsing is done using std::string iterators to walk through the entire | ||
77 | // pathspec parameter. Thus, all parsing methods receive references to these | ||
78 | // iterators and increment itr as they pass along. | ||
79 | 2188 | std::string::const_iterator itr = spec.begin(); | |
80 | 2188 | const std::string::const_iterator end = spec.end(); | |
81 | |||
82 | 2188 | absolute_ = (*itr == kSeparator); | |
83 |
2/2✓ Branch 1 taken 9083 times.
✓ Branch 2 taken 2188 times.
|
11271 | while (itr != end) { |
84 |
2/2✓ Branch 1 taken 4288 times.
✓ Branch 2 taken 4795 times.
|
9083 | if (*itr == kSeparator) { |
85 | 4288 | ++itr; | |
86 | 4288 | continue; | |
87 | } | ||
88 |
1/2✓ Branch 1 taken 4795 times.
✗ Branch 2 not taken.
|
4795 | ParsePathElement(end, &itr); |
89 | } | ||
90 | 2188 | } | |
91 | |||
92 | 4795 | void Pathspec::ParsePathElement(const std::string::const_iterator &end, | |
93 | std::string::const_iterator *itr) { | ||
94 | // find the end of the current pattern element (next directory boundary) | ||
95 | 4795 | const std::string::const_iterator begin_element = *itr; | |
96 |
6/6✓ Branch 1 taken 23141 times.
✓ Branch 2 taken 2082 times.
✓ Branch 4 taken 20428 times.
✓ Branch 5 taken 2713 times.
✓ Branch 6 taken 20428 times.
✓ Branch 7 taken 4795 times.
|
25223 | while (*itr != end && **itr != kSeparator) { |
97 | 20428 | ++(*itr); | |
98 | } | ||
99 | 4795 | const std::string::const_iterator end_element = *itr; | |
100 | |||
101 | // create a PathspecElementPattern out of this directory description | ||
102 |
2/4✓ Branch 1 taken 4795 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4795 times.
✗ Branch 5 not taken.
|
4795 | patterns_.push_back(PathspecElementPattern(begin_element, end_element)); |
103 | 4795 | } | |
104 | |||
105 | 5416 | bool Pathspec::IsMatching(const std::string &query_path) const { | |
106 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 5416 times.
|
5416 | assert(IsValid()); |
107 | |||
108 |
2/2✓ Branch 1 taken 225 times.
✓ Branch 2 taken 5191 times.
|
5416 | if (query_path.empty()) { |
109 | 225 | return false; | |
110 | } | ||
111 | |||
112 | 5191 | const bool query_is_absolute = (query_path[0] == kSeparator); | |
113 |
2/2✓ Branch 1 taken 3796 times.
✓ Branch 2 taken 179 times.
|
3975 | return (!query_is_absolute || this->IsAbsolute()) |
114 |
4/4✓ Branch 0 taken 3975 times.
✓ Branch 1 taken 1216 times.
✓ Branch 3 taken 2000 times.
✓ Branch 4 taken 3012 times.
|
9166 | && IsPathspecMatching(query_path); |
115 | } | ||
116 | |||
117 | 168 | bool Pathspec::IsPrefixMatching(const std::string &query_path) const { | |
118 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 168 times.
|
168 | assert(IsValid()); |
119 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 168 times.
|
168 | assert(IsAbsolute()); |
120 | |||
121 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 168 times.
|
168 | if (query_path.empty()) { |
122 | ✗ | return false; | |
123 | } | ||
124 | |||
125 | 168 | const bool query_is_absolute = (query_path[0] == kSeparator); | |
126 |
4/4✓ Branch 0 taken 154 times.
✓ Branch 1 taken 14 times.
✓ Branch 3 taken 98 times.
✓ Branch 4 taken 56 times.
|
168 | return (query_is_absolute && IsPathspecPrefixMatching(query_path)); |
127 | } | ||
128 | |||
129 | 3989 | bool Pathspec::IsMatchingRelaxed(const std::string &query_path) const { | |
130 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3989 times.
|
3989 | assert(IsValid()); |
131 | |||
132 |
2/2✓ Branch 1 taken 468 times.
✓ Branch 2 taken 3521 times.
|
3989 | if (query_path.empty()) { |
133 | 468 | return false; | |
134 | } | ||
135 | |||
136 | 3521 | return IsPathspecMatchingRelaxed(query_path); | |
137 | } | ||
138 | |||
139 | 5012 | bool Pathspec::IsPathspecMatching(const std::string &query_path) const { | |
140 | 5012 | return ApplyRegularExpression(query_path, GetRegularExpression()); | |
141 | } | ||
142 | |||
143 | 154 | bool Pathspec::IsPathspecPrefixMatching(const std::string &query_path) const { | |
144 | 154 | return ApplyRegularExpression(query_path, GetPrefixRegularExpression()); | |
145 | } | ||
146 | |||
147 | 3521 | bool Pathspec::IsPathspecMatchingRelaxed(const std::string &query_path) const { | |
148 | 3521 | return ApplyRegularExpression(query_path, GetRelaxedRegularExpression()); | |
149 | } | ||
150 | |||
151 | 8687 | bool Pathspec::ApplyRegularExpression(const std::string &query_path, | |
152 | regex_t *regex) const { | ||
153 | 8687 | const char *path = query_path.c_str(); | |
154 | 8687 | const int retval = regexec(regex, path, 0, NULL, 0); | |
155 | |||
156 |
3/4✓ Branch 0 taken 5929 times.
✓ Branch 1 taken 2758 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5929 times.
|
8687 | if (retval != 0 && retval != REG_NOMATCH) { |
157 | ✗ | PrintRegularExpressionError(retval); | |
158 | } | ||
159 | |||
160 | 8687 | return (retval == 0); | |
161 | } | ||
162 | |||
163 | 5012 | regex_t *Pathspec::GetRegularExpression() const { | |
164 |
2/2✓ Branch 0 taken 1007 times.
✓ Branch 1 taken 4005 times.
|
5012 | if (!regex_compiled_) { |
165 | 1007 | const bool is_relaxed = false; | |
166 |
1/2✓ Branch 1 taken 1007 times.
✗ Branch 2 not taken.
|
1007 | const std::string regex = GenerateRegularExpression(is_relaxed); |
167 |
1/2✓ Branch 2 taken 1007 times.
✗ Branch 3 not taken.
|
1007 | LogCvmfs(kLogPathspec, kLogDebug, "compiled regex: %s", regex.c_str()); |
168 | |||
169 |
1/2✓ Branch 1 taken 1007 times.
✗ Branch 2 not taken.
|
1007 | regex_ = CompileRegularExpression(regex); |
170 | 1007 | regex_compiled_ = true; | |
171 | 1007 | } | |
172 | |||
173 | 5012 | return regex_; | |
174 | } | ||
175 | |||
176 | 154 | regex_t *Pathspec::GetPrefixRegularExpression() const { | |
177 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 126 times.
|
154 | if (!prefix_regex_compiled_) { |
178 | 28 | const bool is_relaxed = false; | |
179 | 28 | const bool is_prefix = true; | |
180 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | const std::string regex = GenerateRegularExpression(is_relaxed, is_prefix); |
181 |
1/2✓ Branch 2 taken 28 times.
✗ Branch 3 not taken.
|
28 | LogCvmfs(kLogPathspec, kLogDebug, "compiled regex: %s", regex.c_str()); |
182 | |||
183 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | prefix_regex_ = CompileRegularExpression(regex); |
184 | 28 | prefix_regex_compiled_ = true; | |
185 | 28 | } | |
186 | |||
187 | 154 | return prefix_regex_; | |
188 | } | ||
189 | |||
190 | 3521 | regex_t *Pathspec::GetRelaxedRegularExpression() const { | |
191 |
2/2✓ Branch 0 taken 360 times.
✓ Branch 1 taken 3161 times.
|
3521 | if (!relaxed_regex_compiled_) { |
192 | 360 | const bool is_relaxed = true; | |
193 |
1/2✓ Branch 1 taken 360 times.
✗ Branch 2 not taken.
|
360 | const std::string regex = GenerateRegularExpression(is_relaxed); |
194 |
1/2✓ Branch 2 taken 360 times.
✗ Branch 3 not taken.
|
360 | LogCvmfs(kLogPathspec, kLogDebug, "compiled relaxed regex: %s", |
195 | regex.c_str()); | ||
196 | |||
197 |
1/2✓ Branch 1 taken 360 times.
✗ Branch 2 not taken.
|
360 | relaxed_regex_ = CompileRegularExpression(regex); |
198 | 360 | relaxed_regex_compiled_ = true; | |
199 | 360 | } | |
200 | |||
201 | 3521 | return relaxed_regex_; | |
202 | } | ||
203 | |||
204 | 1395 | std::string Pathspec::GenerateRegularExpression(const bool is_relaxed, | |
205 | const bool is_prefix) const { | ||
206 | // start matching at the first character | ||
207 |
1/2✓ Branch 2 taken 1395 times.
✗ Branch 3 not taken.
|
1395 | std::string regex = "^"; |
208 | |||
209 | // absolute paths require a / in the beginning | ||
210 |
2/2✓ Branch 1 taken 1137 times.
✓ Branch 2 taken 258 times.
|
1395 | if (IsAbsolute()) { |
211 |
1/2✓ Branch 1 taken 1137 times.
✗ Branch 2 not taken.
|
1137 | regex += kSeparator; |
212 | } | ||
213 | |||
214 | // concatenate the regular expressions of the compiled path elements | ||
215 | 1395 | ElementPatterns::const_iterator i = patterns_.begin(); | |
216 | 1395 | const ElementPatterns::const_iterator iend = patterns_.end(); | |
217 |
2/2✓ Branch 2 taken 3010 times.
✓ Branch 3 taken 1395 times.
|
4405 | for (; i != iend; ++i) { |
218 |
2/4✓ Branch 2 taken 3010 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 3010 times.
✗ Branch 6 not taken.
|
3010 | regex += i->GenerateRegularExpression(is_relaxed); |
219 |
2/2✓ Branch 2 taken 1615 times.
✓ Branch 3 taken 1395 times.
|
3010 | if (i + 1 != iend) { |
220 |
1/2✓ Branch 1 taken 1615 times.
✗ Branch 2 not taken.
|
1615 | regex += kSeparator; |
221 | } | ||
222 | } | ||
223 | |||
224 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 1367 times.
|
1395 | if (is_prefix) { |
225 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | regex += "($|"; |
226 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | regex += kSeparator; |
227 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | regex += ".*$)"; |
228 | } else { | ||
229 | // a path might end with a trailing slash | ||
230 | // (pathspec does not distinguish files and directories) | ||
231 |
1/2✓ Branch 1 taken 1367 times.
✗ Branch 2 not taken.
|
1367 | regex += kSeparator; |
232 |
1/2✓ Branch 1 taken 1367 times.
✗ Branch 2 not taken.
|
1367 | regex += "?$"; |
233 | } | ||
234 | |||
235 | 2790 | return regex; | |
236 | } | ||
237 | |||
238 | 1395 | regex_t *Pathspec::CompileRegularExpression(const std::string ®ex) const { | |
239 | 1395 | regex_t *result = reinterpret_cast<regex_t *>(smalloc(sizeof(regex_t))); | |
240 | 1395 | const int flags = REG_NOSUB | REG_NEWLINE | REG_EXTENDED; | |
241 | 1395 | const int retval = regcomp(result, regex.c_str(), flags); | |
242 | |||
243 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1395 times.
|
1395 | if (retval != 0) { |
244 | ✗ | PrintRegularExpressionError(retval); | |
245 | ✗ | assert(false && "failed to compile regex"); | |
246 | } | ||
247 | |||
248 | 1395 | return result; | |
249 | } | ||
250 | |||
251 | 4495 | void Pathspec::DestroyRegularExpressions() { | |
252 |
2/2✓ Branch 0 taken 1007 times.
✓ Branch 1 taken 3488 times.
|
4495 | if (regex_compiled_) { |
253 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1007 times.
|
1007 | assert(regex_ != NULL); |
254 | 1007 | regfree(regex_); | |
255 | 1007 | regex_ = NULL; | |
256 | 1007 | regex_compiled_ = false; | |
257 | } | ||
258 | |||
259 |
2/2✓ Branch 0 taken 360 times.
✓ Branch 1 taken 4135 times.
|
4495 | if (relaxed_regex_compiled_) { |
260 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 360 times.
|
360 | assert(relaxed_regex_ != NULL); |
261 | 360 | regfree(relaxed_regex_); | |
262 | 360 | relaxed_regex_ = NULL; | |
263 | 360 | relaxed_regex_compiled_ = false; | |
264 | } | ||
265 | 4495 | } | |
266 | |||
267 | 1179 | bool Pathspec::operator==(const Pathspec &other) const { | |
268 |
1/2✓ Branch 4 taken 720 times.
✗ Branch 5 not taken.
|
1899 | if (patterns_.size() != other.patterns_.size() || IsValid() != other.IsValid() |
269 |
6/6✓ Branch 0 taken 720 times.
✓ Branch 1 taken 459 times.
✓ Branch 4 taken 63 times.
✓ Branch 5 taken 657 times.
✓ Branch 6 taken 522 times.
✓ Branch 7 taken 657 times.
|
1899 | || IsAbsolute() != other.IsAbsolute()) { |
270 | 522 | return false; | |
271 | } | ||
272 | |||
273 | 657 | ElementPatterns::const_iterator i = patterns_.begin(); | |
274 | 657 | const ElementPatterns::const_iterator iend = patterns_.end(); | |
275 | 657 | ElementPatterns::const_iterator j = other.patterns_.begin(); | |
276 | 657 | const ElementPatterns::const_iterator jend = other.patterns_.end(); | |
277 | |||
278 |
5/6✓ Branch 3 taken 1368 times.
✓ Branch 4 taken 222 times.
✓ Branch 6 taken 1368 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1368 times.
✓ Branch 9 taken 222 times.
|
1590 | for (; i != iend && j != jend; ++i, ++j) { |
279 |
3/4✓ Branch 3 taken 1368 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 435 times.
✓ Branch 6 taken 933 times.
|
1368 | if (*i != *j) { |
280 | 435 | return false; | |
281 | } | ||
282 | } | ||
283 | |||
284 | 222 | return true; | |
285 | } | ||
286 | |||
287 | ✗ | void Pathspec::PrintRegularExpressionError(const int error_code) const { | |
288 | ✗ | assert(regex_compiled_); | |
289 | ✗ | const size_t errbuf_size = 1024; | |
290 | char error[errbuf_size]; | ||
291 | ✗ | regerror(error_code, regex_, error, errbuf_size); | |
292 | ✗ | LogCvmfs(kLogPathspec, kLogStderr, "RegEx Error: %d - %s", error_code, error); | |
293 | } | ||
294 | |||
295 | 227 | const Pathspec::GlobStringSequence &Pathspec::GetGlobStringSequence() const { | |
296 |
1/2✓ Branch 0 taken 227 times.
✗ Branch 1 not taken.
|
227 | if (!glob_string_sequence_compiled_) { |
297 | 227 | GenerateGlobStringSequence(); | |
298 | 227 | glob_string_sequence_compiled_ = true; | |
299 | } | ||
300 | 227 | return glob_string_sequence_; | |
301 | } | ||
302 | |||
303 | |||
304 | 227 | void Pathspec::GenerateGlobStringSequence() const { | |
305 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 227 times.
|
227 | assert(glob_string_sequence_.empty()); |
306 | 227 | ElementPatterns::const_iterator i = patterns_.begin(); | |
307 | 227 | const ElementPatterns::const_iterator iend = patterns_.end(); | |
308 |
2/2✓ Branch 2 taken 454 times.
✓ Branch 3 taken 227 times.
|
681 | for (; i != iend; ++i) { |
309 |
1/2✓ Branch 2 taken 454 times.
✗ Branch 3 not taken.
|
454 | const std::string glob_string = i->GenerateGlobString(); |
310 |
1/2✓ Branch 1 taken 454 times.
✗ Branch 2 not taken.
|
454 | glob_string_sequence_.push_back(glob_string); |
311 | 454 | } | |
312 | 227 | } | |
313 | |||
314 | |||
315 | 311 | const std::string &Pathspec::GetGlobString() const { | |
316 |
2/2✓ Branch 0 taken 227 times.
✓ Branch 1 taken 84 times.
|
311 | if (!glob_string_compiled_) { |
317 | 227 | GenerateGlobString(); | |
318 | 227 | glob_string_compiled_ = true; | |
319 | } | ||
320 | 311 | return glob_string_; | |
321 | } | ||
322 | |||
323 | |||
324 | 227 | void Pathspec::GenerateGlobString() const { | |
325 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 227 times.
|
227 | assert(glob_string_.empty()); |
326 | |||
327 | 227 | bool is_first = true; | |
328 |
1/2✓ Branch 1 taken 227 times.
✗ Branch 2 not taken.
|
227 | const GlobStringSequence &seq = GetGlobStringSequence(); |
329 | 227 | GlobStringSequence::const_iterator i = seq.begin(); | |
330 | 227 | const GlobStringSequence::const_iterator iend = seq.end(); | |
331 |
2/2✓ Branch 1 taken 454 times.
✓ Branch 2 taken 227 times.
|
681 | for (; i != iend; ++i) { |
332 |
6/6✓ Branch 0 taken 227 times.
✓ Branch 1 taken 227 times.
✓ Branch 3 taken 149 times.
✓ Branch 4 taken 78 times.
✓ Branch 5 taken 376 times.
✓ Branch 6 taken 78 times.
|
454 | if (!is_first || IsAbsolute()) { |
333 |
1/2✓ Branch 1 taken 376 times.
✗ Branch 2 not taken.
|
376 | glob_string_ += kSeparator; |
334 | } | ||
335 |
1/2✓ Branch 2 taken 454 times.
✗ Branch 3 not taken.
|
454 | glob_string_ += *i; |
336 | 454 | is_first = false; | |
337 | } | ||
338 | 227 | } | |
339 |