Directory: | cvmfs/ |
---|---|
File: | cvmfs/pathspec/pathspec.cc |
Date: | 2025-06-22 02:36:02 |
Exec | Total | Coverage | |
---|---|---|---|
Lines: | 198 | 207 | 95.7% |
Branches: | 120 | 172 | 69.8% |
Line | Branch | Exec | Source |
---|---|---|---|
1 | /** | ||
2 | * This file is part of the CernVM File System. | ||
3 | */ | ||
4 | |||
5 | #include "pathspec.h" | ||
6 | |||
7 | #include <cassert> | ||
8 | |||
9 | #include "util/logging.h" | ||
10 | #include "util/smalloc.h" | ||
11 | |||
12 | 5045 | Pathspec::Pathspec(const std::string &spec) | |
13 | 5045 | : regex_(NULL) | |
14 | 5045 | , relaxed_regex_(NULL) | |
15 | 5045 | , prefix_regex_(NULL) | |
16 | 5045 | , regex_compiled_(false) | |
17 | 5045 | , relaxed_regex_compiled_(false) | |
18 | 5045 | , prefix_regex_compiled_(false) | |
19 | 5045 | , glob_string_compiled_(false) | |
20 | 5045 | , glob_string_sequence_compiled_(false) | |
21 | 5045 | , valid_(true) | |
22 | 5045 | , absolute_(false) { | |
23 |
1/2✓ Branch 1 taken 5045 times.
✗ Branch 2 not taken.
|
5045 | Parse(spec); |
24 |
2/2✓ Branch 1 taken 40 times.
✓ Branch 2 taken 5005 times.
|
5045 | if (patterns_.size() == 0) { |
25 | 40 | valid_ = false; | |
26 | } | ||
27 | |||
28 | 5045 | ElementPatterns::const_iterator i = patterns_.begin(); | |
29 | 5045 | const ElementPatterns::const_iterator iend = patterns_.end(); | |
30 |
2/2✓ Branch 2 taken 10783 times.
✓ Branch 3 taken 5045 times.
|
15828 | for (; i != iend; ++i) { |
31 |
2/2✓ Branch 2 taken 54 times.
✓ Branch 3 taken 10729 times.
|
10783 | if (!i->IsValid()) { |
32 | 54 | valid_ = false; | |
33 | } | ||
34 | } | ||
35 | 5045 | } | |
36 | |||
37 | // Compiled regex structure cannot be duplicated and needs to be re-compiled | ||
38 | // Note: the copy-constructed object will perform a lazy evaluation again | ||
39 | 3152 | Pathspec::Pathspec(const Pathspec &other) | |
40 | 3152 | : patterns_(other.patterns_) | |
41 | 3152 | , regex_(NULL) | |
42 | 3152 | , relaxed_regex_(NULL) | |
43 | 3152 | , prefix_regex_(NULL) | |
44 |
1/2✓ Branch 1 taken 3152 times.
✗ Branch 2 not taken.
|
3152 | , glob_string_(other.glob_string_) |
45 |
1/2✓ Branch 1 taken 3152 times.
✗ Branch 2 not taken.
|
3152 | , glob_string_sequence_(other.glob_string_sequence_) |
46 | 3152 | , regex_compiled_(false) | |
47 | 3152 | , relaxed_regex_compiled_(false) | |
48 | 3152 | , prefix_regex_compiled_(false) | |
49 | 3152 | , glob_string_compiled_(other.glob_string_compiled_) | |
50 | 3152 | , glob_string_sequence_compiled_(other.glob_string_sequence_compiled_) | |
51 | 3152 | , valid_(other.valid_) | |
52 | 3152 | , absolute_(other.absolute_) { } | |
53 | |||
54 | 8194 | Pathspec::~Pathspec() { DestroyRegularExpressions(); } | |
55 | |||
56 | 120 | Pathspec &Pathspec::operator=(const Pathspec &other) { | |
57 |
1/2✓ Branch 0 taken 120 times.
✗ Branch 1 not taken.
|
120 | if (this != &other) { |
58 | 120 | DestroyRegularExpressions(); // see: copy c'tor for details | |
59 | 120 | patterns_ = other.patterns_; | |
60 | |||
61 | 120 | glob_string_compiled_ = other.glob_string_compiled_; | |
62 | 120 | glob_string_ = other.glob_string_; | |
63 | |||
64 | 120 | glob_string_sequence_compiled_ = other.glob_string_sequence_compiled_; | |
65 | 120 | glob_string_sequence_ = other.glob_string_sequence_; | |
66 | |||
67 | 120 | valid_ = other.valid_; | |
68 | 120 | absolute_ = other.absolute_; | |
69 | } | ||
70 | |||
71 | 120 | return *this; | |
72 | } | ||
73 | |||
74 | |||
75 | 5045 | void Pathspec::Parse(const std::string &spec) { | |
76 | // parsing is done using std::string iterators to walk through the entire | ||
77 | // pathspec parameter. Thus, all parsing methods receive references to these | ||
78 | // iterators and increment itr as they pass along. | ||
79 | 5045 | std::string::const_iterator itr = spec.begin(); | |
80 | 5045 | const std::string::const_iterator end = spec.end(); | |
81 | |||
82 | 5045 | absolute_ = (*itr == kSeparator); | |
83 |
2/2✓ Branch 1 taken 20111 times.
✓ Branch 2 taken 5045 times.
|
25156 | while (itr != end) { |
84 |
2/2✓ Branch 1 taken 9328 times.
✓ Branch 2 taken 10783 times.
|
20111 | if (*itr == kSeparator) { |
85 | 9328 | ++itr; | |
86 | 9328 | continue; | |
87 | } | ||
88 |
1/2✓ Branch 1 taken 10783 times.
✗ Branch 2 not taken.
|
10783 | ParsePathElement(end, &itr); |
89 | } | ||
90 | 5045 | } | |
91 | |||
92 | 10783 | void Pathspec::ParsePathElement(const std::string::const_iterator &end, | |
93 | std::string::const_iterator *itr) { | ||
94 | // find the end of the current pattern element (next directory boundary) | ||
95 | 10783 | const std::string::const_iterator begin_element = *itr; | |
96 |
6/6✓ Branch 1 taken 53791 times.
✓ Branch 2 taken 4845 times.
✓ Branch 4 taken 47853 times.
✓ Branch 5 taken 5938 times.
✓ Branch 6 taken 47853 times.
✓ Branch 7 taken 10783 times.
|
58636 | while (*itr != end && **itr != kSeparator) { |
97 | 47853 | ++(*itr); | |
98 | } | ||
99 | 10783 | const std::string::const_iterator end_element = *itr; | |
100 | |||
101 | // create a PathspecElementPattern out of this directory description | ||
102 |
2/4✓ Branch 1 taken 10783 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10783 times.
✗ Branch 5 not taken.
|
10783 | patterns_.push_back(PathspecElementPattern(begin_element, end_element)); |
103 | 10783 | } | |
104 | |||
105 | 12474 | bool Pathspec::IsMatching(const std::string &query_path) const { | |
106 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 12474 times.
|
12474 | assert(IsValid()); |
107 | |||
108 |
2/2✓ Branch 1 taken 420 times.
✓ Branch 2 taken 12054 times.
|
12474 | if (query_path.empty()) { |
109 | 420 | return false; | |
110 | } | ||
111 | |||
112 | 12054 | const bool query_is_absolute = (query_path[0] == kSeparator); | |
113 |
2/2✓ Branch 1 taken 8095 times.
✓ Branch 2 taken 517 times.
|
8612 | return (!query_is_absolute || this->IsAbsolute()) |
114 |
4/4✓ Branch 0 taken 8612 times.
✓ Branch 1 taken 3442 times.
✓ Branch 3 taken 4950 times.
✓ Branch 4 taken 6587 times.
|
20666 | && IsPathspecMatching(query_path); |
115 | } | ||
116 | |||
117 | 480 | bool Pathspec::IsPrefixMatching(const std::string &query_path) const { | |
118 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 480 times.
|
480 | assert(IsValid()); |
119 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 480 times.
|
480 | assert(IsAbsolute()); |
120 | |||
121 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 480 times.
|
480 | if (query_path.empty()) { |
122 | ✗ | return false; | |
123 | } | ||
124 | |||
125 | 480 | const bool query_is_absolute = (query_path[0] == kSeparator); | |
126 |
4/4✓ Branch 0 taken 440 times.
✓ Branch 1 taken 40 times.
✓ Branch 3 taken 280 times.
✓ Branch 4 taken 160 times.
|
480 | return (query_is_absolute && IsPathspecPrefixMatching(query_path)); |
127 | } | ||
128 | |||
129 | 6578 | bool Pathspec::IsMatchingRelaxed(const std::string &query_path) const { | |
130 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 6578 times.
|
6578 | assert(IsValid()); |
131 | |||
132 |
2/2✓ Branch 1 taken 720 times.
✓ Branch 2 taken 5858 times.
|
6578 | if (query_path.empty()) { |
133 | 720 | return false; | |
134 | } | ||
135 | |||
136 | 5858 | return IsPathspecMatchingRelaxed(query_path); | |
137 | } | ||
138 | |||
139 | 11537 | bool Pathspec::IsPathspecMatching(const std::string &query_path) const { | |
140 | 11537 | return ApplyRegularExpression(query_path, GetRegularExpression()); | |
141 | } | ||
142 | |||
143 | 440 | bool Pathspec::IsPathspecPrefixMatching(const std::string &query_path) const { | |
144 | 440 | return ApplyRegularExpression(query_path, GetPrefixRegularExpression()); | |
145 | } | ||
146 | |||
147 | 5858 | bool Pathspec::IsPathspecMatchingRelaxed(const std::string &query_path) const { | |
148 | 5858 | return ApplyRegularExpression(query_path, GetRelaxedRegularExpression()); | |
149 | } | ||
150 | |||
151 | 17835 | bool Pathspec::ApplyRegularExpression(const std::string &query_path, | |
152 | regex_t *regex) const { | ||
153 | 17835 | const char *path = query_path.c_str(); | |
154 | 17835 | const int retval = regexec(regex, path, 0, NULL, 0); | |
155 | |||
156 |
3/4✓ Branch 0 taken 11235 times.
✓ Branch 1 taken 6600 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 11235 times.
|
17835 | if (retval != 0 && retval != REG_NOMATCH) { |
157 | ✗ | PrintRegularExpressionError(retval); | |
158 | } | ||
159 | |||
160 | 17835 | return (retval == 0); | |
161 | } | ||
162 | |||
163 | 11537 | regex_t *Pathspec::GetRegularExpression() const { | |
164 |
2/2✓ Branch 0 taken 2399 times.
✓ Branch 1 taken 9138 times.
|
11537 | if (!regex_compiled_) { |
165 | 2399 | const bool is_relaxed = false; | |
166 |
1/2✓ Branch 1 taken 2399 times.
✗ Branch 2 not taken.
|
2399 | const std::string regex = GenerateRegularExpression(is_relaxed); |
167 |
1/2✓ Branch 2 taken 2399 times.
✗ Branch 3 not taken.
|
2399 | LogCvmfs(kLogPathspec, kLogDebug, "compiled regex: %s", regex.c_str()); |
168 | |||
169 |
1/2✓ Branch 1 taken 2399 times.
✗ Branch 2 not taken.
|
2399 | regex_ = CompileRegularExpression(regex); |
170 | 2399 | regex_compiled_ = true; | |
171 | 2399 | } | |
172 | |||
173 | 11537 | return regex_; | |
174 | } | ||
175 | |||
176 | 440 | regex_t *Pathspec::GetPrefixRegularExpression() const { | |
177 |
2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 360 times.
|
440 | if (!prefix_regex_compiled_) { |
178 | 80 | const bool is_relaxed = false; | |
179 | 80 | const bool is_prefix = true; | |
180 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | const std::string regex = GenerateRegularExpression(is_relaxed, is_prefix); |
181 |
1/2✓ Branch 2 taken 80 times.
✗ Branch 3 not taken.
|
80 | LogCvmfs(kLogPathspec, kLogDebug, "compiled regex: %s", regex.c_str()); |
182 | |||
183 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | prefix_regex_ = CompileRegularExpression(regex); |
184 | 80 | prefix_regex_compiled_ = true; | |
185 | 80 | } | |
186 | |||
187 | 440 | return prefix_regex_; | |
188 | } | ||
189 | |||
190 | 5858 | regex_t *Pathspec::GetRelaxedRegularExpression() const { | |
191 |
2/2✓ Branch 0 taken 701 times.
✓ Branch 1 taken 5157 times.
|
5858 | if (!relaxed_regex_compiled_) { |
192 | 701 | const bool is_relaxed = true; | |
193 |
1/2✓ Branch 1 taken 701 times.
✗ Branch 2 not taken.
|
701 | const std::string regex = GenerateRegularExpression(is_relaxed); |
194 |
1/2✓ Branch 2 taken 701 times.
✗ Branch 3 not taken.
|
701 | LogCvmfs(kLogPathspec, kLogDebug, "compiled relaxed regex: %s", |
195 | regex.c_str()); | ||
196 | |||
197 |
1/2✓ Branch 1 taken 701 times.
✗ Branch 2 not taken.
|
701 | relaxed_regex_ = CompileRegularExpression(regex); |
198 | 701 | relaxed_regex_compiled_ = true; | |
199 | 701 | } | |
200 | |||
201 | 5858 | return relaxed_regex_; | |
202 | } | ||
203 | |||
204 | 3180 | std::string Pathspec::GenerateRegularExpression(const bool is_relaxed, | |
205 | const bool is_prefix) const { | ||
206 | // start matching at the first character | ||
207 |
1/2✓ Branch 2 taken 3180 times.
✗ Branch 3 not taken.
|
3180 | std::string regex = "^"; |
208 | |||
209 | // absolute paths require a / in the beginning | ||
210 |
2/2✓ Branch 1 taken 2514 times.
✓ Branch 2 taken 666 times.
|
3180 | if (IsAbsolute()) { |
211 |
1/2✓ Branch 1 taken 2514 times.
✗ Branch 2 not taken.
|
2514 | regex += kSeparator; |
212 | } | ||
213 | |||
214 | // concatenate the regular expressions of the compiled path elements | ||
215 | 3180 | ElementPatterns::const_iterator i = patterns_.begin(); | |
216 | 3180 | const ElementPatterns::const_iterator iend = patterns_.end(); | |
217 |
2/2✓ Branch 2 taken 6740 times.
✓ Branch 3 taken 3180 times.
|
9920 | for (; i != iend; ++i) { |
218 |
2/4✓ Branch 2 taken 6740 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 6740 times.
✗ Branch 6 not taken.
|
6740 | regex += i->GenerateRegularExpression(is_relaxed); |
219 |
2/2✓ Branch 2 taken 3560 times.
✓ Branch 3 taken 3180 times.
|
6740 | if (i + 1 != iend) { |
220 |
1/2✓ Branch 1 taken 3560 times.
✗ Branch 2 not taken.
|
3560 | regex += kSeparator; |
221 | } | ||
222 | } | ||
223 | |||
224 |
2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 3100 times.
|
3180 | if (is_prefix) { |
225 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | regex += "($|"; |
226 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | regex += kSeparator; |
227 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | regex += ".*$)"; |
228 | } else { | ||
229 | // a path might end with a trailing slash | ||
230 | // (pathspec does not distinguish files and directories) | ||
231 |
1/2✓ Branch 1 taken 3100 times.
✗ Branch 2 not taken.
|
3100 | regex += kSeparator; |
232 |
1/2✓ Branch 1 taken 3100 times.
✗ Branch 2 not taken.
|
3100 | regex += "?$"; |
233 | } | ||
234 | |||
235 | 6360 | return regex; | |
236 | } | ||
237 | |||
238 | 3180 | regex_t *Pathspec::CompileRegularExpression(const std::string ®ex) const { | |
239 | 3180 | regex_t *result = reinterpret_cast<regex_t *>(smalloc(sizeof(regex_t))); | |
240 | 3180 | const int flags = REG_NOSUB | REG_NEWLINE | REG_EXTENDED; | |
241 | 3180 | const int retval = regcomp(result, regex.c_str(), flags); | |
242 | |||
243 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3180 times.
|
3180 | if (retval != 0) { |
244 | ✗ | PrintRegularExpressionError(retval); | |
245 | ✗ | assert(false && "failed to compile regex"); | |
246 | } | ||
247 | |||
248 | 3180 | return result; | |
249 | } | ||
250 | |||
251 | 8314 | void Pathspec::DestroyRegularExpressions() { | |
252 |
2/2✓ Branch 0 taken 2399 times.
✓ Branch 1 taken 5915 times.
|
8314 | if (regex_compiled_) { |
253 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2399 times.
|
2399 | assert(regex_ != NULL); |
254 | 2399 | regfree(regex_); | |
255 | 2399 | regex_ = NULL; | |
256 | 2399 | regex_compiled_ = false; | |
257 | } | ||
258 | |||
259 |
2/2✓ Branch 0 taken 701 times.
✓ Branch 1 taken 7613 times.
|
8314 | if (relaxed_regex_compiled_) { |
260 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 701 times.
|
701 | assert(relaxed_regex_ != NULL); |
261 | 701 | regfree(relaxed_regex_); | |
262 | 701 | relaxed_regex_ = NULL; | |
263 | 701 | relaxed_regex_compiled_ = false; | |
264 | } | ||
265 | 8314 | } | |
266 | |||
267 | 2344 | bool Pathspec::operator==(const Pathspec &other) const { | |
268 |
1/2✓ Branch 4 taken 1650 times.
✗ Branch 5 not taken.
|
3994 | if (patterns_.size() != other.patterns_.size() || IsValid() != other.IsValid() |
269 |
6/6✓ Branch 0 taken 1650 times.
✓ Branch 1 taken 694 times.
✓ Branch 4 taken 102 times.
✓ Branch 5 taken 1548 times.
✓ Branch 6 taken 796 times.
✓ Branch 7 taken 1548 times.
|
3994 | || IsAbsolute() != other.IsAbsolute()) { |
270 | 796 | return false; | |
271 | } | ||
272 | |||
273 | 1548 | ElementPatterns::const_iterator i = patterns_.begin(); | |
274 | 1548 | const ElementPatterns::const_iterator iend = patterns_.end(); | |
275 | 1548 | ElementPatterns::const_iterator j = other.patterns_.begin(); | |
276 | 1548 | const ElementPatterns::const_iterator jend = other.patterns_.end(); | |
277 | |||
278 |
5/6✓ Branch 3 taken 3244 times.
✓ Branch 4 taken 614 times.
✓ Branch 6 taken 3244 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 3244 times.
✓ Branch 9 taken 614 times.
|
3858 | for (; i != iend && j != jend; ++i, ++j) { |
279 |
3/4✓ Branch 3 taken 3244 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 934 times.
✓ Branch 6 taken 2310 times.
|
3244 | if (*i != *j) { |
280 | 934 | return false; | |
281 | } | ||
282 | } | ||
283 | |||
284 | 614 | return true; | |
285 | } | ||
286 | |||
287 | ✗ | void Pathspec::PrintRegularExpressionError(const int error_code) const { | |
288 | ✗ | assert(regex_compiled_); | |
289 | ✗ | const size_t errbuf_size = 1024; | |
290 | char error[errbuf_size]; | ||
291 | ✗ | regerror(error_code, regex_, error, errbuf_size); | |
292 | ✗ | LogCvmfs(kLogPathspec, kLogStderr, "RegEx Error: %d - %s", error_code, error); | |
293 | } | ||
294 | |||
295 | 680 | const Pathspec::GlobStringSequence &Pathspec::GetGlobStringSequence() const { | |
296 |
1/2✓ Branch 0 taken 680 times.
✗ Branch 1 not taken.
|
680 | if (!glob_string_sequence_compiled_) { |
297 | 680 | GenerateGlobStringSequence(); | |
298 | 680 | glob_string_sequence_compiled_ = true; | |
299 | } | ||
300 | 680 | return glob_string_sequence_; | |
301 | } | ||
302 | |||
303 | |||
304 | 680 | void Pathspec::GenerateGlobStringSequence() const { | |
305 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 680 times.
|
680 | assert(glob_string_sequence_.empty()); |
306 | 680 | ElementPatterns::const_iterator i = patterns_.begin(); | |
307 | 680 | const ElementPatterns::const_iterator iend = patterns_.end(); | |
308 |
2/2✓ Branch 2 taken 1360 times.
✓ Branch 3 taken 680 times.
|
2040 | for (; i != iend; ++i) { |
309 |
1/2✓ Branch 2 taken 1360 times.
✗ Branch 3 not taken.
|
1360 | const std::string glob_string = i->GenerateGlobString(); |
310 |
1/2✓ Branch 1 taken 1360 times.
✗ Branch 2 not taken.
|
1360 | glob_string_sequence_.push_back(glob_string); |
311 | 1360 | } | |
312 | 680 | } | |
313 | |||
314 | |||
315 | 920 | const std::string &Pathspec::GetGlobString() const { | |
316 |
2/2✓ Branch 0 taken 680 times.
✓ Branch 1 taken 240 times.
|
920 | if (!glob_string_compiled_) { |
317 | 680 | GenerateGlobString(); | |
318 | 680 | glob_string_compiled_ = true; | |
319 | } | ||
320 | 920 | return glob_string_; | |
321 | } | ||
322 | |||
323 | |||
324 | 680 | void Pathspec::GenerateGlobString() const { | |
325 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 680 times.
|
680 | assert(glob_string_.empty()); |
326 | |||
327 | 680 | bool is_first = true; | |
328 |
1/2✓ Branch 1 taken 680 times.
✗ Branch 2 not taken.
|
680 | const GlobStringSequence &seq = GetGlobStringSequence(); |
329 | 680 | GlobStringSequence::const_iterator i = seq.begin(); | |
330 | 680 | const GlobStringSequence::const_iterator iend = seq.end(); | |
331 |
2/2✓ Branch 1 taken 1360 times.
✓ Branch 2 taken 680 times.
|
2040 | for (; i != iend; ++i) { |
332 |
6/6✓ Branch 0 taken 680 times.
✓ Branch 1 taken 680 times.
✓ Branch 3 taken 440 times.
✓ Branch 4 taken 240 times.
✓ Branch 5 taken 1120 times.
✓ Branch 6 taken 240 times.
|
1360 | if (!is_first || IsAbsolute()) { |
333 |
1/2✓ Branch 1 taken 1120 times.
✗ Branch 2 not taken.
|
1120 | glob_string_ += kSeparator; |
334 | } | ||
335 |
1/2✓ Branch 2 taken 1360 times.
✗ Branch 3 not taken.
|
1360 | glob_string_ += *i; |
336 | 1360 | is_first = false; | |
337 | } | ||
338 | 680 | } | |
339 |