1 |
|
|
/** |
2 |
|
|
* This file is part of the CernVM File System. |
3 |
|
|
*/ |
4 |
|
|
|
5 |
|
|
#ifndef CVMFS_PATHSPEC_PATHSPEC_H_ |
6 |
|
|
#define CVMFS_PATHSPEC_PATHSPEC_H_ |
7 |
|
|
|
8 |
|
|
#include <regex.h> |
9 |
|
|
|
10 |
|
|
#include <string> |
11 |
|
|
#include <vector> |
12 |
|
|
|
13 |
|
|
#include "pathspec/pathspec_pattern.h" |
14 |
|
|
|
15 |
|
|
/** |
16 |
|
|
* A Pathspec is an abstract description of a file path pattern. |
17 |
|
|
* Examples (adding a space in front of * - silence compiler warning): |
18 |
|
|
* /foo/bar/ *.txt - matches .txt files in /foo/bar |
19 |
|
|
* /kernel/2.6.? - matches directories like: /kernel/2.6.[0-9a-z] |
20 |
|
|
* /test/ *_debug/ * - matches all files in /test/cvmfs_debug/ for example |
21 |
|
|
* |
22 |
|
|
* We are supporting both the wildcard (i.e. *) and the placeholder (i.e. ?) |
23 |
|
|
* symbol. Furthermore Pathspecs can be absolute (starting with /) or relative. |
24 |
|
|
* |
25 |
|
|
* Pathspecs are similar to unix glob strings for file system paths and can be |
26 |
|
|
* transforms into such strings or sequences of these (cut at directory |
27 |
|
|
* boundaries). This comes in handy when searching CVMFS catalog entries with a |
28 |
|
|
* Pathspec. |
29 |
|
|
* Note: sophisticated Pathspec based catalog lookup was not needed, yet. But it |
30 |
|
|
* is implemented and not merged (see: reneme/feature-lookup_pathspec). |
31 |
|
|
* |
32 |
|
|
* Also inverse matches are possible by transforming a Pathspec into a regular |
33 |
|
|
* expression and matching path strings with it. There are two matching modes: |
34 |
|
|
* IsMatching() - Matches the exact path |
35 |
|
|
* (wildcards don't span directory boundaries) |
36 |
|
|
* IsMatchingRelaxed() - Matches the path more relaxed |
37 |
|
|
* (comparable to shell pattern matching, wildcards |
38 |
|
|
* match any character including /) |
39 |
|
|
* |
40 |
|
|
* Internally a Pathspec is broken up into PathspecElementPatterns at the |
41 |
|
|
* directory boundaries. Have a look there for further details. |
42 |
|
|
* |
43 |
|
|
* For matching, Pathspecs need to be transformed either into a regular expres- |
44 |
|
|
* sion or GlobString(Sequence). These transformations are done lazily on first |
45 |
|
|
* request. |
46 |
|
|
*/ |
47 |
|
|
class Pathspec { |
48 |
|
|
public: |
49 |
|
|
static const char kSeparator = '/'; |
50 |
|
|
static const char kEscaper = '\\'; |
51 |
|
|
static const char kWildcard = '*'; |
52 |
|
|
static const char kPlaceholder = '?'; |
53 |
|
|
|
54 |
|
|
protected: |
55 |
|
|
typedef std::vector<PathspecElementPattern> ElementPatterns; |
56 |
|
|
|
57 |
|
|
public: |
58 |
|
|
typedef std::vector<std::string> GlobStringSequence; |
59 |
|
|
|
60 |
|
|
public: |
61 |
|
|
/** |
62 |
|
|
* Create a new Pathspec that represents the pattern handed in as a parameter. |
63 |
|
|
* Note: The parser will determine if the given pattern is valid and set a |
64 |
|
|
* flag. After creating a Pathspec it should be checked if .IsValid() |
65 |
|
|
* |
66 |
|
|
* @param spec the pathspec pattern to be parsed |
67 |
|
|
*/ |
68 |
|
|
explicit Pathspec(const std::string &spec); |
69 |
|
|
Pathspec(const Pathspec &other); |
70 |
|
|
// TODO(rmeusel): C++11 move constructor |
71 |
|
|
~Pathspec(); |
72 |
|
|
|
73 |
|
|
/** |
74 |
|
|
* Matches an exact path string. Directory boundaries are taken into account |
75 |
|
|
* Say: wildcards do not match beyond a singly directory tree level. |
76 |
|
|
* |
77 |
|
|
* @param query_path the path to be matched against this Pathstring |
78 |
|
|
* @return true if the path matches |
79 |
|
|
*/ |
80 |
|
|
bool IsMatching(const std::string &query_path) const; |
81 |
|
|
|
82 |
|
|
|
83 |
|
|
/** |
84 |
|
|
* Matches if the Pathspec is a prefix path of query_path. Directory |
85 |
|
|
* boundaries are taken into account. Only works for absolute PathSpec. |
86 |
|
|
* |
87 |
|
|
* @param query_path the path to be matched against this Pathstring |
88 |
|
|
* @return true if the path matches |
89 |
|
|
*/ |
90 |
|
|
bool IsPrefixMatching(const std::string &query_path) const; |
91 |
|
|
|
92 |
|
|
/** |
93 |
|
|
* Matches path strings similar to shell pattern matching (case...esac). |
94 |
|
|
* Say: wildcards match any character including / and therefore can span over |
95 |
|
|
* directory boundaries. |
96 |
|
|
* |
97 |
|
|
* @param query_path the path to be matched against this Pathstring |
98 |
|
|
* @return true if the path matches |
99 |
|
|
*/ |
100 |
|
|
bool IsMatchingRelaxed(const std::string &query_path) const; |
101 |
|
|
|
102 |
|
|
/** |
103 |
|
|
* Checks if the parsed Pathspec is valid and can be used. |
104 |
|
|
* @return true if this Pathspec is valid |
105 |
|
|
*/ |
106 |
|
1688 |
bool IsValid() const { return valid_; } |
107 |
|
|
|
108 |
|
|
/** |
109 |
|
|
* Checks if this Pathspec is defining an absolute path (i.e. starts with /) |
110 |
|
|
* @return true if this Pathspec is absolute |
111 |
|
|
*/ |
112 |
|
923 |
bool IsAbsolute() const { return absolute_; } |
113 |
|
|
|
114 |
|
|
/** |
115 |
|
|
* Generates an ordered list of unix-like glob strings split on directory |
116 |
|
|
* boundaries. Can be used to traverse down into a directory tree along a |
117 |
|
|
* given Pathspec. |
118 |
|
|
* |
119 |
|
|
* @return an ordered list of unixoid glob strings (usable in glob()) |
120 |
|
|
*/ |
121 |
|
|
const GlobStringSequence& GetGlobStringSequence() const; |
122 |
|
|
|
123 |
|
|
/** |
124 |
|
|
* Generates a single glob string out of this Pathspec. This string can be |
125 |
|
|
* used in glob() |
126 |
|
|
* |
127 |
|
|
* @return a unix-compatible glob string |
128 |
|
|
*/ |
129 |
|
|
const std::string& GetGlobString() const; |
130 |
|
|
|
131 |
|
|
Pathspec& operator=(const Pathspec &other); |
132 |
|
|
bool operator== (const Pathspec &other) const; |
133 |
|
20 |
bool operator!= (const Pathspec &other) const { return !(*this == other); } |
134 |
|
|
|
135 |
|
5464 |
static bool IsSpecialChar(const char chr) { |
136 |
✓✓✓✓
|
5464 |
return (chr == kWildcard || chr == kPlaceholder); |
137 |
|
|
} |
138 |
|
|
|
139 |
|
|
protected: |
140 |
|
|
void Parse(const std::string &spec); |
141 |
|
|
void ParsePathElement(const std::string::const_iterator &end, |
142 |
|
|
std::string::const_iterator *itr); |
143 |
|
|
|
144 |
|
|
bool IsPathspecMatching(const std::string &query_path) const; |
145 |
|
|
bool IsPathspecPrefixMatching(const std::string &query_path) const; |
146 |
|
|
bool IsPathspecMatchingRelaxed(const std::string &query_path) const; |
147 |
|
|
|
148 |
|
|
bool ApplyRegularExpression(const std::string &query_path, |
149 |
|
|
regex_t *regex) const; |
150 |
|
|
|
151 |
|
|
regex_t* GetRegularExpression() const; |
152 |
|
|
regex_t* GetPrefixRegularExpression() const; |
153 |
|
|
regex_t* GetRelaxedRegularExpression() const; |
154 |
|
|
|
155 |
|
|
std::string GenerateRegularExpression(const bool is_relaxed = false, |
156 |
|
|
const bool is_prefix = false) const; |
157 |
|
|
regex_t* CompileRegularExpression(const std::string ®ex) const; |
158 |
|
|
|
159 |
|
|
void PrintRegularExpressionError(const int error_code) const; |
160 |
|
|
|
161 |
|
|
void GenerateGlobStringSequence() const; |
162 |
|
|
void GenerateGlobString() const; |
163 |
|
|
|
164 |
|
|
void DestroyRegularExpressions(); |
165 |
|
|
|
166 |
|
|
private: |
167 |
|
|
ElementPatterns patterns_; |
168 |
|
|
|
169 |
|
|
mutable bool regex_compiled_; |
170 |
|
|
mutable regex_t *regex_; |
171 |
|
|
|
172 |
|
|
mutable bool relaxed_regex_compiled_; |
173 |
|
|
mutable regex_t *relaxed_regex_; |
174 |
|
|
|
175 |
|
|
mutable bool prefix_regex_compiled_; |
176 |
|
|
mutable regex_t *prefix_regex_; |
177 |
|
|
|
178 |
|
|
mutable bool glob_string_compiled_; |
179 |
|
|
mutable std::string glob_string_; |
180 |
|
|
|
181 |
|
|
mutable bool glob_string_sequence_compiled_; |
182 |
|
|
mutable GlobStringSequence glob_string_sequence_; |
183 |
|
|
|
184 |
|
|
bool valid_; |
185 |
|
|
bool absolute_; |
186 |
|
|
}; |
187 |
|
|
|
188 |
|
|
#endif // CVMFS_PATHSPEC_PATHSPEC_H_ |