| Directory: | cvmfs/ |
|---|---|
| File: | cvmfs/pathspec/pathspec.h |
| Date: | 2025-11-09 02:35:23 |
| Exec | Total | Coverage | |
|---|---|---|---|
| Lines: | 5 | 5 | 100.0% |
| Branches: | 4 | 4 | 100.0% |
| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /** | ||
| 2 | * This file is part of the CernVM File System. | ||
| 3 | */ | ||
| 4 | |||
| 5 | #ifndef CVMFS_PATHSPEC_PATHSPEC_H_ | ||
| 6 | #define CVMFS_PATHSPEC_PATHSPEC_H_ | ||
| 7 | |||
| 8 | #include <regex.h> | ||
| 9 | |||
| 10 | #include <string> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "pathspec/pathspec_pattern.h" | ||
| 14 | |||
| 15 | /** | ||
| 16 | * A Pathspec is an abstract description of a file path pattern. | ||
| 17 | * Examples (adding a space in front of * - silence compiler warning): | ||
| 18 | * /foo/bar/ *.txt - matches .txt files in /foo/bar | ||
| 19 | * /kernel/2.6.? - matches directories like: /kernel/2.6.[0-9a-z] | ||
| 20 | * /test/ *_debug/ * - matches all files in /test/cvmfs_debug/ for example | ||
| 21 | * | ||
| 22 | * We are supporting both the wildcard (i.e. *) and the placeholder (i.e. ?) | ||
| 23 | * symbol. Furthermore Pathspecs can be absolute (starting with /) or relative. | ||
| 24 | * | ||
| 25 | * Pathspecs are similar to unix glob strings for file system paths and can be | ||
| 26 | * transforms into such strings or sequences of these (cut at directory | ||
| 27 | * boundaries). This comes in handy when searching CVMFS catalog entries with a | ||
| 28 | * Pathspec. | ||
| 29 | * Note: sophisticated Pathspec based catalog lookup was not needed, yet. But it | ||
| 30 | * is implemented and not merged (see: reneme/feature-lookup_pathspec). | ||
| 31 | * | ||
| 32 | * Also inverse matches are possible by transforming a Pathspec into a regular | ||
| 33 | * expression and matching path strings with it. There are two matching modes: | ||
| 34 | * IsMatching() - Matches the exact path | ||
| 35 | * (wildcards don't span directory boundaries) | ||
| 36 | * IsMatchingRelaxed() - Matches the path more relaxed | ||
| 37 | * (comparable to shell pattern matching, wildcards | ||
| 38 | * match any character including /) | ||
| 39 | * | ||
| 40 | * Internally a Pathspec is broken up into PathspecElementPatterns at the | ||
| 41 | * directory boundaries. Have a look there for further details. | ||
| 42 | * | ||
| 43 | * For matching, Pathspecs need to be transformed either into a regular expres- | ||
| 44 | * sion or GlobString(Sequence). These transformations are done lazily on first | ||
| 45 | * request. | ||
| 46 | */ | ||
| 47 | class Pathspec { | ||
| 48 | public: | ||
| 49 | static const char kSeparator = '/'; | ||
| 50 | static const char kEscaper = '\\'; | ||
| 51 | static const char kWildcard = '*'; | ||
| 52 | static const char kPlaceholder = '?'; | ||
| 53 | |||
| 54 | protected: | ||
| 55 | typedef std::vector<PathspecElementPattern> ElementPatterns; | ||
| 56 | |||
| 57 | public: | ||
| 58 | typedef std::vector<std::string> GlobStringSequence; | ||
| 59 | |||
| 60 | public: | ||
| 61 | /** | ||
| 62 | * Create a new Pathspec that represents the pattern handed in as a parameter. | ||
| 63 | * Note: The parser will determine if the given pattern is valid and set a | ||
| 64 | * flag. After creating a Pathspec it should be checked if .IsValid() | ||
| 65 | * | ||
| 66 | * @param spec the pathspec pattern to be parsed | ||
| 67 | */ | ||
| 68 | explicit Pathspec(const std::string &spec); | ||
| 69 | Pathspec(const Pathspec &other); | ||
| 70 | // TODO(rmeusel): C++11 move constructor | ||
| 71 | ~Pathspec(); | ||
| 72 | |||
| 73 | /** | ||
| 74 | * Matches an exact path string. Directory boundaries are taken into account | ||
| 75 | * Say: wildcards do not match beyond a singly directory tree level. | ||
| 76 | * | ||
| 77 | * @param query_path the path to be matched against this Pathstring | ||
| 78 | * @return true if the path matches | ||
| 79 | */ | ||
| 80 | bool IsMatching(const std::string &query_path) const; | ||
| 81 | |||
| 82 | |||
| 83 | /** | ||
| 84 | * Matches if the Pathspec is a prefix path of query_path. Directory | ||
| 85 | * boundaries are taken into account. Only works for absolute PathSpec. | ||
| 86 | * | ||
| 87 | * @param query_path the path to be matched against this Pathstring | ||
| 88 | * @return true if the path matches | ||
| 89 | */ | ||
| 90 | bool IsPrefixMatching(const std::string &query_path) const; | ||
| 91 | |||
| 92 | /** | ||
| 93 | * Matches path strings similar to shell pattern matching (case...esac). | ||
| 94 | * Say: wildcards match any character including / and therefore can span over | ||
| 95 | * directory boundaries. | ||
| 96 | * | ||
| 97 | * @param query_path the path to be matched against this Pathstring | ||
| 98 | * @return true if the path matches | ||
| 99 | */ | ||
| 100 | bool IsMatchingRelaxed(const std::string &query_path) const; | ||
| 101 | |||
| 102 | /** | ||
| 103 | * Checks if the parsed Pathspec is valid and can be used. | ||
| 104 | * @return true if this Pathspec is valid | ||
| 105 | */ | ||
| 106 | 7104 | bool IsValid() const { return valid_; } | |
| 107 | |||
| 108 | /** | ||
| 109 | * Checks if this Pathspec is defining an absolute path (i.e. starts with /) | ||
| 110 | * @return true if this Pathspec is absolute | ||
| 111 | */ | ||
| 112 | 3745 | bool IsAbsolute() const { return absolute_; } | |
| 113 | |||
| 114 | /** | ||
| 115 | * Generates an ordered list of unix-like glob strings split on directory | ||
| 116 | * boundaries. Can be used to traverse down into a directory tree along a | ||
| 117 | * given Pathspec. | ||
| 118 | * | ||
| 119 | * @return an ordered list of unixoid glob strings (usable in glob()) | ||
| 120 | */ | ||
| 121 | const GlobStringSequence &GetGlobStringSequence() const; | ||
| 122 | |||
| 123 | /** | ||
| 124 | * Generates a single glob string out of this Pathspec. This string can be | ||
| 125 | * used in glob() | ||
| 126 | * | ||
| 127 | * @return a unix-compatible glob string | ||
| 128 | */ | ||
| 129 | const std::string &GetGlobString() const; | ||
| 130 | |||
| 131 | Pathspec &operator=(const Pathspec &other); | ||
| 132 | bool operator==(const Pathspec &other) const; | ||
| 133 | 20 | bool operator!=(const Pathspec &other) const { return !(*this == other); } | |
| 134 | |||
| 135 | 19665 | static bool IsSpecialChar(const char chr) { | |
| 136 |
4/4✓ Branch 0 taken 18724 times.
✓ Branch 1 taken 941 times.
✓ Branch 2 taken 377 times.
✓ Branch 3 taken 18347 times.
|
19665 | return (chr == kWildcard || chr == kPlaceholder); |
| 137 | } | ||
| 138 | |||
| 139 | protected: | ||
| 140 | void Parse(const std::string &spec); | ||
| 141 | void ParsePathElement(const std::string::const_iterator &end, | ||
| 142 | std::string::const_iterator *itr); | ||
| 143 | |||
| 144 | bool IsPathspecMatching(const std::string &query_path) const; | ||
| 145 | bool IsPathspecPrefixMatching(const std::string &query_path) const; | ||
| 146 | bool IsPathspecMatchingRelaxed(const std::string &query_path) const; | ||
| 147 | |||
| 148 | bool ApplyRegularExpression(const std::string &query_path, | ||
| 149 | regex_t *regex) const; | ||
| 150 | |||
| 151 | regex_t *GetRegularExpression() const; | ||
| 152 | regex_t *GetPrefixRegularExpression() const; | ||
| 153 | regex_t *GetRelaxedRegularExpression() const; | ||
| 154 | |||
| 155 | std::string GenerateRegularExpression(const bool is_relaxed = false, | ||
| 156 | const bool is_prefix = false) const; | ||
| 157 | regex_t *CompileRegularExpression(const std::string ®ex) const; | ||
| 158 | |||
| 159 | void PrintRegularExpressionError(const int error_code) const; | ||
| 160 | |||
| 161 | void GenerateGlobStringSequence() const; | ||
| 162 | void GenerateGlobString() const; | ||
| 163 | |||
| 164 | void DestroyRegularExpressions(); | ||
| 165 | |||
| 166 | private: | ||
| 167 | ElementPatterns patterns_; | ||
| 168 | |||
| 169 | mutable regex_t *regex_; | ||
| 170 | mutable regex_t *relaxed_regex_; | ||
| 171 | mutable regex_t *prefix_regex_; | ||
| 172 | mutable std::string glob_string_; | ||
| 173 | mutable GlobStringSequence glob_string_sequence_; | ||
| 174 | |||
| 175 | mutable bool regex_compiled_; | ||
| 176 | mutable bool relaxed_regex_compiled_; | ||
| 177 | mutable bool prefix_regex_compiled_; | ||
| 178 | mutable bool glob_string_compiled_; | ||
| 179 | mutable bool glob_string_sequence_compiled_; | ||
| 180 | |||
| 181 | bool valid_; | ||
| 182 | bool absolute_; | ||
| 183 | }; | ||
| 184 | |||
| 185 | #endif // CVMFS_PATHSPEC_PATHSPEC_H_ | ||
| 186 |