GCC Code Coverage Report
Directory: cvmfs/ Exec Total Coverage
File: cvmfs/hash.h Lines: 156 157 99.4 %
Date: 2019-02-03 02:48:13 Branches: 97 130 74.6 %

Line Branch Exec Source
1
/**
2
 * This file is part of the CernVM File System.
3
 *
4
 * Provides a bit syntactic sugar around the hash algorithms.
5
 * In particular, hashes can easily be created by constructors.
6
 * Also, we have a little to-string-from-string conversion.
7
 *
8
 * The complexity is due to the need to avoid dynamically allocated memory
9
 * for the hashes.  Almost everything happens on the stack.
10
 */
11
12
#ifndef CVMFS_HASH_H_
13
#define CVMFS_HASH_H_
14
15
#include <stdint.h>
16
17
#include <cassert>
18
#include <cctype>
19
#include <cstdlib>
20
#include <cstring>
21
#include <string>
22
23
#include "logging.h"
24
#include "prng.h"
25
#include "smalloc.h"
26
27
#ifdef CVMFS_NAMESPACE_GUARD
28
namespace CVMFS_NAMESPACE_GUARD {
29
#endif
30
31
namespace shash {
32
33
/**
34
 * Don't change order!  The integer value of the enum constants is used
35
 * as file catalog flags and as flags in communication with the cache manager.
36
 * If algorithms are added, the protocol definition for external cache managers
37
 * needs to be updated, too.
38
 */
39
enum Algorithms {
40
  kMd5 = 0,
41
  kSha1,
42
  kRmd160,
43
  kShake128,  // with 160 output bits
44
  kAny,
45
};
46
47
/**
48
 * NOTE: when adding a suffix here, one must edit `cvmfs_swissknife scrub`
49
 *       accordingly, that checks for invalid hash suffixes
50
 */
51
const char kSuffixNone         = 0;
52
const char kSuffixCatalog      = 'C';
53
const char kSuffixHistory      = 'H';
54
const char kSuffixMicroCatalog = 'L';  // currently unused
55
const char kSuffixPartial      = 'P';
56
const char kSuffixTemporary    = 'T';
57
const char kSuffixCertificate  = 'X';
58
const char kSuffixMetainfo     = 'M';
59
60
61
/**
62
 * Corresponds to Algorithms.  "Any" is the maximum of all the other
63
 * digest sizes.
64
 * When the maximum digest size changes, the memory layout of DirectoryEntry and
65
 * PosixQuotaManager::LruCommand changes, too!
66
 */
67
const unsigned kDigestSizes[] =
68
  {16,  20,   20,     20,       20};
69
// Md5  Sha1  Rmd160  Shake128  Any
70
const unsigned kMaxDigestSize = 20;
71
72
/**
73
 * Hex representations of hashes with the same length need a suffix
74
 * to be distinguished from each other.  They should all have one but
75
 * for backwards compatibility MD5 and SHA-1 have none.  Initialized in hash.cc
76
 * like const char *kAlgorithmIds[] = {"", "", "-rmd160", ...
77
 */
78
extern const char *kAlgorithmIds[];
79
const unsigned kAlgorithmIdSizes[] =
80
  {0,   0,    7,       9,         0};
81
// Md5  Sha1  -rmd160  -shake128  Any
82
const unsigned kMaxAlgorithmIdentifierSize = 9;
83
84
/**
85
 * Corresponds to Algorithms.  There is no block size for Any.
86
 * Is an HMAC for SHAKE well-defined?
87
 */
88
const unsigned kBlockSizes[] =
89
  {64,  64,   64,     168};
90
// Md5  Sha1  Rmd160  Shake128
91
92
/**
93
 * Distinguishes between interpreting a string as hex hash and hashing over
94
 * the contents of a string.
95
 */
96
struct HexPtr {
97
  const std::string *str;
98
15528
  explicit HexPtr(const std::string &s) { str = &s; }
99
  bool IsValid() const;
100
};
101
102
struct AsciiPtr {
103
  const std::string *str;
104
923
  explicit AsciiPtr(const std::string &s) { str = &s; }
105
};
106
107
typedef char Suffix;
108
109
/**
110
 * Holds a hash digest and provides from string / to string conversion and
111
 * comparison.  The kAny algorithm may not be used in functions!  The algorithm
112
 * has to be changed beforehand.
113
 * This class is not used directly, but used as base clase of Md5, Sha1, ...
114
 */
115
template<unsigned digest_size_, Algorithms algorithm_>
116
struct Digest {
117
  unsigned char digest[digest_size_];
118
  Algorithms    algorithm;
119
  Suffix        suffix;
120
121
  class Hex {
122
   public:
123
50805
    explicit Hex(const Digest<digest_size_, algorithm_> *digest) :
124
      digest_(*digest),
125
      hash_length_(2 * kDigestSizes[digest_.algorithm]),
126
50805
      algo_id_length_(kAlgorithmIdSizes[digest_.algorithm]) {}
127
128
4026265
    unsigned int length() const { return hash_length_ + algo_id_length_; }
129
130
1960774
    char operator[](const unsigned int position) const {
131

1960774
      assert(position < length());
132
      return (position < hash_length_)
133
        ? GetHashChar(position)
134

1960774
        : GetAlgorithmIdentifierChar(position);
135
    }
136
137
   protected:
138
1960192
    char GetHashChar(const unsigned int position) const {
139

1960192
      assert(position < hash_length_);
140
      const char digit = (position % 2 == 0)
141
        ? digest_.digest[position / 2] / 16
142

1960192
        : digest_.digest[position / 2] % 16;
143
1960192
      return ToHex(digit);
144
    }
145
146
582
    char GetAlgorithmIdentifierChar(const unsigned int position) const {
147

582
      assert(position >= hash_length_);
148
582
      return kAlgorithmIds[digest_.algorithm][position - hash_length_];
149
    }
150
151

1960192
    char ToHex(const char c) const { return c + ((c <= 9) ? '0' : 'a' - 10); }
152
153
   private:
154
    const Digest<digest_size_, algorithm_>  &digest_;
155
    const unsigned int                       hash_length_;
156
    const unsigned int                       algo_id_length_;
157
  };
158
159
13023873
  unsigned GetDigestSize() const { return kDigestSizes[algorithm]; }
160
40
  unsigned GetHexSize() const {
161
40
    return 2*kDigestSizes[algorithm] + kAlgorithmIdSizes[algorithm];
162
  }
163
164
103019703
  Digest() :
165
103019703
    algorithm(algorithm_), suffix(kSuffixNone)
166
  {
167
103019703
    SetNull();
168
103019695
  }
169
170
15460
  explicit Digest(const Algorithms a, const HexPtr hex, const char s = 0) :
171
15460
    algorithm(a), suffix(s)
172
  {
173
44
    assert((algorithm_ == kAny) || (a == algorithm_));
174
15460
    const unsigned char_size = 2*kDigestSizes[a];
175
176
15460
    const std::string *str = hex.str;
177
15460
    const unsigned length = str->length();
178
15460
    assert(length >= char_size);  // A suffix won't hurt
179
180
296476
    for (unsigned i = 0; i < char_size; i += 2) {
181

281016
      this->digest[i/2] =
182
        ((*str)[i] <= '9' ? (*str)[i] -'0' : (*str)[i] - 'a' + 10)*16 +
183
        ((*str)[i+1] <= '9' ? (*str)[i+1] - '0' : (*str)[i+1] - 'a' + 10);
184
    }
185
15460
  }
186
187
149
  Digest(const Algorithms a,
188
         const unsigned char *digest_buffer,
189
         const Suffix s = kSuffixNone) :
190
149
    algorithm(a), suffix(s)
191
  {
192
149
    memcpy(digest, digest_buffer, kDigestSizes[a]);
193
149
  }
194
195
  /**
196
   * Generates a purely random hash
197
   * Only used for testing purposes
198
   */
199
182
  void Randomize() {
200
182
    Prng prng;
201
182
    prng.InitLocaltime();
202
182
    Randomize(&prng);
203
182
  }
204
205
  /**
206
   * Generates a purely random hash
207
   * Only used for testing purposes
208
   *
209
   * @param seed  random number generator seed (for reproducability)
210
   */
211
8001027
  void Randomize(const uint64_t seed) {
212
8001027
    Prng prng;
213
8001027
    prng.InitSeed(seed);
214
8001027
    Randomize(&prng);
215
8001027
  }
216
217
  /**
218
   * Generates a purely random hash
219
   * Only used for testing purposes
220
   *
221
   * @param prng  random number generator object (for external reproducability)
222
   */
223
13014520
  void Randomize(Prng *prng) {
224
13014520
    const unsigned bytes = GetDigestSize();
225

234623756
    for (unsigned i = 0; i < bytes; ++i) {
226
221609236
      digest[i] = prng->Next(256);
227
    }
228
13014520
  }
229
230
6975
  bool HasSuffix() const { return suffix != kSuffixNone; }
231
5544
  void set_suffix(const Suffix s) { suffix = s; }
232
233
  /**
234
   * Generates a hexified repesentation of the digest including the identifier
235
   * string for newly added hashes.
236
   *
237
   * @param with_suffix  append the hash suffix (C,H,X, ...) to the result
238
   * @return             a string representation of the digest
239
   */
240
47354
  std::string ToString(const bool with_suffix = false) const {
241
47354
    Hex hex(this);
242


47354
    const bool     use_suffix  = with_suffix && HasSuffix();
243
47354
    const unsigned string_length = hex.length() + use_suffix;
244
47354
    std::string result(string_length, 0);
245
246

1886018
    for (unsigned int i = 0; i < hex.length(); ++i) {
247
1838664
      result[i] = hex[i];
248
    }
249
250

47354
    if (use_suffix) {
251
61
      result[string_length - 1] = suffix;
252
    }
253
254

47354
    assert(result.length() == string_length);
255
47354
    return result;
256
  }
257
258
  /**
259
   * Generates a hexified repesentation of the digest including the identifier
260
   * string for newly added hashes.  Output is in the form of
261
   * 'openssl x509 fingerprint', e.g. 00:AA:BB:...-SHAKE128
262
   *
263
   * @param with_suffix  append the hash suffix (C,H,X, ...) to the result
264
   * @return             a string representation of the digest
265
   */
266
8
  std::string ToFingerprint(const bool with_suffix = false) const {
267
8
    Hex hex(this);
268

8
    const bool     use_suffix  = with_suffix && HasSuffix();
269
    const unsigned string_length =
270
8
      hex.length() + kDigestSizes[algorithm] - 1 + use_suffix;
271
8
    std::string result(string_length, 0);
272
273
8
    unsigned l = hex.length();
274
344
    for (unsigned int hex_i = 0, result_i = 0; hex_i < l; ++hex_i, ++result_i) {
275
336
      result[result_i] = toupper(hex[hex_i]);
276

336
      if ((hex_i < 2 * kDigestSizes[algorithm] - 1) && (hex_i % 2 == 1)) {
277
144
        result[++result_i] = ':';
278
      }
279
    }
280
281
8
    if (use_suffix) {
282
4
      result[string_length - 1] = suffix;
283
    }
284
285
8
    assert(result.length() == string_length);
286
8
    return result;
287
  }
288
289
  /**
290
   * Convenience method to generate a string representation of the digest.
291
   * See Digest<>::ToString() for details
292
   *
293
   * @return  a string representation including the hash suffix of the digest
294
   */
295
99
  std::string ToStringWithSuffix() const {
296
99
    return ToString(true);
297
  }
298
299
  /**
300
   * Generate the standard relative path from the hexified digest to be used in
301
   * CAS areas or cache directories. Throughout the entire system we use one
302
   * directory level (first to hex digest characters) for namespace splitting.
303
   * Note: This method appends the internal hash suffix to the path.
304
   *
305
   * @return  a relative path representation of the digest including the suffix
306
   */
307
2887
  std::string MakePath() const {
308
2887
    return MakePathExplicit(1, 2, suffix);
309
  }
310
311
  /**
312
   * The alternative path is used to symlink the root catalog from the webserver
313
   * root to the data directory.  This way, the data directory can be protected
314
   * while the root catalog remains accessible.
315
   */
316
10
  std::string MakeAlternativePath() const {
317
10
    return ".cvmfsalt-" + ToStringWithSuffix();
318
  }
319
320
  /**
321
   * Produces a relative path representation of the digest without appending the
322
   * hash suffix. See Digest<>::MakePath() for more details.
323
   *
324
   * @return  a relative path representation of the digest without the suffix
325
   */
326
540
  std::string MakePathWithoutSuffix() const {
327
540
    return MakePathExplicit(1, 2, kSuffixNone);
328
  }
329
330
  /**
331
   * Generates an arbitrary path representation of the digest. Both number of
332
   * directory levels and the hash-digits per level can be customized. Further-
333
   * more an arbitrary hash suffix can be provided.
334
   * Note: This method is mainly meant for internal usage but stays public for
335
   *       historical reasons.
336
   *
337
   * @param dir_levels        the number of namespace splitting directory levels
338
   * @param digits_per_level  each directory level's number of hex-digits
339
   * @param hash_suffix       the hash suffix character to be appended
340
   * @return                  a relative path representation of the digest
341
   */
342
3443
  std::string MakePathExplicit(const unsigned dir_levels,
343
                               const unsigned digits_per_level,
344
                               const Suffix   hash_suffix = kSuffixNone) const {
345
3443
    Hex hex(this);
346
347
    // figure out how big the output string needs to be
348
3443
    const bool use_suffix = (hash_suffix != kSuffixNone);
349
3443
    const unsigned string_length = hex.length() + dir_levels + use_suffix;
350
3443
    std::string result;
351
3443
    result.resize(string_length);
352
353
    // build hexified hash and path delimiters
354
3443
    unsigned i   = 0;
355
3443
    unsigned pos = 0;
356
125217
    for (; i < hex.length(); ++i) {
357

121774
      if (i > 0 && (i % digits_per_level == 0)
358
                && (i / digits_per_level <= dir_levels)) {
359
3451
        result[pos++] = '/';
360
      }
361
121774
      result[pos++] = hex[i];
362
    }
363
364
    // (optionally) add hash hint suffix
365
3443
    if (use_suffix) {
366
574
      result[pos++] = hash_suffix;
367
    }
368
369
3443
    assert(i   == hex.length());
370
3443
    assert(pos == string_length);
371
3443
    return result;
372
  }
373
374
20023
  bool IsNull() const {
375

134562
    for (unsigned i = 0; i < kDigestSizes[algorithm]; ++i)
376

128837
      if (digest[i] != 0)
377
14298
        return false;
378
5725
    return true;
379
  }
380
381
382
103019725
  void SetNull() {
383
103019725
    memset(digest, 0, digest_size_);
384
103019725
  }
385
386
387
99143663
  bool operator ==(const Digest<digest_size_, algorithm_> &other) const {
388

99143663
    if (this->algorithm != other.algorithm)
389
16233471
      return false;
390

704558745
    for (unsigned i = 0; i < kDigestSizes[algorithm]; ++i)
391

666425175
      if (this->digest[i] != other.digest[i])
392
44776622
        return false;
393
38133570
    return true;
394
  }
395
396
22171445
  bool operator !=(const Digest<digest_size_, algorithm_> &other) const {
397
22171445
    return !(*this == other);
398
  }
399
400
42544165
  bool operator <(const Digest<digest_size_, algorithm_> &other) const {
401

42544165
    if (this->algorithm != other.algorithm)
402
2392148
      return (this->algorithm < other.algorithm);
403

104964103
    for (unsigned i = 0; i < kDigestSizes[algorithm]; ++i) {
404

102921273
      if (this->digest[i] > other.digest[i])
405
18011727
        return false;
406

84909546
      if (this->digest[i] < other.digest[i])
407
20097460
        return true;
408
    }
409
2042830
    return false;
410
  }
411
412
24467
  bool operator >(const Digest<digest_size_, algorithm_> &other) const {
413
24467
    if (this->algorithm != other.algorithm)
414
      return (this->algorithm > other.algorithm);
415
33417
    for (unsigned i = 0; i < kDigestSizes[algorithm]; ++i) {
416
33149
      if (this->digest[i] < other.digest[i])
417
9021
        return false;
418
24128
      if (this->digest[i] > other.digest[i])
419
15178
        return true;
420
    }
421
268
    return false;
422
  }
423
};
424
425
426
34985120
struct Md5 : public Digest<16, kMd5> {
427
43107932
  Md5() : Digest<16, kMd5>() { }
428
  explicit Md5(const AsciiPtr ascii);
429
44
  explicit Md5(const HexPtr hex) : Digest<16, kMd5>(kMd5, hex) { }
430
  Md5(const char *chars, const unsigned length);
431
432
  /**
433
   * An MD5 hash can be seen as two 64bit integers.
434
   */
435
  Md5(const uint64_t lo, const uint64_t hi);
436
  void ToIntPair(uint64_t *lo, uint64_t *hi) const;
437
};
438
439
struct Sha1 : public Digest<20, kSha1> { };
440
struct Rmd160 : public Digest<20, kRmd160> { };
441
2
struct Shake128 : public Digest<20, kShake128> { };
442
443
/**
444
 * Any as such must not be used except for digest storage.
445
 * To do real work, the class has to be "blessed" to be a real hash by
446
 * setting the algorithm field accordingly.
447
 */
448
51988679
struct Any : public Digest<kMaxDigestSize, kAny> {
449
54640322
  Any() : Digest<kMaxDigestSize, kAny>() { }
450
451
5242854
  explicit Any(const Algorithms a,
452
               const char       s = kSuffixNone) :
453
5242854
    Digest<kMaxDigestSize, kAny>() { algorithm = a; suffix = s; }
454
455
149
  Any(const Algorithms     a,
456
      const unsigned char *digest_buffer,
457
      const Suffix         suffix = kSuffixNone) :
458
149
    Digest<kMaxDigestSize, kAny>(a, digest_buffer, suffix) { }
459
460
15416
  explicit Any(const Algorithms  a,
461
               const HexPtr      hex,
462
               const char        suffix = kSuffixNone) :
463
15416
    Digest<kMaxDigestSize, kAny>(a, hex, suffix) { }
464
465
  Md5 CastToMd5();
466
};
467
468
469
/**
470
 * Actual operations on digests, like "hash a file", "hash a buffer", or
471
 * iterative operations.
472
 */
473
unsigned GetContextSize(const Algorithms algorithm);
474
475
/**
476
 * Holds an OpenSSL context, only required for hash operations.  Allows to
477
 * deferr the storage allocation for the context to alloca.
478
 */
479
class ContextPtr {
480
 public:
481
  Algorithms  algorithm;
482
  void       *buffer;
483
  unsigned    size;
484
485
7062
  ContextPtr() : algorithm(kAny), buffer(NULL), size(0) {}
486
487
233128
  explicit ContextPtr(const Algorithms a) :
488
233128
    algorithm(a), buffer(NULL), size(GetContextSize(a)) {}
489
};
490
491
void Init(ContextPtr context);
492
void Update(const unsigned char *buffer, const unsigned buffer_size,
493
            ContextPtr context);
494
void Final(ContextPtr context, Any *any_digest);
495
bool HashFile(const std::string &filename, Any *any_digest);
496
bool HashFd(int fd, Any *any_digest);
497
void HashMem(const unsigned char *buffer, const unsigned buffer_size,
498
             Any *any_digest);
499
void HashString(const std::string &content, Any *any_digest);
500
void Hmac(const std::string &key,
501
          const unsigned char *buffer, const unsigned buffer_size,
502
          Any *any_digest);
503
1
inline void HmacString(const std::string &key, const std::string &content,
504
                       Any *any_digest)
505
{
506
  Hmac(key,
507
       reinterpret_cast<const unsigned char *>(content.data()),
508
       content.size(),
509
1
       any_digest);
510
1
}
511
512
/**
513
 * Only used for AWS4 signature.
514
 *
515
 * Adding SHA-256 to the standard hash infrastructure would generally bloat the
516
 * digets size to 32 bytes and require client data structure transformation
517
 * during hotpatch.
518
 */
519
std::string Hmac256(const std::string &key, const std::string &content,
520
                    bool raw_output = false);
521
std::string Sha256File(const std::string &filename);
522
std::string Sha256Mem(const unsigned char *buffer, const unsigned buffer_size);
523
std::string Sha256String(const std::string &content);
524
525
Algorithms ParseHashAlgorithm(const std::string &algorithm_option);
526
Any MkFromHexPtr(const HexPtr hex, const Suffix suffix = kSuffixNone);
527
Any MkFromSuffixedHexPtr(const HexPtr hex);
528
529
}  // namespace shash
530
531
#ifdef CVMFS_NAMESPACE_GUARD
532
}  // namespace CVMFS_NAMESPACE_GUARD
533
#endif
534
535
#endif  // CVMFS_HASH_H_