GCC Code Coverage Report
Directory: cvmfs/ Exec Total Coverage
File: cvmfs/upload.h Lines: 2 2 100.0 %
Date: 2019-02-03 02:48:13 Branches: 0 0 - %

Line Branch Exec Source
1
/**
2
 * This file is part of the CernVM File System.
3
 */
4
5
/**
6
 *    Backend Storage Spooler
7
 *    ~~~~~~~~~~~~~~~~~~~~~~~
8
 *
9
 * This is the entry point to the general file processing facility of the CVMFS
10
 * backend. It works with a two-stage approach:
11
 *
12
 *   1. Process file content
13
 *      -> create smaller file chunks for big input files
14
 *      -> compress the file content (optionally chunked)
15
 *      -> generate a content hash of the compression result
16
 *
17
 *   2. Upload files
18
 *      -> pluggable to support different upload pathes (local, S3, ...)
19
 *
20
 * There are a number of different entities involved in this process. Namely:
21
 *   -> Spooler            - general steering tasks ( + common interface )
22
 *   -> IngestionPipeline  - chunking, compression and hashing of files
23
 *   -> AbstractUploader   - abstract base class for uploading facilities
24
 *   -> concrete Uploaders - upload functionality for various backend storages
25
 *
26
 * Stage 1 aka. the processing of files is handled by the IngestionPipeline,
27
 * since it is independent from the actual uploading this functionality is
28
 * outsourced. The IngestionPipeline will take care of the above mentioned steps
29
 * in a concurrent fashion. This process is invoked by calling
30
 * Spooler::Process(). While processing, the IngestionPipeline immediately
31
 * schedules Upload jobs in order to push data to the backend storage as early
32
 *  as possible.
33
 *
34
 * Stage 2 aka. the upload is handled by one of the concrete Uploader classes.
35
 * Uploaders have a thin interface described in the AbstractUploader class.
36
 * The IngestionPipeline uses a concrete Uploader to push files into the backend
37
 * storage as part of it's processing.
38
 * Furthermore the user can directly upload files using the Spooler::Upload()
39
 * method as described in the next paragraph.
40
 *
41
 * For some specific files we need to be able to circumvent the
42
 * IngestionPipeline to directly push them into the backend storage (i.e.
43
 * .cvmfspublished). For this Spooler::Upload() is used. It directly schedules
44
 * an upload job in the used concrete Uploader facility. These files will not
45
 * get compressed or check-summed by any means.
46
 *
47
 * In any case, calling Spooler::Process() or Spooler::Upload() will invoke a
48
 * callback once the whole job has been finished. Callbacks are provided by the
49
 * Observable template. Please see the implementation of this template for more
50
 * details on usage and implementation.
51
 * The data structure provided by this callback is called SpoolerResult and con-
52
 * tains information about the processed file (status, content hash, chunks, ..)
53
 * Note: Even if a concrete Uploader internally spawns more than one upload job
54
 *       to send out chunked files, the user will only see a single invocation
55
 *       containing information about the uploaded file including it's generated
56
 *       chunks.
57
 *
58
 * Workflow:
59
 *
60
 *   User
61
 *   \O/                Callback (SpoolerResult)
62
 *    |   <----------------------+
63
 *   / \                         |
64
 *    |                          |
65
 *    | Process()                |          File
66
 *    +-----------> ################### -----------------> #####################
67
 *    | Upload()    #     Spooler     #                    # IngestionPipeline #
68
 *    +-----------> ################### <----------------- #####################
69
 *                   |               ^      SpoolerResult          |    ^
70
 *                   |               |                             |    |
71
 *     direct Upload |      Callback |                             |    |
72
 *                  `|��              |            Schedule Upload  |    |
73
 *                 ##################### <-------------------------+    |
74
 *                 #  Upload facility  #                                |
75
 *                 ##################### -------------------------------+
76
 *                           |             Callback (UploaderResults)
77
 *                    Upload |
78
 *                          `|��
79
 *                 *********************
80
 *                 *  Backend Storage  *
81
 *                 *********************
82
 *
83
 *
84
 * TODO(rmeusel): special purpose ::Process...() methods should (opionally?)
85
 *                return Future<> instead of relying on the callbacks. Those are
86
 *                somewhat one-shot calls and require a rather fishy idiom when
87
 *                using callbacks, like:
88
 *
89
 *                   cb = spooler->RegisterListener(...certificate_callback...);
90
 *                   spooler->ProcessCertificate(...);
91
 *                   spooler->WaitForUpload();
92
 *                   spooler->UnregisterListener(cb);
93
 *                   cert_hash = global_cert_hash;
94
 *
95
 *                   void certificate_callback(shash::Any &hash) {
96
 *                     global_cert_hash = hash;
97
 *                   }
98
 *
99
 *                If ProcessCertificate(), ProcessHistory(), ProcessMetainfo(),
100
 *                UploadManifest() and UploaderReflog() would return Future<>,
101
 *                the code would be much more comprehensible and free of user-
102
 *                managed global state:
103
 *
104
 *                   Future<shash::Any> fc = spooler->ProcessCertificate(...);
105
 *                   cert_hash = fc.get();
106
 *
107
 */
108
109
#ifndef CVMFS_UPLOAD_H_
110
#define CVMFS_UPLOAD_H_
111
112
#include <cstdio>
113
#include <string>
114
#include <vector>
115
116
#include "file_chunk.h"
117
#include "hash.h"
118
#include "ingestion/pipeline.h"
119
#include "repository_tag.h"
120
#include "upload_facility.h"
121
#include "upload_spooler_definition.h"
122
#include "upload_spooler_result.h"
123
#include "util/pointer.h"
124
#include "util/shared_ptr.h"
125
#include "util_concurrency.h"
126
127
namespace upload {
128
129
/**
130
 * The Spooler takes care of the upload procedure of files into a backend
131
 * storage. It can be extended to multiple supported backend storage types,
132
 * like f.e. the local file system or a key value storage.
133
 *
134
 * This AbstractSpooler defines not much more than the common spooler inter-
135
 * face. There are derived classes that actually implement different types of
136
 * spoolers.
137
 *
138
 * Note: A spooler is derived from the Observable template, meaning that it
139
 *       allows for Listeners to be registered onto it.
140
 */
141
class Spooler : public Observable<SpoolerResult> {
142
 public:
143
  static Spooler *Construct(const SpoolerDefinition &spooler_definition,
144
                              perf::StatisticsTemplate *statistics = NULL);
145
  virtual ~Spooler();
146
147
  /**
148
   * Prints the name of the targeted backend storage.
149
   * Intended for debugging purposes only!
150
   */
151
  std::string backend_name() const;
152
153
  /**
154
   * Schedules a copy job that transfers a file found at local_path to the
155
   * location pointed to by remote_path. Copy Jobs do not hash or compress the
156
   * given file. They simply upload it.
157
   * When the copying has finished a callback will be invoked asynchronously.
158
   *
159
   * @param local_path    path to the file which needs to be copied into the
160
   *                      backend storage
161
   * @param remote_path   the destination of the file to be copied in the
162
   *                      backend storage
163
   */
164
  void Upload(const std::string &local_path, const std::string &remote_path);
165
166
  /**
167
   * Convenience wrapper to upload the Manifest file into the backend storage
168
   *
169
   * @param local_path  the location of the (signed) manifest to be uploaded
170
   */
171
  void UploadManifest(const std::string &local_path);
172
173
  /**
174
   * Convenience wrapper to upload a Reflog database into the backend storage
175
   *
176
   * @param local_path  the SQLite file location of the Reflog to be uploaded
177
   */
178
  void UploadReflog(const std::string &local_path);
179
180
  /**
181
   * Schedules a process job that compresses and hashes the provided file in
182
   * local_path and uploads it into the CAS backend. The remote path to the
183
   * file is determined by the content hash of the compressed file appended by
184
   * file_suffix.
185
   * When the processing has finish a callback will be invoked asynchronously.
186
   *
187
   * Note: This method might decide to chunk the file into a number of smaller
188
   *       parts and upload them separately. Still, you will receive a single
189
   *       callback for the whole job, that contains information about the
190
   *       generated chunks.
191
   *
192
   * @param source          the ingestion source of the file to be processed
193
   *                        and uploaded into the backend storage
194
   * @param allow_chunking  (optional) controls if this file should be cut in
195
   *                        chunks or uploaded at once
196
   */
197
  void Process(IngestionSource *source, const bool allow_chunking = true);
198
199
  /**
200
   * Convenience wrapper to process a catalog file. Please always use this
201
   * for catalog processing. It will add special flags and hash suffixes
202
   *
203
   * @param local_path  the location of the catalog file to be processed
204
   */
205
  void ProcessCatalog(const std::string &local_path);
206
207
  /**
208
   * Convenience wrapper to process a history database file. This sets the
209
   * processing parameters (like chunking and hash suffixes) accordingly.
210
   *
211
   * @param local_path  the location of the history database file
212
   */
213
  void ProcessHistory(const std::string &local_path);
214
215
  /**
216
   * Convenience wrapper to process a certificate file. This sets the
217
   * processing parameters (like chunking and hash suffixes) accordingly.
218
   *
219
   * @param local_path  the location of the source of the certificate file
220
   */
221
  void ProcessCertificate(const std::string &local_path);
222
223
  /**
224
   * Convenience wrapper to process a meta info file.
225
   *
226
   * @param local_path  the location of the meta info file
227
   */
228
  void ProcessMetainfo(const std::string &local_path);
229
230
  /**
231
   * Deletes the given file from the repository backend storage.  This requires
232
   * using WaitForUpload() to make sure the delete operations reached the
233
   * upload backend.
234
   *
235
   * @param file_to_delete   path to the file to be deleted
236
   * @return                 true if file was successfully removed
237
   */
238
  void RemoveAsync(const std::string &file_to_delete);
239
240
  /**
241
   * Checks if a file is already present in the backend storage
242
   *
243
   * @param path  the path of the file to be peeked
244
   * @return      true if the file was found in the backend storage
245
   */
246
  bool Peek(const std::string &path) const;
247
248
  /**
249
   * Creates a top-level shortcut to the given data object. This is particularly
250
   * useful for bootstrapping repositories whose data-directory is secured by
251
   * a VOMS certificate.
252
   *
253
   * @param object  content hash of the object to be exposed on the top-level
254
   * @return        true on success
255
   */
256
  bool PlaceBootstrappingShortcut(const shash::Any &object) const;
257
258
  /**
259
   * Blocks until all jobs currently under processing are finished. After it
260
   * returned, more jobs can be scheduled if needed.
261
   * Note: We assume that no one schedules new jobs while this method is in
262
   *       waiting state. Otherwise it might never return, since the job queue
263
   *       does not get empty.
264
   */
265
  void WaitForUpload() const;
266
267
  bool FinalizeSession(bool commit, const std::string &old_root_hash = "",
268
                       const std::string &new_root_hash = "",
269
                       const RepositoryTag &tag = RepositoryTag()) const;
270
271
  /**
272
   * Checks how many of the already processed jobs have failed.
273
   *
274
   * @return   the number of failed jobs at the time this method is invoked
275
   */
276
  unsigned int GetNumberOfErrors() const;
277
278
27
  shash::Algorithms GetHashAlgorithm() const {
279
27
    return spooler_definition_.hash_algorithm;
280
  }
281
282
 protected:
283
  /**
284
   * This method is called once before any other operations are performed on
285
   * a Spooler. Implements global initialization work.
286
   */
287
  bool Initialize(perf::StatisticsTemplate *statistics);
288
289
  /**
290
   * @param spooler_definition   the SpoolerDefinition structure that defines
291
   *                             some intrinsics of the concrete Spoolers.
292
   */
293
  explicit Spooler(const SpoolerDefinition &spooler_definition);
294
295
  /**
296
   * Used internally: Is called when ingestion pipeline finishes a job.
297
   * Automatically takes care of processed files and prepares them for upload.
298
   */
299
  void ProcessingCallback(const SpoolerResult &data);
300
301
  void UploadingCallback(const UploaderResults &data);
302
303
  /*
304
   * @return   the spooler definition that was initially given to any Spooler
305
   *           constructor.
306
   */
307
  inline const SpoolerDefinition &spooler_definition() const {
308
    return spooler_definition_;
309
  }
310
311
 private:
312
  // Status Information
313
  const SpoolerDefinition spooler_definition_;
314
315
  UniquePtr<IngestionPipeline> ingestion_pipeline_;
316
  UniquePtr<AbstractUploader> uploader_;
317
};
318
319
}  // namespace upload
320
321
#endif  // CVMFS_UPLOAD_H_