1 |
|
|
/** |
2 |
|
|
* This file is part of the CernVM File System. |
3 |
|
|
* |
4 |
|
|
* Linux specific system/library calls. |
5 |
|
|
*/ |
6 |
|
|
|
7 |
|
|
#ifndef CVMFS_PLATFORM_LINUX_H_ |
8 |
|
|
#define CVMFS_PLATFORM_LINUX_H_ |
9 |
|
|
|
10 |
|
|
#include <sys/types.h> // contains ssize_t needed inside <attr/xattr.h> |
11 |
|
|
#include <sys/xattr.h> |
12 |
|
|
#ifdef HAVE_ATTR_XATTR_H |
13 |
|
|
#include <attr/xattr.h> // NOLINT(build/include_alpha) |
14 |
|
|
#endif |
15 |
|
|
#include <dirent.h> |
16 |
|
|
#include <errno.h> |
17 |
|
|
#include <fcntl.h> |
18 |
|
|
#include <limits.h> |
19 |
|
|
#include <mntent.h> |
20 |
|
|
#include <pthread.h> |
21 |
|
|
#include <signal.h> |
22 |
|
|
#include <sys/file.h> |
23 |
|
|
#include <sys/mount.h> |
24 |
|
|
#include <sys/prctl.h> |
25 |
|
|
#include <sys/select.h> |
26 |
|
|
#include <sys/stat.h> |
27 |
|
|
#include <sys/utsname.h> |
28 |
|
|
#include <unistd.h> |
29 |
|
|
|
30 |
|
|
#include <cassert> |
31 |
|
|
#include <cstdio> |
32 |
|
|
#include <cstdlib> |
33 |
|
|
#include <cstring> |
34 |
|
|
#include <ctime> |
35 |
|
|
#include <string> |
36 |
|
|
#include <vector> |
37 |
|
|
|
38 |
|
|
#ifdef CVMFS_ENABLE_INOTIFY |
39 |
|
|
#include "file_watcher_inotify.h" |
40 |
|
|
#else // CVMFS_ENABLE_INOTIFY |
41 |
|
|
#include "file_watcher.h" |
42 |
|
|
#endif // CVMFS_ENABLE_INOTIFY |
43 |
|
|
#include "smalloc.h" |
44 |
|
|
|
45 |
|
|
#ifdef CVMFS_NAMESPACE_GUARD |
46 |
|
|
namespace CVMFS_NAMESPACE_GUARD { |
47 |
|
|
#endif |
48 |
|
|
|
49 |
|
|
#define platform_sighandler_t sighandler_t |
50 |
|
|
|
51 |
|
|
inline std::vector<std::string> platform_mountlist() { |
52 |
|
|
std::vector<std::string> result; |
53 |
|
|
FILE *fmnt = setmntent("/proc/mounts", "r"); |
54 |
|
|
struct mntent *mntbuf; // Static buffer managed by libc! |
55 |
|
|
while ((mntbuf = getmntent(fmnt)) != NULL) { |
56 |
|
|
result.push_back(mntbuf->mnt_dir); |
57 |
|
|
} |
58 |
|
|
endmntent(fmnt); |
59 |
|
|
return result; |
60 |
|
|
} |
61 |
|
|
|
62 |
|
|
// glibc < 2.11 |
63 |
|
|
#ifndef MNT_DETACH |
64 |
|
|
#define MNT_DETACH 0x00000002 |
65 |
|
|
#endif |
66 |
|
|
inline bool platform_umount(const char *mountpoint, const bool lazy) { |
67 |
|
|
struct stat64 mtab_info; |
68 |
|
|
int retval = lstat64(_PATH_MOUNTED, &mtab_info); |
69 |
|
|
// If /etc/mtab exists and is not a symlink to /proc/mounts |
70 |
|
|
if ((retval == 0) && S_ISREG(mtab_info.st_mode)) { |
71 |
|
|
// Lock the modification on /etc/mtab against concurrent |
72 |
|
|
// crash unmount handlers (removing the lock file would result in a race) |
73 |
|
|
std::string lockfile = std::string(_PATH_MOUNTED) + ".cvmfslock"; |
74 |
|
|
const int fd_lockfile = open(lockfile.c_str(), O_RDONLY | O_CREAT, 0600); |
75 |
|
|
if (fd_lockfile < 0) return false; |
76 |
|
|
int timeout = 10; |
77 |
|
|
while ((flock(fd_lockfile, LOCK_EX | LOCK_NB) != 0) && (timeout > 0)) { |
78 |
|
|
if (errno != EWOULDBLOCK) { |
79 |
|
|
close(fd_lockfile); |
80 |
|
|
return false; |
81 |
|
|
} |
82 |
|
|
struct timeval wait_for; |
83 |
|
|
wait_for.tv_sec = 1; |
84 |
|
|
wait_for.tv_usec = 0; |
85 |
|
|
select(0, NULL, NULL, NULL, &wait_for); |
86 |
|
|
timeout--; |
87 |
|
|
} |
88 |
|
|
if (timeout <= 0) { |
89 |
|
|
close(fd_lockfile); |
90 |
|
|
return false; |
91 |
|
|
} |
92 |
|
|
|
93 |
|
|
// Remove entry from /etc/mtab (create new file without entry) |
94 |
|
|
std::string mntnew = std::string(_PATH_MOUNTED) + ".cvmfstmp"; |
95 |
|
|
FILE *fmntold = setmntent(_PATH_MOUNTED, "r"); |
96 |
|
|
if (!fmntold) { |
97 |
|
|
flock(fd_lockfile, LOCK_UN); |
98 |
|
|
close(fd_lockfile); |
99 |
|
|
return false; |
100 |
|
|
} |
101 |
|
|
FILE *fmntnew = setmntent(mntnew.c_str(), "w+"); |
102 |
|
|
if (!fmntnew && (chmod(mntnew.c_str(), mtab_info.st_mode) != 0) && |
103 |
|
|
(chown(mntnew.c_str(), mtab_info.st_uid, mtab_info.st_gid) != 0)) { |
104 |
|
|
endmntent(fmntold); |
105 |
|
|
flock(fd_lockfile, LOCK_UN); |
106 |
|
|
close(fd_lockfile); |
107 |
|
|
return false; |
108 |
|
|
} |
109 |
|
|
struct mntent *mntbuf; // Static buffer managed by libc! |
110 |
|
|
while ((mntbuf = getmntent(fmntold)) != NULL) { |
111 |
|
|
if (strcmp(mntbuf->mnt_dir, mountpoint) != 0) { |
112 |
|
|
retval = addmntent(fmntnew, mntbuf); |
113 |
|
|
if (retval != 0) { |
114 |
|
|
endmntent(fmntold); |
115 |
|
|
endmntent(fmntnew); |
116 |
|
|
unlink(mntnew.c_str()); |
117 |
|
|
flock(fd_lockfile, LOCK_UN); |
118 |
|
|
close(fd_lockfile); |
119 |
|
|
return false; |
120 |
|
|
} |
121 |
|
|
} |
122 |
|
|
} |
123 |
|
|
endmntent(fmntold); |
124 |
|
|
endmntent(fmntnew); |
125 |
|
|
retval = rename(mntnew.c_str(), _PATH_MOUNTED); |
126 |
|
|
flock(fd_lockfile, LOCK_UN); |
127 |
|
|
close(fd_lockfile); |
128 |
|
|
if (retval != 0) return false; |
129 |
|
|
// Best effort |
130 |
|
|
retval = chmod(_PATH_MOUNTED, mtab_info.st_mode); |
131 |
|
|
retval = chown(_PATH_MOUNTED, mtab_info.st_uid, mtab_info.st_gid); |
132 |
|
|
// We pickup these values only to silent warnings |
133 |
|
|
} |
134 |
|
|
|
135 |
|
|
int flags = lazy ? MNT_DETACH : 0; |
136 |
|
|
retval = umount2(mountpoint, flags); |
137 |
|
|
return retval == 0; |
138 |
|
|
} |
139 |
|
|
|
140 |
|
|
/** |
141 |
|
|
* Spinlocks are not necessarily part of pthread on all platforms. |
142 |
|
|
*/ |
143 |
|
|
typedef pthread_spinlock_t platform_spinlock; |
144 |
|
|
|
145 |
|
16 |
inline int platform_spinlock_init(platform_spinlock *lock, int pshared) { |
146 |
|
16 |
return pthread_spin_init(lock, pshared); |
147 |
|
|
} |
148 |
|
|
|
149 |
|
1 |
inline int platform_spinlock_destroy(platform_spinlock *lock) { |
150 |
|
1 |
return pthread_spin_destroy(lock); |
151 |
|
|
} |
152 |
|
|
|
153 |
|
3 |
inline int platform_spinlock_trylock(platform_spinlock *lock) { |
154 |
|
3 |
return pthread_spin_trylock(lock); |
155 |
|
|
} |
156 |
|
|
|
157 |
|
1 |
inline void platform_spinlock_unlock(platform_spinlock *lock) { |
158 |
|
1 |
pthread_spin_unlock(lock); |
159 |
|
1 |
} |
160 |
|
|
|
161 |
|
|
/** |
162 |
|
|
* pthread_self() is not necessarily an unsigned long. |
163 |
|
|
*/ |
164 |
|
200812 |
inline pthread_t platform_gettid() { return pthread_self(); } |
165 |
|
|
|
166 |
|
1 |
inline int platform_sigwait(const int signum) { |
167 |
|
|
sigset_t sigset; |
168 |
|
1 |
int retval = sigemptyset(&sigset); |
169 |
✗✓ |
1 |
assert(retval == 0); |
170 |
|
1 |
retval = sigaddset(&sigset, signum); |
171 |
✗✓ |
1 |
assert(retval == 0); |
172 |
|
1 |
retval = sigwaitinfo(&sigset, NULL); |
173 |
|
1 |
return retval; |
174 |
|
|
} |
175 |
|
|
|
176 |
|
|
/** |
177 |
|
|
* Grants a PID capabilites for ptrace() usage |
178 |
|
|
* |
179 |
|
|
* @param PID the PID of the process to be granted ptrace()-access |
180 |
|
|
* (may be ignored) |
181 |
|
|
* @return true when successful |
182 |
|
|
*/ |
183 |
|
|
inline bool platform_allow_ptrace(const pid_t pid) { |
184 |
|
|
#ifdef PR_SET_PTRACER |
185 |
|
|
// On Ubuntu, yama prevents all processes from ptracing other processes, even |
186 |
|
|
// when they are owned by the same user. Therefore the watchdog would not be |
187 |
|
|
// able to create a stacktrace, without this extra permission. |
188 |
|
|
const int retval = prctl(PR_SET_PTRACER, pid, 0, 0, 0); |
189 |
|
|
// On some platforms (e.g. CentOS7), PR_SET_PTRACER is defined but not |
190 |
|
|
// supported by the kernel. That's fine and we don't have to care about it |
191 |
|
|
// when it happens. |
192 |
|
|
return (retval == 0) || (errno == EINVAL); |
193 |
|
|
#else |
194 |
|
|
// On other platforms this is currently a no-op |
195 |
|
|
return true; |
196 |
|
|
#endif |
197 |
|
|
} |
198 |
|
|
|
199 |
|
|
/** |
200 |
|
|
* File system functions, ensure 64bit versions. |
201 |
|
|
*/ |
202 |
|
|
typedef struct dirent64 platform_dirent64; |
203 |
|
|
|
204 |
|
241423 |
inline platform_dirent64 *platform_readdir(DIR *dirp) { |
205 |
|
241423 |
return readdir64(dirp); |
206 |
|
|
} |
207 |
|
|
|
208 |
|
|
typedef struct stat64 platform_stat64; |
209 |
|
|
|
210 |
|
17680 |
inline int platform_stat(const char *path, platform_stat64 *buf) { |
211 |
|
17680 |
return stat64(path, buf); |
212 |
|
|
} |
213 |
|
|
|
214 |
|
127423 |
inline int platform_lstat(const char *path, platform_stat64 *buf) { |
215 |
|
127423 |
return lstat64(path, buf); |
216 |
|
|
} |
217 |
|
|
|
218 |
|
1858 |
inline int platform_fstat(int filedes, platform_stat64 *buf) { |
219 |
|
1858 |
return fstat64(filedes, buf); |
220 |
|
|
} |
221 |
|
|
|
222 |
|
|
// TODO(jblomer): the translation from C to C++ should be done elsewhere |
223 |
|
|
inline bool platform_getxattr(const std::string &path, const std::string &name, |
224 |
|
|
std::string *value) { |
225 |
|
|
int size = 0; |
226 |
|
|
void *buffer = NULL; |
227 |
|
|
int retval; |
228 |
|
|
retval = getxattr(path.c_str(), name.c_str(), buffer, size); |
229 |
|
|
if (retval > 1) { |
230 |
|
|
size = retval; |
231 |
|
|
buffer = smalloc(size); |
232 |
|
|
retval = getxattr(path.c_str(), name.c_str(), buffer, size); |
233 |
|
|
} |
234 |
|
|
if ((retval < 0) || (retval > size)) { |
235 |
|
|
free(buffer); |
236 |
|
|
return false; |
237 |
|
|
} |
238 |
|
|
if (retval > 0) { |
239 |
|
|
value->assign(static_cast<const char *>(buffer), size); |
240 |
|
|
free(buffer); |
241 |
|
|
} else { |
242 |
|
|
value->assign(""); |
243 |
|
|
} |
244 |
|
|
return true; |
245 |
|
|
} |
246 |
|
|
|
247 |
|
|
// TODO(jblomer): the translation from C to C++ should be done elsewhere |
248 |
|
20 |
inline bool platform_setxattr(const std::string &path, const std::string &name, |
249 |
|
|
const std::string &value) { |
250 |
|
|
int retval = |
251 |
|
20 |
setxattr(path.c_str(), name.c_str(), value.c_str(), value.size(), 0); |
252 |
|
20 |
return retval == 0; |
253 |
|
|
} |
254 |
|
|
|
255 |
|
|
inline bool platform_lsetxattr(const std::string &path, const std::string &name, |
256 |
|
|
const std::string &value) { |
257 |
|
|
int retval = |
258 |
|
|
lsetxattr(path.c_str(), name.c_str(), value.c_str(), value.size(), 0); |
259 |
|
|
return retval == 0; |
260 |
|
|
} |
261 |
|
|
|
262 |
|
32 |
inline ssize_t platform_lgetxattr(const char *path, const char *name, |
263 |
|
|
void *value, size_t size) { |
264 |
|
32 |
return lgetxattr(path, name, value, size); |
265 |
|
|
} |
266 |
|
|
|
267 |
|
36 |
inline ssize_t platform_llistxattr(const char *path, char *list, size_t size) { |
268 |
|
36 |
return llistxattr(path, list, size); |
269 |
|
|
} |
270 |
|
|
|
271 |
|
3 |
inline void platform_disable_kcache(int filedes) { |
272 |
|
3 |
(void)posix_fadvise(filedes, 0, 0, POSIX_FADV_RANDOM | POSIX_FADV_NOREUSE); |
273 |
|
3 |
} |
274 |
|
|
|
275 |
|
118 |
inline int platform_readahead(int filedes) { |
276 |
|
118 |
return readahead(filedes, 0, static_cast<size_t>(-1)); |
277 |
|
|
} |
278 |
|
|
|
279 |
|
|
/** |
280 |
|
|
* Advises the kernel to evict the given file region from the page cache. |
281 |
|
|
* |
282 |
|
|
* Note: Pages containing the data at `offset` and `offset + length` are NOT |
283 |
|
|
* evicted by the kernel. This means that a few pages are not purged when |
284 |
|
|
* offset and length are not exactly on page boundaries. See below: |
285 |
|
|
* |
286 |
|
|
* offset length |
287 |
|
|
* | | |
288 |
|
|
* +---------+----|----+---------+---------+---------+-----|---+---------+ |
289 |
|
|
* | | | | xxxxxxx | xxxxxxx | xxxxxxx | | | | |
290 |
|
|
* | | | | xxxxxxx | xxxxxxx | xxxxxxx | | | | |
291 |
|
|
* +---------+----|----+---------+---------+---------+-----|---+---------+ |
292 |
|
|
* 0 4096 | 8192 12288 16384 20480 | 24576 28672 |
293 |
|
|
* |
294 |
|
|
* git.kernel.org/cgit/linux/kernel/git/stable/linux-stable.git/tree/mm/fadvise.c#n115 |
295 |
|
|
* |
296 |
|
|
* TODO(rmeusel): figure out a clever way how to align `offset` and `length` |
297 |
|
|
* |
298 |
|
|
* @param fd file descriptor whose page cache should be (partially) evicted |
299 |
|
|
* @param offset start offset of the pages to be evicted |
300 |
|
|
* @param length number of bytes to be evicted |
301 |
|
|
*/ |
302 |
|
155 |
inline int platform_invalidate_kcache(const int fd, const off_t offset, |
303 |
|
|
const size_t length) { |
304 |
|
155 |
return posix_fadvise(fd, offset, length, POSIX_FADV_DONTNEED); |
305 |
|
|
} |
306 |
|
|
|
307 |
|
|
inline std::string platform_libname(const std::string &base_name) { |
308 |
|
|
return "lib" + base_name + ".so"; |
309 |
|
|
} |
310 |
|
|
|
311 |
|
15 |
inline std::string platform_getexepath() { |
312 |
|
|
char buf[PATH_MAX + 1]; |
313 |
|
15 |
int ret = readlink("/proc/self/exe", buf, PATH_MAX); |
314 |
✓✗ |
15 |
if (ret > 0) { |
315 |
|
15 |
buf[ret] = '\0'; |
316 |
|
15 |
return std::string(buf); |
317 |
|
|
} |
318 |
|
|
return ""; |
319 |
|
|
} |
320 |
|
|
|
321 |
|
390592005 |
inline uint64_t platform_monotonic_time() { |
322 |
|
|
struct timespec tp; |
323 |
|
|
#ifdef CLOCK_MONOTONIC_COARSE |
324 |
|
390592005 |
int retval = clock_gettime(CLOCK_MONOTONIC_COARSE, &tp); |
325 |
|
|
#else |
326 |
|
|
int retval = clock_gettime(CLOCK_MONOTONIC, &tp); |
327 |
|
|
#endif |
328 |
✗✓ |
390592005 |
assert(retval == 0); |
329 |
|
390592005 |
return tp.tv_sec + (tp.tv_nsec >= 500000000); |
330 |
|
|
} |
331 |
|
|
|
332 |
|
31 |
inline uint64_t platform_memsize() { |
333 |
|
|
return static_cast<uint64_t>(sysconf(_SC_PHYS_PAGES)) * |
334 |
|
31 |
static_cast<uint64_t>(sysconf(_SC_PAGE_SIZE)); |
335 |
|
|
} |
336 |
|
|
|
337 |
|
4 |
inline file_watcher::FileWatcher* platform_file_watcher() { |
338 |
|
|
#ifdef CVMFS_ENABLE_INOTIFY |
339 |
|
4 |
return new file_watcher::FileWatcherInotify(); |
340 |
|
|
#else // CVMFS_ENABLE_INOTIFY |
341 |
|
|
return NULL; |
342 |
|
|
#endif // CVMFS_ENABLE_INOTIFY |
343 |
|
|
} |
344 |
|
|
|
345 |
|
|
#ifdef CVMFS_NAMESPACE_GUARD |
346 |
|
|
} // namespace CVMFS_NAMESPACE_GUARD |
347 |
|
|
#endif |
348 |
|
|
|
349 |
|
|
#endif // CVMFS_PLATFORM_LINUX_H_ |