GCC Code Coverage Report


Directory: cvmfs/
File: cvmfs/util/namespace.cc
Date: 2026-06-28 02:36:10
Exec Total Coverage
Lines: 7 109 6.4%
Branches: 5 129 3.9%

Line Branch Exec Source
1 /**
2 * This file is part of the CernVM File System.
3 */
4
5
6 #include "namespace.h"
7
8 #include <fcntl.h>
9 #include <signal.h>
10 #include <unistd.h>
11 #include <stdlib.h>
12 #include <cstdlib>
13 #include <cassert>
14 #ifndef __APPLE__
15 #include <sched.h>
16 #include <sys/mount.h>
17 #endif
18 #include <sys/wait.h>
19
20 #include <cstring>
21
22 #include "util/posix.h"
23 #include "util/string.h"
24
25 // Might otherwise not compile on older Linux kernels or glibc versions
26 #ifndef CLONE_NEWUSER
27 #define CLONE_NEWUSER 0x10000000
28 #endif
29 #ifndef CLONE_NEWPID
30 #define CLONE_NEWPID 0x20000000
31 #endif
32 #ifndef MS_REC
33 #define MS_REC 0x4000
34 #endif
35
36 #ifndef __APPLE__
37 #define CVMFS_HAS_UNSHARE 1
38 #ifdef __GLIBC_MINOR__
39 #if __GLIBC_MINOR__ < 4
40 #undef CVMFS_HAS_UNSHARE
41 #endif
42 #endif
43 #endif
44
45
46 #ifdef CVMFS_HAS_UNSHARE
47 /**
48 * Some environments advertise unprivileged user namespaces as available (e.g.
49 * /proc/sys/kernel/unprivileged_userns_clone contains "1") but still deny the
50 * actual unshare() or uid_map setup. Examples are the AppArmor restriction
51 * kernel.apparmor_restrict_unprivileged_userns on recent Ubuntu releases, an
52 * SELinux policy, or a nesting/quota limit inside a container. Probe the real
53 * capability in a child process -- unshare() mutates the caller -- so that the
54 * kNsFeatureUserEnabled flag reflects what CreateUserNamespace() can actually
55 * do rather than what the sysctl claims.
56 */
57 static bool ProbeUserNamespace() {
58 const pid_t pid = fork();
59 if (pid < 0)
60 return false;
61 if (pid == 0) {
62 // Child: exercise the same syscalls as CreateUserNamespace(), mapping our
63 // own ids onto themselves (the least privileged setup). Probe two levels
64 // deep because some environments permit the first user namespace but deny
65 // nesting -- e.g. a max_user_namespaces limit or an outer user namespace --
66 // and T_Namespace.User nests two levels.
67 const bool ok = (CreateUserNamespace(geteuid(), getegid()) == kFailNsOk) &&
68 (CreateUserNamespace(geteuid(), getegid()) == kFailNsOk);
69 _exit(ok ? 0 : 1);
70 }
71 int status;
72 if (waitpid(pid, &status, 0) < 0)
73 return false;
74 return WIFEXITED(status) && (WEXITSTATUS(status) == 0);
75 }
76 #endif
77
78
79 140 int CheckNamespaceFeatures() {
80 #ifdef __APPLE__
81 return 0;
82 #else
83 140 int result = kNsFeatureMount; // available since kernel 2.4
84
3/6
✓ Branch 2 taken 140 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 140 times.
✗ Branch 6 not taken.
✓ Branch 9 taken 140 times.
✗ Branch 10 not taken.
140 if (SymlinkExists("/proc/self/ns/pid"))
85 140 result |= kNsFeaturePid;
86
1/2
✓ Branch 1 taken 140 times.
✗ Branch 2 not taken.
140 const int fd = open("/proc/sys/kernel/unprivileged_userns_clone", O_RDONLY);
87
1/2
✓ Branch 0 taken 140 times.
✗ Branch 1 not taken.
140 if (fd < 0)
88 140 return result;
89 result |= kNsFeatureUserAvailable;
90 char enabled = 0;
91 SafeRead(fd, &enabled, 1);
92 close(fd);
93 if (enabled != '1')
94 return result;
95 #ifdef CVMFS_HAS_UNSHARE
96 // The sysctl can report user namespaces as enabled even when the actual
97 // operations are denied; only advertise them as enabled if they really work.
98 if (ProbeUserNamespace())
99 result |= kNsFeatureUserEnabled;
100 #endif
101 return result;
102 #endif
103 }
104
105
106 NamespaceFailures CreateUserNamespace(uid_t map_uid_to, gid_t map_gid_to) {
107 #ifdef CVMFS_HAS_UNSHARE
108 const std::string uid_str = StringifyInt(geteuid());
109 const std::string gid_str = StringifyInt(getegid());
110
111 const int rvi = unshare(CLONE_NEWUSER);
112 if (rvi != 0)
113 return kFailNsUnshare;
114
115 std::string uid_map = StringifyInt(map_uid_to) + " " + uid_str + " 1";
116 std::string gid_map = StringifyInt(map_gid_to) + " " + gid_str + " 1";
117
118 int fd;
119 ssize_t nbytes;
120 fd = open("/proc/self/setgroups", O_WRONLY);
121 if (fd < 0)
122 return kFailNsSetgroupsOpen;
123 nbytes = write(fd, "deny", 4);
124 close(fd);
125 if (nbytes != 4)
126 return kFailNsSetgroupsWrite;
127
128 fd = open("/proc/self/uid_map", O_WRONLY);
129 if (fd < 0)
130 return kFailNsMapUidOpen;
131 nbytes = write(fd, uid_map.data(), uid_map.length());
132 close(fd);
133 if (nbytes != static_cast<ssize_t>(uid_map.length()))
134 return kFailNsMapUidWrite;
135
136 fd = open("/proc/self/gid_map", O_WRONLY);
137 if (fd < 0)
138 return kFailNsMapGidOpen;
139 nbytes = write(fd, gid_map.data(), gid_map.length());
140 close(fd);
141 if (nbytes != static_cast<ssize_t>(gid_map.length()))
142 return kFailNsMapGidWrite;
143
144 return kFailNsOk;
145 #else
146 return kFailNsUnsuppored;
147 #endif
148 }
149
150
151 bool BindMount(const std::string &from, const std::string &to) {
152 #ifdef __APPLE__
153 return false;
154 #else
155 const int rvi = mount(from.c_str(), to.c_str(), "", MS_BIND | MS_REC, NULL);
156 return rvi == 0;
157 #endif
158 }
159
160
161 bool ProcMount(const std::string &to) {
162 #ifdef __APPLE__
163 return false;
164 #else
165 const int rvi = mount("proc", to.c_str(), "proc", 0, NULL);
166 return rvi == 0;
167 #endif
168 }
169
170
171 bool CreateMountNamespace() {
172 #ifdef CVMFS_HAS_UNSHARE
173 const std::string cwd = GetCurrentWorkingDirectory();
174
175 int rvi = unshare(CLONE_NEWNS);
176 if (rvi != 0)
177 return false;
178
179 rvi = chdir(cwd.c_str());
180 return rvi == 0;
181 #else
182 return false;
183 #endif
184 }
185
186
187 #ifdef CVMFS_HAS_UNSHARE
188 namespace {
189
190 static void Reaper(int /*sig*/, siginfo_t * /*siginfo*/, void * /*context*/) {
191 while (true) {
192 const pid_t retval = waitpid(-1, NULL, WNOHANG);
193 if (retval <= 0)
194 return;
195 }
196 }
197
198 } // anonymous namespace
199 #endif
200
201
202 /**
203 * The fd_parent file descriptor, if passed, is the read end of a pipe whose
204 * write end is connected to the parent process. This gives the namespace's
205 * init process a means to know its pid in the context of the parent namespace.
206 */
207 bool CreatePidNamespace(int *fd_parent) {
208 #ifdef CVMFS_HAS_UNSHARE
209 int rvi = unshare(CLONE_NEWPID);
210 if (rvi != 0)
211 return false;
212
213 int pipe_parent[2];
214 MakePipe(pipe_parent);
215
216 int max_fd;
217 int status;
218 pid_t pid = fork();
219 switch (pid) {
220 case -1:
221 abort();
222 case 0:
223 // New init process
224 break;
225 default:
226 // Parent, wait for the namespace to exit
227
228 // Close all file descriptors
229 max_fd = static_cast<int>(sysconf(_SC_OPEN_MAX));
230 for (int fd = 0; fd < max_fd; fd++) {
231 if (fd != pipe_parent[1])
232 close(fd);
233 }
234
235 pid_t parent_pid = getpid();
236 SafeWrite(pipe_parent[1], &parent_pid, sizeof(parent_pid));
237 SafeWrite(pipe_parent[1], &pid, sizeof(pid));
238
239 rvi = waitpid(pid, &status, 0);
240 if (rvi >= 0) {
241 if (WIFEXITED(status))
242 exit(WEXITSTATUS(status));
243 }
244 exit(127);
245 }
246 close(pipe_parent[1]);
247 if (fd_parent != NULL)
248 *fd_parent = pipe_parent[0];
249
250 // Note: only signals for which signal handlers are established can be sent
251 // by other processes of this pid namespace to the init process
252 struct sigaction sa;
253 memset(&sa, 0, sizeof(sa));
254 sa.sa_sigaction = Reaper;
255 sa.sa_flags = SA_SIGINFO;
256 sigfillset(&sa.sa_mask);
257 rvi = sigaction(SIGCHLD, &sa, NULL);
258 assert(rvi == 0);
259
260 rvi = mount("", "/proc", "proc", 0, NULL);
261 return rvi == 0;
262 #else
263 return false;
264 #endif
265 }
266