1 /*
2 * Copyright (C) NGINX, Inc.
3 */
4
5 #include <nxt_main.h>
6 #include <nxt_application.h>
7 #include <nxt_process.h>
8 #include <nxt_isolation.h>
9
10 #if (NXT_HAVE_PIVOT_ROOT)
11 #include <mntent.h>
12 #endif
13
14
15 static nxt_int_t nxt_isolation_set(nxt_task_t *task,
16 nxt_conf_value_t *isolation, nxt_process_t *process);
17
18 #if (NXT_HAVE_CLONE)
19 static nxt_int_t nxt_isolation_set_namespaces(nxt_task_t *task,
20 nxt_conf_value_t *isolation, nxt_process_t *process);
21 static nxt_int_t nxt_isolation_clone_flags(nxt_task_t *task,
22 nxt_conf_value_t *namespaces, nxt_clone_t *clone);
23 #endif
24
25 #if (NXT_HAVE_CLONE_NEWUSER)
26 static nxt_int_t nxt_isolation_set_creds(nxt_task_t *task,
27 nxt_conf_value_t *isolation, nxt_process_t *process);
28 static nxt_int_t nxt_isolation_credential_map(nxt_task_t *task,
29 nxt_mp_t *mem_pool, nxt_conf_value_t *map_array,
30 nxt_clone_credential_map_t *map);
31 static nxt_int_t nxt_isolation_vldt_creds(nxt_task_t *task,
32 nxt_process_t *process);
33 #endif
34
35 #if (NXT_HAVE_ISOLATION_ROOTFS)
36 static nxt_int_t nxt_isolation_set_rootfs(nxt_task_t *task,
37 nxt_conf_value_t *isolation, nxt_process_t *process);
38 static nxt_int_t nxt_isolation_set_automount(nxt_task_t *task,
39 nxt_conf_value_t *isolation, nxt_process_t *process);
40 static nxt_int_t nxt_isolation_set_mounts(nxt_task_t *task,
41 nxt_process_t *process, nxt_str_t *app_type);
42 static nxt_int_t nxt_isolation_set_lang_mounts(nxt_task_t *task,
43 nxt_process_t *process, nxt_array_t *syspaths);
44 static int nxt_cdecl nxt_isolation_mount_compare(const void *v1,
45 const void *v2);
46 static void nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process);
47
48 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
49 static nxt_int_t nxt_isolation_pivot_root(nxt_task_t *task, const char *rootfs);
50 static nxt_int_t nxt_isolation_make_private_mount(nxt_task_t *task,
51 const char *rootfs);
52 nxt_inline int nxt_pivot_root(const char *new_root, const char *old_root);
53 #endif
54
55 static nxt_int_t nxt_isolation_chroot(nxt_task_t *task, const char *path);
56 #endif
57
58 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
59 static nxt_int_t nxt_isolation_set_new_privs(nxt_task_t *task,
60 nxt_conf_value_t *isolation, nxt_process_t *process);
61 #endif
62
63
64 nxt_int_t
nxt_isolation_main_prefork(nxt_task_t * task,nxt_process_t * process,nxt_mp_t * mp)65 nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process,
66 nxt_mp_t *mp)
67 {
68 nxt_int_t cap_setid;
69 nxt_int_t ret;
70 nxt_runtime_t *rt;
71 nxt_common_app_conf_t *app_conf;
72
73 rt = task->thread->runtime;
74 app_conf = process->data.app;
75 cap_setid = rt->capabilities.setid;
76
77 if (app_conf->isolation != NULL) {
78 ret = nxt_isolation_set(task, app_conf->isolation, process);
79 if (nxt_slow_path(ret != NXT_OK)) {
80 return ret;
81 }
82 }
83
84 #if (NXT_HAVE_CLONE_NEWUSER)
85 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
86 cap_setid = 1;
87 }
88 #endif
89
90 if (cap_setid) {
91 ret = nxt_process_creds_set(task, process, &app_conf->user,
92 &app_conf->group);
93
94 if (nxt_slow_path(ret != NXT_OK)) {
95 return ret;
96 }
97
98 } else {
99 if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user,
100 nxt_strlen(rt->user_cred.user)))
101 {
102 nxt_alert(task, "cannot set user \"%V\" for app \"%V\": "
103 "missing capabilities", &app_conf->user, &app_conf->name);
104
105 return NXT_ERROR;
106 }
107
108 if (app_conf->group.length > 0
109 && !nxt_str_eq(&app_conf->group, (u_char *) rt->group,
110 nxt_strlen(rt->group)))
111 {
112 nxt_alert(task, "cannot set group \"%V\" for app \"%V\": "
113 "missing capabilities", &app_conf->group,
114 &app_conf->name);
115
116 return NXT_ERROR;
117 }
118 }
119
120 #if (NXT_HAVE_ISOLATION_ROOTFS)
121 if (process->isolation.rootfs != NULL) {
122 nxt_int_t has_mnt;
123
124 ret = nxt_isolation_set_mounts(task, process, &app_conf->type);
125 if (nxt_slow_path(ret != NXT_OK)) {
126 return ret;
127 }
128
129 #if (NXT_HAVE_CLONE_NEWNS)
130 has_mnt = nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS);
131 #else
132 has_mnt = 0;
133 #endif
134
135 if (process->user_cred->uid == 0 && !has_mnt) {
136 nxt_log(task, NXT_LOG_WARN,
137 "setting user \"root\" with \"rootfs\" is unsafe without "
138 "\"mount\" namespace isolation");
139 }
140 }
141 #endif
142
143 #if (NXT_HAVE_CLONE_NEWUSER)
144 ret = nxt_isolation_vldt_creds(task, process);
145 if (nxt_slow_path(ret != NXT_OK)) {
146 return ret;
147 }
148 #endif
149
150 return NXT_OK;
151 }
152
153
154 static nxt_int_t
nxt_isolation_set(nxt_task_t * task,nxt_conf_value_t * isolation,nxt_process_t * process)155 nxt_isolation_set(nxt_task_t *task, nxt_conf_value_t *isolation,
156 nxt_process_t *process)
157 {
158 #if (NXT_HAVE_CLONE)
159 if (nxt_slow_path(nxt_isolation_set_namespaces(task, isolation, process)
160 != NXT_OK))
161 {
162 return NXT_ERROR;
163 }
164 #endif
165
166 #if (NXT_HAVE_CLONE_NEWUSER)
167 if (nxt_slow_path(nxt_isolation_set_creds(task, isolation, process)
168 != NXT_OK))
169 {
170 return NXT_ERROR;
171 }
172 #endif
173
174 #if (NXT_HAVE_ISOLATION_ROOTFS)
175 if (nxt_slow_path(nxt_isolation_set_rootfs(task, isolation, process)
176 != NXT_OK))
177 {
178 return NXT_ERROR;
179 }
180
181 if (nxt_slow_path(nxt_isolation_set_automount(task, isolation, process)
182 != NXT_OK))
183 {
184 return NXT_ERROR;
185 }
186 #endif
187
188 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
189 if (nxt_slow_path(nxt_isolation_set_new_privs(task, isolation, process)
190 != NXT_OK))
191 {
192 return NXT_ERROR;
193 }
194 #endif
195
196 return NXT_OK;
197 }
198
199
200 #if (NXT_HAVE_CLONE)
201
202 static nxt_int_t
nxt_isolation_set_namespaces(nxt_task_t * task,nxt_conf_value_t * isolation,nxt_process_t * process)203 nxt_isolation_set_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation,
204 nxt_process_t *process)
205 {
206 nxt_int_t ret;
207 nxt_conf_value_t *obj;
208
209 static nxt_str_t nsname = nxt_string("namespaces");
210
211 obj = nxt_conf_get_object_member(isolation, &nsname, NULL);
212 if (obj != NULL) {
213 ret = nxt_isolation_clone_flags(task, obj, &process->isolation.clone);
214 if (nxt_slow_path(ret != NXT_OK)) {
215 return NXT_ERROR;
216 }
217 }
218
219 return NXT_OK;
220 }
221
222 #endif
223
224
225 #if (NXT_HAVE_CLONE_NEWUSER)
226
227 static nxt_int_t
nxt_isolation_set_creds(nxt_task_t * task,nxt_conf_value_t * isolation,nxt_process_t * process)228 nxt_isolation_set_creds(nxt_task_t *task, nxt_conf_value_t *isolation,
229 nxt_process_t *process)
230 {
231 nxt_int_t ret;
232 nxt_clone_t *clone;
233 nxt_conf_value_t *array;
234
235 static nxt_str_t uidname = nxt_string("uidmap");
236 static nxt_str_t gidname = nxt_string("gidmap");
237
238 clone = &process->isolation.clone;
239
240 array = nxt_conf_get_object_member(isolation, &uidname, NULL);
241 if (array != NULL) {
242 ret = nxt_isolation_credential_map(task, process->mem_pool, array,
243 &clone->uidmap);
244
245 if (nxt_slow_path(ret != NXT_OK)) {
246 return NXT_ERROR;
247 }
248 }
249
250 array = nxt_conf_get_object_member(isolation, &gidname, NULL);
251 if (array != NULL) {
252 ret = nxt_isolation_credential_map(task, process->mem_pool, array,
253 &clone->gidmap);
254
255 if (nxt_slow_path(ret != NXT_OK)) {
256 return NXT_ERROR;
257 }
258 }
259
260 return NXT_OK;
261 }
262
263
264 static nxt_int_t
nxt_isolation_credential_map(nxt_task_t * task,nxt_mp_t * mp,nxt_conf_value_t * map_array,nxt_clone_credential_map_t * map)265 nxt_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp,
266 nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map)
267 {
268 nxt_int_t ret;
269 nxt_uint_t i;
270 nxt_conf_value_t *obj;
271
272 static nxt_conf_map_t nxt_clone_map_entry_conf[] = {
273 {
274 nxt_string("container"),
275 NXT_CONF_MAP_INT,
276 offsetof(nxt_clone_map_entry_t, container),
277 },
278
279 {
280 nxt_string("host"),
281 NXT_CONF_MAP_INT,
282 offsetof(nxt_clone_map_entry_t, host),
283 },
284
285 {
286 nxt_string("size"),
287 NXT_CONF_MAP_INT,
288 offsetof(nxt_clone_map_entry_t, size),
289 },
290 };
291
292 map->size = nxt_conf_array_elements_count(map_array);
293
294 if (map->size == 0) {
295 return NXT_OK;
296 }
297
298 map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t));
299 if (nxt_slow_path(map->map == NULL)) {
300 return NXT_ERROR;
301 }
302
303 for (i = 0; i < map->size; i++) {
304 obj = nxt_conf_get_array_element(map_array, i);
305
306 ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf,
307 nxt_nitems(nxt_clone_map_entry_conf),
308 map->map + i);
309 if (nxt_slow_path(ret != NXT_OK)) {
310 nxt_alert(task, "clone map entry map error");
311 return NXT_ERROR;
312 }
313 }
314
315 return NXT_OK;
316 }
317
318
319 static nxt_int_t
nxt_isolation_vldt_creds(nxt_task_t * task,nxt_process_t * process)320 nxt_isolation_vldt_creds(nxt_task_t *task, nxt_process_t *process)
321 {
322 nxt_int_t ret;
323 nxt_clone_t *clone;
324 nxt_credential_t *creds;
325
326 clone = &process->isolation.clone;
327 creds = process->user_cred;
328
329 if (clone->uidmap.size == 0 && clone->gidmap.size == 0) {
330 return NXT_OK;
331 }
332
333 if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) {
334 if (nxt_slow_path(clone->uidmap.size > 0)) {
335 nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but "
336 "\"isolation.namespaces.credential\" is false or unset");
337
338 return NXT_ERROR;
339 }
340
341 if (nxt_slow_path(clone->gidmap.size > 0)) {
342 nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but "
343 "\"isolation.namespaces.credential\" is false or unset");
344
345 return NXT_ERROR;
346 }
347
348 return NXT_OK;
349 }
350
351 ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds);
352 if (nxt_slow_path(ret != NXT_OK)) {
353 return NXT_ERROR;
354 }
355
356 return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds);
357 }
358
359 #endif
360
361
362 #if (NXT_HAVE_CLONE)
363
364 static nxt_int_t
nxt_isolation_clone_flags(nxt_task_t * task,nxt_conf_value_t * namespaces,nxt_clone_t * clone)365 nxt_isolation_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces,
366 nxt_clone_t *clone)
367 {
368 uint32_t index;
369 nxt_str_t name;
370 nxt_int_t flag;
371 nxt_conf_value_t *value;
372
373 index = 0;
374
375 for ( ;; ) {
376 value = nxt_conf_next_object_member(namespaces, &name, &index);
377
378 if (value == NULL) {
379 break;
380 }
381
382 flag = 0;
383
384 #if (NXT_HAVE_CLONE_NEWUSER)
385 if (nxt_str_eq(&name, "credential", 10)) {
386 flag = CLONE_NEWUSER;
387 }
388 #endif
389
390 #if (NXT_HAVE_CLONE_NEWPID)
391 if (nxt_str_eq(&name, "pid", 3)) {
392 flag = CLONE_NEWPID;
393 }
394 #endif
395
396 #if (NXT_HAVE_CLONE_NEWNET)
397 if (nxt_str_eq(&name, "network", 7)) {
398 flag = CLONE_NEWNET;
399 }
400 #endif
401
402 #if (NXT_HAVE_CLONE_NEWUTS)
403 if (nxt_str_eq(&name, "uname", 5)) {
404 flag = CLONE_NEWUTS;
405 }
406 #endif
407
408 #if (NXT_HAVE_CLONE_NEWNS)
409 if (nxt_str_eq(&name, "mount", 5)) {
410 flag = CLONE_NEWNS;
411 }
412 #endif
413
414 #if (NXT_HAVE_CLONE_NEWCGROUP)
415 if (nxt_str_eq(&name, "cgroup", 6)) {
416 flag = CLONE_NEWCGROUP;
417 }
418 #endif
419
420 if (!flag) {
421 nxt_alert(task, "unknown namespace flag: \"%V\"", &name);
422 return NXT_ERROR;
423 }
424
425 if (nxt_conf_get_boolean(value)) {
426 clone->flags |= flag;
427 }
428 }
429
430 return NXT_OK;
431 }
432
433 #endif
434
435
436 #if (NXT_HAVE_ISOLATION_ROOTFS)
437
438 static nxt_int_t
nxt_isolation_set_rootfs(nxt_task_t * task,nxt_conf_value_t * isolation,nxt_process_t * process)439 nxt_isolation_set_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation,
440 nxt_process_t *process)
441 {
442 nxt_str_t str;
443 nxt_conf_value_t *obj;
444
445 static nxt_str_t rootfs_name = nxt_string("rootfs");
446
447 obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL);
448 if (obj != NULL) {
449 nxt_conf_get_string(obj, &str);
450
451 if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) {
452 nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other "
453 "than \"/\" but given \"%V\"", &str);
454
455 return NXT_ERROR;
456 }
457
458 if (str.start[str.length - 1] == '/') {
459 str.length--;
460 }
461
462 process->isolation.rootfs = nxt_mp_alloc(process->mem_pool,
463 str.length + 1);
464
465 if (nxt_slow_path(process->isolation.rootfs == NULL)) {
466 return NXT_ERROR;
467 }
468
469 nxt_memcpy(process->isolation.rootfs, str.start, str.length);
470
471 process->isolation.rootfs[str.length] = '\0';
472 }
473
474 return NXT_OK;
475 }
476
477
478 static nxt_int_t
nxt_isolation_set_automount(nxt_task_t * task,nxt_conf_value_t * isolation,nxt_process_t * process)479 nxt_isolation_set_automount(nxt_task_t *task, nxt_conf_value_t *isolation,
480 nxt_process_t *process)
481 {
482 nxt_conf_value_t *conf, *value;
483 nxt_process_automount_t *automount;
484
485 static nxt_str_t automount_name = nxt_string("automount");
486 static nxt_str_t langdeps_name = nxt_string("language_deps");
487 static nxt_str_t tmp_name = nxt_string("tmpfs");
488 static nxt_str_t proc_name = nxt_string("procfs");
489
490 automount = &process->isolation.automount;
491
492 automount->language_deps = 1;
493 automount->tmpfs = 1;
494 automount->procfs = 1;
495
496 conf = nxt_conf_get_object_member(isolation, &automount_name, NULL);
497 if (conf != NULL) {
498 value = nxt_conf_get_object_member(conf, &langdeps_name, NULL);
499 if (value != NULL) {
500 automount->language_deps = nxt_conf_get_boolean(value);
501 }
502
503 value = nxt_conf_get_object_member(conf, &tmp_name, NULL);
504 if (value != NULL) {
505 automount->tmpfs = nxt_conf_get_boolean(value);
506 }
507
508 value = nxt_conf_get_object_member(conf, &proc_name, NULL);
509 if (value != NULL) {
510 automount->procfs = nxt_conf_get_boolean(value);
511 }
512 }
513
514 return NXT_OK;
515 }
516
517
518 static nxt_int_t
nxt_isolation_set_mounts(nxt_task_t * task,nxt_process_t * process,nxt_str_t * app_type)519 nxt_isolation_set_mounts(nxt_task_t *task, nxt_process_t *process,
520 nxt_str_t *app_type)
521 {
522 nxt_int_t ret, cap_chroot;
523 nxt_runtime_t *rt;
524 nxt_app_lang_module_t *lang;
525
526 rt = task->thread->runtime;
527 cap_chroot = rt->capabilities.chroot;
528 lang = nxt_app_lang_module(rt, app_type);
529
530 nxt_assert(lang != NULL);
531
532 #if (NXT_HAVE_CLONE_NEWUSER)
533 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
534 cap_chroot = 1;
535 }
536 #endif
537
538 if (!cap_chroot) {
539 nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges");
540 return NXT_ERROR;
541 }
542
543 ret = nxt_isolation_set_lang_mounts(task, process, lang->mounts);
544 if (nxt_slow_path(ret != NXT_OK)) {
545 return NXT_ERROR;
546 }
547
548 process->isolation.cleanup = nxt_isolation_unmount_all;
549
550 return NXT_OK;
551 }
552
553
554 static nxt_int_t
nxt_isolation_set_lang_mounts(nxt_task_t * task,nxt_process_t * process,nxt_array_t * lang_mounts)555 nxt_isolation_set_lang_mounts(nxt_task_t *task, nxt_process_t *process,
556 nxt_array_t *lang_mounts)
557 {
558 u_char *p;
559 size_t i, n, rootfs_len, len;
560 nxt_mp_t *mp;
561 nxt_array_t *mounts;
562 const u_char *rootfs;
563 nxt_fs_mount_t *mnt, *lang_mnt;
564
565 mp = process->mem_pool;
566
567 /* copy to init mem pool */
568 mounts = nxt_array_copy(mp, NULL, lang_mounts);
569 if (mounts == NULL) {
570 return NXT_ERROR;
571 }
572
573 n = mounts->nelts;
574 mnt = mounts->elts;
575 lang_mnt = lang_mounts->elts;
576
577 rootfs = process->isolation.rootfs;
578 rootfs_len = nxt_strlen(rootfs);
579
580 for (i = 0; i < n; i++) {
581 len = nxt_strlen(lang_mnt[i].dst);
582
583 mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1);
584 if (nxt_slow_path(mnt[i].dst == NULL)) {
585 return NXT_ERROR;
586 }
587
588 p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len);
589 p = nxt_cpymem(p, lang_mnt[i].dst, len);
590 *p = '\0';
591 }
592
593 if (process->isolation.automount.tmpfs) {
594 mnt = nxt_array_add(mounts);
595 if (nxt_slow_path(mnt == NULL)) {
596 return NXT_ERROR;
597 }
598
599 mnt->src = (u_char *) "tmpfs";
600 mnt->name = (u_char *) "tmpfs";
601 mnt->type = NXT_FS_TMP;
602 mnt->flags = (NXT_FS_FLAGS_NOSUID
603 | NXT_FS_FLAGS_NODEV
604 | NXT_FS_FLAGS_NOEXEC);
605 mnt->data = (u_char *) "size=1m,mode=777";
606 mnt->builtin = 1;
607 mnt->deps = 0;
608
609 mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/tmp") + 1);
610 if (nxt_slow_path(mnt->dst == NULL)) {
611 return NXT_ERROR;
612 }
613
614 p = nxt_cpymem(mnt->dst, rootfs, rootfs_len);
615 p = nxt_cpymem(p, "/tmp", 4);
616 *p = '\0';
617 }
618
619 if (process->isolation.automount.procfs) {
620 mnt = nxt_array_add(mounts);
621 if (nxt_slow_path(mnt == NULL)) {
622 return NXT_ERROR;
623 }
624
625 mnt->name = (u_char *) "proc";
626 mnt->type = NXT_FS_PROC;
627 mnt->src = (u_char *) "none";
628 mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/proc") + 1);
629 if (nxt_slow_path(mnt->dst == NULL)) {
630 return NXT_ERROR;
631 }
632
633 p = nxt_cpymem(mnt->dst, rootfs, rootfs_len);
634 p = nxt_cpymem(p, "/proc", 5);
635 *p = '\0';
636
637 mnt->data = (u_char *) "";
638 mnt->flags = NXT_FS_FLAGS_NOEXEC | NXT_FS_FLAGS_NOSUID;
639 mnt->builtin = 1;
640 mnt->deps = 0;
641 }
642
643 qsort(mounts->elts, mounts->nelts, sizeof(nxt_fs_mount_t),
644 nxt_isolation_mount_compare);
645
646 process->isolation.mounts = mounts;
647
648 return NXT_OK;
649 }
650
651
652 static int nxt_cdecl
nxt_isolation_mount_compare(const void * v1,const void * v2)653 nxt_isolation_mount_compare(const void *v1, const void *v2)
654 {
655 const nxt_fs_mount_t *mnt1, *mnt2;
656
657 mnt1 = v1;
658 mnt2 = v2;
659
660 return nxt_strlen(mnt1->src) > nxt_strlen(mnt2->src);
661 }
662
663
664 void
nxt_isolation_unmount_all(nxt_task_t * task,nxt_process_t * process)665 nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process)
666 {
667 size_t n;
668 nxt_array_t *mounts;
669 nxt_runtime_t *rt;
670 nxt_fs_mount_t *mnt;
671 nxt_process_automount_t *automount;
672
673 rt = task->thread->runtime;
674
675 if (!rt->capabilities.setid) {
676 return;
677 }
678
679 nxt_debug(task, "unmount all (%s)", process->name);
680
681 automount = &process->isolation.automount;
682 mounts = process->isolation.mounts;
683 n = mounts->nelts;
684 mnt = mounts->elts;
685
686 while (n > 0) {
687 n--;
688
689 if (mnt[n].deps && !automount->language_deps) {
690 continue;
691 }
692
693 nxt_fs_unmount(mnt[n].dst);
694 }
695 }
696
697
698 nxt_int_t
nxt_isolation_prepare_rootfs(nxt_task_t * task,nxt_process_t * process)699 nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process)
700 {
701 size_t i, n;
702 nxt_int_t ret;
703 struct stat st;
704 nxt_array_t *mounts;
705 const u_char *dst;
706 nxt_fs_mount_t *mnt;
707 nxt_process_automount_t *automount;
708
709 automount = &process->isolation.automount;
710 mounts = process->isolation.mounts;
711
712 n = mounts->nelts;
713 mnt = mounts->elts;
714
715 for (i = 0; i < n; i++) {
716 dst = mnt[i].dst;
717
718 if (mnt[i].deps && !automount->language_deps) {
719 continue;
720 }
721
722 if (nxt_slow_path(mnt[i].type == NXT_FS_BIND
723 && stat((const char *) mnt[i].src, &st) != 0))
724 {
725 nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src);
726 continue;
727 }
728
729 ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO);
730 if (nxt_slow_path(ret != NXT_OK)) {
731 nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno);
732 goto undo;
733 }
734
735 ret = nxt_fs_mount(task, &mnt[i]);
736 if (nxt_slow_path(ret != NXT_OK)) {
737 goto undo;
738 }
739 }
740
741 return NXT_OK;
742
743 undo:
744
745 n = i + 1;
746
747 for (i = 0; i < n; i++) {
748 nxt_fs_unmount(mnt[i].dst);
749 }
750
751 return NXT_ERROR;
752 }
753
754
755 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
756
757 nxt_int_t
nxt_isolation_change_root(nxt_task_t * task,nxt_process_t * process)758 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
759 {
760 char *rootfs;
761 nxt_int_t ret;
762
763 rootfs = (char *) process->isolation.rootfs;
764
765 nxt_debug(task, "change root: %s", rootfs);
766
767 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) {
768 ret = nxt_isolation_pivot_root(task, rootfs);
769
770 } else {
771 ret = nxt_isolation_chroot(task, rootfs);
772 }
773
774 if (nxt_fast_path(ret == NXT_OK)) {
775 if (nxt_slow_path(chdir("/") < 0)) {
776 nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
777 return NXT_ERROR;
778 }
779 }
780
781 return ret;
782 }
783
784
785 /*
786 * pivot_root(2) can only be safely used with containers, otherwise it can
787 * umount(2) the global root filesystem and screw up the machine.
788 */
789
790 static nxt_int_t
nxt_isolation_pivot_root(nxt_task_t * task,const char * path)791 nxt_isolation_pivot_root(nxt_task_t *task, const char *path)
792 {
793 /*
794 * This implementation makes use of a kernel trick that works for ages
795 * and now documented in Linux kernel 5.
796 * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/
797 */
798
799 if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) {
800 nxt_alert(task, "mount(\"/\", MS_SLAVE|MS_REC) failed: %E", nxt_errno);
801 return NXT_ERROR;
802 }
803
804 if (nxt_slow_path(nxt_isolation_make_private_mount(task, path) != NXT_OK)) {
805 return NXT_ERROR;
806 }
807
808 if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) {
809 nxt_alert(task, "error bind mounting rootfs %E", nxt_errno);
810 return NXT_ERROR;
811 }
812
813 if (nxt_slow_path(chdir(path) != 0)) {
814 nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno);
815 return NXT_ERROR;
816 }
817
818 if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) {
819 nxt_alert(task, "failed to pivot_root %E", nxt_errno);
820 return NXT_ERROR;
821 }
822
823 /*
824 * Demote the oldroot mount to avoid unmounts getting propagated to
825 * the host.
826 */
827 if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) {
828 nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno);
829 return NXT_ERROR;
830 }
831
832 if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) {
833 nxt_alert(task, "failed to umount old root directory %E", nxt_errno);
834 return NXT_ERROR;
835 }
836
837 return NXT_OK;
838 }
839
840
841 static nxt_int_t
nxt_isolation_make_private_mount(nxt_task_t * task,const char * rootfs)842 nxt_isolation_make_private_mount(nxt_task_t *task, const char *rootfs)
843 {
844 char *parent_mnt;
845 FILE *procfile;
846 u_char **mounts;
847 size_t len;
848 uint8_t *shared;
849 nxt_int_t ret, index, nmounts;
850 struct mntent *ent;
851
852 static const char *mount_path = "/proc/self/mounts";
853
854 ret = NXT_ERROR;
855 ent = NULL;
856 shared = NULL;
857 procfile = NULL;
858 parent_mnt = NULL;
859
860 nmounts = 256;
861
862 mounts = nxt_malloc(nmounts * sizeof(uintptr_t));
863 if (nxt_slow_path(mounts == NULL)) {
864 goto fail;
865 }
866
867 shared = nxt_malloc(nmounts);
868 if (nxt_slow_path(shared == NULL)) {
869 goto fail;
870 }
871
872 procfile = setmntent(mount_path, "r");
873 if (nxt_slow_path(procfile == NULL)) {
874 nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno);
875
876 goto fail;
877 }
878
879 index = 0;
880
881 again:
882
883 for ( ; index < nmounts; index++) {
884 ent = getmntent(procfile);
885 if (ent == NULL) {
886 nmounts = index;
887 break;
888 }
889
890 mounts[index] = (u_char *) strdup(ent->mnt_dir);
891 shared[index] = hasmntopt(ent, "shared") != NULL;
892 }
893
894 if (ent != NULL) {
895 /* there are still entries to be read */
896
897 nmounts *= 2;
898 mounts = nxt_realloc(mounts, nmounts);
899 if (nxt_slow_path(mounts == NULL)) {
900 goto fail;
901 }
902
903 shared = nxt_realloc(shared, nmounts);
904 if (nxt_slow_path(shared == NULL)) {
905 goto fail;
906 }
907
908 goto again;
909 }
910
911 for (index = 0; index < nmounts; index++) {
912 if (nxt_strcmp(mounts[index], rootfs) == 0) {
913 parent_mnt = (char *) rootfs;
914 break;
915 }
916 }
917
918 if (parent_mnt == NULL) {
919 len = nxt_strlen(rootfs);
920
921 parent_mnt = nxt_malloc(len + 1);
922 if (parent_mnt == NULL) {
923 goto fail;
924 }
925
926 nxt_memcpy(parent_mnt, rootfs, len);
927 parent_mnt[len] = '\0';
928
929 if (parent_mnt[len - 1] == '/') {
930 parent_mnt[len - 1] = '\0';
931 len--;
932 }
933
934 for ( ;; ) {
935 for (index = 0; index < nmounts; index++) {
936 if (nxt_strcmp(mounts[index], parent_mnt) == 0) {
937 goto found;
938 }
939 }
940
941 if (len == 1 && parent_mnt[0] == '/') {
942 nxt_alert(task, "parent mount not found");
943 goto fail;
944 }
945
946 /* parent dir */
947 while (parent_mnt[len - 1] != '/' && len > 0) {
948 len--;
949 }
950
951 if (nxt_slow_path(len == 0)) {
952 nxt_alert(task, "parent mount not found");
953 goto fail;
954 }
955
956 if (len == 1) {
957 parent_mnt[len] = '\0'; /* / */
958 } else {
959 parent_mnt[len - 1] = '\0'; /* /<path> */
960 }
961 }
962 }
963
964 found:
965
966 if (shared[index]) {
967 if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) {
968 nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt,
969 nxt_errno);
970
971 goto fail;
972 }
973 }
974
975 ret = NXT_OK;
976
977 fail:
978
979 if (procfile != NULL) {
980 endmntent(procfile);
981 }
982
983 if (mounts != NULL) {
984 for (index = 0; index < nmounts; index++) {
985 nxt_free(mounts[index]);
986 }
987
988 nxt_free(mounts);
989 }
990
991 if (shared != NULL) {
992 nxt_free(shared);
993 }
994
995 if (parent_mnt != NULL && parent_mnt != rootfs) {
996 nxt_free(parent_mnt);
997 }
998
999 return ret;
1000 }
1001
1002
1003 nxt_inline int
nxt_pivot_root(const char * new_root,const char * old_root)1004 nxt_pivot_root(const char *new_root, const char *old_root)
1005 {
1006 return syscall(__NR_pivot_root, new_root, old_root);
1007 }
1008
1009
1010 #else /* !(NXT_HAVE_PIVOT_ROOT) || !(NXT_HAVE_CLONE_NEWNS) */
1011
1012
1013 nxt_int_t
nxt_isolation_change_root(nxt_task_t * task,nxt_process_t * process)1014 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
1015 {
1016 char *rootfs;
1017
1018 rootfs = (char *) process->isolation.rootfs;
1019
1020 nxt_debug(task, "change root: %s", rootfs);
1021
1022 if (nxt_fast_path(nxt_isolation_chroot(task, rootfs) == NXT_OK)) {
1023 if (nxt_slow_path(chdir("/") < 0)) {
1024 nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
1025 return NXT_ERROR;
1026 }
1027
1028 return NXT_OK;
1029 }
1030
1031 return NXT_ERROR;
1032 }
1033
1034 #endif
1035
1036
1037 static nxt_int_t
nxt_isolation_chroot(nxt_task_t * task,const char * path)1038 nxt_isolation_chroot(nxt_task_t *task, const char *path)
1039 {
1040 if (nxt_slow_path(chroot(path) < 0)) {
1041 nxt_alert(task, "chroot(%s) %E", path, nxt_errno);
1042 return NXT_ERROR;
1043 }
1044
1045 return NXT_OK;
1046 }
1047
1048 #endif /* NXT_HAVE_ISOLATION_ROOTFS */
1049
1050
1051 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
1052
1053 static nxt_int_t
nxt_isolation_set_new_privs(nxt_task_t * task,nxt_conf_value_t * isolation,nxt_process_t * process)1054 nxt_isolation_set_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation,
1055 nxt_process_t *process)
1056 {
1057 nxt_conf_value_t *obj;
1058
1059 static nxt_str_t new_privs_name = nxt_string("new_privs");
1060
1061 obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL);
1062 if (obj != NULL) {
1063 process->isolation.new_privs = nxt_conf_get_boolean(obj);
1064 }
1065
1066 return NXT_OK;
1067 }
1068
1069 #endif
1070