xref: /unit/src/nxt_isolation.c (revision 1580:f1aefdf995d4)
1 /*
2  * Copyright (C) NGINX, Inc.
3  */
4 
5 #include <nxt_main.h>
6 #include <nxt_application.h>
7 #include <nxt_process.h>
8 #include <nxt_isolation.h>
9 
10 #if (NXT_HAVE_PIVOT_ROOT)
11 #include <mntent.h>
12 #endif
13 
14 
15 static nxt_int_t nxt_isolation_set(nxt_task_t *task,
16     nxt_conf_value_t *isolation, nxt_process_t *process);
17 
18 #if (NXT_HAVE_CLONE)
19 static nxt_int_t nxt_isolation_set_namespaces(nxt_task_t *task,
20     nxt_conf_value_t *isolation, nxt_process_t *process);
21 static nxt_int_t nxt_isolation_clone_flags(nxt_task_t *task,
22     nxt_conf_value_t *namespaces, nxt_clone_t *clone);
23 #endif
24 
25 #if (NXT_HAVE_CLONE_NEWUSER)
26 static nxt_int_t nxt_isolation_set_creds(nxt_task_t *task,
27     nxt_conf_value_t *isolation, nxt_process_t *process);
28 static nxt_int_t nxt_isolation_credential_map(nxt_task_t *task,
29     nxt_mp_t *mem_pool, nxt_conf_value_t *map_array,
30     nxt_clone_credential_map_t *map);
31 static nxt_int_t nxt_isolation_vldt_creds(nxt_task_t *task,
32     nxt_process_t *process);
33 #endif
34 
35 #if (NXT_HAVE_ISOLATION_ROOTFS)
36 static nxt_int_t nxt_isolation_set_rootfs(nxt_task_t *task,
37     nxt_conf_value_t *isolation, nxt_process_t *process);
38 static nxt_int_t nxt_isolation_set_mounts(nxt_task_t *task,
39     nxt_process_t *process, nxt_str_t *app_type);
40 static nxt_int_t nxt_isolation_set_lang_mounts(nxt_task_t *task,
41     nxt_process_t *process, nxt_array_t *syspaths);
42 static void nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process);
43 
44 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
45 static nxt_int_t nxt_isolation_pivot_root(nxt_task_t *task, const char *rootfs);
46 static nxt_int_t nxt_isolation_make_private_mount(nxt_task_t *task,
47     const char *rootfs);
48 nxt_inline int nxt_pivot_root(const char *new_root, const char *old_root);
49 #endif
50 
51 static nxt_int_t nxt_isolation_chroot(nxt_task_t *task, const char *path);
52 #endif
53 
54 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
55 static nxt_int_t nxt_isolation_set_new_privs(nxt_task_t *task,
56     nxt_conf_value_t *isolation, nxt_process_t *process);
57 #endif
58 
59 
60 nxt_int_t
61 nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process,
62     nxt_mp_t *mp)
63 {
64     nxt_int_t              cap_setid;
65     nxt_int_t              ret;
66     nxt_runtime_t          *rt;
67     nxt_common_app_conf_t  *app_conf;
68 
69     rt = task->thread->runtime;
70     app_conf = process->data.app;
71     cap_setid = rt->capabilities.setid;
72 
73     if (app_conf->isolation != NULL) {
74         ret = nxt_isolation_set(task, app_conf->isolation, process);
75         if (nxt_slow_path(ret != NXT_OK)) {
76             return ret;
77         }
78     }
79 
80 #if (NXT_HAVE_CLONE_NEWUSER)
81     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
82         cap_setid = 1;
83     }
84 #endif
85 
86 #if (NXT_HAVE_ISOLATION_ROOTFS)
87     if (process->isolation.rootfs != NULL) {
88         ret = nxt_isolation_set_mounts(task, process, &app_conf->type);
89         if (nxt_slow_path(ret != NXT_OK)) {
90             return ret;
91         }
92     }
93 #endif
94 
95     if (cap_setid) {
96         ret = nxt_process_creds_set(task, process, &app_conf->user,
97                                     &app_conf->group);
98 
99         if (nxt_slow_path(ret != NXT_OK)) {
100             return ret;
101         }
102 
103     } else {
104         if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user,
105                         nxt_strlen(rt->user_cred.user)))
106         {
107             nxt_alert(task, "cannot set user \"%V\" for app \"%V\": "
108                       "missing capabilities", &app_conf->user, &app_conf->name);
109 
110             return NXT_ERROR;
111         }
112 
113         if (app_conf->group.length > 0
114             && !nxt_str_eq(&app_conf->group, (u_char *) rt->group,
115                            nxt_strlen(rt->group)))
116         {
117             nxt_alert(task, "cannot set group \"%V\" for app \"%V\": "
118                             "missing capabilities", &app_conf->group,
119                             &app_conf->name);
120 
121             return NXT_ERROR;
122         }
123     }
124 
125 #if (NXT_HAVE_CLONE_NEWUSER)
126     ret = nxt_isolation_vldt_creds(task, process);
127     if (nxt_slow_path(ret != NXT_OK)) {
128         return ret;
129     }
130 #endif
131 
132     return NXT_OK;
133 }
134 
135 
136 static nxt_int_t
137 nxt_isolation_set(nxt_task_t *task, nxt_conf_value_t *isolation,
138     nxt_process_t *process)
139 {
140 #if (NXT_HAVE_CLONE)
141     if (nxt_slow_path(nxt_isolation_set_namespaces(task, isolation, process)
142                       != NXT_OK))
143     {
144         return NXT_ERROR;
145     }
146 #endif
147 
148 #if (NXT_HAVE_CLONE_NEWUSER)
149     if (nxt_slow_path(nxt_isolation_set_creds(task, isolation, process)
150                       != NXT_OK))
151     {
152         return NXT_ERROR;
153     }
154 #endif
155 
156 #if (NXT_HAVE_ISOLATION_ROOTFS)
157     if (nxt_slow_path(nxt_isolation_set_rootfs(task, isolation, process)
158                       != NXT_OK))
159     {
160         return NXT_ERROR;
161     }
162 #endif
163 
164 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
165     if (nxt_slow_path(nxt_isolation_set_new_privs(task, isolation, process)
166                       != NXT_OK))
167     {
168         return NXT_ERROR;
169     }
170 #endif
171 
172     return NXT_OK;
173 }
174 
175 
176 #if (NXT_HAVE_CLONE)
177 
178 static nxt_int_t
179 nxt_isolation_set_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation,
180     nxt_process_t *process)
181 {
182     nxt_int_t         ret;
183     nxt_conf_value_t  *obj;
184 
185     static nxt_str_t  nsname = nxt_string("namespaces");
186 
187     obj = nxt_conf_get_object_member(isolation, &nsname, NULL);
188     if (obj != NULL) {
189         ret = nxt_isolation_clone_flags(task, obj, &process->isolation.clone);
190         if (nxt_slow_path(ret != NXT_OK)) {
191             return NXT_ERROR;
192         }
193     }
194 
195     return NXT_OK;
196 }
197 
198 #endif
199 
200 
201 #if (NXT_HAVE_CLONE_NEWUSER)
202 
203 static nxt_int_t
204 nxt_isolation_set_creds(nxt_task_t *task, nxt_conf_value_t *isolation,
205     nxt_process_t *process)
206 {
207     nxt_int_t         ret;
208     nxt_clone_t       *clone;
209     nxt_conf_value_t  *array;
210 
211     static nxt_str_t uidname = nxt_string("uidmap");
212     static nxt_str_t gidname = nxt_string("gidmap");
213 
214     clone = &process->isolation.clone;
215 
216     array = nxt_conf_get_object_member(isolation, &uidname, NULL);
217     if (array != NULL) {
218         ret = nxt_isolation_credential_map(task, process->mem_pool, array,
219                                            &clone->uidmap);
220 
221         if (nxt_slow_path(ret != NXT_OK)) {
222             return NXT_ERROR;
223         }
224     }
225 
226     array = nxt_conf_get_object_member(isolation, &gidname, NULL);
227     if (array != NULL) {
228         ret = nxt_isolation_credential_map(task, process->mem_pool, array,
229                                            &clone->gidmap);
230 
231         if (nxt_slow_path(ret != NXT_OK)) {
232             return NXT_ERROR;
233         }
234     }
235 
236     return NXT_OK;
237 }
238 
239 
240 static nxt_int_t
241 nxt_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp,
242     nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map)
243 {
244     nxt_int_t         ret;
245     nxt_uint_t        i;
246     nxt_conf_value_t  *obj;
247 
248     static nxt_conf_map_t  nxt_clone_map_entry_conf[] = {
249         {
250             nxt_string("container"),
251             NXT_CONF_MAP_INT,
252             offsetof(nxt_clone_map_entry_t, container),
253         },
254 
255         {
256             nxt_string("host"),
257             NXT_CONF_MAP_INT,
258             offsetof(nxt_clone_map_entry_t, host),
259         },
260 
261         {
262             nxt_string("size"),
263             NXT_CONF_MAP_INT,
264             offsetof(nxt_clone_map_entry_t, size),
265         },
266     };
267 
268     map->size = nxt_conf_array_elements_count(map_array);
269 
270     if (map->size == 0) {
271         return NXT_OK;
272     }
273 
274     map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t));
275     if (nxt_slow_path(map->map == NULL)) {
276         return NXT_ERROR;
277     }
278 
279     for (i = 0; i < map->size; i++) {
280         obj = nxt_conf_get_array_element(map_array, i);
281 
282         ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf,
283                                   nxt_nitems(nxt_clone_map_entry_conf),
284                                   map->map + i);
285         if (nxt_slow_path(ret != NXT_OK)) {
286             nxt_alert(task, "clone map entry map error");
287             return NXT_ERROR;
288         }
289     }
290 
291     return NXT_OK;
292 }
293 
294 
295 static nxt_int_t
296 nxt_isolation_vldt_creds(nxt_task_t *task, nxt_process_t *process)
297 {
298     nxt_int_t         ret;
299     nxt_clone_t       *clone;
300     nxt_credential_t  *creds;
301 
302     clone = &process->isolation.clone;
303     creds = process->user_cred;
304 
305     if (clone->uidmap.size == 0 && clone->gidmap.size == 0) {
306         return NXT_OK;
307     }
308 
309     if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) {
310         if (nxt_slow_path(clone->uidmap.size > 0)) {
311             nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but "
312                     "\"isolation.namespaces.credential\" is false or unset");
313 
314             return NXT_ERROR;
315         }
316 
317         if (nxt_slow_path(clone->gidmap.size > 0)) {
318             nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but "
319                     "\"isolation.namespaces.credential\" is false or unset");
320 
321             return NXT_ERROR;
322         }
323 
324         return NXT_OK;
325     }
326 
327     ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds);
328     if (nxt_slow_path(ret != NXT_OK)) {
329         return NXT_ERROR;
330     }
331 
332     return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds);
333 }
334 
335 #endif
336 
337 
338 #if (NXT_HAVE_CLONE)
339 
340 static nxt_int_t
341 nxt_isolation_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces,
342     nxt_clone_t *clone)
343 {
344     uint32_t          index;
345     nxt_str_t         name;
346     nxt_int_t         flag;
347     nxt_conf_value_t  *value;
348 
349     index = 0;
350 
351     for ( ;; ) {
352         value = nxt_conf_next_object_member(namespaces, &name, &index);
353 
354         if (value == NULL) {
355             break;
356         }
357 
358         flag = 0;
359 
360 #if (NXT_HAVE_CLONE_NEWUSER)
361         if (nxt_str_eq(&name, "credential", 10)) {
362             flag = CLONE_NEWUSER;
363         }
364 #endif
365 
366 #if (NXT_HAVE_CLONE_NEWPID)
367         if (nxt_str_eq(&name, "pid", 3)) {
368             flag = CLONE_NEWPID;
369         }
370 #endif
371 
372 #if (NXT_HAVE_CLONE_NEWNET)
373         if (nxt_str_eq(&name, "network", 7)) {
374             flag = CLONE_NEWNET;
375         }
376 #endif
377 
378 #if (NXT_HAVE_CLONE_NEWUTS)
379         if (nxt_str_eq(&name, "uname", 5)) {
380             flag = CLONE_NEWUTS;
381         }
382 #endif
383 
384 #if (NXT_HAVE_CLONE_NEWNS)
385         if (nxt_str_eq(&name, "mount", 5)) {
386             flag = CLONE_NEWNS;
387         }
388 #endif
389 
390 #if (NXT_HAVE_CLONE_NEWCGROUP)
391         if (nxt_str_eq(&name, "cgroup", 6)) {
392             flag = CLONE_NEWCGROUP;
393         }
394 #endif
395 
396         if (!flag) {
397             nxt_alert(task, "unknown namespace flag: \"%V\"", &name);
398             return NXT_ERROR;
399         }
400 
401         if (nxt_conf_get_boolean(value)) {
402             clone->flags |= flag;
403         }
404     }
405 
406     return NXT_OK;
407 }
408 
409 #endif
410 
411 
412 #if (NXT_HAVE_ISOLATION_ROOTFS)
413 
414 static nxt_int_t
415 nxt_isolation_set_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation,
416     nxt_process_t *process)
417 {
418     nxt_str_t         str;
419     nxt_conf_value_t  *obj;
420 
421     static nxt_str_t  rootfs_name = nxt_string("rootfs");
422 
423     obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL);
424     if (obj != NULL) {
425         nxt_conf_get_string(obj, &str);
426 
427         if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) {
428             nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other "
429                     "than \"/\" but given \"%V\"", &str);
430 
431             return NXT_ERROR;
432         }
433 
434         if (str.start[str.length - 1] == '/') {
435             str.length--;
436         }
437 
438         process->isolation.rootfs = nxt_mp_alloc(process->mem_pool,
439                                                  str.length + 1);
440 
441         if (nxt_slow_path(process->isolation.rootfs == NULL)) {
442             return NXT_ERROR;
443         }
444 
445         nxt_memcpy(process->isolation.rootfs, str.start, str.length);
446 
447         process->isolation.rootfs[str.length] = '\0';
448     }
449 
450     return NXT_OK;
451 }
452 
453 
454 static nxt_int_t
455 nxt_isolation_set_mounts(nxt_task_t *task, nxt_process_t *process,
456     nxt_str_t *app_type)
457 {
458     nxt_int_t              ret, cap_chroot;
459     nxt_runtime_t          *rt;
460     nxt_app_lang_module_t  *lang;
461 
462     rt = task->thread->runtime;
463     cap_chroot = rt->capabilities.chroot;
464     lang = nxt_app_lang_module(rt, app_type);
465 
466     nxt_assert(lang != NULL);
467 
468 #if (NXT_HAVE_CLONE_NEWUSER)
469     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
470         cap_chroot = 1;
471     }
472 #endif
473 
474     if (!cap_chroot) {
475         nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges");
476         return NXT_ERROR;
477     }
478 
479     ret = nxt_isolation_set_lang_mounts(task, process, lang->mounts);
480     if (nxt_slow_path(ret != NXT_OK)) {
481         return NXT_ERROR;
482     }
483 
484     process->isolation.cleanup = nxt_isolation_unmount_all;
485 
486     return NXT_OK;
487 }
488 
489 
490 static nxt_int_t
491 nxt_isolation_set_lang_mounts(nxt_task_t *task, nxt_process_t *process,
492     nxt_array_t *lang_mounts)
493 {
494     u_char          *p;
495     size_t          i, n, rootfs_len, len;
496     nxt_mp_t        *mp;
497     nxt_array_t     *mounts;
498     const u_char    *rootfs;
499     nxt_fs_mount_t  *mnt, *lang_mnt;
500 
501     mp = process->mem_pool;
502 
503     /* copy to init mem pool */
504     mounts = nxt_array_copy(mp, NULL, lang_mounts);
505     if (mounts == NULL) {
506         return NXT_ERROR;
507     }
508 
509     n = mounts->nelts;
510     mnt = mounts->elts;
511     lang_mnt = lang_mounts->elts;
512 
513     rootfs = process->isolation.rootfs;
514     rootfs_len = nxt_strlen(rootfs);
515 
516     for (i = 0; i < n; i++) {
517         len = nxt_strlen(lang_mnt[i].dst);
518 
519         mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1);
520         if (nxt_slow_path(mnt[i].dst == NULL)) {
521             return NXT_ERROR;
522         }
523 
524         p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len);
525         p = nxt_cpymem(p, lang_mnt[i].dst, len);
526         *p = '\0';
527     }
528 
529     mnt = nxt_array_add(mounts);
530     if (nxt_slow_path(mnt == NULL)) {
531         return NXT_ERROR;
532     }
533 
534     mnt->src = (u_char *) "tmpfs";
535     mnt->fstype = (u_char *) "tmpfs";
536     mnt->flags = NXT_MS_NOSUID | NXT_MS_NODEV | NXT_MS_NOEXEC | NXT_MS_RELATIME;
537     mnt->data = (u_char *) "size=1m,mode=777";
538 
539     mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/tmp") + 1);
540     if (nxt_slow_path(mnt->dst == NULL)) {
541         return NXT_ERROR;
542     }
543 
544     p = nxt_cpymem(mnt->dst, rootfs, rootfs_len);
545     p = nxt_cpymem(p, "/tmp", 4);
546     *p = '\0';
547 
548 #if (NXT_HAVE_CLONE_NEWPID) && (NXT_HAVE_CLONE_NEWNS)
549 
550     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWPID)
551         && nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS))
552     {
553         mnt = nxt_array_add(mounts);
554         if (nxt_slow_path(mnt == NULL)) {
555             return NXT_ERROR;
556         }
557 
558         mnt->fstype = (u_char *) "proc";
559         mnt->src = (u_char *) "proc";
560 
561         mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/proc") + 1);
562         if (nxt_slow_path(mnt->dst == NULL)) {
563             return NXT_ERROR;
564         }
565 
566         p = nxt_cpymem(mnt->dst, rootfs, rootfs_len);
567         p = nxt_cpymem(p, "/proc", 5);
568         *p = '\0';
569 
570         mnt->data = (u_char *) "";
571         mnt->flags = 0;
572     }
573 #endif
574 
575     process->isolation.mounts = mounts;
576 
577     return NXT_OK;
578 }
579 
580 
581 void
582 nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process)
583 {
584     size_t          i, n;
585     nxt_array_t     *mounts;
586     nxt_fs_mount_t  *mnt;
587 
588     nxt_debug(task, "unmount all (%s)", process->name);
589 
590     mounts = process->isolation.mounts;
591     n = mounts->nelts;
592     mnt = mounts->elts;
593 
594     for (i = 0; i < n; i++) {
595         nxt_fs_unmount(mnt[i].dst);
596     }
597 }
598 
599 
600 nxt_int_t
601 nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process)
602 {
603     size_t          i, n;
604     nxt_int_t       ret;
605     struct stat     st;
606     nxt_array_t     *mounts;
607     const u_char    *dst;
608     nxt_fs_mount_t  *mnt;
609 
610     mounts = process->isolation.mounts;
611 
612     n = mounts->nelts;
613     mnt = mounts->elts;
614 
615     for (i = 0; i < n; i++) {
616         dst = mnt[i].dst;
617 
618         if (nxt_slow_path(nxt_memcmp(mnt[i].fstype, "bind", 4) == 0
619                           && stat((const char *) mnt[i].src, &st) != 0))
620         {
621             nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src);
622             continue;
623         }
624 
625         ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO);
626         if (nxt_slow_path(ret != NXT_OK)) {
627             nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno);
628             goto undo;
629         }
630 
631         ret = nxt_fs_mount(task, &mnt[i]);
632         if (nxt_slow_path(ret != NXT_OK)) {
633             goto undo;
634         }
635     }
636 
637     return NXT_OK;
638 
639 undo:
640 
641     n = i + 1;
642 
643     for (i = 0; i < n; i++) {
644         nxt_fs_unmount(mnt[i].dst);
645     }
646 
647     return NXT_ERROR;
648 }
649 
650 
651 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
652 
653 nxt_int_t
654 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
655 {
656     char       *rootfs;
657     nxt_int_t  ret;
658 
659     rootfs = (char *) process->isolation.rootfs;
660 
661     nxt_debug(task, "change root: %s", rootfs);
662 
663     if (NXT_CLONE_MNT(process->isolation.clone.flags)) {
664         ret = nxt_isolation_pivot_root(task, rootfs);
665 
666     } else {
667         ret = nxt_isolation_chroot(task, rootfs);
668     }
669 
670     if (nxt_fast_path(ret == NXT_OK)) {
671         if (nxt_slow_path(chdir("/") < 0)) {
672             nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
673             return NXT_ERROR;
674         }
675     }
676 
677     return ret;
678 }
679 
680 
681 /*
682  * pivot_root(2) can only be safely used with containers, otherwise it can
683  * umount(2) the global root filesystem and screw up the machine.
684  */
685 
686 static nxt_int_t
687 nxt_isolation_pivot_root(nxt_task_t *task, const char *path)
688 {
689     /*
690      * This implementation makes use of a kernel trick that works for ages
691      * and now documented in Linux kernel 5.
692      * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/
693      */
694 
695     if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) {
696         nxt_alert(task, "failed to make / a slave mount %E", nxt_errno);
697         return NXT_ERROR;
698     }
699 
700     if (nxt_slow_path(nxt_isolation_make_private_mount(task, path) != NXT_OK)) {
701         return NXT_ERROR;
702     }
703 
704     if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) {
705         nxt_alert(task, "error bind mounting rootfs %E", nxt_errno);
706         return NXT_ERROR;
707     }
708 
709     if (nxt_slow_path(chdir(path) != 0)) {
710         nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno);
711         return NXT_ERROR;
712     }
713 
714     if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) {
715         nxt_alert(task, "failed to pivot_root %E", nxt_errno);
716         return NXT_ERROR;
717     }
718 
719     /*
720      * Make oldroot a slave mount to avoid unmounts getting propagated to the
721      * host.
722      */
723     if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) {
724         nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno);
725         return NXT_ERROR;
726     }
727 
728     if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) {
729         nxt_alert(task, "failed to umount old root directory %E", nxt_errno);
730         return NXT_ERROR;
731     }
732 
733     return NXT_OK;
734 }
735 
736 
737 static nxt_int_t
738 nxt_isolation_make_private_mount(nxt_task_t *task, const char *rootfs)
739 {
740     char           *parent_mnt;
741     FILE           *procfile;
742     u_char         **mounts;
743     size_t         len;
744     uint8_t        *shared;
745     nxt_int_t      ret, index, nmounts;
746     struct mntent  *ent;
747 
748     static const char  *mount_path = "/proc/self/mounts";
749 
750     ret = NXT_ERROR;
751     ent = NULL;
752     shared = NULL;
753     procfile = NULL;
754     parent_mnt = NULL;
755 
756     nmounts = 256;
757 
758     mounts = nxt_malloc(nmounts * sizeof(uintptr_t));
759     if (nxt_slow_path(mounts == NULL)) {
760         goto fail;
761     }
762 
763     shared = nxt_malloc(nmounts);
764     if (nxt_slow_path(shared == NULL)) {
765         goto fail;
766     }
767 
768     procfile = setmntent(mount_path, "r");
769     if (nxt_slow_path(procfile == NULL)) {
770         nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno);
771 
772         goto fail;
773     }
774 
775     index = 0;
776 
777 again:
778 
779     for ( ; index < nmounts; index++) {
780         ent = getmntent(procfile);
781         if (ent == NULL) {
782             nmounts = index;
783             break;
784         }
785 
786         mounts[index] = (u_char *) strdup(ent->mnt_dir);
787         shared[index] = hasmntopt(ent, "shared") != NULL;
788     }
789 
790     if (ent != NULL) {
791         /* there are still entries to be read */
792 
793         nmounts *= 2;
794         mounts = nxt_realloc(mounts, nmounts);
795         if (nxt_slow_path(mounts == NULL)) {
796             goto fail;
797         }
798 
799         shared = nxt_realloc(shared, nmounts);
800         if (nxt_slow_path(shared == NULL)) {
801             goto fail;
802         }
803 
804         goto again;
805     }
806 
807     for (index = 0; index < nmounts; index++) {
808         if (nxt_strcmp(mounts[index], rootfs) == 0) {
809             parent_mnt = (char *) rootfs;
810             break;
811         }
812     }
813 
814     if (parent_mnt == NULL) {
815         len = nxt_strlen(rootfs);
816 
817         parent_mnt = nxt_malloc(len + 1);
818         if (parent_mnt == NULL) {
819             goto fail;
820         }
821 
822         nxt_memcpy(parent_mnt, rootfs, len);
823         parent_mnt[len] = '\0';
824 
825         if (parent_mnt[len - 1] == '/') {
826             parent_mnt[len - 1] = '\0';
827             len--;
828         }
829 
830         for ( ;; ) {
831             for (index = 0; index < nmounts; index++) {
832                 if (nxt_strcmp(mounts[index], parent_mnt) == 0) {
833                     goto found;
834                 }
835             }
836 
837             if (len == 1 && parent_mnt[0] == '/') {
838                 nxt_alert(task, "parent mount not found");
839                 goto fail;
840             }
841 
842             /* parent dir */
843             while (parent_mnt[len - 1] != '/' && len > 0) {
844                 len--;
845             }
846 
847             if (nxt_slow_path(len == 0)) {
848                 nxt_alert(task, "parent mount not found");
849                 goto fail;
850             }
851 
852             if (len == 1) {
853                 parent_mnt[len] = '\0';     /* / */
854             } else {
855                 parent_mnt[len - 1] = '\0'; /* /<path> */
856             }
857         }
858     }
859 
860 found:
861 
862     if (shared[index]) {
863         if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) {
864             nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt,
865                       nxt_errno);
866 
867             goto fail;
868         }
869     }
870 
871     ret = NXT_OK;
872 
873 fail:
874 
875     if (procfile != NULL) {
876         endmntent(procfile);
877     }
878 
879     if (mounts != NULL) {
880         for (index = 0; index < nmounts; index++) {
881             nxt_free(mounts[index]);
882         }
883 
884         nxt_free(mounts);
885     }
886 
887     if (shared != NULL) {
888         nxt_free(shared);
889     }
890 
891     if (parent_mnt != NULL && parent_mnt != rootfs) {
892         nxt_free(parent_mnt);
893     }
894 
895     return ret;
896 }
897 
898 
899 nxt_inline int
900 nxt_pivot_root(const char *new_root, const char *old_root)
901 {
902     return syscall(__NR_pivot_root, new_root, old_root);
903 }
904 
905 
906 #else /* !(NXT_HAVE_PIVOT_ROOT) || !(NXT_HAVE_CLONE_NEWNS) */
907 
908 
909 nxt_int_t
910 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
911 {
912     char       *rootfs;
913 
914     rootfs = (char *) process->isolation.rootfs;
915 
916     nxt_debug(task, "change root: %s", rootfs);
917 
918     if (nxt_fast_path(nxt_isolation_chroot(task, rootfs) == NXT_OK)) {
919         if (nxt_slow_path(chdir("/") < 0)) {
920             nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
921             return NXT_ERROR;
922         }
923 
924         return NXT_OK;
925     }
926 
927     return NXT_ERROR;
928 }
929 
930 #endif
931 
932 
933 static nxt_int_t
934 nxt_isolation_chroot(nxt_task_t *task, const char *path)
935 {
936     if (nxt_slow_path(chroot(path) < 0)) {
937         nxt_alert(task, "chroot(%s) %E", path, nxt_errno);
938         return NXT_ERROR;
939     }
940 
941     return NXT_OK;
942 }
943 
944 #endif /* NXT_HAVE_ISOLATION_ROOTFS */
945 
946 
947 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
948 
949 static nxt_int_t
950 nxt_isolation_set_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation,
951     nxt_process_t *process)
952 {
953     nxt_conf_value_t  *obj;
954 
955     static nxt_str_t  new_privs_name = nxt_string("new_privs");
956 
957     obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL);
958     if (obj != NULL) {
959         process->isolation.new_privs = nxt_conf_get_boolean(obj);
960     }
961 
962     return NXT_OK;
963 }
964 
965 #endif
966