xref: /unit/src/nxt_isolation.c (revision 1744:5e6c2b8fb3fe)
1 /*
2  * Copyright (C) NGINX, Inc.
3  */
4 
5 #include <nxt_main.h>
6 #include <nxt_application.h>
7 #include <nxt_process.h>
8 #include <nxt_isolation.h>
9 
10 #if (NXT_HAVE_PIVOT_ROOT)
11 #include <mntent.h>
12 #endif
13 
14 
15 static nxt_int_t nxt_isolation_set(nxt_task_t *task,
16     nxt_conf_value_t *isolation, nxt_process_t *process);
17 
18 #if (NXT_HAVE_CLONE)
19 static nxt_int_t nxt_isolation_set_namespaces(nxt_task_t *task,
20     nxt_conf_value_t *isolation, nxt_process_t *process);
21 static nxt_int_t nxt_isolation_clone_flags(nxt_task_t *task,
22     nxt_conf_value_t *namespaces, nxt_clone_t *clone);
23 #endif
24 
25 #if (NXT_HAVE_CLONE_NEWUSER)
26 static nxt_int_t nxt_isolation_set_creds(nxt_task_t *task,
27     nxt_conf_value_t *isolation, nxt_process_t *process);
28 static nxt_int_t nxt_isolation_credential_map(nxt_task_t *task,
29     nxt_mp_t *mem_pool, nxt_conf_value_t *map_array,
30     nxt_clone_credential_map_t *map);
31 static nxt_int_t nxt_isolation_vldt_creds(nxt_task_t *task,
32     nxt_process_t *process);
33 #endif
34 
35 #if (NXT_HAVE_ISOLATION_ROOTFS)
36 static nxt_int_t nxt_isolation_set_rootfs(nxt_task_t *task,
37     nxt_conf_value_t *isolation, nxt_process_t *process);
38 static nxt_int_t nxt_isolation_set_automount(nxt_task_t *task,
39     nxt_conf_value_t *isolation, nxt_process_t *process);
40 static nxt_int_t nxt_isolation_set_mounts(nxt_task_t *task,
41     nxt_process_t *process, nxt_str_t *app_type);
42 static nxt_int_t nxt_isolation_set_lang_mounts(nxt_task_t *task,
43     nxt_process_t *process, nxt_array_t *syspaths);
44 static int nxt_cdecl nxt_isolation_mount_compare(const void *v1,
45     const void *v2);
46 static void nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process);
47 
48 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
49 static nxt_int_t nxt_isolation_pivot_root(nxt_task_t *task, const char *rootfs);
50 static nxt_int_t nxt_isolation_make_private_mount(nxt_task_t *task,
51     const char *rootfs);
52 nxt_inline int nxt_pivot_root(const char *new_root, const char *old_root);
53 #endif
54 
55 static nxt_int_t nxt_isolation_chroot(nxt_task_t *task, const char *path);
56 #endif
57 
58 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
59 static nxt_int_t nxt_isolation_set_new_privs(nxt_task_t *task,
60     nxt_conf_value_t *isolation, nxt_process_t *process);
61 #endif
62 
63 
64 nxt_int_t
65 nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process,
66     nxt_mp_t *mp)
67 {
68     nxt_int_t              cap_setid;
69     nxt_int_t              ret;
70     nxt_runtime_t          *rt;
71     nxt_common_app_conf_t  *app_conf;
72 
73     rt = task->thread->runtime;
74     app_conf = process->data.app;
75     cap_setid = rt->capabilities.setid;
76 
77     if (app_conf->isolation != NULL) {
78         ret = nxt_isolation_set(task, app_conf->isolation, process);
79         if (nxt_slow_path(ret != NXT_OK)) {
80             return ret;
81         }
82     }
83 
84 #if (NXT_HAVE_CLONE_NEWUSER)
85     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
86         cap_setid = 1;
87     }
88 #endif
89 
90     if (cap_setid) {
91         ret = nxt_process_creds_set(task, process, &app_conf->user,
92                                     &app_conf->group);
93 
94         if (nxt_slow_path(ret != NXT_OK)) {
95             return ret;
96         }
97 
98     } else {
99         if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user,
100                         nxt_strlen(rt->user_cred.user)))
101         {
102             nxt_alert(task, "cannot set user \"%V\" for app \"%V\": "
103                       "missing capabilities", &app_conf->user, &app_conf->name);
104 
105             return NXT_ERROR;
106         }
107 
108         if (app_conf->group.length > 0
109             && !nxt_str_eq(&app_conf->group, (u_char *) rt->group,
110                            nxt_strlen(rt->group)))
111         {
112             nxt_alert(task, "cannot set group \"%V\" for app \"%V\": "
113                             "missing capabilities", &app_conf->group,
114                             &app_conf->name);
115 
116             return NXT_ERROR;
117         }
118     }
119 
120 #if (NXT_HAVE_ISOLATION_ROOTFS)
121     if (process->isolation.rootfs != NULL) {
122         nxt_int_t  has_mnt;
123 
124         ret = nxt_isolation_set_mounts(task, process, &app_conf->type);
125         if (nxt_slow_path(ret != NXT_OK)) {
126             return ret;
127         }
128 
129         has_mnt = 0;
130 
131 #if (NXT_HAVE_CLONE_NEWNS)
132         has_mnt = nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS);
133 #endif
134 
135         if (process->user_cred->uid == 0 && !has_mnt) {
136             nxt_log(task, NXT_LOG_WARN,
137                     "setting user \"root\" with \"rootfs\" is unsafe without "
138                     "\"mount\" namespace isolation");
139         }
140     }
141 #endif
142 
143 #if (NXT_HAVE_CLONE_NEWUSER)
144     ret = nxt_isolation_vldt_creds(task, process);
145     if (nxt_slow_path(ret != NXT_OK)) {
146         return ret;
147     }
148 #endif
149 
150     return NXT_OK;
151 }
152 
153 
154 static nxt_int_t
155 nxt_isolation_set(nxt_task_t *task, nxt_conf_value_t *isolation,
156     nxt_process_t *process)
157 {
158 #if (NXT_HAVE_CLONE)
159     if (nxt_slow_path(nxt_isolation_set_namespaces(task, isolation, process)
160                       != NXT_OK))
161     {
162         return NXT_ERROR;
163     }
164 #endif
165 
166 #if (NXT_HAVE_CLONE_NEWUSER)
167     if (nxt_slow_path(nxt_isolation_set_creds(task, isolation, process)
168                       != NXT_OK))
169     {
170         return NXT_ERROR;
171     }
172 #endif
173 
174 #if (NXT_HAVE_ISOLATION_ROOTFS)
175     if (nxt_slow_path(nxt_isolation_set_rootfs(task, isolation, process)
176                       != NXT_OK))
177     {
178         return NXT_ERROR;
179     }
180 
181     if (nxt_slow_path(nxt_isolation_set_automount(task, isolation, process)
182                       != NXT_OK))
183     {
184         return NXT_ERROR;
185     }
186 #endif
187 
188 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
189     if (nxt_slow_path(nxt_isolation_set_new_privs(task, isolation, process)
190                       != NXT_OK))
191     {
192         return NXT_ERROR;
193     }
194 #endif
195 
196     return NXT_OK;
197 }
198 
199 
200 #if (NXT_HAVE_CLONE)
201 
202 static nxt_int_t
203 nxt_isolation_set_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation,
204     nxt_process_t *process)
205 {
206     nxt_int_t         ret;
207     nxt_conf_value_t  *obj;
208 
209     static nxt_str_t  nsname = nxt_string("namespaces");
210 
211     obj = nxt_conf_get_object_member(isolation, &nsname, NULL);
212     if (obj != NULL) {
213         ret = nxt_isolation_clone_flags(task, obj, &process->isolation.clone);
214         if (nxt_slow_path(ret != NXT_OK)) {
215             return NXT_ERROR;
216         }
217     }
218 
219     return NXT_OK;
220 }
221 
222 #endif
223 
224 
225 #if (NXT_HAVE_CLONE_NEWUSER)
226 
227 static nxt_int_t
228 nxt_isolation_set_creds(nxt_task_t *task, nxt_conf_value_t *isolation,
229     nxt_process_t *process)
230 {
231     nxt_int_t         ret;
232     nxt_clone_t       *clone;
233     nxt_conf_value_t  *array;
234 
235     static nxt_str_t uidname = nxt_string("uidmap");
236     static nxt_str_t gidname = nxt_string("gidmap");
237 
238     clone = &process->isolation.clone;
239 
240     array = nxt_conf_get_object_member(isolation, &uidname, NULL);
241     if (array != NULL) {
242         ret = nxt_isolation_credential_map(task, process->mem_pool, array,
243                                            &clone->uidmap);
244 
245         if (nxt_slow_path(ret != NXT_OK)) {
246             return NXT_ERROR;
247         }
248     }
249 
250     array = nxt_conf_get_object_member(isolation, &gidname, NULL);
251     if (array != NULL) {
252         ret = nxt_isolation_credential_map(task, process->mem_pool, array,
253                                            &clone->gidmap);
254 
255         if (nxt_slow_path(ret != NXT_OK)) {
256             return NXT_ERROR;
257         }
258     }
259 
260     return NXT_OK;
261 }
262 
263 
264 static nxt_int_t
265 nxt_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp,
266     nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map)
267 {
268     nxt_int_t         ret;
269     nxt_uint_t        i;
270     nxt_conf_value_t  *obj;
271 
272     static nxt_conf_map_t  nxt_clone_map_entry_conf[] = {
273         {
274             nxt_string("container"),
275             NXT_CONF_MAP_INT,
276             offsetof(nxt_clone_map_entry_t, container),
277         },
278 
279         {
280             nxt_string("host"),
281             NXT_CONF_MAP_INT,
282             offsetof(nxt_clone_map_entry_t, host),
283         },
284 
285         {
286             nxt_string("size"),
287             NXT_CONF_MAP_INT,
288             offsetof(nxt_clone_map_entry_t, size),
289         },
290     };
291 
292     map->size = nxt_conf_array_elements_count(map_array);
293 
294     if (map->size == 0) {
295         return NXT_OK;
296     }
297 
298     map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t));
299     if (nxt_slow_path(map->map == NULL)) {
300         return NXT_ERROR;
301     }
302 
303     for (i = 0; i < map->size; i++) {
304         obj = nxt_conf_get_array_element(map_array, i);
305 
306         ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf,
307                                   nxt_nitems(nxt_clone_map_entry_conf),
308                                   map->map + i);
309         if (nxt_slow_path(ret != NXT_OK)) {
310             nxt_alert(task, "clone map entry map error");
311             return NXT_ERROR;
312         }
313     }
314 
315     return NXT_OK;
316 }
317 
318 
319 static nxt_int_t
320 nxt_isolation_vldt_creds(nxt_task_t *task, nxt_process_t *process)
321 {
322     nxt_int_t         ret;
323     nxt_clone_t       *clone;
324     nxt_credential_t  *creds;
325 
326     clone = &process->isolation.clone;
327     creds = process->user_cred;
328 
329     if (clone->uidmap.size == 0 && clone->gidmap.size == 0) {
330         return NXT_OK;
331     }
332 
333     if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) {
334         if (nxt_slow_path(clone->uidmap.size > 0)) {
335             nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but "
336                     "\"isolation.namespaces.credential\" is false or unset");
337 
338             return NXT_ERROR;
339         }
340 
341         if (nxt_slow_path(clone->gidmap.size > 0)) {
342             nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but "
343                     "\"isolation.namespaces.credential\" is false or unset");
344 
345             return NXT_ERROR;
346         }
347 
348         return NXT_OK;
349     }
350 
351     ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds);
352     if (nxt_slow_path(ret != NXT_OK)) {
353         return NXT_ERROR;
354     }
355 
356     return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds);
357 }
358 
359 #endif
360 
361 
362 #if (NXT_HAVE_CLONE)
363 
364 static nxt_int_t
365 nxt_isolation_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces,
366     nxt_clone_t *clone)
367 {
368     uint32_t          index;
369     nxt_str_t         name;
370     nxt_int_t         flag;
371     nxt_conf_value_t  *value;
372 
373     index = 0;
374 
375     for ( ;; ) {
376         value = nxt_conf_next_object_member(namespaces, &name, &index);
377 
378         if (value == NULL) {
379             break;
380         }
381 
382         flag = 0;
383 
384 #if (NXT_HAVE_CLONE_NEWUSER)
385         if (nxt_str_eq(&name, "credential", 10)) {
386             flag = CLONE_NEWUSER;
387         }
388 #endif
389 
390 #if (NXT_HAVE_CLONE_NEWPID)
391         if (nxt_str_eq(&name, "pid", 3)) {
392             flag = CLONE_NEWPID;
393         }
394 #endif
395 
396 #if (NXT_HAVE_CLONE_NEWNET)
397         if (nxt_str_eq(&name, "network", 7)) {
398             flag = CLONE_NEWNET;
399         }
400 #endif
401 
402 #if (NXT_HAVE_CLONE_NEWUTS)
403         if (nxt_str_eq(&name, "uname", 5)) {
404             flag = CLONE_NEWUTS;
405         }
406 #endif
407 
408 #if (NXT_HAVE_CLONE_NEWNS)
409         if (nxt_str_eq(&name, "mount", 5)) {
410             flag = CLONE_NEWNS;
411         }
412 #endif
413 
414 #if (NXT_HAVE_CLONE_NEWCGROUP)
415         if (nxt_str_eq(&name, "cgroup", 6)) {
416             flag = CLONE_NEWCGROUP;
417         }
418 #endif
419 
420         if (!flag) {
421             nxt_alert(task, "unknown namespace flag: \"%V\"", &name);
422             return NXT_ERROR;
423         }
424 
425         if (nxt_conf_get_boolean(value)) {
426             clone->flags |= flag;
427         }
428     }
429 
430     return NXT_OK;
431 }
432 
433 #endif
434 
435 
436 #if (NXT_HAVE_ISOLATION_ROOTFS)
437 
438 static nxt_int_t
439 nxt_isolation_set_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation,
440     nxt_process_t *process)
441 {
442     nxt_str_t         str;
443     nxt_conf_value_t  *obj;
444 
445     static nxt_str_t  rootfs_name = nxt_string("rootfs");
446 
447     obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL);
448     if (obj != NULL) {
449         nxt_conf_get_string(obj, &str);
450 
451         if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) {
452             nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other "
453                     "than \"/\" but given \"%V\"", &str);
454 
455             return NXT_ERROR;
456         }
457 
458         if (str.start[str.length - 1] == '/') {
459             str.length--;
460         }
461 
462         process->isolation.rootfs = nxt_mp_alloc(process->mem_pool,
463                                                  str.length + 1);
464 
465         if (nxt_slow_path(process->isolation.rootfs == NULL)) {
466             return NXT_ERROR;
467         }
468 
469         nxt_memcpy(process->isolation.rootfs, str.start, str.length);
470 
471         process->isolation.rootfs[str.length] = '\0';
472     }
473 
474     return NXT_OK;
475 }
476 
477 
478 static nxt_int_t
479 nxt_isolation_set_automount(nxt_task_t *task, nxt_conf_value_t *isolation,
480     nxt_process_t *process)
481 {
482     nxt_conf_value_t         *conf, *value;
483     nxt_process_automount_t  *automount;
484 
485     static nxt_str_t  automount_name = nxt_string("automount");
486     static nxt_str_t  langdeps_name = nxt_string("language_deps");
487     static nxt_str_t  tmp_name = nxt_string("tmpfs");
488     static nxt_str_t  proc_name = nxt_string("procfs");
489 
490     automount = &process->isolation.automount;
491 
492     automount->language_deps = 1;
493     automount->tmpfs = 1;
494     automount->procfs = 1;
495 
496     conf = nxt_conf_get_object_member(isolation, &automount_name, NULL);
497     if (conf != NULL) {
498         value = nxt_conf_get_object_member(conf, &langdeps_name, NULL);
499         if (value != NULL) {
500             automount->language_deps = nxt_conf_get_boolean(value);
501         }
502 
503         value = nxt_conf_get_object_member(conf, &tmp_name, NULL);
504         if (value != NULL) {
505             automount->tmpfs = nxt_conf_get_boolean(value);
506         }
507 
508         value = nxt_conf_get_object_member(conf, &proc_name, NULL);
509         if (value != NULL) {
510             automount->procfs = nxt_conf_get_boolean(value);
511         }
512     }
513 
514     return NXT_OK;
515 }
516 
517 
518 static nxt_int_t
519 nxt_isolation_set_mounts(nxt_task_t *task, nxt_process_t *process,
520     nxt_str_t *app_type)
521 {
522     nxt_int_t              ret, cap_chroot;
523     nxt_runtime_t          *rt;
524     nxt_app_lang_module_t  *lang;
525 
526     rt = task->thread->runtime;
527     cap_chroot = rt->capabilities.chroot;
528     lang = nxt_app_lang_module(rt, app_type);
529 
530     nxt_assert(lang != NULL);
531 
532 #if (NXT_HAVE_CLONE_NEWUSER)
533     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
534         cap_chroot = 1;
535     }
536 #endif
537 
538     if (!cap_chroot) {
539         nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges");
540         return NXT_ERROR;
541     }
542 
543     ret = nxt_isolation_set_lang_mounts(task, process, lang->mounts);
544     if (nxt_slow_path(ret != NXT_OK)) {
545         return NXT_ERROR;
546     }
547 
548     process->isolation.cleanup = nxt_isolation_unmount_all;
549 
550     return NXT_OK;
551 }
552 
553 
554 static nxt_int_t
555 nxt_isolation_set_lang_mounts(nxt_task_t *task, nxt_process_t *process,
556     nxt_array_t *lang_mounts)
557 {
558     u_char          *p;
559     size_t          i, n, rootfs_len, len;
560     nxt_mp_t        *mp;
561     nxt_array_t     *mounts;
562     const u_char    *rootfs;
563     nxt_fs_mount_t  *mnt, *lang_mnt;
564 
565     mp = process->mem_pool;
566 
567     /* copy to init mem pool */
568     mounts = nxt_array_copy(mp, NULL, lang_mounts);
569     if (mounts == NULL) {
570         return NXT_ERROR;
571     }
572 
573     n = mounts->nelts;
574     mnt = mounts->elts;
575     lang_mnt = lang_mounts->elts;
576 
577     rootfs = process->isolation.rootfs;
578     rootfs_len = nxt_strlen(rootfs);
579 
580     for (i = 0; i < n; i++) {
581         len = nxt_strlen(lang_mnt[i].dst);
582 
583         mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1);
584         if (nxt_slow_path(mnt[i].dst == NULL)) {
585             return NXT_ERROR;
586         }
587 
588         p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len);
589         p = nxt_cpymem(p, lang_mnt[i].dst, len);
590         *p = '\0';
591     }
592 
593     if (process->isolation.automount.tmpfs) {
594         mnt = nxt_array_add(mounts);
595         if (nxt_slow_path(mnt == NULL)) {
596             return NXT_ERROR;
597         }
598 
599         mnt->src = (u_char *) "tmpfs";
600         mnt->name = (u_char *) "tmpfs";
601         mnt->type = NXT_FS_TMP;
602         mnt->flags = (NXT_FS_FLAGS_NOSUID
603                       | NXT_FS_FLAGS_NODEV
604                       | NXT_FS_FLAGS_NOEXEC);
605         mnt->data = (u_char *) "size=1m,mode=777";
606         mnt->builtin = 1;
607         mnt->deps = 0;
608 
609         mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/tmp") + 1);
610         if (nxt_slow_path(mnt->dst == NULL)) {
611             return NXT_ERROR;
612         }
613 
614         p = nxt_cpymem(mnt->dst, rootfs, rootfs_len);
615         p = nxt_cpymem(p, "/tmp", 4);
616         *p = '\0';
617     }
618 
619     if (process->isolation.automount.procfs) {
620         mnt = nxt_array_add(mounts);
621         if (nxt_slow_path(mnt == NULL)) {
622             return NXT_ERROR;
623         }
624 
625         mnt->name = (u_char *) "proc";
626         mnt->type = NXT_FS_PROC;
627         mnt->src = (u_char *) "none";
628         mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/proc") + 1);
629         if (nxt_slow_path(mnt->dst == NULL)) {
630             return NXT_ERROR;
631         }
632 
633         p = nxt_cpymem(mnt->dst, rootfs, rootfs_len);
634         p = nxt_cpymem(p, "/proc", 5);
635         *p = '\0';
636 
637         mnt->data = (u_char *) "";
638         mnt->flags = NXT_FS_FLAGS_NOEXEC | NXT_FS_FLAGS_NOSUID;
639         mnt->builtin = 1;
640         mnt->deps = 0;
641     }
642 
643     qsort(mounts->elts, mounts->nelts, sizeof(nxt_fs_mount_t),
644           nxt_isolation_mount_compare);
645 
646     process->isolation.mounts = mounts;
647 
648     return NXT_OK;
649 }
650 
651 
652 static int nxt_cdecl
653 nxt_isolation_mount_compare(const void *v1, const void *v2)
654 {
655     const nxt_fs_mount_t  *mnt1, *mnt2;
656 
657     mnt1 = v1;
658     mnt2 = v2;
659 
660     return nxt_strlen(mnt1->src) > nxt_strlen(mnt2->src);
661 }
662 
663 
664 void
665 nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process)
666 {
667     size_t                   n;
668     nxt_array_t              *mounts;
669     nxt_runtime_t            *rt;
670     nxt_fs_mount_t           *mnt;
671     nxt_process_automount_t  *automount;
672 
673     rt = task->thread->runtime;
674 
675     if (!rt->capabilities.setid) {
676         return;
677     }
678 
679     nxt_debug(task, "unmount all (%s)", process->name);
680 
681     automount = &process->isolation.automount;
682     mounts = process->isolation.mounts;
683     n = mounts->nelts;
684     mnt = mounts->elts;
685 
686     while (n > 0) {
687         n--;
688 
689         if (mnt[n].deps && !automount->language_deps) {
690             continue;
691         }
692 
693         nxt_fs_unmount(mnt[n].dst);
694     }
695 }
696 
697 
698 nxt_int_t
699 nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process)
700 {
701     size_t                   i, n;
702     nxt_int_t                ret;
703     struct stat              st;
704     nxt_array_t              *mounts;
705     const u_char             *dst;
706     nxt_fs_mount_t           *mnt;
707     nxt_process_automount_t  *automount;
708 
709     automount = &process->isolation.automount;
710     mounts = process->isolation.mounts;
711 
712     n = mounts->nelts;
713     mnt = mounts->elts;
714 
715     for (i = 0; i < n; i++) {
716         dst = mnt[i].dst;
717 
718         if (mnt[i].deps && !automount->language_deps) {
719             continue;
720         }
721 
722         if (nxt_slow_path(mnt[i].type == NXT_FS_BIND
723                           && stat((const char *) mnt[i].src, &st) != 0))
724         {
725             nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src);
726             continue;
727         }
728 
729         ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO);
730         if (nxt_slow_path(ret != NXT_OK)) {
731             nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno);
732             goto undo;
733         }
734 
735         ret = nxt_fs_mount(task, &mnt[i]);
736         if (nxt_slow_path(ret != NXT_OK)) {
737             goto undo;
738         }
739     }
740 
741     return NXT_OK;
742 
743 undo:
744 
745     n = i + 1;
746 
747     for (i = 0; i < n; i++) {
748         nxt_fs_unmount(mnt[i].dst);
749     }
750 
751     return NXT_ERROR;
752 }
753 
754 
755 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
756 
757 nxt_int_t
758 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
759 {
760     char       *rootfs;
761     nxt_int_t  ret;
762 
763     rootfs = (char *) process->isolation.rootfs;
764 
765     nxt_debug(task, "change root: %s", rootfs);
766 
767     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) {
768         ret = nxt_isolation_pivot_root(task, rootfs);
769 
770     } else {
771         ret = nxt_isolation_chroot(task, rootfs);
772     }
773 
774     if (nxt_fast_path(ret == NXT_OK)) {
775         if (nxt_slow_path(chdir("/") < 0)) {
776             nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
777             return NXT_ERROR;
778         }
779     }
780 
781     return ret;
782 }
783 
784 
785 /*
786  * pivot_root(2) can only be safely used with containers, otherwise it can
787  * umount(2) the global root filesystem and screw up the machine.
788  */
789 
790 static nxt_int_t
791 nxt_isolation_pivot_root(nxt_task_t *task, const char *path)
792 {
793     /*
794      * This implementation makes use of a kernel trick that works for ages
795      * and now documented in Linux kernel 5.
796      * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/
797      */
798 
799     if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) {
800         nxt_alert(task, "mount(\"/\", MS_SLAVE|MS_REC) failed: %E", nxt_errno);
801         return NXT_ERROR;
802     }
803 
804     if (nxt_slow_path(nxt_isolation_make_private_mount(task, path) != NXT_OK)) {
805         return NXT_ERROR;
806     }
807 
808     if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) {
809         nxt_alert(task, "error bind mounting rootfs %E", nxt_errno);
810         return NXT_ERROR;
811     }
812 
813     if (nxt_slow_path(chdir(path) != 0)) {
814         nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno);
815         return NXT_ERROR;
816     }
817 
818     if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) {
819         nxt_alert(task, "failed to pivot_root %E", nxt_errno);
820         return NXT_ERROR;
821     }
822 
823     /*
824      * Demote the oldroot mount to avoid unmounts getting propagated to
825      * the host.
826      */
827     if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) {
828         nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno);
829         return NXT_ERROR;
830     }
831 
832     if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) {
833         nxt_alert(task, "failed to umount old root directory %E", nxt_errno);
834         return NXT_ERROR;
835     }
836 
837     return NXT_OK;
838 }
839 
840 
841 static nxt_int_t
842 nxt_isolation_make_private_mount(nxt_task_t *task, const char *rootfs)
843 {
844     char           *parent_mnt;
845     FILE           *procfile;
846     u_char         **mounts;
847     size_t         len;
848     uint8_t        *shared;
849     nxt_int_t      ret, index, nmounts;
850     struct mntent  *ent;
851 
852     static const char  *mount_path = "/proc/self/mounts";
853 
854     ret = NXT_ERROR;
855     ent = NULL;
856     shared = NULL;
857     procfile = NULL;
858     parent_mnt = NULL;
859 
860     nmounts = 256;
861 
862     mounts = nxt_malloc(nmounts * sizeof(uintptr_t));
863     if (nxt_slow_path(mounts == NULL)) {
864         goto fail;
865     }
866 
867     shared = nxt_malloc(nmounts);
868     if (nxt_slow_path(shared == NULL)) {
869         goto fail;
870     }
871 
872     procfile = setmntent(mount_path, "r");
873     if (nxt_slow_path(procfile == NULL)) {
874         nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno);
875 
876         goto fail;
877     }
878 
879     index = 0;
880 
881 again:
882 
883     for ( ; index < nmounts; index++) {
884         ent = getmntent(procfile);
885         if (ent == NULL) {
886             nmounts = index;
887             break;
888         }
889 
890         mounts[index] = (u_char *) strdup(ent->mnt_dir);
891         shared[index] = hasmntopt(ent, "shared") != NULL;
892     }
893 
894     if (ent != NULL) {
895         /* there are still entries to be read */
896 
897         nmounts *= 2;
898         mounts = nxt_realloc(mounts, nmounts);
899         if (nxt_slow_path(mounts == NULL)) {
900             goto fail;
901         }
902 
903         shared = nxt_realloc(shared, nmounts);
904         if (nxt_slow_path(shared == NULL)) {
905             goto fail;
906         }
907 
908         goto again;
909     }
910 
911     for (index = 0; index < nmounts; index++) {
912         if (nxt_strcmp(mounts[index], rootfs) == 0) {
913             parent_mnt = (char *) rootfs;
914             break;
915         }
916     }
917 
918     if (parent_mnt == NULL) {
919         len = nxt_strlen(rootfs);
920 
921         parent_mnt = nxt_malloc(len + 1);
922         if (parent_mnt == NULL) {
923             goto fail;
924         }
925 
926         nxt_memcpy(parent_mnt, rootfs, len);
927         parent_mnt[len] = '\0';
928 
929         if (parent_mnt[len - 1] == '/') {
930             parent_mnt[len - 1] = '\0';
931             len--;
932         }
933 
934         for ( ;; ) {
935             for (index = 0; index < nmounts; index++) {
936                 if (nxt_strcmp(mounts[index], parent_mnt) == 0) {
937                     goto found;
938                 }
939             }
940 
941             if (len == 1 && parent_mnt[0] == '/') {
942                 nxt_alert(task, "parent mount not found");
943                 goto fail;
944             }
945 
946             /* parent dir */
947             while (parent_mnt[len - 1] != '/' && len > 0) {
948                 len--;
949             }
950 
951             if (nxt_slow_path(len == 0)) {
952                 nxt_alert(task, "parent mount not found");
953                 goto fail;
954             }
955 
956             if (len == 1) {
957                 parent_mnt[len] = '\0';     /* / */
958             } else {
959                 parent_mnt[len - 1] = '\0'; /* /<path> */
960             }
961         }
962     }
963 
964 found:
965 
966     if (shared[index]) {
967         if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) {
968             nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt,
969                       nxt_errno);
970 
971             goto fail;
972         }
973     }
974 
975     ret = NXT_OK;
976 
977 fail:
978 
979     if (procfile != NULL) {
980         endmntent(procfile);
981     }
982 
983     if (mounts != NULL) {
984         for (index = 0; index < nmounts; index++) {
985             nxt_free(mounts[index]);
986         }
987 
988         nxt_free(mounts);
989     }
990 
991     if (shared != NULL) {
992         nxt_free(shared);
993     }
994 
995     if (parent_mnt != NULL && parent_mnt != rootfs) {
996         nxt_free(parent_mnt);
997     }
998 
999     return ret;
1000 }
1001 
1002 
1003 nxt_inline int
1004 nxt_pivot_root(const char *new_root, const char *old_root)
1005 {
1006     return syscall(__NR_pivot_root, new_root, old_root);
1007 }
1008 
1009 
1010 #else /* !(NXT_HAVE_PIVOT_ROOT) || !(NXT_HAVE_CLONE_NEWNS) */
1011 
1012 
1013 nxt_int_t
1014 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
1015 {
1016     char       *rootfs;
1017 
1018     rootfs = (char *) process->isolation.rootfs;
1019 
1020     nxt_debug(task, "change root: %s", rootfs);
1021 
1022     if (nxt_fast_path(nxt_isolation_chroot(task, rootfs) == NXT_OK)) {
1023         if (nxt_slow_path(chdir("/") < 0)) {
1024             nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
1025             return NXT_ERROR;
1026         }
1027 
1028         return NXT_OK;
1029     }
1030 
1031     return NXT_ERROR;
1032 }
1033 
1034 #endif
1035 
1036 
1037 static nxt_int_t
1038 nxt_isolation_chroot(nxt_task_t *task, const char *path)
1039 {
1040     if (nxt_slow_path(chroot(path) < 0)) {
1041         nxt_alert(task, "chroot(%s) %E", path, nxt_errno);
1042         return NXT_ERROR;
1043     }
1044 
1045     return NXT_OK;
1046 }
1047 
1048 #endif /* NXT_HAVE_ISOLATION_ROOTFS */
1049 
1050 
1051 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
1052 
1053 static nxt_int_t
1054 nxt_isolation_set_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation,
1055     nxt_process_t *process)
1056 {
1057     nxt_conf_value_t  *obj;
1058 
1059     static nxt_str_t  new_privs_name = nxt_string("new_privs");
1060 
1061     obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL);
1062     if (obj != NULL) {
1063         process->isolation.new_privs = nxt_conf_get_boolean(obj);
1064     }
1065 
1066     return NXT_OK;
1067 }
1068 
1069 #endif
1070