xref: /unit/src/nxt_isolation.c (revision 1708:4463c1fc26fd)
1 /*
2  * Copyright (C) NGINX, Inc.
3  */
4 
5 #include <nxt_main.h>
6 #include <nxt_application.h>
7 #include <nxt_process.h>
8 #include <nxt_isolation.h>
9 
10 #if (NXT_HAVE_PIVOT_ROOT)
11 #include <mntent.h>
12 #endif
13 
14 
15 static nxt_int_t nxt_isolation_set(nxt_task_t *task,
16     nxt_conf_value_t *isolation, nxt_process_t *process);
17 
18 #if (NXT_HAVE_CLONE)
19 static nxt_int_t nxt_isolation_set_namespaces(nxt_task_t *task,
20     nxt_conf_value_t *isolation, nxt_process_t *process);
21 static nxt_int_t nxt_isolation_clone_flags(nxt_task_t *task,
22     nxt_conf_value_t *namespaces, nxt_clone_t *clone);
23 #endif
24 
25 #if (NXT_HAVE_CLONE_NEWUSER)
26 static nxt_int_t nxt_isolation_set_creds(nxt_task_t *task,
27     nxt_conf_value_t *isolation, nxt_process_t *process);
28 static nxt_int_t nxt_isolation_credential_map(nxt_task_t *task,
29     nxt_mp_t *mem_pool, nxt_conf_value_t *map_array,
30     nxt_clone_credential_map_t *map);
31 static nxt_int_t nxt_isolation_vldt_creds(nxt_task_t *task,
32     nxt_process_t *process);
33 #endif
34 
35 #if (NXT_HAVE_ISOLATION_ROOTFS)
36 static nxt_int_t nxt_isolation_set_rootfs(nxt_task_t *task,
37     nxt_conf_value_t *isolation, nxt_process_t *process);
38 static nxt_int_t nxt_isolation_set_automount(nxt_task_t *task,
39     nxt_conf_value_t *isolation, nxt_process_t *process);
40 static nxt_int_t nxt_isolation_set_mounts(nxt_task_t *task,
41     nxt_process_t *process, nxt_str_t *app_type);
42 static nxt_int_t nxt_isolation_set_lang_mounts(nxt_task_t *task,
43     nxt_process_t *process, nxt_array_t *syspaths);
44 static int nxt_cdecl nxt_isolation_mount_compare(const void *v1,
45     const void *v2);
46 static void nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process);
47 
48 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
49 static nxt_int_t nxt_isolation_pivot_root(nxt_task_t *task, const char *rootfs);
50 static nxt_int_t nxt_isolation_make_private_mount(nxt_task_t *task,
51     const char *rootfs);
52 nxt_inline int nxt_pivot_root(const char *new_root, const char *old_root);
53 #endif
54 
55 static nxt_int_t nxt_isolation_chroot(nxt_task_t *task, const char *path);
56 #endif
57 
58 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
59 static nxt_int_t nxt_isolation_set_new_privs(nxt_task_t *task,
60     nxt_conf_value_t *isolation, nxt_process_t *process);
61 #endif
62 
63 
64 nxt_int_t
65 nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process,
66     nxt_mp_t *mp)
67 {
68     nxt_int_t              cap_setid;
69     nxt_int_t              ret;
70     nxt_runtime_t          *rt;
71     nxt_common_app_conf_t  *app_conf;
72 
73     rt = task->thread->runtime;
74     app_conf = process->data.app;
75     cap_setid = rt->capabilities.setid;
76 
77     if (app_conf->isolation != NULL) {
78         ret = nxt_isolation_set(task, app_conf->isolation, process);
79         if (nxt_slow_path(ret != NXT_OK)) {
80             return ret;
81         }
82     }
83 
84 #if (NXT_HAVE_CLONE_NEWUSER)
85     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
86         cap_setid = 1;
87     }
88 #endif
89 
90     if (cap_setid) {
91         ret = nxt_process_creds_set(task, process, &app_conf->user,
92                                     &app_conf->group);
93 
94         if (nxt_slow_path(ret != NXT_OK)) {
95             return ret;
96         }
97 
98     } else {
99         if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user,
100                         nxt_strlen(rt->user_cred.user)))
101         {
102             nxt_alert(task, "cannot set user \"%V\" for app \"%V\": "
103                       "missing capabilities", &app_conf->user, &app_conf->name);
104 
105             return NXT_ERROR;
106         }
107 
108         if (app_conf->group.length > 0
109             && !nxt_str_eq(&app_conf->group, (u_char *) rt->group,
110                            nxt_strlen(rt->group)))
111         {
112             nxt_alert(task, "cannot set group \"%V\" for app \"%V\": "
113                             "missing capabilities", &app_conf->group,
114                             &app_conf->name);
115 
116             return NXT_ERROR;
117         }
118     }
119 
120 #if (NXT_HAVE_ISOLATION_ROOTFS)
121     if (process->isolation.rootfs != NULL) {
122         nxt_int_t  has_mnt;
123 
124         ret = nxt_isolation_set_mounts(task, process, &app_conf->type);
125         if (nxt_slow_path(ret != NXT_OK)) {
126             return ret;
127         }
128 
129         has_mnt = 0;
130 
131 #if (NXT_HAVE_CLONE_NEWNS)
132         has_mnt = nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS);
133 #endif
134 
135         if (process->user_cred->uid == 0 && !has_mnt) {
136             nxt_log(task, NXT_LOG_WARN,
137                     "setting user \"root\" with \"rootfs\" is unsafe without "
138                     "\"mount\" namespace isolation");
139         }
140     }
141 #endif
142 
143 #if (NXT_HAVE_CLONE_NEWUSER)
144     ret = nxt_isolation_vldt_creds(task, process);
145     if (nxt_slow_path(ret != NXT_OK)) {
146         return ret;
147     }
148 #endif
149 
150     return NXT_OK;
151 }
152 
153 
154 static nxt_int_t
155 nxt_isolation_set(nxt_task_t *task, nxt_conf_value_t *isolation,
156     nxt_process_t *process)
157 {
158 #if (NXT_HAVE_CLONE)
159     if (nxt_slow_path(nxt_isolation_set_namespaces(task, isolation, process)
160                       != NXT_OK))
161     {
162         return NXT_ERROR;
163     }
164 #endif
165 
166 #if (NXT_HAVE_CLONE_NEWUSER)
167     if (nxt_slow_path(nxt_isolation_set_creds(task, isolation, process)
168                       != NXT_OK))
169     {
170         return NXT_ERROR;
171     }
172 #endif
173 
174 #if (NXT_HAVE_ISOLATION_ROOTFS)
175     if (nxt_slow_path(nxt_isolation_set_rootfs(task, isolation, process)
176                       != NXT_OK))
177     {
178         return NXT_ERROR;
179     }
180 
181     if (nxt_slow_path(nxt_isolation_set_automount(task, isolation, process)
182                       != NXT_OK))
183     {
184         return NXT_ERROR;
185     }
186 #endif
187 
188 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
189     if (nxt_slow_path(nxt_isolation_set_new_privs(task, isolation, process)
190                       != NXT_OK))
191     {
192         return NXT_ERROR;
193     }
194 #endif
195 
196     return NXT_OK;
197 }
198 
199 
200 #if (NXT_HAVE_CLONE)
201 
202 static nxt_int_t
203 nxt_isolation_set_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation,
204     nxt_process_t *process)
205 {
206     nxt_int_t         ret;
207     nxt_conf_value_t  *obj;
208 
209     static nxt_str_t  nsname = nxt_string("namespaces");
210 
211     obj = nxt_conf_get_object_member(isolation, &nsname, NULL);
212     if (obj != NULL) {
213         ret = nxt_isolation_clone_flags(task, obj, &process->isolation.clone);
214         if (nxt_slow_path(ret != NXT_OK)) {
215             return NXT_ERROR;
216         }
217     }
218 
219     return NXT_OK;
220 }
221 
222 #endif
223 
224 
225 #if (NXT_HAVE_CLONE_NEWUSER)
226 
227 static nxt_int_t
228 nxt_isolation_set_creds(nxt_task_t *task, nxt_conf_value_t *isolation,
229     nxt_process_t *process)
230 {
231     nxt_int_t         ret;
232     nxt_clone_t       *clone;
233     nxt_conf_value_t  *array;
234 
235     static nxt_str_t uidname = nxt_string("uidmap");
236     static nxt_str_t gidname = nxt_string("gidmap");
237 
238     clone = &process->isolation.clone;
239 
240     array = nxt_conf_get_object_member(isolation, &uidname, NULL);
241     if (array != NULL) {
242         ret = nxt_isolation_credential_map(task, process->mem_pool, array,
243                                            &clone->uidmap);
244 
245         if (nxt_slow_path(ret != NXT_OK)) {
246             return NXT_ERROR;
247         }
248     }
249 
250     array = nxt_conf_get_object_member(isolation, &gidname, NULL);
251     if (array != NULL) {
252         ret = nxt_isolation_credential_map(task, process->mem_pool, array,
253                                            &clone->gidmap);
254 
255         if (nxt_slow_path(ret != NXT_OK)) {
256             return NXT_ERROR;
257         }
258     }
259 
260     return NXT_OK;
261 }
262 
263 
264 static nxt_int_t
265 nxt_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp,
266     nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map)
267 {
268     nxt_int_t         ret;
269     nxt_uint_t        i;
270     nxt_conf_value_t  *obj;
271 
272     static nxt_conf_map_t  nxt_clone_map_entry_conf[] = {
273         {
274             nxt_string("container"),
275             NXT_CONF_MAP_INT,
276             offsetof(nxt_clone_map_entry_t, container),
277         },
278 
279         {
280             nxt_string("host"),
281             NXT_CONF_MAP_INT,
282             offsetof(nxt_clone_map_entry_t, host),
283         },
284 
285         {
286             nxt_string("size"),
287             NXT_CONF_MAP_INT,
288             offsetof(nxt_clone_map_entry_t, size),
289         },
290     };
291 
292     map->size = nxt_conf_array_elements_count(map_array);
293 
294     if (map->size == 0) {
295         return NXT_OK;
296     }
297 
298     map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t));
299     if (nxt_slow_path(map->map == NULL)) {
300         return NXT_ERROR;
301     }
302 
303     for (i = 0; i < map->size; i++) {
304         obj = nxt_conf_get_array_element(map_array, i);
305 
306         ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf,
307                                   nxt_nitems(nxt_clone_map_entry_conf),
308                                   map->map + i);
309         if (nxt_slow_path(ret != NXT_OK)) {
310             nxt_alert(task, "clone map entry map error");
311             return NXT_ERROR;
312         }
313     }
314 
315     return NXT_OK;
316 }
317 
318 
319 static nxt_int_t
320 nxt_isolation_vldt_creds(nxt_task_t *task, nxt_process_t *process)
321 {
322     nxt_int_t         ret;
323     nxt_clone_t       *clone;
324     nxt_credential_t  *creds;
325 
326     clone = &process->isolation.clone;
327     creds = process->user_cred;
328 
329     if (clone->uidmap.size == 0 && clone->gidmap.size == 0) {
330         return NXT_OK;
331     }
332 
333     if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) {
334         if (nxt_slow_path(clone->uidmap.size > 0)) {
335             nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but "
336                     "\"isolation.namespaces.credential\" is false or unset");
337 
338             return NXT_ERROR;
339         }
340 
341         if (nxt_slow_path(clone->gidmap.size > 0)) {
342             nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but "
343                     "\"isolation.namespaces.credential\" is false or unset");
344 
345             return NXT_ERROR;
346         }
347 
348         return NXT_OK;
349     }
350 
351     ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds);
352     if (nxt_slow_path(ret != NXT_OK)) {
353         return NXT_ERROR;
354     }
355 
356     return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds);
357 }
358 
359 #endif
360 
361 
362 #if (NXT_HAVE_CLONE)
363 
364 static nxt_int_t
365 nxt_isolation_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces,
366     nxt_clone_t *clone)
367 {
368     uint32_t          index;
369     nxt_str_t         name;
370     nxt_int_t         flag;
371     nxt_conf_value_t  *value;
372 
373     index = 0;
374 
375     for ( ;; ) {
376         value = nxt_conf_next_object_member(namespaces, &name, &index);
377 
378         if (value == NULL) {
379             break;
380         }
381 
382         flag = 0;
383 
384 #if (NXT_HAVE_CLONE_NEWUSER)
385         if (nxt_str_eq(&name, "credential", 10)) {
386             flag = CLONE_NEWUSER;
387         }
388 #endif
389 
390 #if (NXT_HAVE_CLONE_NEWPID)
391         if (nxt_str_eq(&name, "pid", 3)) {
392             flag = CLONE_NEWPID;
393         }
394 #endif
395 
396 #if (NXT_HAVE_CLONE_NEWNET)
397         if (nxt_str_eq(&name, "network", 7)) {
398             flag = CLONE_NEWNET;
399         }
400 #endif
401 
402 #if (NXT_HAVE_CLONE_NEWUTS)
403         if (nxt_str_eq(&name, "uname", 5)) {
404             flag = CLONE_NEWUTS;
405         }
406 #endif
407 
408 #if (NXT_HAVE_CLONE_NEWNS)
409         if (nxt_str_eq(&name, "mount", 5)) {
410             flag = CLONE_NEWNS;
411         }
412 #endif
413 
414 #if (NXT_HAVE_CLONE_NEWCGROUP)
415         if (nxt_str_eq(&name, "cgroup", 6)) {
416             flag = CLONE_NEWCGROUP;
417         }
418 #endif
419 
420         if (!flag) {
421             nxt_alert(task, "unknown namespace flag: \"%V\"", &name);
422             return NXT_ERROR;
423         }
424 
425         if (nxt_conf_get_boolean(value)) {
426             clone->flags |= flag;
427         }
428     }
429 
430     return NXT_OK;
431 }
432 
433 #endif
434 
435 
436 #if (NXT_HAVE_ISOLATION_ROOTFS)
437 
438 static nxt_int_t
439 nxt_isolation_set_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation,
440     nxt_process_t *process)
441 {
442     nxt_str_t         str;
443     nxt_conf_value_t  *obj;
444 
445     static nxt_str_t  rootfs_name = nxt_string("rootfs");
446 
447     obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL);
448     if (obj != NULL) {
449         nxt_conf_get_string(obj, &str);
450 
451         if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) {
452             nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other "
453                     "than \"/\" but given \"%V\"", &str);
454 
455             return NXT_ERROR;
456         }
457 
458         if (str.start[str.length - 1] == '/') {
459             str.length--;
460         }
461 
462         process->isolation.rootfs = nxt_mp_alloc(process->mem_pool,
463                                                  str.length + 1);
464 
465         if (nxt_slow_path(process->isolation.rootfs == NULL)) {
466             return NXT_ERROR;
467         }
468 
469         nxt_memcpy(process->isolation.rootfs, str.start, str.length);
470 
471         process->isolation.rootfs[str.length] = '\0';
472     }
473 
474     return NXT_OK;
475 }
476 
477 
478 static nxt_int_t
479 nxt_isolation_set_automount(nxt_task_t *task, nxt_conf_value_t *isolation,
480     nxt_process_t *process)
481 {
482     nxt_conf_value_t         *conf, *value;
483     nxt_process_automount_t  *automount;
484 
485     static nxt_str_t  automount_name = nxt_string("automount");
486     static nxt_str_t  langdeps_name = nxt_string("language_deps");
487     static nxt_str_t  tmp_name = nxt_string("tmpfs");
488     static nxt_str_t  proc_name = nxt_string("procfs");
489 
490     automount = &process->isolation.automount;
491 
492     automount->language_deps = 1;
493     automount->tmpfs = 1;
494     automount->procfs = 1;
495 
496     conf = nxt_conf_get_object_member(isolation, &automount_name, NULL);
497     if (conf != NULL) {
498         value = nxt_conf_get_object_member(conf, &langdeps_name, NULL);
499         if (value != NULL) {
500             automount->language_deps = nxt_conf_get_boolean(value);
501         }
502 
503         value = nxt_conf_get_object_member(conf, &tmp_name, NULL);
504         if (value != NULL) {
505             automount->tmpfs = nxt_conf_get_boolean(value);
506         }
507 
508         value = nxt_conf_get_object_member(conf, &proc_name, NULL);
509         if (value != NULL) {
510             automount->procfs = nxt_conf_get_boolean(value);
511         }
512     }
513 
514     return NXT_OK;
515 }
516 
517 
518 static nxt_int_t
519 nxt_isolation_set_mounts(nxt_task_t *task, nxt_process_t *process,
520     nxt_str_t *app_type)
521 {
522     nxt_int_t              ret, cap_chroot;
523     nxt_runtime_t          *rt;
524     nxt_app_lang_module_t  *lang;
525 
526     rt = task->thread->runtime;
527     cap_chroot = rt->capabilities.chroot;
528     lang = nxt_app_lang_module(rt, app_type);
529 
530     nxt_assert(lang != NULL);
531 
532 #if (NXT_HAVE_CLONE_NEWUSER)
533     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
534         cap_chroot = 1;
535     }
536 #endif
537 
538     if (!cap_chroot) {
539         nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges");
540         return NXT_ERROR;
541     }
542 
543     ret = nxt_isolation_set_lang_mounts(task, process, lang->mounts);
544     if (nxt_slow_path(ret != NXT_OK)) {
545         return NXT_ERROR;
546     }
547 
548     process->isolation.cleanup = nxt_isolation_unmount_all;
549 
550     return NXT_OK;
551 }
552 
553 
554 static nxt_int_t
555 nxt_isolation_set_lang_mounts(nxt_task_t *task, nxt_process_t *process,
556     nxt_array_t *lang_mounts)
557 {
558     u_char          *p;
559     size_t          i, n, rootfs_len, len;
560     nxt_mp_t        *mp;
561     nxt_array_t     *mounts;
562     const u_char    *rootfs;
563     nxt_fs_mount_t  *mnt, *lang_mnt;
564 
565     mp = process->mem_pool;
566 
567     /* copy to init mem pool */
568     mounts = nxt_array_copy(mp, NULL, lang_mounts);
569     if (mounts == NULL) {
570         return NXT_ERROR;
571     }
572 
573     n = mounts->nelts;
574     mnt = mounts->elts;
575     lang_mnt = lang_mounts->elts;
576 
577     rootfs = process->isolation.rootfs;
578     rootfs_len = nxt_strlen(rootfs);
579 
580     for (i = 0; i < n; i++) {
581         len = nxt_strlen(lang_mnt[i].dst);
582 
583         mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1);
584         if (nxt_slow_path(mnt[i].dst == NULL)) {
585             return NXT_ERROR;
586         }
587 
588         p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len);
589         p = nxt_cpymem(p, lang_mnt[i].dst, len);
590         *p = '\0';
591     }
592 
593     if (process->isolation.automount.tmpfs) {
594         mnt = nxt_array_add(mounts);
595         if (nxt_slow_path(mnt == NULL)) {
596             return NXT_ERROR;
597         }
598 
599         mnt->src = (u_char *) "tmpfs";
600         mnt->name = (u_char *) "tmpfs";
601         mnt->type = NXT_FS_TMP;
602         mnt->flags = (NXT_FS_FLAGS_NOSUID
603                       | NXT_FS_FLAGS_NODEV
604                       | NXT_FS_FLAGS_NOEXEC);
605         mnt->data = (u_char *) "size=1m,mode=777";
606         mnt->builtin = 1;
607         mnt->deps = 0;
608 
609         mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/tmp") + 1);
610         if (nxt_slow_path(mnt->dst == NULL)) {
611             return NXT_ERROR;
612         }
613 
614         p = nxt_cpymem(mnt->dst, rootfs, rootfs_len);
615         p = nxt_cpymem(p, "/tmp", 4);
616         *p = '\0';
617     }
618 
619     if (process->isolation.automount.procfs) {
620         mnt = nxt_array_add(mounts);
621         if (nxt_slow_path(mnt == NULL)) {
622             return NXT_ERROR;
623         }
624 
625         mnt->name = (u_char *) "proc";
626         mnt->type = NXT_FS_PROC;
627         mnt->src = (u_char *) "none";
628         mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/proc") + 1);
629         if (nxt_slow_path(mnt->dst == NULL)) {
630             return NXT_ERROR;
631         }
632 
633         p = nxt_cpymem(mnt->dst, rootfs, rootfs_len);
634         p = nxt_cpymem(p, "/proc", 5);
635         *p = '\0';
636 
637         mnt->data = (u_char *) "";
638         mnt->flags = NXT_FS_FLAGS_NOEXEC | NXT_FS_FLAGS_NOSUID;
639         mnt->builtin = 1;
640         mnt->deps = 0;
641     }
642 
643     qsort(mounts->elts, mounts->nelts, sizeof(nxt_fs_mount_t),
644           nxt_isolation_mount_compare);
645 
646     process->isolation.mounts = mounts;
647 
648     return NXT_OK;
649 }
650 
651 
652 static int nxt_cdecl
653 nxt_isolation_mount_compare(const void *v1, const void *v2)
654 {
655     const nxt_fs_mount_t  *mnt1, *mnt2;
656 
657     mnt1 = v1;
658     mnt2 = v2;
659 
660     return nxt_strlen(mnt1->src) > nxt_strlen(mnt2->src);
661 }
662 
663 
664 void
665 nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process)
666 {
667     size_t                   n;
668     nxt_array_t              *mounts;
669     nxt_runtime_t            *rt;
670     nxt_fs_mount_t           *mnt;
671     nxt_process_automount_t  *automount;
672 
673     rt = task->thread->runtime;
674 
675     if (!rt->capabilities.setid) {
676         return;
677     }
678 
679 #if (NXT_HAVE_CLONE_NEWNS)
680     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) {
681         return;
682     }
683 #endif
684 
685     nxt_debug(task, "unmount all (%s)", process->name);
686 
687     automount = &process->isolation.automount;
688     mounts = process->isolation.mounts;
689     n = mounts->nelts;
690     mnt = mounts->elts;
691 
692     while (n > 0) {
693         n--;
694 
695         if (mnt[n].deps && !automount->language_deps) {
696             continue;
697         }
698 
699         nxt_fs_unmount(mnt[n].dst);
700     }
701 }
702 
703 
704 nxt_int_t
705 nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process)
706 {
707     size_t                   i, n;
708     nxt_int_t                ret;
709     struct stat              st;
710     nxt_array_t              *mounts;
711     const u_char             *dst;
712     nxt_fs_mount_t           *mnt;
713     nxt_process_automount_t  *automount;
714 
715     automount = &process->isolation.automount;
716     mounts = process->isolation.mounts;
717 
718     n = mounts->nelts;
719     mnt = mounts->elts;
720 
721     for (i = 0; i < n; i++) {
722         dst = mnt[i].dst;
723 
724         if (mnt[i].deps && !automount->language_deps) {
725             continue;
726         }
727 
728         if (nxt_slow_path(mnt[i].type == NXT_FS_BIND
729                           && stat((const char *) mnt[i].src, &st) != 0))
730         {
731             nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src);
732             continue;
733         }
734 
735         ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO);
736         if (nxt_slow_path(ret != NXT_OK)) {
737             nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno);
738             goto undo;
739         }
740 
741         ret = nxt_fs_mount(task, &mnt[i]);
742         if (nxt_slow_path(ret != NXT_OK)) {
743             goto undo;
744         }
745     }
746 
747     return NXT_OK;
748 
749 undo:
750 
751     n = i + 1;
752 
753     for (i = 0; i < n; i++) {
754         nxt_fs_unmount(mnt[i].dst);
755     }
756 
757     return NXT_ERROR;
758 }
759 
760 
761 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
762 
763 nxt_int_t
764 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
765 {
766     char       *rootfs;
767     nxt_int_t  ret;
768 
769     rootfs = (char *) process->isolation.rootfs;
770 
771     nxt_debug(task, "change root: %s", rootfs);
772 
773     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) {
774         ret = nxt_isolation_pivot_root(task, rootfs);
775 
776     } else {
777         ret = nxt_isolation_chroot(task, rootfs);
778     }
779 
780     if (nxt_fast_path(ret == NXT_OK)) {
781         if (nxt_slow_path(chdir("/") < 0)) {
782             nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
783             return NXT_ERROR;
784         }
785     }
786 
787     return ret;
788 }
789 
790 
791 /*
792  * pivot_root(2) can only be safely used with containers, otherwise it can
793  * umount(2) the global root filesystem and screw up the machine.
794  */
795 
796 static nxt_int_t
797 nxt_isolation_pivot_root(nxt_task_t *task, const char *path)
798 {
799     /*
800      * This implementation makes use of a kernel trick that works for ages
801      * and now documented in Linux kernel 5.
802      * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/
803      */
804 
805     if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) {
806         nxt_alert(task, "mount(\"/\", MS_SLAVE|MS_REC) failed: %E", nxt_errno);
807         return NXT_ERROR;
808     }
809 
810     if (nxt_slow_path(nxt_isolation_make_private_mount(task, path) != NXT_OK)) {
811         return NXT_ERROR;
812     }
813 
814     if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) {
815         nxt_alert(task, "error bind mounting rootfs %E", nxt_errno);
816         return NXT_ERROR;
817     }
818 
819     if (nxt_slow_path(chdir(path) != 0)) {
820         nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno);
821         return NXT_ERROR;
822     }
823 
824     if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) {
825         nxt_alert(task, "failed to pivot_root %E", nxt_errno);
826         return NXT_ERROR;
827     }
828 
829     /*
830      * Demote the oldroot mount to avoid unmounts getting propagated to
831      * the host.
832      */
833     if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) {
834         nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno);
835         return NXT_ERROR;
836     }
837 
838     if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) {
839         nxt_alert(task, "failed to umount old root directory %E", nxt_errno);
840         return NXT_ERROR;
841     }
842 
843     return NXT_OK;
844 }
845 
846 
847 static nxt_int_t
848 nxt_isolation_make_private_mount(nxt_task_t *task, const char *rootfs)
849 {
850     char           *parent_mnt;
851     FILE           *procfile;
852     u_char         **mounts;
853     size_t         len;
854     uint8_t        *shared;
855     nxt_int_t      ret, index, nmounts;
856     struct mntent  *ent;
857 
858     static const char  *mount_path = "/proc/self/mounts";
859 
860     ret = NXT_ERROR;
861     ent = NULL;
862     shared = NULL;
863     procfile = NULL;
864     parent_mnt = NULL;
865 
866     nmounts = 256;
867 
868     mounts = nxt_malloc(nmounts * sizeof(uintptr_t));
869     if (nxt_slow_path(mounts == NULL)) {
870         goto fail;
871     }
872 
873     shared = nxt_malloc(nmounts);
874     if (nxt_slow_path(shared == NULL)) {
875         goto fail;
876     }
877 
878     procfile = setmntent(mount_path, "r");
879     if (nxt_slow_path(procfile == NULL)) {
880         nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno);
881 
882         goto fail;
883     }
884 
885     index = 0;
886 
887 again:
888 
889     for ( ; index < nmounts; index++) {
890         ent = getmntent(procfile);
891         if (ent == NULL) {
892             nmounts = index;
893             break;
894         }
895 
896         mounts[index] = (u_char *) strdup(ent->mnt_dir);
897         shared[index] = hasmntopt(ent, "shared") != NULL;
898     }
899 
900     if (ent != NULL) {
901         /* there are still entries to be read */
902 
903         nmounts *= 2;
904         mounts = nxt_realloc(mounts, nmounts);
905         if (nxt_slow_path(mounts == NULL)) {
906             goto fail;
907         }
908 
909         shared = nxt_realloc(shared, nmounts);
910         if (nxt_slow_path(shared == NULL)) {
911             goto fail;
912         }
913 
914         goto again;
915     }
916 
917     for (index = 0; index < nmounts; index++) {
918         if (nxt_strcmp(mounts[index], rootfs) == 0) {
919             parent_mnt = (char *) rootfs;
920             break;
921         }
922     }
923 
924     if (parent_mnt == NULL) {
925         len = nxt_strlen(rootfs);
926 
927         parent_mnt = nxt_malloc(len + 1);
928         if (parent_mnt == NULL) {
929             goto fail;
930         }
931 
932         nxt_memcpy(parent_mnt, rootfs, len);
933         parent_mnt[len] = '\0';
934 
935         if (parent_mnt[len - 1] == '/') {
936             parent_mnt[len - 1] = '\0';
937             len--;
938         }
939 
940         for ( ;; ) {
941             for (index = 0; index < nmounts; index++) {
942                 if (nxt_strcmp(mounts[index], parent_mnt) == 0) {
943                     goto found;
944                 }
945             }
946 
947             if (len == 1 && parent_mnt[0] == '/') {
948                 nxt_alert(task, "parent mount not found");
949                 goto fail;
950             }
951 
952             /* parent dir */
953             while (parent_mnt[len - 1] != '/' && len > 0) {
954                 len--;
955             }
956 
957             if (nxt_slow_path(len == 0)) {
958                 nxt_alert(task, "parent mount not found");
959                 goto fail;
960             }
961 
962             if (len == 1) {
963                 parent_mnt[len] = '\0';     /* / */
964             } else {
965                 parent_mnt[len - 1] = '\0'; /* /<path> */
966             }
967         }
968     }
969 
970 found:
971 
972     if (shared[index]) {
973         if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) {
974             nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt,
975                       nxt_errno);
976 
977             goto fail;
978         }
979     }
980 
981     ret = NXT_OK;
982 
983 fail:
984 
985     if (procfile != NULL) {
986         endmntent(procfile);
987     }
988 
989     if (mounts != NULL) {
990         for (index = 0; index < nmounts; index++) {
991             nxt_free(mounts[index]);
992         }
993 
994         nxt_free(mounts);
995     }
996 
997     if (shared != NULL) {
998         nxt_free(shared);
999     }
1000 
1001     if (parent_mnt != NULL && parent_mnt != rootfs) {
1002         nxt_free(parent_mnt);
1003     }
1004 
1005     return ret;
1006 }
1007 
1008 
1009 nxt_inline int
1010 nxt_pivot_root(const char *new_root, const char *old_root)
1011 {
1012     return syscall(__NR_pivot_root, new_root, old_root);
1013 }
1014 
1015 
1016 #else /* !(NXT_HAVE_PIVOT_ROOT) || !(NXT_HAVE_CLONE_NEWNS) */
1017 
1018 
1019 nxt_int_t
1020 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
1021 {
1022     char       *rootfs;
1023 
1024     rootfs = (char *) process->isolation.rootfs;
1025 
1026     nxt_debug(task, "change root: %s", rootfs);
1027 
1028     if (nxt_fast_path(nxt_isolation_chroot(task, rootfs) == NXT_OK)) {
1029         if (nxt_slow_path(chdir("/") < 0)) {
1030             nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
1031             return NXT_ERROR;
1032         }
1033 
1034         return NXT_OK;
1035     }
1036 
1037     return NXT_ERROR;
1038 }
1039 
1040 #endif
1041 
1042 
1043 static nxt_int_t
1044 nxt_isolation_chroot(nxt_task_t *task, const char *path)
1045 {
1046     if (nxt_slow_path(chroot(path) < 0)) {
1047         nxt_alert(task, "chroot(%s) %E", path, nxt_errno);
1048         return NXT_ERROR;
1049     }
1050 
1051     return NXT_OK;
1052 }
1053 
1054 #endif /* NXT_HAVE_ISOLATION_ROOTFS */
1055 
1056 
1057 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
1058 
1059 static nxt_int_t
1060 nxt_isolation_set_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation,
1061     nxt_process_t *process)
1062 {
1063     nxt_conf_value_t  *obj;
1064 
1065     static nxt_str_t  new_privs_name = nxt_string("new_privs");
1066 
1067     obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL);
1068     if (obj != NULL) {
1069         process->isolation.new_privs = nxt_conf_get_boolean(obj);
1070     }
1071 
1072     return NXT_OK;
1073 }
1074 
1075 #endif
1076