xref: /unit/src/nxt_isolation.c (revision 1579:c80e692dc644)
1 /*
2  * Copyright (C) NGINX, Inc.
3  */
4 
5 #include <nxt_main.h>
6 #include <nxt_application.h>
7 #include <nxt_process.h>
8 #include <nxt_isolation.h>
9 
10 #if (NXT_HAVE_PIVOT_ROOT)
11 #include <mntent.h>
12 #endif
13 
14 
15 static nxt_int_t nxt_isolation_set(nxt_task_t *task,
16     nxt_conf_value_t *isolation, nxt_process_t *process);
17 
18 #if (NXT_HAVE_CLONE)
19 static nxt_int_t nxt_isolation_set_namespaces(nxt_task_t *task,
20     nxt_conf_value_t *isolation, nxt_process_t *process);
21 static nxt_int_t nxt_isolation_clone_flags(nxt_task_t *task,
22     nxt_conf_value_t *namespaces, nxt_clone_t *clone);
23 #endif
24 
25 #if (NXT_HAVE_CLONE_NEWUSER)
26 static nxt_int_t nxt_isolation_set_creds(nxt_task_t *task,
27     nxt_conf_value_t *isolation, nxt_process_t *process);
28 static nxt_int_t nxt_isolation_credential_map(nxt_task_t *task,
29     nxt_mp_t *mem_pool, nxt_conf_value_t *map_array,
30     nxt_clone_credential_map_t *map);
31 static nxt_int_t nxt_isolation_vldt_creds(nxt_task_t *task,
32     nxt_process_t *process);
33 #endif
34 
35 #if (NXT_HAVE_ISOLATION_ROOTFS)
36 static nxt_int_t nxt_isolation_set_rootfs(nxt_task_t *task,
37     nxt_conf_value_t *isolation, nxt_process_t *process);
38 static nxt_int_t nxt_isolation_set_mounts(nxt_task_t *task,
39     nxt_process_t *process, nxt_str_t *app_type);
40 static nxt_int_t nxt_isolation_set_lang_mounts(nxt_task_t *task,
41     nxt_process_t *process, nxt_array_t *syspaths);
42 static void nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process);
43 
44 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
45 static nxt_int_t nxt_isolation_pivot_root(nxt_task_t *task, const char *rootfs);
46 static nxt_int_t nxt_isolation_make_private_mount(nxt_task_t *task,
47     const char *rootfs);
48 nxt_inline int nxt_pivot_root(const char *new_root, const char *old_root);
49 #endif
50 
51 static nxt_int_t nxt_isolation_chroot(nxt_task_t *task, const char *path);
52 #endif
53 
54 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
55 static nxt_int_t nxt_isolation_set_new_privs(nxt_task_t *task,
56     nxt_conf_value_t *isolation, nxt_process_t *process);
57 #endif
58 
59 
60 nxt_int_t
61 nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process,
62     nxt_mp_t *mp)
63 {
64     nxt_int_t              cap_setid;
65     nxt_int_t              ret;
66     nxt_runtime_t          *rt;
67     nxt_common_app_conf_t  *app_conf;
68 
69     rt = task->thread->runtime;
70     app_conf = process->data.app;
71     cap_setid = rt->capabilities.setid;
72 
73     if (app_conf->isolation != NULL) {
74         ret = nxt_isolation_set(task, app_conf->isolation, process);
75         if (nxt_slow_path(ret != NXT_OK)) {
76             return ret;
77         }
78     }
79 
80 #if (NXT_HAVE_CLONE_NEWUSER)
81     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
82         cap_setid = 1;
83     }
84 #endif
85 
86 #if (NXT_HAVE_ISOLATION_ROOTFS)
87     if (process->isolation.rootfs != NULL) {
88         ret = nxt_isolation_set_mounts(task, process, &app_conf->type);
89         if (nxt_slow_path(ret != NXT_OK)) {
90             return ret;
91         }
92     }
93 #endif
94 
95     if (cap_setid) {
96         ret = nxt_process_creds_set(task, process, &app_conf->user,
97                                     &app_conf->group);
98 
99         if (nxt_slow_path(ret != NXT_OK)) {
100             return ret;
101         }
102 
103     } else {
104         if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user,
105                         nxt_strlen(rt->user_cred.user)))
106         {
107             nxt_alert(task, "cannot set user \"%V\" for app \"%V\": "
108                       "missing capabilities", &app_conf->user, &app_conf->name);
109 
110             return NXT_ERROR;
111         }
112 
113         if (app_conf->group.length > 0
114             && !nxt_str_eq(&app_conf->group, (u_char *) rt->group,
115                            nxt_strlen(rt->group)))
116         {
117             nxt_alert(task, "cannot set group \"%V\" for app \"%V\": "
118                             "missing capabilities", &app_conf->group,
119                             &app_conf->name);
120 
121             return NXT_ERROR;
122         }
123     }
124 
125 #if (NXT_HAVE_CLONE_NEWUSER)
126     ret = nxt_isolation_vldt_creds(task, process);
127     if (nxt_slow_path(ret != NXT_OK)) {
128         return ret;
129     }
130 #endif
131 
132     return NXT_OK;
133 }
134 
135 
136 static nxt_int_t
137 nxt_isolation_set(nxt_task_t *task, nxt_conf_value_t *isolation,
138     nxt_process_t *process)
139 {
140 #if (NXT_HAVE_CLONE)
141     if (nxt_slow_path(nxt_isolation_set_namespaces(task, isolation, process)
142                       != NXT_OK))
143     {
144         return NXT_ERROR;
145     }
146 #endif
147 
148 #if (NXT_HAVE_CLONE_NEWUSER)
149     if (nxt_slow_path(nxt_isolation_set_creds(task, isolation, process)
150                       != NXT_OK))
151     {
152         return NXT_ERROR;
153     }
154 #endif
155 
156 #if (NXT_HAVE_ISOLATION_ROOTFS)
157     if (nxt_slow_path(nxt_isolation_set_rootfs(task, isolation, process)
158                       != NXT_OK))
159     {
160         return NXT_ERROR;
161     }
162 #endif
163 
164 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
165     if (nxt_slow_path(nxt_isolation_set_new_privs(task, isolation, process)
166                       != NXT_OK))
167     {
168         return NXT_ERROR;
169     }
170 #endif
171 
172     return NXT_OK;
173 }
174 
175 
176 #if (NXT_HAVE_CLONE)
177 
178 static nxt_int_t
179 nxt_isolation_set_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation,
180     nxt_process_t *process)
181 {
182     nxt_int_t         ret;
183     nxt_conf_value_t  *obj;
184 
185     static nxt_str_t  nsname = nxt_string("namespaces");
186 
187     obj = nxt_conf_get_object_member(isolation, &nsname, NULL);
188     if (obj != NULL) {
189         ret = nxt_isolation_clone_flags(task, obj, &process->isolation.clone);
190         if (nxt_slow_path(ret != NXT_OK)) {
191             return NXT_ERROR;
192         }
193     }
194 
195     return NXT_OK;
196 }
197 
198 #endif
199 
200 
201 #if (NXT_HAVE_CLONE_NEWUSER)
202 
203 static nxt_int_t
204 nxt_isolation_set_creds(nxt_task_t *task, nxt_conf_value_t *isolation,
205     nxt_process_t *process)
206 {
207     nxt_int_t         ret;
208     nxt_clone_t       *clone;
209     nxt_conf_value_t  *array;
210 
211     static nxt_str_t uidname = nxt_string("uidmap");
212     static nxt_str_t gidname = nxt_string("gidmap");
213 
214     clone = &process->isolation.clone;
215 
216     array = nxt_conf_get_object_member(isolation, &uidname, NULL);
217     if (array != NULL) {
218         ret = nxt_isolation_credential_map(task, process->mem_pool, array,
219                                            &clone->uidmap);
220 
221         if (nxt_slow_path(ret != NXT_OK)) {
222             return NXT_ERROR;
223         }
224     }
225 
226     array = nxt_conf_get_object_member(isolation, &gidname, NULL);
227     if (array != NULL) {
228         ret = nxt_isolation_credential_map(task, process->mem_pool, array,
229                                            &clone->gidmap);
230 
231         if (nxt_slow_path(ret != NXT_OK)) {
232             return NXT_ERROR;
233         }
234     }
235 
236     return NXT_OK;
237 }
238 
239 
240 static nxt_int_t
241 nxt_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp,
242     nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map)
243 {
244     nxt_int_t         ret;
245     nxt_uint_t        i;
246     nxt_conf_value_t  *obj;
247 
248     static nxt_conf_map_t  nxt_clone_map_entry_conf[] = {
249         {
250             nxt_string("container"),
251             NXT_CONF_MAP_INT,
252             offsetof(nxt_clone_map_entry_t, container),
253         },
254 
255         {
256             nxt_string("host"),
257             NXT_CONF_MAP_INT,
258             offsetof(nxt_clone_map_entry_t, host),
259         },
260 
261         {
262             nxt_string("size"),
263             NXT_CONF_MAP_INT,
264             offsetof(nxt_clone_map_entry_t, size),
265         },
266     };
267 
268     map->size = nxt_conf_array_elements_count(map_array);
269 
270     if (map->size == 0) {
271         return NXT_OK;
272     }
273 
274     map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t));
275     if (nxt_slow_path(map->map == NULL)) {
276         return NXT_ERROR;
277     }
278 
279     for (i = 0; i < map->size; i++) {
280         obj = nxt_conf_get_array_element(map_array, i);
281 
282         ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf,
283                                   nxt_nitems(nxt_clone_map_entry_conf),
284                                   map->map + i);
285         if (nxt_slow_path(ret != NXT_OK)) {
286             nxt_alert(task, "clone map entry map error");
287             return NXT_ERROR;
288         }
289     }
290 
291     return NXT_OK;
292 }
293 
294 
295 static nxt_int_t
296 nxt_isolation_vldt_creds(nxt_task_t *task, nxt_process_t *process)
297 {
298     nxt_int_t         ret;
299     nxt_clone_t       *clone;
300     nxt_credential_t  *creds;
301 
302     clone = &process->isolation.clone;
303     creds = process->user_cred;
304 
305     if (clone->uidmap.size == 0 && clone->gidmap.size == 0) {
306         return NXT_OK;
307     }
308 
309     if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) {
310         if (nxt_slow_path(clone->uidmap.size > 0)) {
311             nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but "
312                     "\"isolation.namespaces.credential\" is false or unset");
313 
314             return NXT_ERROR;
315         }
316 
317         if (nxt_slow_path(clone->gidmap.size > 0)) {
318             nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but "
319                     "\"isolation.namespaces.credential\" is false or unset");
320 
321             return NXT_ERROR;
322         }
323 
324         return NXT_OK;
325     }
326 
327     ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds);
328     if (nxt_slow_path(ret != NXT_OK)) {
329         return NXT_ERROR;
330     }
331 
332     return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds);
333 }
334 
335 #endif
336 
337 
338 #if (NXT_HAVE_CLONE)
339 
340 static nxt_int_t
341 nxt_isolation_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces,
342     nxt_clone_t *clone)
343 {
344     uint32_t          index;
345     nxt_str_t         name;
346     nxt_int_t         flag;
347     nxt_conf_value_t  *value;
348 
349     index = 0;
350 
351     for ( ;; ) {
352         value = nxt_conf_next_object_member(namespaces, &name, &index);
353 
354         if (value == NULL) {
355             break;
356         }
357 
358         flag = 0;
359 
360 #if (NXT_HAVE_CLONE_NEWUSER)
361         if (nxt_str_eq(&name, "credential", 10)) {
362             flag = CLONE_NEWUSER;
363         }
364 #endif
365 
366 #if (NXT_HAVE_CLONE_NEWPID)
367         if (nxt_str_eq(&name, "pid", 3)) {
368             flag = CLONE_NEWPID;
369         }
370 #endif
371 
372 #if (NXT_HAVE_CLONE_NEWNET)
373         if (nxt_str_eq(&name, "network", 7)) {
374             flag = CLONE_NEWNET;
375         }
376 #endif
377 
378 #if (NXT_HAVE_CLONE_NEWUTS)
379         if (nxt_str_eq(&name, "uname", 5)) {
380             flag = CLONE_NEWUTS;
381         }
382 #endif
383 
384 #if (NXT_HAVE_CLONE_NEWNS)
385         if (nxt_str_eq(&name, "mount", 5)) {
386             flag = CLONE_NEWNS;
387         }
388 #endif
389 
390 #if (NXT_HAVE_CLONE_NEWCGROUP)
391         if (nxt_str_eq(&name, "cgroup", 6)) {
392             flag = CLONE_NEWCGROUP;
393         }
394 #endif
395 
396         if (!flag) {
397             nxt_alert(task, "unknown namespace flag: \"%V\"", &name);
398             return NXT_ERROR;
399         }
400 
401         if (nxt_conf_get_boolean(value)) {
402             clone->flags |= flag;
403         }
404     }
405 
406     return NXT_OK;
407 }
408 
409 #endif
410 
411 
412 #if (NXT_HAVE_ISOLATION_ROOTFS)
413 
414 static nxt_int_t
415 nxt_isolation_set_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation,
416     nxt_process_t *process)
417 {
418     nxt_str_t         str;
419     nxt_conf_value_t  *obj;
420 
421     static nxt_str_t  rootfs_name = nxt_string("rootfs");
422 
423     obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL);
424     if (obj != NULL) {
425         nxt_conf_get_string(obj, &str);
426 
427         if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) {
428             nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other "
429                     "than \"/\" but given \"%V\"", &str);
430 
431             return NXT_ERROR;
432         }
433 
434         if (str.start[str.length - 1] == '/') {
435             str.length--;
436         }
437 
438         process->isolation.rootfs = nxt_mp_alloc(process->mem_pool,
439                                                  str.length + 1);
440 
441         if (nxt_slow_path(process->isolation.rootfs == NULL)) {
442             return NXT_ERROR;
443         }
444 
445         nxt_memcpy(process->isolation.rootfs, str.start, str.length);
446 
447         process->isolation.rootfs[str.length] = '\0';
448     }
449 
450     return NXT_OK;
451 }
452 
453 
454 static nxt_int_t
455 nxt_isolation_set_mounts(nxt_task_t *task, nxt_process_t *process,
456     nxt_str_t *app_type)
457 {
458     nxt_int_t              ret, cap_chroot;
459     nxt_runtime_t          *rt;
460     nxt_app_lang_module_t  *lang;
461 
462     rt = task->thread->runtime;
463     cap_chroot = rt->capabilities.chroot;
464     lang = nxt_app_lang_module(rt, app_type);
465 
466     nxt_assert(lang != NULL);
467 
468 #if (NXT_HAVE_CLONE_NEWUSER)
469     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) {
470         cap_chroot = 1;
471     }
472 #endif
473 
474     if (!cap_chroot) {
475         nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges");
476         return NXT_ERROR;
477     }
478 
479     if (lang->mounts != NULL && lang->mounts->nelts > 0) {
480         ret = nxt_isolation_set_lang_mounts(task, process, lang->mounts);
481         if (nxt_slow_path(ret != NXT_OK)) {
482             return NXT_ERROR;
483         }
484 
485         process->isolation.cleanup = nxt_isolation_unmount_all;
486     }
487 
488     return NXT_OK;
489 }
490 
491 
492 static nxt_int_t
493 nxt_isolation_set_lang_mounts(nxt_task_t *task, nxt_process_t *process,
494     nxt_array_t *lang_mounts)
495 {
496     u_char          *p;
497     size_t          i, n, rootfs_len, len;
498     nxt_mp_t        *mp;
499     nxt_array_t     *mounts;
500     const u_char    *rootfs;
501     nxt_fs_mount_t  *mnt, *lang_mnt;
502 
503     rootfs = process->isolation.rootfs;
504     rootfs_len = nxt_strlen(rootfs);
505     mp = process->mem_pool;
506 
507     /* copy to init mem pool */
508     mounts = nxt_array_copy(mp, NULL, lang_mounts);
509     if (mounts == NULL) {
510         return NXT_ERROR;
511     }
512 
513     n = mounts->nelts;
514     mnt = mounts->elts;
515     lang_mnt = lang_mounts->elts;
516 
517     for (i = 0; i < n; i++) {
518         len = nxt_strlen(lang_mnt[i].dst);
519 
520         mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1);
521         if (mnt[i].dst == NULL) {
522             return NXT_ERROR;
523         }
524 
525         p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len);
526         p = nxt_cpymem(p, lang_mnt[i].dst, len);
527         *p = '\0';
528     }
529 
530     process->isolation.mounts = mounts;
531 
532     return NXT_OK;
533 }
534 
535 
536 void
537 nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process)
538 {
539     size_t          i, n;
540     nxt_array_t     *mounts;
541     nxt_fs_mount_t  *mnt;
542 
543     nxt_debug(task, "unmount all (%s)", process->name);
544 
545     mounts = process->isolation.mounts;
546     n = mounts->nelts;
547     mnt = mounts->elts;
548 
549     for (i = 0; i < n; i++) {
550         nxt_fs_unmount(mnt[i].dst);
551     }
552 }
553 
554 
555 nxt_int_t
556 nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process)
557 {
558     size_t          i, n;
559     nxt_int_t       ret, hasproc;
560     struct stat     st;
561     nxt_array_t     *mounts;
562     const u_char    *dst;
563     nxt_fs_mount_t  *mnt;
564 
565     hasproc = 0;
566 
567 #if (NXT_HAVE_CLONE_NEWPID) && (NXT_HAVE_CLONE_NEWNS)
568     nxt_fs_mount_t  mount;
569 
570     if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWPID)
571         && nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS))
572     {
573         /*
574          * This mount point will automatically be gone when the namespace is
575          * destroyed.
576          */
577 
578         mount.fstype = (u_char *) "proc";
579         mount.src = (u_char *) "proc";
580         mount.dst = (u_char *) "/proc";
581         mount.data = (u_char *) "";
582         mount.flags = 0;
583 
584         ret = nxt_fs_mkdir_all(mount.dst, S_IRWXU | S_IRWXG | S_IRWXO);
585         if (nxt_fast_path(ret == NXT_OK)) {
586             ret = nxt_fs_mount(task, &mount);
587             if (nxt_fast_path(ret == NXT_OK)) {
588                 hasproc = 1;
589             }
590 
591         } else {
592             nxt_log(task, NXT_LOG_WARN, "mkdir(%s) %E", mount.dst, nxt_errno);
593         }
594     }
595 #endif
596 
597     mounts = process->isolation.mounts;
598 
599     n = mounts->nelts;
600     mnt = mounts->elts;
601 
602     for (i = 0; i < n; i++) {
603         dst = mnt[i].dst;
604 
605         if (nxt_slow_path(nxt_memcmp(mnt[i].fstype, "bind", 4) == 0
606                           && stat((const char *) mnt[i].src, &st) != 0))
607         {
608             nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src);
609             continue;
610         }
611 
612         if (hasproc && nxt_memcmp(mnt[i].fstype, "proc", 4) == 0
613             && nxt_memcmp(mnt[i].dst, "/proc", 5) == 0)
614         {
615             continue;
616         }
617 
618         ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO);
619         if (nxt_slow_path(ret != NXT_OK)) {
620             nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno);
621             goto undo;
622         }
623 
624         ret = nxt_fs_mount(task, &mnt[i]);
625         if (nxt_slow_path(ret != NXT_OK)) {
626             goto undo;
627         }
628     }
629 
630     return NXT_OK;
631 
632 undo:
633 
634     n = i + 1;
635 
636     for (i = 0; i < n; i++) {
637         nxt_fs_unmount(mnt[i].dst);
638     }
639 
640     return NXT_ERROR;
641 }
642 
643 
644 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS)
645 
646 nxt_int_t
647 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
648 {
649     char       *rootfs;
650     nxt_int_t  ret;
651 
652     rootfs = (char *) process->isolation.rootfs;
653 
654     nxt_debug(task, "change root: %s", rootfs);
655 
656     if (NXT_CLONE_MNT(process->isolation.clone.flags)) {
657         ret = nxt_isolation_pivot_root(task, rootfs);
658 
659     } else {
660         ret = nxt_isolation_chroot(task, rootfs);
661     }
662 
663     if (nxt_fast_path(ret == NXT_OK)) {
664         if (nxt_slow_path(chdir("/") < 0)) {
665             nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
666             return NXT_ERROR;
667         }
668     }
669 
670     return ret;
671 }
672 
673 
674 /*
675  * pivot_root(2) can only be safely used with containers, otherwise it can
676  * umount(2) the global root filesystem and screw up the machine.
677  */
678 
679 static nxt_int_t
680 nxt_isolation_pivot_root(nxt_task_t *task, const char *path)
681 {
682     /*
683      * This implementation makes use of a kernel trick that works for ages
684      * and now documented in Linux kernel 5.
685      * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/
686      */
687 
688     if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) {
689         nxt_alert(task, "failed to make / a slave mount %E", nxt_errno);
690         return NXT_ERROR;
691     }
692 
693     if (nxt_slow_path(nxt_isolation_make_private_mount(task, path) != NXT_OK)) {
694         return NXT_ERROR;
695     }
696 
697     if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) {
698         nxt_alert(task, "error bind mounting rootfs %E", nxt_errno);
699         return NXT_ERROR;
700     }
701 
702     if (nxt_slow_path(chdir(path) != 0)) {
703         nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno);
704         return NXT_ERROR;
705     }
706 
707     if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) {
708         nxt_alert(task, "failed to pivot_root %E", nxt_errno);
709         return NXT_ERROR;
710     }
711 
712     /*
713      * Make oldroot a slave mount to avoid unmounts getting propagated to the
714      * host.
715      */
716     if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) {
717         nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno);
718         return NXT_ERROR;
719     }
720 
721     if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) {
722         nxt_alert(task, "failed to umount old root directory %E", nxt_errno);
723         return NXT_ERROR;
724     }
725 
726     return NXT_OK;
727 }
728 
729 
730 static nxt_int_t
731 nxt_isolation_make_private_mount(nxt_task_t *task, const char *rootfs)
732 {
733     char           *parent_mnt;
734     FILE           *procfile;
735     u_char         **mounts;
736     size_t         len;
737     uint8_t        *shared;
738     nxt_int_t      ret, index, nmounts;
739     struct mntent  *ent;
740 
741     static const char  *mount_path = "/proc/self/mounts";
742 
743     ret = NXT_ERROR;
744     ent = NULL;
745     shared = NULL;
746     procfile = NULL;
747     parent_mnt = NULL;
748 
749     nmounts = 256;
750 
751     mounts = nxt_malloc(nmounts * sizeof(uintptr_t));
752     if (nxt_slow_path(mounts == NULL)) {
753         goto fail;
754     }
755 
756     shared = nxt_malloc(nmounts);
757     if (nxt_slow_path(shared == NULL)) {
758         goto fail;
759     }
760 
761     procfile = setmntent(mount_path, "r");
762     if (nxt_slow_path(procfile == NULL)) {
763         nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno);
764 
765         goto fail;
766     }
767 
768     index = 0;
769 
770 again:
771 
772     for ( ; index < nmounts; index++) {
773         ent = getmntent(procfile);
774         if (ent == NULL) {
775             nmounts = index;
776             break;
777         }
778 
779         mounts[index] = (u_char *) strdup(ent->mnt_dir);
780         shared[index] = hasmntopt(ent, "shared") != NULL;
781     }
782 
783     if (ent != NULL) {
784         /* there are still entries to be read */
785 
786         nmounts *= 2;
787         mounts = nxt_realloc(mounts, nmounts);
788         if (nxt_slow_path(mounts == NULL)) {
789             goto fail;
790         }
791 
792         shared = nxt_realloc(shared, nmounts);
793         if (nxt_slow_path(shared == NULL)) {
794             goto fail;
795         }
796 
797         goto again;
798     }
799 
800     for (index = 0; index < nmounts; index++) {
801         if (nxt_strcmp(mounts[index], rootfs) == 0) {
802             parent_mnt = (char *) rootfs;
803             break;
804         }
805     }
806 
807     if (parent_mnt == NULL) {
808         len = nxt_strlen(rootfs);
809 
810         parent_mnt = nxt_malloc(len + 1);
811         if (parent_mnt == NULL) {
812             goto fail;
813         }
814 
815         nxt_memcpy(parent_mnt, rootfs, len);
816         parent_mnt[len] = '\0';
817 
818         if (parent_mnt[len - 1] == '/') {
819             parent_mnt[len - 1] = '\0';
820             len--;
821         }
822 
823         for ( ;; ) {
824             for (index = 0; index < nmounts; index++) {
825                 if (nxt_strcmp(mounts[index], parent_mnt) == 0) {
826                     goto found;
827                 }
828             }
829 
830             if (len == 1 && parent_mnt[0] == '/') {
831                 nxt_alert(task, "parent mount not found");
832                 goto fail;
833             }
834 
835             /* parent dir */
836             while (parent_mnt[len - 1] != '/' && len > 0) {
837                 len--;
838             }
839 
840             if (nxt_slow_path(len == 0)) {
841                 nxt_alert(task, "parent mount not found");
842                 goto fail;
843             }
844 
845             if (len == 1) {
846                 parent_mnt[len] = '\0';     /* / */
847             } else {
848                 parent_mnt[len - 1] = '\0'; /* /<path> */
849             }
850         }
851     }
852 
853 found:
854 
855     if (shared[index]) {
856         if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) {
857             nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt,
858                       nxt_errno);
859 
860             goto fail;
861         }
862     }
863 
864     ret = NXT_OK;
865 
866 fail:
867 
868     if (procfile != NULL) {
869         endmntent(procfile);
870     }
871 
872     if (mounts != NULL) {
873         for (index = 0; index < nmounts; index++) {
874             nxt_free(mounts[index]);
875         }
876 
877         nxt_free(mounts);
878     }
879 
880     if (shared != NULL) {
881         nxt_free(shared);
882     }
883 
884     if (parent_mnt != NULL && parent_mnt != rootfs) {
885         nxt_free(parent_mnt);
886     }
887 
888     return ret;
889 }
890 
891 
892 nxt_inline int
893 nxt_pivot_root(const char *new_root, const char *old_root)
894 {
895     return syscall(__NR_pivot_root, new_root, old_root);
896 }
897 
898 
899 #else /* !(NXT_HAVE_PIVOT_ROOT) || !(NXT_HAVE_CLONE_NEWNS) */
900 
901 
902 nxt_int_t
903 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process)
904 {
905     char       *rootfs;
906 
907     rootfs = (char *) process->isolation.rootfs;
908 
909     nxt_debug(task, "change root: %s", rootfs);
910 
911     if (nxt_fast_path(nxt_isolation_chroot(task, rootfs) == NXT_OK)) {
912         if (nxt_slow_path(chdir("/") < 0)) {
913             nxt_alert(task, "chdir(\"/\") %E", nxt_errno);
914             return NXT_ERROR;
915         }
916 
917         return NXT_OK;
918     }
919 
920     return NXT_ERROR;
921 }
922 
923 #endif
924 
925 
926 static nxt_int_t
927 nxt_isolation_chroot(nxt_task_t *task, const char *path)
928 {
929     if (nxt_slow_path(chroot(path) < 0)) {
930         nxt_alert(task, "chroot(%s) %E", path, nxt_errno);
931         return NXT_ERROR;
932     }
933 
934     return NXT_OK;
935 }
936 
937 #endif /* NXT_HAVE_ISOLATION_ROOTFS */
938 
939 
940 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS)
941 
942 static nxt_int_t
943 nxt_isolation_set_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation,
944     nxt_process_t *process)
945 {
946     nxt_conf_value_t  *obj;
947 
948     static nxt_str_t  new_privs_name = nxt_string("new_privs");
949 
950     obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL);
951     if (obj != NULL) {
952         process->isolation.new_privs = nxt_conf_get_boolean(obj);
953     }
954 
955     return NXT_OK;
956 }
957 
958 #endif
959