1 /* 2 * Copyright (C) NGINX, Inc. 3 */ 4 5 #include <nxt_main.h> 6 #include <nxt_application.h> 7 #include <nxt_process.h> 8 #include <nxt_isolation.h> 9 10 #if (NXT_HAVE_PIVOT_ROOT) 11 #include <mntent.h> 12 #endif 13 14 15 static nxt_int_t nxt_isolation_set(nxt_task_t *task, 16 nxt_conf_value_t *isolation, nxt_process_t *process); 17 18 #if (NXT_HAVE_CLONE) 19 static nxt_int_t nxt_isolation_set_namespaces(nxt_task_t *task, 20 nxt_conf_value_t *isolation, nxt_process_t *process); 21 static nxt_int_t nxt_isolation_clone_flags(nxt_task_t *task, 22 nxt_conf_value_t *namespaces, nxt_clone_t *clone); 23 #endif 24 25 #if (NXT_HAVE_CLONE_NEWUSER) 26 static nxt_int_t nxt_isolation_set_creds(nxt_task_t *task, 27 nxt_conf_value_t *isolation, nxt_process_t *process); 28 static nxt_int_t nxt_isolation_credential_map(nxt_task_t *task, 29 nxt_mp_t *mem_pool, nxt_conf_value_t *map_array, 30 nxt_clone_credential_map_t *map); 31 static nxt_int_t nxt_isolation_vldt_creds(nxt_task_t *task, 32 nxt_process_t *process); 33 #endif 34 35 #if (NXT_HAVE_ISOLATION_ROOTFS) 36 static nxt_int_t nxt_isolation_set_rootfs(nxt_task_t *task, 37 nxt_conf_value_t *isolation, nxt_process_t *process); 38 static nxt_int_t nxt_isolation_set_mounts(nxt_task_t *task, 39 nxt_process_t *process, nxt_str_t *app_type); 40 static nxt_int_t nxt_isolation_set_lang_mounts(nxt_task_t *task, 41 nxt_process_t *process, nxt_array_t *syspaths); 42 static void nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process); 43 44 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) 45 static nxt_int_t nxt_isolation_pivot_root(nxt_task_t *task, const char *rootfs); 46 static nxt_int_t nxt_isolation_make_private_mount(nxt_task_t *task, 47 const char *rootfs); 48 nxt_inline int nxt_pivot_root(const char *new_root, const char *old_root); 49 #endif 50 51 static nxt_int_t nxt_isolation_chroot(nxt_task_t *task, const char *path); 52 #endif 53 54 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) 55 static nxt_int_t nxt_isolation_set_new_privs(nxt_task_t *task, 56 nxt_conf_value_t *isolation, nxt_process_t *process); 57 #endif 58 59 60 nxt_int_t 61 nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process, 62 nxt_mp_t *mp) 63 { 64 nxt_int_t cap_setid; 65 nxt_int_t ret; 66 nxt_runtime_t *rt; 67 nxt_common_app_conf_t *app_conf; 68 69 rt = task->thread->runtime; 70 app_conf = process->data.app; 71 cap_setid = rt->capabilities.setid; 72 73 if (app_conf->isolation != NULL) { 74 ret = nxt_isolation_set(task, app_conf->isolation, process); 75 if (nxt_slow_path(ret != NXT_OK)) { 76 return ret; 77 } 78 } 79 80 #if (NXT_HAVE_CLONE_NEWUSER) 81 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) { 82 cap_setid = 1; 83 } 84 #endif 85 86 #if (NXT_HAVE_ISOLATION_ROOTFS) 87 if (process->isolation.rootfs != NULL) { 88 ret = nxt_isolation_set_mounts(task, process, &app_conf->type); 89 if (nxt_slow_path(ret != NXT_OK)) { 90 return ret; 91 } 92 } 93 #endif 94 95 if (cap_setid) { 96 ret = nxt_process_creds_set(task, process, &app_conf->user, 97 &app_conf->group); 98 99 if (nxt_slow_path(ret != NXT_OK)) { 100 return ret; 101 } 102 103 } else { 104 if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user, 105 nxt_strlen(rt->user_cred.user))) 106 { 107 nxt_alert(task, "cannot set user \"%V\" for app \"%V\": " 108 "missing capabilities", &app_conf->user, &app_conf->name); 109 110 return NXT_ERROR; 111 } 112 113 if (app_conf->group.length > 0 114 && !nxt_str_eq(&app_conf->group, (u_char *) rt->group, 115 nxt_strlen(rt->group))) 116 { 117 nxt_alert(task, "cannot set group \"%V\" for app \"%V\": " 118 "missing capabilities", &app_conf->group, 119 &app_conf->name); 120 121 return NXT_ERROR; 122 } 123 } 124 125 #if (NXT_HAVE_CLONE_NEWUSER) 126 ret = nxt_isolation_vldt_creds(task, process); 127 if (nxt_slow_path(ret != NXT_OK)) { 128 return ret; 129 } 130 #endif 131 132 return NXT_OK; 133 } 134 135 136 static nxt_int_t 137 nxt_isolation_set(nxt_task_t *task, nxt_conf_value_t *isolation, 138 nxt_process_t *process) 139 { 140 #if (NXT_HAVE_CLONE) 141 if (nxt_slow_path(nxt_isolation_set_namespaces(task, isolation, process) 142 != NXT_OK)) 143 { 144 return NXT_ERROR; 145 } 146 #endif 147 148 #if (NXT_HAVE_CLONE_NEWUSER) 149 if (nxt_slow_path(nxt_isolation_set_creds(task, isolation, process) 150 != NXT_OK)) 151 { 152 return NXT_ERROR; 153 } 154 #endif 155 156 #if (NXT_HAVE_ISOLATION_ROOTFS) 157 if (nxt_slow_path(nxt_isolation_set_rootfs(task, isolation, process) 158 != NXT_OK)) 159 { 160 return NXT_ERROR; 161 } 162 #endif 163 164 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) 165 if (nxt_slow_path(nxt_isolation_set_new_privs(task, isolation, process) 166 != NXT_OK)) 167 { 168 return NXT_ERROR; 169 } 170 #endif 171 172 return NXT_OK; 173 } 174 175 176 #if (NXT_HAVE_CLONE) 177 178 static nxt_int_t 179 nxt_isolation_set_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation, 180 nxt_process_t *process) 181 { 182 nxt_int_t ret; 183 nxt_conf_value_t *obj; 184 185 static nxt_str_t nsname = nxt_string("namespaces"); 186 187 obj = nxt_conf_get_object_member(isolation, &nsname, NULL); 188 if (obj != NULL) { 189 ret = nxt_isolation_clone_flags(task, obj, &process->isolation.clone); 190 if (nxt_slow_path(ret != NXT_OK)) { 191 return NXT_ERROR; 192 } 193 } 194 195 return NXT_OK; 196 } 197 198 #endif 199 200 201 #if (NXT_HAVE_CLONE_NEWUSER) 202 203 static nxt_int_t 204 nxt_isolation_set_creds(nxt_task_t *task, nxt_conf_value_t *isolation, 205 nxt_process_t *process) 206 { 207 nxt_int_t ret; 208 nxt_clone_t *clone; 209 nxt_conf_value_t *array; 210 211 static nxt_str_t uidname = nxt_string("uidmap"); 212 static nxt_str_t gidname = nxt_string("gidmap"); 213 214 clone = &process->isolation.clone; 215 216 array = nxt_conf_get_object_member(isolation, &uidname, NULL); 217 if (array != NULL) { 218 ret = nxt_isolation_credential_map(task, process->mem_pool, array, 219 &clone->uidmap); 220 221 if (nxt_slow_path(ret != NXT_OK)) { 222 return NXT_ERROR; 223 } 224 } 225 226 array = nxt_conf_get_object_member(isolation, &gidname, NULL); 227 if (array != NULL) { 228 ret = nxt_isolation_credential_map(task, process->mem_pool, array, 229 &clone->gidmap); 230 231 if (nxt_slow_path(ret != NXT_OK)) { 232 return NXT_ERROR; 233 } 234 } 235 236 return NXT_OK; 237 } 238 239 240 static nxt_int_t 241 nxt_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp, 242 nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map) 243 { 244 nxt_int_t ret; 245 nxt_uint_t i; 246 nxt_conf_value_t *obj; 247 248 static nxt_conf_map_t nxt_clone_map_entry_conf[] = { 249 { 250 nxt_string("container"), 251 NXT_CONF_MAP_INT, 252 offsetof(nxt_clone_map_entry_t, container), 253 }, 254 255 { 256 nxt_string("host"), 257 NXT_CONF_MAP_INT, 258 offsetof(nxt_clone_map_entry_t, host), 259 }, 260 261 { 262 nxt_string("size"), 263 NXT_CONF_MAP_INT, 264 offsetof(nxt_clone_map_entry_t, size), 265 }, 266 }; 267 268 map->size = nxt_conf_array_elements_count(map_array); 269 270 if (map->size == 0) { 271 return NXT_OK; 272 } 273 274 map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t)); 275 if (nxt_slow_path(map->map == NULL)) { 276 return NXT_ERROR; 277 } 278 279 for (i = 0; i < map->size; i++) { 280 obj = nxt_conf_get_array_element(map_array, i); 281 282 ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf, 283 nxt_nitems(nxt_clone_map_entry_conf), 284 map->map + i); 285 if (nxt_slow_path(ret != NXT_OK)) { 286 nxt_alert(task, "clone map entry map error"); 287 return NXT_ERROR; 288 } 289 } 290 291 return NXT_OK; 292 } 293 294 295 static nxt_int_t 296 nxt_isolation_vldt_creds(nxt_task_t *task, nxt_process_t *process) 297 { 298 nxt_int_t ret; 299 nxt_clone_t *clone; 300 nxt_credential_t *creds; 301 302 clone = &process->isolation.clone; 303 creds = process->user_cred; 304 305 if (clone->uidmap.size == 0 && clone->gidmap.size == 0) { 306 return NXT_OK; 307 } 308 309 if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) { 310 if (nxt_slow_path(clone->uidmap.size > 0)) { 311 nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but " 312 "\"isolation.namespaces.credential\" is false or unset"); 313 314 return NXT_ERROR; 315 } 316 317 if (nxt_slow_path(clone->gidmap.size > 0)) { 318 nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but " 319 "\"isolation.namespaces.credential\" is false or unset"); 320 321 return NXT_ERROR; 322 } 323 324 return NXT_OK; 325 } 326 327 ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds); 328 if (nxt_slow_path(ret != NXT_OK)) { 329 return NXT_ERROR; 330 } 331 332 return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds); 333 } 334 335 #endif 336 337 338 #if (NXT_HAVE_CLONE) 339 340 static nxt_int_t 341 nxt_isolation_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces, 342 nxt_clone_t *clone) 343 { 344 uint32_t index; 345 nxt_str_t name; 346 nxt_int_t flag; 347 nxt_conf_value_t *value; 348 349 index = 0; 350 351 for ( ;; ) { 352 value = nxt_conf_next_object_member(namespaces, &name, &index); 353 354 if (value == NULL) { 355 break; 356 } 357 358 flag = 0; 359 360 #if (NXT_HAVE_CLONE_NEWUSER) 361 if (nxt_str_eq(&name, "credential", 10)) { 362 flag = CLONE_NEWUSER; 363 } 364 #endif 365 366 #if (NXT_HAVE_CLONE_NEWPID) 367 if (nxt_str_eq(&name, "pid", 3)) { 368 flag = CLONE_NEWPID; 369 } 370 #endif 371 372 #if (NXT_HAVE_CLONE_NEWNET) 373 if (nxt_str_eq(&name, "network", 7)) { 374 flag = CLONE_NEWNET; 375 } 376 #endif 377 378 #if (NXT_HAVE_CLONE_NEWUTS) 379 if (nxt_str_eq(&name, "uname", 5)) { 380 flag = CLONE_NEWUTS; 381 } 382 #endif 383 384 #if (NXT_HAVE_CLONE_NEWNS) 385 if (nxt_str_eq(&name, "mount", 5)) { 386 flag = CLONE_NEWNS; 387 } 388 #endif 389 390 #if (NXT_HAVE_CLONE_NEWCGROUP) 391 if (nxt_str_eq(&name, "cgroup", 6)) { 392 flag = CLONE_NEWCGROUP; 393 } 394 #endif 395 396 if (!flag) { 397 nxt_alert(task, "unknown namespace flag: \"%V\"", &name); 398 return NXT_ERROR; 399 } 400 401 if (nxt_conf_get_boolean(value)) { 402 clone->flags |= flag; 403 } 404 } 405 406 return NXT_OK; 407 } 408 409 #endif 410 411 412 #if (NXT_HAVE_ISOLATION_ROOTFS) 413 414 static nxt_int_t 415 nxt_isolation_set_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation, 416 nxt_process_t *process) 417 { 418 nxt_str_t str; 419 nxt_conf_value_t *obj; 420 421 static nxt_str_t rootfs_name = nxt_string("rootfs"); 422 423 obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL); 424 if (obj != NULL) { 425 nxt_conf_get_string(obj, &str); 426 427 if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) { 428 nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other " 429 "than \"/\" but given \"%V\"", &str); 430 431 return NXT_ERROR; 432 } 433 434 if (str.start[str.length - 1] == '/') { 435 str.length--; 436 } 437 438 process->isolation.rootfs = nxt_mp_alloc(process->mem_pool, 439 str.length + 1); 440 441 if (nxt_slow_path(process->isolation.rootfs == NULL)) { 442 return NXT_ERROR; 443 } 444 445 nxt_memcpy(process->isolation.rootfs, str.start, str.length); 446 447 process->isolation.rootfs[str.length] = '\0'; 448 } 449 450 return NXT_OK; 451 } 452 453 454 static nxt_int_t 455 nxt_isolation_set_mounts(nxt_task_t *task, nxt_process_t *process, 456 nxt_str_t *app_type) 457 { 458 nxt_int_t ret, cap_chroot; 459 nxt_runtime_t *rt; 460 nxt_app_lang_module_t *lang; 461 462 rt = task->thread->runtime; 463 cap_chroot = rt->capabilities.chroot; 464 lang = nxt_app_lang_module(rt, app_type); 465 466 nxt_assert(lang != NULL); 467 468 #if (NXT_HAVE_CLONE_NEWUSER) 469 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) { 470 cap_chroot = 1; 471 } 472 #endif 473 474 if (!cap_chroot) { 475 nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges"); 476 return NXT_ERROR; 477 } 478 479 if (lang->mounts != NULL && lang->mounts->nelts > 0) { 480 ret = nxt_isolation_set_lang_mounts(task, process, lang->mounts); 481 if (nxt_slow_path(ret != NXT_OK)) { 482 return NXT_ERROR; 483 } 484 485 process->isolation.cleanup = nxt_isolation_unmount_all; 486 } 487 488 return NXT_OK; 489 } 490 491 492 static nxt_int_t 493 nxt_isolation_set_lang_mounts(nxt_task_t *task, nxt_process_t *process, 494 nxt_array_t *lang_mounts) 495 { 496 u_char *p; 497 size_t i, n, rootfs_len, len; 498 nxt_mp_t *mp; 499 nxt_array_t *mounts; 500 const u_char *rootfs; 501 nxt_fs_mount_t *mnt, *lang_mnt; 502 503 rootfs = process->isolation.rootfs; 504 rootfs_len = nxt_strlen(rootfs); 505 mp = process->mem_pool; 506 507 /* copy to init mem pool */ 508 mounts = nxt_array_copy(mp, NULL, lang_mounts); 509 if (mounts == NULL) { 510 return NXT_ERROR; 511 } 512 513 n = mounts->nelts; 514 mnt = mounts->elts; 515 lang_mnt = lang_mounts->elts; 516 517 for (i = 0; i < n; i++) { 518 len = nxt_strlen(lang_mnt[i].dst); 519 520 mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1); 521 if (mnt[i].dst == NULL) { 522 return NXT_ERROR; 523 } 524 525 p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len); 526 p = nxt_cpymem(p, lang_mnt[i].dst, len); 527 *p = '\0'; 528 } 529 530 process->isolation.mounts = mounts; 531 532 return NXT_OK; 533 } 534 535 536 void 537 nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process) 538 { 539 size_t i, n; 540 nxt_array_t *mounts; 541 nxt_fs_mount_t *mnt; 542 543 nxt_debug(task, "unmount all (%s)", process->name); 544 545 mounts = process->isolation.mounts; 546 n = mounts->nelts; 547 mnt = mounts->elts; 548 549 for (i = 0; i < n; i++) { 550 nxt_fs_unmount(mnt[i].dst); 551 } 552 } 553 554 555 nxt_int_t 556 nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process) 557 { 558 size_t i, n; 559 nxt_int_t ret, hasproc; 560 struct stat st; 561 nxt_array_t *mounts; 562 const u_char *dst; 563 nxt_fs_mount_t *mnt; 564 565 hasproc = 0; 566 567 #if (NXT_HAVE_CLONE_NEWPID) && (NXT_HAVE_CLONE_NEWNS) 568 nxt_fs_mount_t mount; 569 570 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWPID) 571 && nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) 572 { 573 /* 574 * This mount point will automatically be gone when the namespace is 575 * destroyed. 576 */ 577 578 mount.fstype = (u_char *) "proc"; 579 mount.src = (u_char *) "proc"; 580 mount.dst = (u_char *) "/proc"; 581 mount.data = (u_char *) ""; 582 mount.flags = 0; 583 584 ret = nxt_fs_mkdir_all(mount.dst, S_IRWXU | S_IRWXG | S_IRWXO); 585 if (nxt_fast_path(ret == NXT_OK)) { 586 ret = nxt_fs_mount(task, &mount); 587 if (nxt_fast_path(ret == NXT_OK)) { 588 hasproc = 1; 589 } 590 591 } else { 592 nxt_log(task, NXT_LOG_WARN, "mkdir(%s) %E", mount.dst, nxt_errno); 593 } 594 } 595 #endif 596 597 mounts = process->isolation.mounts; 598 599 n = mounts->nelts; 600 mnt = mounts->elts; 601 602 for (i = 0; i < n; i++) { 603 dst = mnt[i].dst; 604 605 if (nxt_slow_path(nxt_memcmp(mnt[i].fstype, "bind", 4) == 0 606 && stat((const char *) mnt[i].src, &st) != 0)) 607 { 608 nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src); 609 continue; 610 } 611 612 if (hasproc && nxt_memcmp(mnt[i].fstype, "proc", 4) == 0 613 && nxt_memcmp(mnt[i].dst, "/proc", 5) == 0) 614 { 615 continue; 616 } 617 618 ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO); 619 if (nxt_slow_path(ret != NXT_OK)) { 620 nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno); 621 goto undo; 622 } 623 624 ret = nxt_fs_mount(task, &mnt[i]); 625 if (nxt_slow_path(ret != NXT_OK)) { 626 goto undo; 627 } 628 } 629 630 return NXT_OK; 631 632 undo: 633 634 n = i + 1; 635 636 for (i = 0; i < n; i++) { 637 nxt_fs_unmount(mnt[i].dst); 638 } 639 640 return NXT_ERROR; 641 } 642 643 644 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) 645 646 nxt_int_t 647 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process) 648 { 649 char *rootfs; 650 nxt_int_t ret; 651 652 rootfs = (char *) process->isolation.rootfs; 653 654 nxt_debug(task, "change root: %s", rootfs); 655 656 if (NXT_CLONE_MNT(process->isolation.clone.flags)) { 657 ret = nxt_isolation_pivot_root(task, rootfs); 658 659 } else { 660 ret = nxt_isolation_chroot(task, rootfs); 661 } 662 663 if (nxt_fast_path(ret == NXT_OK)) { 664 if (nxt_slow_path(chdir("/") < 0)) { 665 nxt_alert(task, "chdir(\"/\") %E", nxt_errno); 666 return NXT_ERROR; 667 } 668 } 669 670 return ret; 671 } 672 673 674 /* 675 * pivot_root(2) can only be safely used with containers, otherwise it can 676 * umount(2) the global root filesystem and screw up the machine. 677 */ 678 679 static nxt_int_t 680 nxt_isolation_pivot_root(nxt_task_t *task, const char *path) 681 { 682 /* 683 * This implementation makes use of a kernel trick that works for ages 684 * and now documented in Linux kernel 5. 685 * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/ 686 */ 687 688 if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) { 689 nxt_alert(task, "failed to make / a slave mount %E", nxt_errno); 690 return NXT_ERROR; 691 } 692 693 if (nxt_slow_path(nxt_isolation_make_private_mount(task, path) != NXT_OK)) { 694 return NXT_ERROR; 695 } 696 697 if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) { 698 nxt_alert(task, "error bind mounting rootfs %E", nxt_errno); 699 return NXT_ERROR; 700 } 701 702 if (nxt_slow_path(chdir(path) != 0)) { 703 nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno); 704 return NXT_ERROR; 705 } 706 707 if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) { 708 nxt_alert(task, "failed to pivot_root %E", nxt_errno); 709 return NXT_ERROR; 710 } 711 712 /* 713 * Make oldroot a slave mount to avoid unmounts getting propagated to the 714 * host. 715 */ 716 if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) { 717 nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno); 718 return NXT_ERROR; 719 } 720 721 if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) { 722 nxt_alert(task, "failed to umount old root directory %E", nxt_errno); 723 return NXT_ERROR; 724 } 725 726 return NXT_OK; 727 } 728 729 730 static nxt_int_t 731 nxt_isolation_make_private_mount(nxt_task_t *task, const char *rootfs) 732 { 733 char *parent_mnt; 734 FILE *procfile; 735 u_char **mounts; 736 size_t len; 737 uint8_t *shared; 738 nxt_int_t ret, index, nmounts; 739 struct mntent *ent; 740 741 static const char *mount_path = "/proc/self/mounts"; 742 743 ret = NXT_ERROR; 744 ent = NULL; 745 shared = NULL; 746 procfile = NULL; 747 parent_mnt = NULL; 748 749 nmounts = 256; 750 751 mounts = nxt_malloc(nmounts * sizeof(uintptr_t)); 752 if (nxt_slow_path(mounts == NULL)) { 753 goto fail; 754 } 755 756 shared = nxt_malloc(nmounts); 757 if (nxt_slow_path(shared == NULL)) { 758 goto fail; 759 } 760 761 procfile = setmntent(mount_path, "r"); 762 if (nxt_slow_path(procfile == NULL)) { 763 nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno); 764 765 goto fail; 766 } 767 768 index = 0; 769 770 again: 771 772 for ( ; index < nmounts; index++) { 773 ent = getmntent(procfile); 774 if (ent == NULL) { 775 nmounts = index; 776 break; 777 } 778 779 mounts[index] = (u_char *) strdup(ent->mnt_dir); 780 shared[index] = hasmntopt(ent, "shared") != NULL; 781 } 782 783 if (ent != NULL) { 784 /* there are still entries to be read */ 785 786 nmounts *= 2; 787 mounts = nxt_realloc(mounts, nmounts); 788 if (nxt_slow_path(mounts == NULL)) { 789 goto fail; 790 } 791 792 shared = nxt_realloc(shared, nmounts); 793 if (nxt_slow_path(shared == NULL)) { 794 goto fail; 795 } 796 797 goto again; 798 } 799 800 for (index = 0; index < nmounts; index++) { 801 if (nxt_strcmp(mounts[index], rootfs) == 0) { 802 parent_mnt = (char *) rootfs; 803 break; 804 } 805 } 806 807 if (parent_mnt == NULL) { 808 len = nxt_strlen(rootfs); 809 810 parent_mnt = nxt_malloc(len + 1); 811 if (parent_mnt == NULL) { 812 goto fail; 813 } 814 815 nxt_memcpy(parent_mnt, rootfs, len); 816 parent_mnt[len] = '\0'; 817 818 if (parent_mnt[len - 1] == '/') { 819 parent_mnt[len - 1] = '\0'; 820 len--; 821 } 822 823 for ( ;; ) { 824 for (index = 0; index < nmounts; index++) { 825 if (nxt_strcmp(mounts[index], parent_mnt) == 0) { 826 goto found; 827 } 828 } 829 830 if (len == 1 && parent_mnt[0] == '/') { 831 nxt_alert(task, "parent mount not found"); 832 goto fail; 833 } 834 835 /* parent dir */ 836 while (parent_mnt[len - 1] != '/' && len > 0) { 837 len--; 838 } 839 840 if (nxt_slow_path(len == 0)) { 841 nxt_alert(task, "parent mount not found"); 842 goto fail; 843 } 844 845 if (len == 1) { 846 parent_mnt[len] = '\0'; /* / */ 847 } else { 848 parent_mnt[len - 1] = '\0'; /* /<path> */ 849 } 850 } 851 } 852 853 found: 854 855 if (shared[index]) { 856 if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) { 857 nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt, 858 nxt_errno); 859 860 goto fail; 861 } 862 } 863 864 ret = NXT_OK; 865 866 fail: 867 868 if (procfile != NULL) { 869 endmntent(procfile); 870 } 871 872 if (mounts != NULL) { 873 for (index = 0; index < nmounts; index++) { 874 nxt_free(mounts[index]); 875 } 876 877 nxt_free(mounts); 878 } 879 880 if (shared != NULL) { 881 nxt_free(shared); 882 } 883 884 if (parent_mnt != NULL && parent_mnt != rootfs) { 885 nxt_free(parent_mnt); 886 } 887 888 return ret; 889 } 890 891 892 nxt_inline int 893 nxt_pivot_root(const char *new_root, const char *old_root) 894 { 895 return syscall(__NR_pivot_root, new_root, old_root); 896 } 897 898 899 #else /* !(NXT_HAVE_PIVOT_ROOT) || !(NXT_HAVE_CLONE_NEWNS) */ 900 901 902 nxt_int_t 903 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process) 904 { 905 char *rootfs; 906 907 rootfs = (char *) process->isolation.rootfs; 908 909 nxt_debug(task, "change root: %s", rootfs); 910 911 if (nxt_fast_path(nxt_isolation_chroot(task, rootfs) == NXT_OK)) { 912 if (nxt_slow_path(chdir("/") < 0)) { 913 nxt_alert(task, "chdir(\"/\") %E", nxt_errno); 914 return NXT_ERROR; 915 } 916 917 return NXT_OK; 918 } 919 920 return NXT_ERROR; 921 } 922 923 #endif 924 925 926 static nxt_int_t 927 nxt_isolation_chroot(nxt_task_t *task, const char *path) 928 { 929 if (nxt_slow_path(chroot(path) < 0)) { 930 nxt_alert(task, "chroot(%s) %E", path, nxt_errno); 931 return NXT_ERROR; 932 } 933 934 return NXT_OK; 935 } 936 937 #endif /* NXT_HAVE_ISOLATION_ROOTFS */ 938 939 940 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) 941 942 static nxt_int_t 943 nxt_isolation_set_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation, 944 nxt_process_t *process) 945 { 946 nxt_conf_value_t *obj; 947 948 static nxt_str_t new_privs_name = nxt_string("new_privs"); 949 950 obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL); 951 if (obj != NULL) { 952 process->isolation.new_privs = nxt_conf_get_boolean(obj); 953 } 954 955 return NXT_OK; 956 } 957 958 #endif 959