1 /* 2 * Copyright (C) NGINX, Inc. 3 */ 4 5 #include <nxt_main.h> 6 #include <nxt_application.h> 7 #include <nxt_process.h> 8 #include <nxt_isolation.h> 9 10 #if (NXT_HAVE_PIVOT_ROOT) 11 #include <mntent.h> 12 #endif 13 14 15 static nxt_int_t nxt_isolation_set(nxt_task_t *task, 16 nxt_conf_value_t *isolation, nxt_process_t *process); 17 18 #if (NXT_HAVE_CLONE) 19 static nxt_int_t nxt_isolation_set_namespaces(nxt_task_t *task, 20 nxt_conf_value_t *isolation, nxt_process_t *process); 21 static nxt_int_t nxt_isolation_clone_flags(nxt_task_t *task, 22 nxt_conf_value_t *namespaces, nxt_clone_t *clone); 23 #endif 24 25 #if (NXT_HAVE_CLONE_NEWUSER) 26 static nxt_int_t nxt_isolation_set_creds(nxt_task_t *task, 27 nxt_conf_value_t *isolation, nxt_process_t *process); 28 static nxt_int_t nxt_isolation_credential_map(nxt_task_t *task, 29 nxt_mp_t *mem_pool, nxt_conf_value_t *map_array, 30 nxt_clone_credential_map_t *map); 31 static nxt_int_t nxt_isolation_vldt_creds(nxt_task_t *task, 32 nxt_process_t *process); 33 #endif 34 35 #if (NXT_HAVE_ISOLATION_ROOTFS) 36 static nxt_int_t nxt_isolation_set_rootfs(nxt_task_t *task, 37 nxt_conf_value_t *isolation, nxt_process_t *process); 38 static nxt_int_t nxt_isolation_set_automount(nxt_task_t *task, 39 nxt_conf_value_t *isolation, nxt_process_t *process); 40 static nxt_int_t nxt_isolation_set_mounts(nxt_task_t *task, 41 nxt_process_t *process, nxt_str_t *app_type); 42 static nxt_int_t nxt_isolation_set_lang_mounts(nxt_task_t *task, 43 nxt_process_t *process, nxt_array_t *syspaths); 44 static int nxt_cdecl nxt_isolation_mount_compare(const void *v1, 45 const void *v2); 46 static void nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process); 47 48 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) 49 static nxt_int_t nxt_isolation_pivot_root(nxt_task_t *task, const char *rootfs); 50 static nxt_int_t nxt_isolation_make_private_mount(nxt_task_t *task, 51 const char *rootfs); 52 nxt_inline int nxt_pivot_root(const char *new_root, const char *old_root); 53 #endif 54 55 static nxt_int_t nxt_isolation_chroot(nxt_task_t *task, const char *path); 56 #endif 57 58 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) 59 static nxt_int_t nxt_isolation_set_new_privs(nxt_task_t *task, 60 nxt_conf_value_t *isolation, nxt_process_t *process); 61 #endif 62 63 64 nxt_int_t 65 nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process, 66 nxt_mp_t *mp) 67 { 68 nxt_int_t cap_setid; 69 nxt_int_t ret; 70 nxt_runtime_t *rt; 71 nxt_common_app_conf_t *app_conf; 72 73 rt = task->thread->runtime; 74 app_conf = process->data.app; 75 cap_setid = rt->capabilities.setid; 76 77 if (app_conf->isolation != NULL) { 78 ret = nxt_isolation_set(task, app_conf->isolation, process); 79 if (nxt_slow_path(ret != NXT_OK)) { 80 return ret; 81 } 82 } 83 84 #if (NXT_HAVE_CLONE_NEWUSER) 85 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) { 86 cap_setid = 1; 87 } 88 #endif 89 90 #if (NXT_HAVE_ISOLATION_ROOTFS) 91 if (process->isolation.rootfs != NULL) { 92 ret = nxt_isolation_set_mounts(task, process, &app_conf->type); 93 if (nxt_slow_path(ret != NXT_OK)) { 94 return ret; 95 } 96 } 97 #endif 98 99 if (cap_setid) { 100 ret = nxt_process_creds_set(task, process, &app_conf->user, 101 &app_conf->group); 102 103 if (nxt_slow_path(ret != NXT_OK)) { 104 return ret; 105 } 106 107 } else { 108 if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user, 109 nxt_strlen(rt->user_cred.user))) 110 { 111 nxt_alert(task, "cannot set user \"%V\" for app \"%V\": " 112 "missing capabilities", &app_conf->user, &app_conf->name); 113 114 return NXT_ERROR; 115 } 116 117 if (app_conf->group.length > 0 118 && !nxt_str_eq(&app_conf->group, (u_char *) rt->group, 119 nxt_strlen(rt->group))) 120 { 121 nxt_alert(task, "cannot set group \"%V\" for app \"%V\": " 122 "missing capabilities", &app_conf->group, 123 &app_conf->name); 124 125 return NXT_ERROR; 126 } 127 } 128 129 #if (NXT_HAVE_CLONE_NEWUSER) 130 ret = nxt_isolation_vldt_creds(task, process); 131 if (nxt_slow_path(ret != NXT_OK)) { 132 return ret; 133 } 134 #endif 135 136 return NXT_OK; 137 } 138 139 140 static nxt_int_t 141 nxt_isolation_set(nxt_task_t *task, nxt_conf_value_t *isolation, 142 nxt_process_t *process) 143 { 144 #if (NXT_HAVE_CLONE) 145 if (nxt_slow_path(nxt_isolation_set_namespaces(task, isolation, process) 146 != NXT_OK)) 147 { 148 return NXT_ERROR; 149 } 150 #endif 151 152 #if (NXT_HAVE_CLONE_NEWUSER) 153 if (nxt_slow_path(nxt_isolation_set_creds(task, isolation, process) 154 != NXT_OK)) 155 { 156 return NXT_ERROR; 157 } 158 #endif 159 160 #if (NXT_HAVE_ISOLATION_ROOTFS) 161 if (nxt_slow_path(nxt_isolation_set_rootfs(task, isolation, process) 162 != NXT_OK)) 163 { 164 return NXT_ERROR; 165 } 166 167 if (nxt_slow_path(nxt_isolation_set_automount(task, isolation, process) 168 != NXT_OK)) 169 { 170 return NXT_ERROR; 171 } 172 #endif 173 174 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) 175 if (nxt_slow_path(nxt_isolation_set_new_privs(task, isolation, process) 176 != NXT_OK)) 177 { 178 return NXT_ERROR; 179 } 180 #endif 181 182 return NXT_OK; 183 } 184 185 186 #if (NXT_HAVE_CLONE) 187 188 static nxt_int_t 189 nxt_isolation_set_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation, 190 nxt_process_t *process) 191 { 192 nxt_int_t ret; 193 nxt_conf_value_t *obj; 194 195 static nxt_str_t nsname = nxt_string("namespaces"); 196 197 obj = nxt_conf_get_object_member(isolation, &nsname, NULL); 198 if (obj != NULL) { 199 ret = nxt_isolation_clone_flags(task, obj, &process->isolation.clone); 200 if (nxt_slow_path(ret != NXT_OK)) { 201 return NXT_ERROR; 202 } 203 } 204 205 return NXT_OK; 206 } 207 208 #endif 209 210 211 #if (NXT_HAVE_CLONE_NEWUSER) 212 213 static nxt_int_t 214 nxt_isolation_set_creds(nxt_task_t *task, nxt_conf_value_t *isolation, 215 nxt_process_t *process) 216 { 217 nxt_int_t ret; 218 nxt_clone_t *clone; 219 nxt_conf_value_t *array; 220 221 static nxt_str_t uidname = nxt_string("uidmap"); 222 static nxt_str_t gidname = nxt_string("gidmap"); 223 224 clone = &process->isolation.clone; 225 226 array = nxt_conf_get_object_member(isolation, &uidname, NULL); 227 if (array != NULL) { 228 ret = nxt_isolation_credential_map(task, process->mem_pool, array, 229 &clone->uidmap); 230 231 if (nxt_slow_path(ret != NXT_OK)) { 232 return NXT_ERROR; 233 } 234 } 235 236 array = nxt_conf_get_object_member(isolation, &gidname, NULL); 237 if (array != NULL) { 238 ret = nxt_isolation_credential_map(task, process->mem_pool, array, 239 &clone->gidmap); 240 241 if (nxt_slow_path(ret != NXT_OK)) { 242 return NXT_ERROR; 243 } 244 } 245 246 return NXT_OK; 247 } 248 249 250 static nxt_int_t 251 nxt_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp, 252 nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map) 253 { 254 nxt_int_t ret; 255 nxt_uint_t i; 256 nxt_conf_value_t *obj; 257 258 static nxt_conf_map_t nxt_clone_map_entry_conf[] = { 259 { 260 nxt_string("container"), 261 NXT_CONF_MAP_INT, 262 offsetof(nxt_clone_map_entry_t, container), 263 }, 264 265 { 266 nxt_string("host"), 267 NXT_CONF_MAP_INT, 268 offsetof(nxt_clone_map_entry_t, host), 269 }, 270 271 { 272 nxt_string("size"), 273 NXT_CONF_MAP_INT, 274 offsetof(nxt_clone_map_entry_t, size), 275 }, 276 }; 277 278 map->size = nxt_conf_array_elements_count(map_array); 279 280 if (map->size == 0) { 281 return NXT_OK; 282 } 283 284 map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t)); 285 if (nxt_slow_path(map->map == NULL)) { 286 return NXT_ERROR; 287 } 288 289 for (i = 0; i < map->size; i++) { 290 obj = nxt_conf_get_array_element(map_array, i); 291 292 ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf, 293 nxt_nitems(nxt_clone_map_entry_conf), 294 map->map + i); 295 if (nxt_slow_path(ret != NXT_OK)) { 296 nxt_alert(task, "clone map entry map error"); 297 return NXT_ERROR; 298 } 299 } 300 301 return NXT_OK; 302 } 303 304 305 static nxt_int_t 306 nxt_isolation_vldt_creds(nxt_task_t *task, nxt_process_t *process) 307 { 308 nxt_int_t ret; 309 nxt_clone_t *clone; 310 nxt_credential_t *creds; 311 312 clone = &process->isolation.clone; 313 creds = process->user_cred; 314 315 if (clone->uidmap.size == 0 && clone->gidmap.size == 0) { 316 return NXT_OK; 317 } 318 319 if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) { 320 if (nxt_slow_path(clone->uidmap.size > 0)) { 321 nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but " 322 "\"isolation.namespaces.credential\" is false or unset"); 323 324 return NXT_ERROR; 325 } 326 327 if (nxt_slow_path(clone->gidmap.size > 0)) { 328 nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but " 329 "\"isolation.namespaces.credential\" is false or unset"); 330 331 return NXT_ERROR; 332 } 333 334 return NXT_OK; 335 } 336 337 ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds); 338 if (nxt_slow_path(ret != NXT_OK)) { 339 return NXT_ERROR; 340 } 341 342 return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds); 343 } 344 345 #endif 346 347 348 #if (NXT_HAVE_CLONE) 349 350 static nxt_int_t 351 nxt_isolation_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces, 352 nxt_clone_t *clone) 353 { 354 uint32_t index; 355 nxt_str_t name; 356 nxt_int_t flag; 357 nxt_conf_value_t *value; 358 359 index = 0; 360 361 for ( ;; ) { 362 value = nxt_conf_next_object_member(namespaces, &name, &index); 363 364 if (value == NULL) { 365 break; 366 } 367 368 flag = 0; 369 370 #if (NXT_HAVE_CLONE_NEWUSER) 371 if (nxt_str_eq(&name, "credential", 10)) { 372 flag = CLONE_NEWUSER; 373 } 374 #endif 375 376 #if (NXT_HAVE_CLONE_NEWPID) 377 if (nxt_str_eq(&name, "pid", 3)) { 378 flag = CLONE_NEWPID; 379 } 380 #endif 381 382 #if (NXT_HAVE_CLONE_NEWNET) 383 if (nxt_str_eq(&name, "network", 7)) { 384 flag = CLONE_NEWNET; 385 } 386 #endif 387 388 #if (NXT_HAVE_CLONE_NEWUTS) 389 if (nxt_str_eq(&name, "uname", 5)) { 390 flag = CLONE_NEWUTS; 391 } 392 #endif 393 394 #if (NXT_HAVE_CLONE_NEWNS) 395 if (nxt_str_eq(&name, "mount", 5)) { 396 flag = CLONE_NEWNS; 397 } 398 #endif 399 400 #if (NXT_HAVE_CLONE_NEWCGROUP) 401 if (nxt_str_eq(&name, "cgroup", 6)) { 402 flag = CLONE_NEWCGROUP; 403 } 404 #endif 405 406 if (!flag) { 407 nxt_alert(task, "unknown namespace flag: \"%V\"", &name); 408 return NXT_ERROR; 409 } 410 411 if (nxt_conf_get_boolean(value)) { 412 clone->flags |= flag; 413 } 414 } 415 416 return NXT_OK; 417 } 418 419 #endif 420 421 422 #if (NXT_HAVE_ISOLATION_ROOTFS) 423 424 static nxt_int_t 425 nxt_isolation_set_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation, 426 nxt_process_t *process) 427 { 428 nxt_str_t str; 429 nxt_conf_value_t *obj; 430 431 static nxt_str_t rootfs_name = nxt_string("rootfs"); 432 433 obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL); 434 if (obj != NULL) { 435 nxt_conf_get_string(obj, &str); 436 437 if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) { 438 nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other " 439 "than \"/\" but given \"%V\"", &str); 440 441 return NXT_ERROR; 442 } 443 444 if (str.start[str.length - 1] == '/') { 445 str.length--; 446 } 447 448 process->isolation.rootfs = nxt_mp_alloc(process->mem_pool, 449 str.length + 1); 450 451 if (nxt_slow_path(process->isolation.rootfs == NULL)) { 452 return NXT_ERROR; 453 } 454 455 nxt_memcpy(process->isolation.rootfs, str.start, str.length); 456 457 process->isolation.rootfs[str.length] = '\0'; 458 } 459 460 return NXT_OK; 461 } 462 463 464 static nxt_int_t 465 nxt_isolation_set_automount(nxt_task_t *task, nxt_conf_value_t *isolation, 466 nxt_process_t *process) 467 { 468 nxt_conf_value_t *conf, *value; 469 nxt_process_automount_t *automount; 470 471 static nxt_str_t automount_name = nxt_string("automount"); 472 static nxt_str_t langdeps_name = nxt_string("language_deps"); 473 474 automount = &process->isolation.automount; 475 476 automount->language_deps = 1; 477 478 conf = nxt_conf_get_object_member(isolation, &automount_name, NULL); 479 if (conf != NULL) { 480 value = nxt_conf_get_object_member(conf, &langdeps_name, NULL); 481 if (value != NULL) { 482 automount->language_deps = nxt_conf_get_boolean(value); 483 } 484 } 485 486 return NXT_OK; 487 } 488 489 490 static nxt_int_t 491 nxt_isolation_set_mounts(nxt_task_t *task, nxt_process_t *process, 492 nxt_str_t *app_type) 493 { 494 nxt_int_t ret, cap_chroot; 495 nxt_runtime_t *rt; 496 nxt_app_lang_module_t *lang; 497 498 rt = task->thread->runtime; 499 cap_chroot = rt->capabilities.chroot; 500 lang = nxt_app_lang_module(rt, app_type); 501 502 nxt_assert(lang != NULL); 503 504 #if (NXT_HAVE_CLONE_NEWUSER) 505 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) { 506 cap_chroot = 1; 507 } 508 #endif 509 510 if (!cap_chroot) { 511 nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges"); 512 return NXT_ERROR; 513 } 514 515 ret = nxt_isolation_set_lang_mounts(task, process, lang->mounts); 516 if (nxt_slow_path(ret != NXT_OK)) { 517 return NXT_ERROR; 518 } 519 520 process->isolation.cleanup = nxt_isolation_unmount_all; 521 522 return NXT_OK; 523 } 524 525 526 static nxt_int_t 527 nxt_isolation_set_lang_mounts(nxt_task_t *task, nxt_process_t *process, 528 nxt_array_t *lang_mounts) 529 { 530 u_char *p; 531 size_t i, n, rootfs_len, len; 532 nxt_mp_t *mp; 533 nxt_array_t *mounts; 534 const u_char *rootfs; 535 nxt_fs_mount_t *mnt, *lang_mnt; 536 537 mp = process->mem_pool; 538 539 /* copy to init mem pool */ 540 mounts = nxt_array_copy(mp, NULL, lang_mounts); 541 if (mounts == NULL) { 542 return NXT_ERROR; 543 } 544 545 n = mounts->nelts; 546 mnt = mounts->elts; 547 lang_mnt = lang_mounts->elts; 548 549 rootfs = process->isolation.rootfs; 550 rootfs_len = nxt_strlen(rootfs); 551 552 for (i = 0; i < n; i++) { 553 len = nxt_strlen(lang_mnt[i].dst); 554 555 mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1); 556 if (nxt_slow_path(mnt[i].dst == NULL)) { 557 return NXT_ERROR; 558 } 559 560 p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len); 561 p = nxt_cpymem(p, lang_mnt[i].dst, len); 562 *p = '\0'; 563 } 564 565 mnt = nxt_array_add(mounts); 566 if (nxt_slow_path(mnt == NULL)) { 567 return NXT_ERROR; 568 } 569 570 mnt->src = (u_char *) "tmpfs"; 571 mnt->fstype = (u_char *) "tmpfs"; 572 mnt->flags = NXT_MS_NOSUID | NXT_MS_NODEV | NXT_MS_NOEXEC | NXT_MS_RELATIME; 573 mnt->data = (u_char *) "size=1m,mode=777"; 574 mnt->builtin = 1; 575 576 mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/tmp") + 1); 577 if (nxt_slow_path(mnt->dst == NULL)) { 578 return NXT_ERROR; 579 } 580 581 p = nxt_cpymem(mnt->dst, rootfs, rootfs_len); 582 p = nxt_cpymem(p, "/tmp", 4); 583 *p = '\0'; 584 585 #if (NXT_HAVE_CLONE_NEWPID) && (NXT_HAVE_CLONE_NEWNS) 586 587 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWPID) 588 && nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) 589 { 590 mnt = nxt_array_add(mounts); 591 if (nxt_slow_path(mnt == NULL)) { 592 return NXT_ERROR; 593 } 594 595 mnt->fstype = (u_char *) "proc"; 596 mnt->src = (u_char *) "proc"; 597 598 mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/proc") + 1); 599 if (nxt_slow_path(mnt->dst == NULL)) { 600 return NXT_ERROR; 601 } 602 603 p = nxt_cpymem(mnt->dst, rootfs, rootfs_len); 604 p = nxt_cpymem(p, "/proc", 5); 605 *p = '\0'; 606 607 mnt->data = (u_char *) ""; 608 mnt->flags = 0; 609 } 610 #endif 611 612 qsort(mounts->elts, mounts->nelts, sizeof(nxt_fs_mount_t), 613 nxt_isolation_mount_compare); 614 615 process->isolation.mounts = mounts; 616 617 return NXT_OK; 618 } 619 620 621 static int nxt_cdecl 622 nxt_isolation_mount_compare(const void *v1, const void *v2) 623 { 624 const nxt_fs_mount_t *mnt1, *mnt2; 625 626 mnt1 = v1; 627 mnt2 = v2; 628 629 return nxt_strlen(mnt1->src) > nxt_strlen(mnt2->src); 630 } 631 632 633 void 634 nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process) 635 { 636 size_t n; 637 nxt_array_t *mounts; 638 nxt_runtime_t *rt; 639 nxt_fs_mount_t *mnt; 640 nxt_process_automount_t *automount; 641 642 rt = task->thread->runtime; 643 644 if (!rt->capabilities.setid) { 645 return; 646 } 647 648 #if (NXT_HAVE_CLONE_NEWNS) 649 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) { 650 return; 651 } 652 #endif 653 654 nxt_debug(task, "unmount all (%s)", process->name); 655 656 automount = &process->isolation.automount; 657 mounts = process->isolation.mounts; 658 n = mounts->nelts; 659 mnt = mounts->elts; 660 661 while (n > 0) { 662 n--; 663 664 if (mnt[n].builtin && !automount->language_deps) { 665 continue; 666 } 667 668 nxt_fs_unmount(mnt[n].dst); 669 } 670 } 671 672 673 nxt_int_t 674 nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process) 675 { 676 size_t i, n; 677 nxt_int_t ret; 678 struct stat st; 679 nxt_array_t *mounts; 680 const u_char *dst; 681 nxt_fs_mount_t *mnt; 682 nxt_process_automount_t *automount; 683 684 automount = &process->isolation.automount; 685 mounts = process->isolation.mounts; 686 687 n = mounts->nelts; 688 mnt = mounts->elts; 689 690 for (i = 0; i < n; i++) { 691 dst = mnt[i].dst; 692 693 if (mnt[i].builtin && !automount->language_deps) { 694 continue; 695 } 696 697 if (nxt_slow_path(nxt_memcmp(mnt[i].fstype, "bind", 4) == 0 698 && stat((const char *) mnt[i].src, &st) != 0)) 699 { 700 nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src); 701 continue; 702 } 703 704 ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO); 705 if (nxt_slow_path(ret != NXT_OK)) { 706 nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno); 707 goto undo; 708 } 709 710 ret = nxt_fs_mount(task, &mnt[i]); 711 if (nxt_slow_path(ret != NXT_OK)) { 712 goto undo; 713 } 714 } 715 716 return NXT_OK; 717 718 undo: 719 720 n = i + 1; 721 722 for (i = 0; i < n; i++) { 723 nxt_fs_unmount(mnt[i].dst); 724 } 725 726 return NXT_ERROR; 727 } 728 729 730 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) 731 732 nxt_int_t 733 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process) 734 { 735 char *rootfs; 736 nxt_int_t ret; 737 738 rootfs = (char *) process->isolation.rootfs; 739 740 nxt_debug(task, "change root: %s", rootfs); 741 742 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) { 743 ret = nxt_isolation_pivot_root(task, rootfs); 744 745 } else { 746 ret = nxt_isolation_chroot(task, rootfs); 747 } 748 749 if (nxt_fast_path(ret == NXT_OK)) { 750 if (nxt_slow_path(chdir("/") < 0)) { 751 nxt_alert(task, "chdir(\"/\") %E", nxt_errno); 752 return NXT_ERROR; 753 } 754 } 755 756 return ret; 757 } 758 759 760 /* 761 * pivot_root(2) can only be safely used with containers, otherwise it can 762 * umount(2) the global root filesystem and screw up the machine. 763 */ 764 765 static nxt_int_t 766 nxt_isolation_pivot_root(nxt_task_t *task, const char *path) 767 { 768 /* 769 * This implementation makes use of a kernel trick that works for ages 770 * and now documented in Linux kernel 5. 771 * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/ 772 */ 773 774 if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) { 775 nxt_alert(task, "mount(\"/\", MS_SLAVE|MS_REC) failed: %E", nxt_errno); 776 return NXT_ERROR; 777 } 778 779 if (nxt_slow_path(nxt_isolation_make_private_mount(task, path) != NXT_OK)) { 780 return NXT_ERROR; 781 } 782 783 if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) { 784 nxt_alert(task, "error bind mounting rootfs %E", nxt_errno); 785 return NXT_ERROR; 786 } 787 788 if (nxt_slow_path(chdir(path) != 0)) { 789 nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno); 790 return NXT_ERROR; 791 } 792 793 if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) { 794 nxt_alert(task, "failed to pivot_root %E", nxt_errno); 795 return NXT_ERROR; 796 } 797 798 /* 799 * Demote the oldroot mount to avoid unmounts getting propagated to 800 * the host. 801 */ 802 if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) { 803 nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno); 804 return NXT_ERROR; 805 } 806 807 if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) { 808 nxt_alert(task, "failed to umount old root directory %E", nxt_errno); 809 return NXT_ERROR; 810 } 811 812 return NXT_OK; 813 } 814 815 816 static nxt_int_t 817 nxt_isolation_make_private_mount(nxt_task_t *task, const char *rootfs) 818 { 819 char *parent_mnt; 820 FILE *procfile; 821 u_char **mounts; 822 size_t len; 823 uint8_t *shared; 824 nxt_int_t ret, index, nmounts; 825 struct mntent *ent; 826 827 static const char *mount_path = "/proc/self/mounts"; 828 829 ret = NXT_ERROR; 830 ent = NULL; 831 shared = NULL; 832 procfile = NULL; 833 parent_mnt = NULL; 834 835 nmounts = 256; 836 837 mounts = nxt_malloc(nmounts * sizeof(uintptr_t)); 838 if (nxt_slow_path(mounts == NULL)) { 839 goto fail; 840 } 841 842 shared = nxt_malloc(nmounts); 843 if (nxt_slow_path(shared == NULL)) { 844 goto fail; 845 } 846 847 procfile = setmntent(mount_path, "r"); 848 if (nxt_slow_path(procfile == NULL)) { 849 nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno); 850 851 goto fail; 852 } 853 854 index = 0; 855 856 again: 857 858 for ( ; index < nmounts; index++) { 859 ent = getmntent(procfile); 860 if (ent == NULL) { 861 nmounts = index; 862 break; 863 } 864 865 mounts[index] = (u_char *) strdup(ent->mnt_dir); 866 shared[index] = hasmntopt(ent, "shared") != NULL; 867 } 868 869 if (ent != NULL) { 870 /* there are still entries to be read */ 871 872 nmounts *= 2; 873 mounts = nxt_realloc(mounts, nmounts); 874 if (nxt_slow_path(mounts == NULL)) { 875 goto fail; 876 } 877 878 shared = nxt_realloc(shared, nmounts); 879 if (nxt_slow_path(shared == NULL)) { 880 goto fail; 881 } 882 883 goto again; 884 } 885 886 for (index = 0; index < nmounts; index++) { 887 if (nxt_strcmp(mounts[index], rootfs) == 0) { 888 parent_mnt = (char *) rootfs; 889 break; 890 } 891 } 892 893 if (parent_mnt == NULL) { 894 len = nxt_strlen(rootfs); 895 896 parent_mnt = nxt_malloc(len + 1); 897 if (parent_mnt == NULL) { 898 goto fail; 899 } 900 901 nxt_memcpy(parent_mnt, rootfs, len); 902 parent_mnt[len] = '\0'; 903 904 if (parent_mnt[len - 1] == '/') { 905 parent_mnt[len - 1] = '\0'; 906 len--; 907 } 908 909 for ( ;; ) { 910 for (index = 0; index < nmounts; index++) { 911 if (nxt_strcmp(mounts[index], parent_mnt) == 0) { 912 goto found; 913 } 914 } 915 916 if (len == 1 && parent_mnt[0] == '/') { 917 nxt_alert(task, "parent mount not found"); 918 goto fail; 919 } 920 921 /* parent dir */ 922 while (parent_mnt[len - 1] != '/' && len > 0) { 923 len--; 924 } 925 926 if (nxt_slow_path(len == 0)) { 927 nxt_alert(task, "parent mount not found"); 928 goto fail; 929 } 930 931 if (len == 1) { 932 parent_mnt[len] = '\0'; /* / */ 933 } else { 934 parent_mnt[len - 1] = '\0'; /* /<path> */ 935 } 936 } 937 } 938 939 found: 940 941 if (shared[index]) { 942 if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) { 943 nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt, 944 nxt_errno); 945 946 goto fail; 947 } 948 } 949 950 ret = NXT_OK; 951 952 fail: 953 954 if (procfile != NULL) { 955 endmntent(procfile); 956 } 957 958 if (mounts != NULL) { 959 for (index = 0; index < nmounts; index++) { 960 nxt_free(mounts[index]); 961 } 962 963 nxt_free(mounts); 964 } 965 966 if (shared != NULL) { 967 nxt_free(shared); 968 } 969 970 if (parent_mnt != NULL && parent_mnt != rootfs) { 971 nxt_free(parent_mnt); 972 } 973 974 return ret; 975 } 976 977 978 nxt_inline int 979 nxt_pivot_root(const char *new_root, const char *old_root) 980 { 981 return syscall(__NR_pivot_root, new_root, old_root); 982 } 983 984 985 #else /* !(NXT_HAVE_PIVOT_ROOT) || !(NXT_HAVE_CLONE_NEWNS) */ 986 987 988 nxt_int_t 989 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process) 990 { 991 char *rootfs; 992 993 rootfs = (char *) process->isolation.rootfs; 994 995 nxt_debug(task, "change root: %s", rootfs); 996 997 if (nxt_fast_path(nxt_isolation_chroot(task, rootfs) == NXT_OK)) { 998 if (nxt_slow_path(chdir("/") < 0)) { 999 nxt_alert(task, "chdir(\"/\") %E", nxt_errno); 1000 return NXT_ERROR; 1001 } 1002 1003 return NXT_OK; 1004 } 1005 1006 return NXT_ERROR; 1007 } 1008 1009 #endif 1010 1011 1012 static nxt_int_t 1013 nxt_isolation_chroot(nxt_task_t *task, const char *path) 1014 { 1015 if (nxt_slow_path(chroot(path) < 0)) { 1016 nxt_alert(task, "chroot(%s) %E", path, nxt_errno); 1017 return NXT_ERROR; 1018 } 1019 1020 return NXT_OK; 1021 } 1022 1023 #endif /* NXT_HAVE_ISOLATION_ROOTFS */ 1024 1025 1026 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) 1027 1028 static nxt_int_t 1029 nxt_isolation_set_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation, 1030 nxt_process_t *process) 1031 { 1032 nxt_conf_value_t *obj; 1033 1034 static nxt_str_t new_privs_name = nxt_string("new_privs"); 1035 1036 obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL); 1037 if (obj != NULL) { 1038 process->isolation.new_privs = nxt_conf_get_boolean(obj); 1039 } 1040 1041 return NXT_OK; 1042 } 1043 1044 #endif 1045