1 /* 2 * Copyright (C) NGINX, Inc. 3 */ 4 5 #include <nxt_main.h> 6 #include <nxt_application.h> 7 #include <nxt_process.h> 8 #include <nxt_isolation.h> 9 10 #if (NXT_HAVE_PIVOT_ROOT) 11 #include <mntent.h> 12 #endif 13 14 15 static nxt_int_t nxt_isolation_set(nxt_task_t *task, 16 nxt_conf_value_t *isolation, nxt_process_t *process); 17 18 #if (NXT_HAVE_CLONE) 19 static nxt_int_t nxt_isolation_set_namespaces(nxt_task_t *task, 20 nxt_conf_value_t *isolation, nxt_process_t *process); 21 static nxt_int_t nxt_isolation_clone_flags(nxt_task_t *task, 22 nxt_conf_value_t *namespaces, nxt_clone_t *clone); 23 #endif 24 25 #if (NXT_HAVE_CLONE_NEWUSER) 26 static nxt_int_t nxt_isolation_set_creds(nxt_task_t *task, 27 nxt_conf_value_t *isolation, nxt_process_t *process); 28 static nxt_int_t nxt_isolation_credential_map(nxt_task_t *task, 29 nxt_mp_t *mem_pool, nxt_conf_value_t *map_array, 30 nxt_clone_credential_map_t *map); 31 static nxt_int_t nxt_isolation_vldt_creds(nxt_task_t *task, 32 nxt_process_t *process); 33 #endif 34 35 #if (NXT_HAVE_ISOLATION_ROOTFS) 36 static nxt_int_t nxt_isolation_set_rootfs(nxt_task_t *task, 37 nxt_conf_value_t *isolation, nxt_process_t *process); 38 static nxt_int_t nxt_isolation_set_automount(nxt_task_t *task, 39 nxt_conf_value_t *isolation, nxt_process_t *process); 40 static nxt_int_t nxt_isolation_set_mounts(nxt_task_t *task, 41 nxt_process_t *process, nxt_str_t *app_type); 42 static nxt_int_t nxt_isolation_set_lang_mounts(nxt_task_t *task, 43 nxt_process_t *process, nxt_array_t *syspaths); 44 static void nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process); 45 46 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) 47 static nxt_int_t nxt_isolation_pivot_root(nxt_task_t *task, const char *rootfs); 48 static nxt_int_t nxt_isolation_make_private_mount(nxt_task_t *task, 49 const char *rootfs); 50 nxt_inline int nxt_pivot_root(const char *new_root, const char *old_root); 51 #endif 52 53 static nxt_int_t nxt_isolation_chroot(nxt_task_t *task, const char *path); 54 #endif 55 56 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) 57 static nxt_int_t nxt_isolation_set_new_privs(nxt_task_t *task, 58 nxt_conf_value_t *isolation, nxt_process_t *process); 59 #endif 60 61 62 nxt_int_t 63 nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process, 64 nxt_mp_t *mp) 65 { 66 nxt_int_t cap_setid; 67 nxt_int_t ret; 68 nxt_runtime_t *rt; 69 nxt_common_app_conf_t *app_conf; 70 71 rt = task->thread->runtime; 72 app_conf = process->data.app; 73 cap_setid = rt->capabilities.setid; 74 75 if (app_conf->isolation != NULL) { 76 ret = nxt_isolation_set(task, app_conf->isolation, process); 77 if (nxt_slow_path(ret != NXT_OK)) { 78 return ret; 79 } 80 } 81 82 #if (NXT_HAVE_CLONE_NEWUSER) 83 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) { 84 cap_setid = 1; 85 } 86 #endif 87 88 #if (NXT_HAVE_ISOLATION_ROOTFS) 89 if (process->isolation.rootfs != NULL) { 90 ret = nxt_isolation_set_mounts(task, process, &app_conf->type); 91 if (nxt_slow_path(ret != NXT_OK)) { 92 return ret; 93 } 94 } 95 #endif 96 97 if (cap_setid) { 98 ret = nxt_process_creds_set(task, process, &app_conf->user, 99 &app_conf->group); 100 101 if (nxt_slow_path(ret != NXT_OK)) { 102 return ret; 103 } 104 105 } else { 106 if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user, 107 nxt_strlen(rt->user_cred.user))) 108 { 109 nxt_alert(task, "cannot set user \"%V\" for app \"%V\": " 110 "missing capabilities", &app_conf->user, &app_conf->name); 111 112 return NXT_ERROR; 113 } 114 115 if (app_conf->group.length > 0 116 && !nxt_str_eq(&app_conf->group, (u_char *) rt->group, 117 nxt_strlen(rt->group))) 118 { 119 nxt_alert(task, "cannot set group \"%V\" for app \"%V\": " 120 "missing capabilities", &app_conf->group, 121 &app_conf->name); 122 123 return NXT_ERROR; 124 } 125 } 126 127 #if (NXT_HAVE_CLONE_NEWUSER) 128 ret = nxt_isolation_vldt_creds(task, process); 129 if (nxt_slow_path(ret != NXT_OK)) { 130 return ret; 131 } 132 #endif 133 134 return NXT_OK; 135 } 136 137 138 static nxt_int_t 139 nxt_isolation_set(nxt_task_t *task, nxt_conf_value_t *isolation, 140 nxt_process_t *process) 141 { 142 #if (NXT_HAVE_CLONE) 143 if (nxt_slow_path(nxt_isolation_set_namespaces(task, isolation, process) 144 != NXT_OK)) 145 { 146 return NXT_ERROR; 147 } 148 #endif 149 150 #if (NXT_HAVE_CLONE_NEWUSER) 151 if (nxt_slow_path(nxt_isolation_set_creds(task, isolation, process) 152 != NXT_OK)) 153 { 154 return NXT_ERROR; 155 } 156 #endif 157 158 #if (NXT_HAVE_ISOLATION_ROOTFS) 159 if (nxt_slow_path(nxt_isolation_set_rootfs(task, isolation, process) 160 != NXT_OK)) 161 { 162 return NXT_ERROR; 163 } 164 165 if (nxt_slow_path(nxt_isolation_set_automount(task, isolation, process) 166 != NXT_OK)) 167 { 168 return NXT_ERROR; 169 } 170 #endif 171 172 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) 173 if (nxt_slow_path(nxt_isolation_set_new_privs(task, isolation, process) 174 != NXT_OK)) 175 { 176 return NXT_ERROR; 177 } 178 #endif 179 180 return NXT_OK; 181 } 182 183 184 #if (NXT_HAVE_CLONE) 185 186 static nxt_int_t 187 nxt_isolation_set_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation, 188 nxt_process_t *process) 189 { 190 nxt_int_t ret; 191 nxt_conf_value_t *obj; 192 193 static nxt_str_t nsname = nxt_string("namespaces"); 194 195 obj = nxt_conf_get_object_member(isolation, &nsname, NULL); 196 if (obj != NULL) { 197 ret = nxt_isolation_clone_flags(task, obj, &process->isolation.clone); 198 if (nxt_slow_path(ret != NXT_OK)) { 199 return NXT_ERROR; 200 } 201 } 202 203 return NXT_OK; 204 } 205 206 #endif 207 208 209 #if (NXT_HAVE_CLONE_NEWUSER) 210 211 static nxt_int_t 212 nxt_isolation_set_creds(nxt_task_t *task, nxt_conf_value_t *isolation, 213 nxt_process_t *process) 214 { 215 nxt_int_t ret; 216 nxt_clone_t *clone; 217 nxt_conf_value_t *array; 218 219 static nxt_str_t uidname = nxt_string("uidmap"); 220 static nxt_str_t gidname = nxt_string("gidmap"); 221 222 clone = &process->isolation.clone; 223 224 array = nxt_conf_get_object_member(isolation, &uidname, NULL); 225 if (array != NULL) { 226 ret = nxt_isolation_credential_map(task, process->mem_pool, array, 227 &clone->uidmap); 228 229 if (nxt_slow_path(ret != NXT_OK)) { 230 return NXT_ERROR; 231 } 232 } 233 234 array = nxt_conf_get_object_member(isolation, &gidname, NULL); 235 if (array != NULL) { 236 ret = nxt_isolation_credential_map(task, process->mem_pool, array, 237 &clone->gidmap); 238 239 if (nxt_slow_path(ret != NXT_OK)) { 240 return NXT_ERROR; 241 } 242 } 243 244 return NXT_OK; 245 } 246 247 248 static nxt_int_t 249 nxt_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp, 250 nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map) 251 { 252 nxt_int_t ret; 253 nxt_uint_t i; 254 nxt_conf_value_t *obj; 255 256 static nxt_conf_map_t nxt_clone_map_entry_conf[] = { 257 { 258 nxt_string("container"), 259 NXT_CONF_MAP_INT, 260 offsetof(nxt_clone_map_entry_t, container), 261 }, 262 263 { 264 nxt_string("host"), 265 NXT_CONF_MAP_INT, 266 offsetof(nxt_clone_map_entry_t, host), 267 }, 268 269 { 270 nxt_string("size"), 271 NXT_CONF_MAP_INT, 272 offsetof(nxt_clone_map_entry_t, size), 273 }, 274 }; 275 276 map->size = nxt_conf_array_elements_count(map_array); 277 278 if (map->size == 0) { 279 return NXT_OK; 280 } 281 282 map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t)); 283 if (nxt_slow_path(map->map == NULL)) { 284 return NXT_ERROR; 285 } 286 287 for (i = 0; i < map->size; i++) { 288 obj = nxt_conf_get_array_element(map_array, i); 289 290 ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf, 291 nxt_nitems(nxt_clone_map_entry_conf), 292 map->map + i); 293 if (nxt_slow_path(ret != NXT_OK)) { 294 nxt_alert(task, "clone map entry map error"); 295 return NXT_ERROR; 296 } 297 } 298 299 return NXT_OK; 300 } 301 302 303 static nxt_int_t 304 nxt_isolation_vldt_creds(nxt_task_t *task, nxt_process_t *process) 305 { 306 nxt_int_t ret; 307 nxt_clone_t *clone; 308 nxt_credential_t *creds; 309 310 clone = &process->isolation.clone; 311 creds = process->user_cred; 312 313 if (clone->uidmap.size == 0 && clone->gidmap.size == 0) { 314 return NXT_OK; 315 } 316 317 if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) { 318 if (nxt_slow_path(clone->uidmap.size > 0)) { 319 nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but " 320 "\"isolation.namespaces.credential\" is false or unset"); 321 322 return NXT_ERROR; 323 } 324 325 if (nxt_slow_path(clone->gidmap.size > 0)) { 326 nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but " 327 "\"isolation.namespaces.credential\" is false or unset"); 328 329 return NXT_ERROR; 330 } 331 332 return NXT_OK; 333 } 334 335 ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds); 336 if (nxt_slow_path(ret != NXT_OK)) { 337 return NXT_ERROR; 338 } 339 340 return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds); 341 } 342 343 #endif 344 345 346 #if (NXT_HAVE_CLONE) 347 348 static nxt_int_t 349 nxt_isolation_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces, 350 nxt_clone_t *clone) 351 { 352 uint32_t index; 353 nxt_str_t name; 354 nxt_int_t flag; 355 nxt_conf_value_t *value; 356 357 index = 0; 358 359 for ( ;; ) { 360 value = nxt_conf_next_object_member(namespaces, &name, &index); 361 362 if (value == NULL) { 363 break; 364 } 365 366 flag = 0; 367 368 #if (NXT_HAVE_CLONE_NEWUSER) 369 if (nxt_str_eq(&name, "credential", 10)) { 370 flag = CLONE_NEWUSER; 371 } 372 #endif 373 374 #if (NXT_HAVE_CLONE_NEWPID) 375 if (nxt_str_eq(&name, "pid", 3)) { 376 flag = CLONE_NEWPID; 377 } 378 #endif 379 380 #if (NXT_HAVE_CLONE_NEWNET) 381 if (nxt_str_eq(&name, "network", 7)) { 382 flag = CLONE_NEWNET; 383 } 384 #endif 385 386 #if (NXT_HAVE_CLONE_NEWUTS) 387 if (nxt_str_eq(&name, "uname", 5)) { 388 flag = CLONE_NEWUTS; 389 } 390 #endif 391 392 #if (NXT_HAVE_CLONE_NEWNS) 393 if (nxt_str_eq(&name, "mount", 5)) { 394 flag = CLONE_NEWNS; 395 } 396 #endif 397 398 #if (NXT_HAVE_CLONE_NEWCGROUP) 399 if (nxt_str_eq(&name, "cgroup", 6)) { 400 flag = CLONE_NEWCGROUP; 401 } 402 #endif 403 404 if (!flag) { 405 nxt_alert(task, "unknown namespace flag: \"%V\"", &name); 406 return NXT_ERROR; 407 } 408 409 if (nxt_conf_get_boolean(value)) { 410 clone->flags |= flag; 411 } 412 } 413 414 return NXT_OK; 415 } 416 417 #endif 418 419 420 #if (NXT_HAVE_ISOLATION_ROOTFS) 421 422 static nxt_int_t 423 nxt_isolation_set_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation, 424 nxt_process_t *process) 425 { 426 nxt_str_t str; 427 nxt_conf_value_t *obj; 428 429 static nxt_str_t rootfs_name = nxt_string("rootfs"); 430 431 obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL); 432 if (obj != NULL) { 433 nxt_conf_get_string(obj, &str); 434 435 if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) { 436 nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other " 437 "than \"/\" but given \"%V\"", &str); 438 439 return NXT_ERROR; 440 } 441 442 if (str.start[str.length - 1] == '/') { 443 str.length--; 444 } 445 446 process->isolation.rootfs = nxt_mp_alloc(process->mem_pool, 447 str.length + 1); 448 449 if (nxt_slow_path(process->isolation.rootfs == NULL)) { 450 return NXT_ERROR; 451 } 452 453 nxt_memcpy(process->isolation.rootfs, str.start, str.length); 454 455 process->isolation.rootfs[str.length] = '\0'; 456 } 457 458 return NXT_OK; 459 } 460 461 462 static nxt_int_t 463 nxt_isolation_set_automount(nxt_task_t *task, nxt_conf_value_t *isolation, 464 nxt_process_t *process) 465 { 466 nxt_conf_value_t *conf, *value; 467 nxt_process_automount_t *automount; 468 469 static nxt_str_t automount_name = nxt_string("automount"); 470 static nxt_str_t langdeps_name = nxt_string("language_deps"); 471 472 automount = &process->isolation.automount; 473 474 automount->language_deps = 1; 475 476 conf = nxt_conf_get_object_member(isolation, &automount_name, NULL); 477 if (conf != NULL) { 478 value = nxt_conf_get_object_member(conf, &langdeps_name, NULL); 479 if (value != NULL) { 480 automount->language_deps = nxt_conf_get_boolean(value); 481 } 482 } 483 484 return NXT_OK; 485 } 486 487 488 static nxt_int_t 489 nxt_isolation_set_mounts(nxt_task_t *task, nxt_process_t *process, 490 nxt_str_t *app_type) 491 { 492 nxt_int_t ret, cap_chroot; 493 nxt_runtime_t *rt; 494 nxt_app_lang_module_t *lang; 495 496 rt = task->thread->runtime; 497 cap_chroot = rt->capabilities.chroot; 498 lang = nxt_app_lang_module(rt, app_type); 499 500 nxt_assert(lang != NULL); 501 502 #if (NXT_HAVE_CLONE_NEWUSER) 503 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) { 504 cap_chroot = 1; 505 } 506 #endif 507 508 if (!cap_chroot) { 509 nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges"); 510 return NXT_ERROR; 511 } 512 513 ret = nxt_isolation_set_lang_mounts(task, process, lang->mounts); 514 if (nxt_slow_path(ret != NXT_OK)) { 515 return NXT_ERROR; 516 } 517 518 process->isolation.cleanup = nxt_isolation_unmount_all; 519 520 return NXT_OK; 521 } 522 523 524 static nxt_int_t 525 nxt_isolation_set_lang_mounts(nxt_task_t *task, nxt_process_t *process, 526 nxt_array_t *lang_mounts) 527 { 528 u_char *p; 529 size_t i, n, rootfs_len, len; 530 nxt_mp_t *mp; 531 nxt_array_t *mounts; 532 const u_char *rootfs; 533 nxt_fs_mount_t *mnt, *lang_mnt; 534 535 mp = process->mem_pool; 536 537 /* copy to init mem pool */ 538 mounts = nxt_array_copy(mp, NULL, lang_mounts); 539 if (mounts == NULL) { 540 return NXT_ERROR; 541 } 542 543 n = mounts->nelts; 544 mnt = mounts->elts; 545 lang_mnt = lang_mounts->elts; 546 547 rootfs = process->isolation.rootfs; 548 rootfs_len = nxt_strlen(rootfs); 549 550 for (i = 0; i < n; i++) { 551 len = nxt_strlen(lang_mnt[i].dst); 552 553 mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1); 554 if (nxt_slow_path(mnt[i].dst == NULL)) { 555 return NXT_ERROR; 556 } 557 558 p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len); 559 p = nxt_cpymem(p, lang_mnt[i].dst, len); 560 *p = '\0'; 561 } 562 563 mnt = nxt_array_add(mounts); 564 if (nxt_slow_path(mnt == NULL)) { 565 return NXT_ERROR; 566 } 567 568 mnt->src = (u_char *) "tmpfs"; 569 mnt->fstype = (u_char *) "tmpfs"; 570 mnt->flags = NXT_MS_NOSUID | NXT_MS_NODEV | NXT_MS_NOEXEC | NXT_MS_RELATIME; 571 mnt->data = (u_char *) "size=1m,mode=777"; 572 mnt->builtin = 1; 573 574 mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/tmp") + 1); 575 if (nxt_slow_path(mnt->dst == NULL)) { 576 return NXT_ERROR; 577 } 578 579 p = nxt_cpymem(mnt->dst, rootfs, rootfs_len); 580 p = nxt_cpymem(p, "/tmp", 4); 581 *p = '\0'; 582 583 #if (NXT_HAVE_CLONE_NEWPID) && (NXT_HAVE_CLONE_NEWNS) 584 585 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWPID) 586 && nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) 587 { 588 mnt = nxt_array_add(mounts); 589 if (nxt_slow_path(mnt == NULL)) { 590 return NXT_ERROR; 591 } 592 593 mnt->fstype = (u_char *) "proc"; 594 mnt->src = (u_char *) "proc"; 595 596 mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/proc") + 1); 597 if (nxt_slow_path(mnt->dst == NULL)) { 598 return NXT_ERROR; 599 } 600 601 p = nxt_cpymem(mnt->dst, rootfs, rootfs_len); 602 p = nxt_cpymem(p, "/proc", 5); 603 *p = '\0'; 604 605 mnt->data = (u_char *) ""; 606 mnt->flags = 0; 607 } 608 #endif 609 610 process->isolation.mounts = mounts; 611 612 return NXT_OK; 613 } 614 615 616 void 617 nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process) 618 { 619 size_t i, n; 620 nxt_array_t *mounts; 621 nxt_fs_mount_t *mnt; 622 nxt_process_automount_t *automount; 623 624 nxt_debug(task, "unmount all (%s)", process->name); 625 626 automount = &process->isolation.automount; 627 mounts = process->isolation.mounts; 628 n = mounts->nelts; 629 mnt = mounts->elts; 630 631 for (i = 0; i < n; i++) { 632 if (mnt[i].builtin && !automount->language_deps) { 633 continue; 634 } 635 636 nxt_fs_unmount(mnt[i].dst); 637 } 638 } 639 640 641 nxt_int_t 642 nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process) 643 { 644 size_t i, n; 645 nxt_int_t ret; 646 struct stat st; 647 nxt_array_t *mounts; 648 const u_char *dst; 649 nxt_fs_mount_t *mnt; 650 nxt_process_automount_t *automount; 651 652 automount = &process->isolation.automount; 653 mounts = process->isolation.mounts; 654 655 n = mounts->nelts; 656 mnt = mounts->elts; 657 658 for (i = 0; i < n; i++) { 659 dst = mnt[i].dst; 660 661 if (mnt[i].builtin && !automount->language_deps) { 662 continue; 663 } 664 665 if (nxt_slow_path(nxt_memcmp(mnt[i].fstype, "bind", 4) == 0 666 && stat((const char *) mnt[i].src, &st) != 0)) 667 { 668 nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src); 669 continue; 670 } 671 672 ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO); 673 if (nxt_slow_path(ret != NXT_OK)) { 674 nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno); 675 goto undo; 676 } 677 678 ret = nxt_fs_mount(task, &mnt[i]); 679 if (nxt_slow_path(ret != NXT_OK)) { 680 goto undo; 681 } 682 } 683 684 return NXT_OK; 685 686 undo: 687 688 n = i + 1; 689 690 for (i = 0; i < n; i++) { 691 nxt_fs_unmount(mnt[i].dst); 692 } 693 694 return NXT_ERROR; 695 } 696 697 698 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) 699 700 nxt_int_t 701 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process) 702 { 703 char *rootfs; 704 nxt_int_t ret; 705 706 rootfs = (char *) process->isolation.rootfs; 707 708 nxt_debug(task, "change root: %s", rootfs); 709 710 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) { 711 ret = nxt_isolation_pivot_root(task, rootfs); 712 713 } else { 714 ret = nxt_isolation_chroot(task, rootfs); 715 } 716 717 if (nxt_fast_path(ret == NXT_OK)) { 718 if (nxt_slow_path(chdir("/") < 0)) { 719 nxt_alert(task, "chdir(\"/\") %E", nxt_errno); 720 return NXT_ERROR; 721 } 722 } 723 724 return ret; 725 } 726 727 728 /* 729 * pivot_root(2) can only be safely used with containers, otherwise it can 730 * umount(2) the global root filesystem and screw up the machine. 731 */ 732 733 static nxt_int_t 734 nxt_isolation_pivot_root(nxt_task_t *task, const char *path) 735 { 736 /* 737 * This implementation makes use of a kernel trick that works for ages 738 * and now documented in Linux kernel 5. 739 * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/ 740 */ 741 742 if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) { 743 nxt_alert(task, "mount(\"/\", MS_SLAVE|MS_REC) failed: %E", nxt_errno); 744 return NXT_ERROR; 745 } 746 747 if (nxt_slow_path(nxt_isolation_make_private_mount(task, path) != NXT_OK)) { 748 return NXT_ERROR; 749 } 750 751 if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) { 752 nxt_alert(task, "error bind mounting rootfs %E", nxt_errno); 753 return NXT_ERROR; 754 } 755 756 if (nxt_slow_path(chdir(path) != 0)) { 757 nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno); 758 return NXT_ERROR; 759 } 760 761 if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) { 762 nxt_alert(task, "failed to pivot_root %E", nxt_errno); 763 return NXT_ERROR; 764 } 765 766 /* 767 * Demote the oldroot mount to avoid unmounts getting propagated to 768 * the host. 769 */ 770 if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) { 771 nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno); 772 return NXT_ERROR; 773 } 774 775 if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) { 776 nxt_alert(task, "failed to umount old root directory %E", nxt_errno); 777 return NXT_ERROR; 778 } 779 780 return NXT_OK; 781 } 782 783 784 static nxt_int_t 785 nxt_isolation_make_private_mount(nxt_task_t *task, const char *rootfs) 786 { 787 char *parent_mnt; 788 FILE *procfile; 789 u_char **mounts; 790 size_t len; 791 uint8_t *shared; 792 nxt_int_t ret, index, nmounts; 793 struct mntent *ent; 794 795 static const char *mount_path = "/proc/self/mounts"; 796 797 ret = NXT_ERROR; 798 ent = NULL; 799 shared = NULL; 800 procfile = NULL; 801 parent_mnt = NULL; 802 803 nmounts = 256; 804 805 mounts = nxt_malloc(nmounts * sizeof(uintptr_t)); 806 if (nxt_slow_path(mounts == NULL)) { 807 goto fail; 808 } 809 810 shared = nxt_malloc(nmounts); 811 if (nxt_slow_path(shared == NULL)) { 812 goto fail; 813 } 814 815 procfile = setmntent(mount_path, "r"); 816 if (nxt_slow_path(procfile == NULL)) { 817 nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno); 818 819 goto fail; 820 } 821 822 index = 0; 823 824 again: 825 826 for ( ; index < nmounts; index++) { 827 ent = getmntent(procfile); 828 if (ent == NULL) { 829 nmounts = index; 830 break; 831 } 832 833 mounts[index] = (u_char *) strdup(ent->mnt_dir); 834 shared[index] = hasmntopt(ent, "shared") != NULL; 835 } 836 837 if (ent != NULL) { 838 /* there are still entries to be read */ 839 840 nmounts *= 2; 841 mounts = nxt_realloc(mounts, nmounts); 842 if (nxt_slow_path(mounts == NULL)) { 843 goto fail; 844 } 845 846 shared = nxt_realloc(shared, nmounts); 847 if (nxt_slow_path(shared == NULL)) { 848 goto fail; 849 } 850 851 goto again; 852 } 853 854 for (index = 0; index < nmounts; index++) { 855 if (nxt_strcmp(mounts[index], rootfs) == 0) { 856 parent_mnt = (char *) rootfs; 857 break; 858 } 859 } 860 861 if (parent_mnt == NULL) { 862 len = nxt_strlen(rootfs); 863 864 parent_mnt = nxt_malloc(len + 1); 865 if (parent_mnt == NULL) { 866 goto fail; 867 } 868 869 nxt_memcpy(parent_mnt, rootfs, len); 870 parent_mnt[len] = '\0'; 871 872 if (parent_mnt[len - 1] == '/') { 873 parent_mnt[len - 1] = '\0'; 874 len--; 875 } 876 877 for ( ;; ) { 878 for (index = 0; index < nmounts; index++) { 879 if (nxt_strcmp(mounts[index], parent_mnt) == 0) { 880 goto found; 881 } 882 } 883 884 if (len == 1 && parent_mnt[0] == '/') { 885 nxt_alert(task, "parent mount not found"); 886 goto fail; 887 } 888 889 /* parent dir */ 890 while (parent_mnt[len - 1] != '/' && len > 0) { 891 len--; 892 } 893 894 if (nxt_slow_path(len == 0)) { 895 nxt_alert(task, "parent mount not found"); 896 goto fail; 897 } 898 899 if (len == 1) { 900 parent_mnt[len] = '\0'; /* / */ 901 } else { 902 parent_mnt[len - 1] = '\0'; /* /<path> */ 903 } 904 } 905 } 906 907 found: 908 909 if (shared[index]) { 910 if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) { 911 nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt, 912 nxt_errno); 913 914 goto fail; 915 } 916 } 917 918 ret = NXT_OK; 919 920 fail: 921 922 if (procfile != NULL) { 923 endmntent(procfile); 924 } 925 926 if (mounts != NULL) { 927 for (index = 0; index < nmounts; index++) { 928 nxt_free(mounts[index]); 929 } 930 931 nxt_free(mounts); 932 } 933 934 if (shared != NULL) { 935 nxt_free(shared); 936 } 937 938 if (parent_mnt != NULL && parent_mnt != rootfs) { 939 nxt_free(parent_mnt); 940 } 941 942 return ret; 943 } 944 945 946 nxt_inline int 947 nxt_pivot_root(const char *new_root, const char *old_root) 948 { 949 return syscall(__NR_pivot_root, new_root, old_root); 950 } 951 952 953 #else /* !(NXT_HAVE_PIVOT_ROOT) || !(NXT_HAVE_CLONE_NEWNS) */ 954 955 956 nxt_int_t 957 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process) 958 { 959 char *rootfs; 960 961 rootfs = (char *) process->isolation.rootfs; 962 963 nxt_debug(task, "change root: %s", rootfs); 964 965 if (nxt_fast_path(nxt_isolation_chroot(task, rootfs) == NXT_OK)) { 966 if (nxt_slow_path(chdir("/") < 0)) { 967 nxt_alert(task, "chdir(\"/\") %E", nxt_errno); 968 return NXT_ERROR; 969 } 970 971 return NXT_OK; 972 } 973 974 return NXT_ERROR; 975 } 976 977 #endif 978 979 980 static nxt_int_t 981 nxt_isolation_chroot(nxt_task_t *task, const char *path) 982 { 983 if (nxt_slow_path(chroot(path) < 0)) { 984 nxt_alert(task, "chroot(%s) %E", path, nxt_errno); 985 return NXT_ERROR; 986 } 987 988 return NXT_OK; 989 } 990 991 #endif /* NXT_HAVE_ISOLATION_ROOTFS */ 992 993 994 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) 995 996 static nxt_int_t 997 nxt_isolation_set_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation, 998 nxt_process_t *process) 999 { 1000 nxt_conf_value_t *obj; 1001 1002 static nxt_str_t new_privs_name = nxt_string("new_privs"); 1003 1004 obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL); 1005 if (obj != NULL) { 1006 process->isolation.new_privs = nxt_conf_get_boolean(obj); 1007 } 1008 1009 return NXT_OK; 1010 } 1011 1012 #endif 1013