1 /* 2 * Copyright (C) NGINX, Inc. 3 */ 4 5 #include <nxt_main.h> 6 #include <nxt_application.h> 7 #include <nxt_process.h> 8 #include <nxt_isolation.h> 9 10 #if (NXT_HAVE_PIVOT_ROOT) 11 #include <mntent.h> 12 #endif 13 14 15 static nxt_int_t nxt_isolation_set(nxt_task_t *task, 16 nxt_conf_value_t *isolation, nxt_process_t *process); 17 18 #if (NXT_HAVE_CLONE) 19 static nxt_int_t nxt_isolation_set_namespaces(nxt_task_t *task, 20 nxt_conf_value_t *isolation, nxt_process_t *process); 21 static nxt_int_t nxt_isolation_clone_flags(nxt_task_t *task, 22 nxt_conf_value_t *namespaces, nxt_clone_t *clone); 23 #endif 24 25 #if (NXT_HAVE_CLONE_NEWUSER) 26 static nxt_int_t nxt_isolation_set_creds(nxt_task_t *task, 27 nxt_conf_value_t *isolation, nxt_process_t *process); 28 static nxt_int_t nxt_isolation_credential_map(nxt_task_t *task, 29 nxt_mp_t *mem_pool, nxt_conf_value_t *map_array, 30 nxt_clone_credential_map_t *map); 31 static nxt_int_t nxt_isolation_vldt_creds(nxt_task_t *task, 32 nxt_process_t *process); 33 #endif 34 35 #if (NXT_HAVE_ISOLATION_ROOTFS) 36 static nxt_int_t nxt_isolation_set_rootfs(nxt_task_t *task, 37 nxt_conf_value_t *isolation, nxt_process_t *process); 38 static nxt_int_t nxt_isolation_set_automount(nxt_task_t *task, 39 nxt_conf_value_t *isolation, nxt_process_t *process); 40 static nxt_int_t nxt_isolation_set_mounts(nxt_task_t *task, 41 nxt_process_t *process, nxt_str_t *app_type); 42 static nxt_int_t nxt_isolation_set_lang_mounts(nxt_task_t *task, 43 nxt_process_t *process, nxt_array_t *syspaths); 44 static int nxt_cdecl nxt_isolation_mount_compare(const void *v1, 45 const void *v2); 46 static void nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process); 47 48 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) 49 static nxt_int_t nxt_isolation_pivot_root(nxt_task_t *task, const char *rootfs); 50 static nxt_int_t nxt_isolation_make_private_mount(nxt_task_t *task, 51 const char *rootfs); 52 nxt_inline int nxt_pivot_root(const char *new_root, const char *old_root); 53 #endif 54 55 static nxt_int_t nxt_isolation_chroot(nxt_task_t *task, const char *path); 56 #endif 57 58 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) 59 static nxt_int_t nxt_isolation_set_new_privs(nxt_task_t *task, 60 nxt_conf_value_t *isolation, nxt_process_t *process); 61 #endif 62 63 64 nxt_int_t 65 nxt_isolation_main_prefork(nxt_task_t *task, nxt_process_t *process, 66 nxt_mp_t *mp) 67 { 68 nxt_int_t cap_setid; 69 nxt_int_t ret; 70 nxt_runtime_t *rt; 71 nxt_common_app_conf_t *app_conf; 72 73 rt = task->thread->runtime; 74 app_conf = process->data.app; 75 cap_setid = rt->capabilities.setid; 76 77 if (app_conf->isolation != NULL) { 78 ret = nxt_isolation_set(task, app_conf->isolation, process); 79 if (nxt_slow_path(ret != NXT_OK)) { 80 return ret; 81 } 82 } 83 84 #if (NXT_HAVE_CLONE_NEWUSER) 85 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) { 86 cap_setid = 1; 87 } 88 #endif 89 90 if (cap_setid) { 91 ret = nxt_process_creds_set(task, process, &app_conf->user, 92 &app_conf->group); 93 94 if (nxt_slow_path(ret != NXT_OK)) { 95 return ret; 96 } 97 98 } else { 99 if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user, 100 nxt_strlen(rt->user_cred.user))) 101 { 102 nxt_alert(task, "cannot set user \"%V\" for app \"%V\": " 103 "missing capabilities", &app_conf->user, &app_conf->name); 104 105 return NXT_ERROR; 106 } 107 108 if (app_conf->group.length > 0 109 && !nxt_str_eq(&app_conf->group, (u_char *) rt->group, 110 nxt_strlen(rt->group))) 111 { 112 nxt_alert(task, "cannot set group \"%V\" for app \"%V\": " 113 "missing capabilities", &app_conf->group, 114 &app_conf->name); 115 116 return NXT_ERROR; 117 } 118 } 119 120 #if (NXT_HAVE_ISOLATION_ROOTFS) 121 if (process->isolation.rootfs != NULL) { 122 nxt_int_t has_mnt; 123 124 ret = nxt_isolation_set_mounts(task, process, &app_conf->type); 125 if (nxt_slow_path(ret != NXT_OK)) { 126 return ret; 127 } 128 129 has_mnt = 0; 130 131 #if (NXT_HAVE_CLONE_NEWNS) 132 has_mnt = nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS); 133 #endif 134 135 if (process->user_cred->uid == 0 && !has_mnt) { 136 nxt_log(task, NXT_LOG_WARN, 137 "setting user \"root\" with \"rootfs\" is unsafe without " 138 "\"mount\" namespace isolation"); 139 } 140 } 141 #endif 142 143 #if (NXT_HAVE_CLONE_NEWUSER) 144 ret = nxt_isolation_vldt_creds(task, process); 145 if (nxt_slow_path(ret != NXT_OK)) { 146 return ret; 147 } 148 #endif 149 150 return NXT_OK; 151 } 152 153 154 static nxt_int_t 155 nxt_isolation_set(nxt_task_t *task, nxt_conf_value_t *isolation, 156 nxt_process_t *process) 157 { 158 #if (NXT_HAVE_CLONE) 159 if (nxt_slow_path(nxt_isolation_set_namespaces(task, isolation, process) 160 != NXT_OK)) 161 { 162 return NXT_ERROR; 163 } 164 #endif 165 166 #if (NXT_HAVE_CLONE_NEWUSER) 167 if (nxt_slow_path(nxt_isolation_set_creds(task, isolation, process) 168 != NXT_OK)) 169 { 170 return NXT_ERROR; 171 } 172 #endif 173 174 #if (NXT_HAVE_ISOLATION_ROOTFS) 175 if (nxt_slow_path(nxt_isolation_set_rootfs(task, isolation, process) 176 != NXT_OK)) 177 { 178 return NXT_ERROR; 179 } 180 181 if (nxt_slow_path(nxt_isolation_set_automount(task, isolation, process) 182 != NXT_OK)) 183 { 184 return NXT_ERROR; 185 } 186 #endif 187 188 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) 189 if (nxt_slow_path(nxt_isolation_set_new_privs(task, isolation, process) 190 != NXT_OK)) 191 { 192 return NXT_ERROR; 193 } 194 #endif 195 196 return NXT_OK; 197 } 198 199 200 #if (NXT_HAVE_CLONE) 201 202 static nxt_int_t 203 nxt_isolation_set_namespaces(nxt_task_t *task, nxt_conf_value_t *isolation, 204 nxt_process_t *process) 205 { 206 nxt_int_t ret; 207 nxt_conf_value_t *obj; 208 209 static nxt_str_t nsname = nxt_string("namespaces"); 210 211 obj = nxt_conf_get_object_member(isolation, &nsname, NULL); 212 if (obj != NULL) { 213 ret = nxt_isolation_clone_flags(task, obj, &process->isolation.clone); 214 if (nxt_slow_path(ret != NXT_OK)) { 215 return NXT_ERROR; 216 } 217 } 218 219 return NXT_OK; 220 } 221 222 #endif 223 224 225 #if (NXT_HAVE_CLONE_NEWUSER) 226 227 static nxt_int_t 228 nxt_isolation_set_creds(nxt_task_t *task, nxt_conf_value_t *isolation, 229 nxt_process_t *process) 230 { 231 nxt_int_t ret; 232 nxt_clone_t *clone; 233 nxt_conf_value_t *array; 234 235 static nxt_str_t uidname = nxt_string("uidmap"); 236 static nxt_str_t gidname = nxt_string("gidmap"); 237 238 clone = &process->isolation.clone; 239 240 array = nxt_conf_get_object_member(isolation, &uidname, NULL); 241 if (array != NULL) { 242 ret = nxt_isolation_credential_map(task, process->mem_pool, array, 243 &clone->uidmap); 244 245 if (nxt_slow_path(ret != NXT_OK)) { 246 return NXT_ERROR; 247 } 248 } 249 250 array = nxt_conf_get_object_member(isolation, &gidname, NULL); 251 if (array != NULL) { 252 ret = nxt_isolation_credential_map(task, process->mem_pool, array, 253 &clone->gidmap); 254 255 if (nxt_slow_path(ret != NXT_OK)) { 256 return NXT_ERROR; 257 } 258 } 259 260 return NXT_OK; 261 } 262 263 264 static nxt_int_t 265 nxt_isolation_credential_map(nxt_task_t *task, nxt_mp_t *mp, 266 nxt_conf_value_t *map_array, nxt_clone_credential_map_t *map) 267 { 268 nxt_int_t ret; 269 nxt_uint_t i; 270 nxt_conf_value_t *obj; 271 272 static nxt_conf_map_t nxt_clone_map_entry_conf[] = { 273 { 274 nxt_string("container"), 275 NXT_CONF_MAP_INT, 276 offsetof(nxt_clone_map_entry_t, container), 277 }, 278 279 { 280 nxt_string("host"), 281 NXT_CONF_MAP_INT, 282 offsetof(nxt_clone_map_entry_t, host), 283 }, 284 285 { 286 nxt_string("size"), 287 NXT_CONF_MAP_INT, 288 offsetof(nxt_clone_map_entry_t, size), 289 }, 290 }; 291 292 map->size = nxt_conf_array_elements_count(map_array); 293 294 if (map->size == 0) { 295 return NXT_OK; 296 } 297 298 map->map = nxt_mp_alloc(mp, map->size * sizeof(nxt_clone_map_entry_t)); 299 if (nxt_slow_path(map->map == NULL)) { 300 return NXT_ERROR; 301 } 302 303 for (i = 0; i < map->size; i++) { 304 obj = nxt_conf_get_array_element(map_array, i); 305 306 ret = nxt_conf_map_object(mp, obj, nxt_clone_map_entry_conf, 307 nxt_nitems(nxt_clone_map_entry_conf), 308 map->map + i); 309 if (nxt_slow_path(ret != NXT_OK)) { 310 nxt_alert(task, "clone map entry map error"); 311 return NXT_ERROR; 312 } 313 } 314 315 return NXT_OK; 316 } 317 318 319 static nxt_int_t 320 nxt_isolation_vldt_creds(nxt_task_t *task, nxt_process_t *process) 321 { 322 nxt_int_t ret; 323 nxt_clone_t *clone; 324 nxt_credential_t *creds; 325 326 clone = &process->isolation.clone; 327 creds = process->user_cred; 328 329 if (clone->uidmap.size == 0 && clone->gidmap.size == 0) { 330 return NXT_OK; 331 } 332 333 if (!nxt_is_clone_flag_set(clone->flags, NEWUSER)) { 334 if (nxt_slow_path(clone->uidmap.size > 0)) { 335 nxt_log(task, NXT_LOG_ERR, "\"uidmap\" is set but " 336 "\"isolation.namespaces.credential\" is false or unset"); 337 338 return NXT_ERROR; 339 } 340 341 if (nxt_slow_path(clone->gidmap.size > 0)) { 342 nxt_log(task, NXT_LOG_ERR, "\"gidmap\" is set but " 343 "\"isolation.namespaces.credential\" is false or unset"); 344 345 return NXT_ERROR; 346 } 347 348 return NXT_OK; 349 } 350 351 ret = nxt_clone_vldt_credential_uidmap(task, &clone->uidmap, creds); 352 if (nxt_slow_path(ret != NXT_OK)) { 353 return NXT_ERROR; 354 } 355 356 return nxt_clone_vldt_credential_gidmap(task, &clone->gidmap, creds); 357 } 358 359 #endif 360 361 362 #if (NXT_HAVE_CLONE) 363 364 static nxt_int_t 365 nxt_isolation_clone_flags(nxt_task_t *task, nxt_conf_value_t *namespaces, 366 nxt_clone_t *clone) 367 { 368 uint32_t index; 369 nxt_str_t name; 370 nxt_int_t flag; 371 nxt_conf_value_t *value; 372 373 index = 0; 374 375 for ( ;; ) { 376 value = nxt_conf_next_object_member(namespaces, &name, &index); 377 378 if (value == NULL) { 379 break; 380 } 381 382 flag = 0; 383 384 #if (NXT_HAVE_CLONE_NEWUSER) 385 if (nxt_str_eq(&name, "credential", 10)) { 386 flag = CLONE_NEWUSER; 387 } 388 #endif 389 390 #if (NXT_HAVE_CLONE_NEWPID) 391 if (nxt_str_eq(&name, "pid", 3)) { 392 flag = CLONE_NEWPID; 393 } 394 #endif 395 396 #if (NXT_HAVE_CLONE_NEWNET) 397 if (nxt_str_eq(&name, "network", 7)) { 398 flag = CLONE_NEWNET; 399 } 400 #endif 401 402 #if (NXT_HAVE_CLONE_NEWUTS) 403 if (nxt_str_eq(&name, "uname", 5)) { 404 flag = CLONE_NEWUTS; 405 } 406 #endif 407 408 #if (NXT_HAVE_CLONE_NEWNS) 409 if (nxt_str_eq(&name, "mount", 5)) { 410 flag = CLONE_NEWNS; 411 } 412 #endif 413 414 #if (NXT_HAVE_CLONE_NEWCGROUP) 415 if (nxt_str_eq(&name, "cgroup", 6)) { 416 flag = CLONE_NEWCGROUP; 417 } 418 #endif 419 420 if (!flag) { 421 nxt_alert(task, "unknown namespace flag: \"%V\"", &name); 422 return NXT_ERROR; 423 } 424 425 if (nxt_conf_get_boolean(value)) { 426 clone->flags |= flag; 427 } 428 } 429 430 return NXT_OK; 431 } 432 433 #endif 434 435 436 #if (NXT_HAVE_ISOLATION_ROOTFS) 437 438 static nxt_int_t 439 nxt_isolation_set_rootfs(nxt_task_t *task, nxt_conf_value_t *isolation, 440 nxt_process_t *process) 441 { 442 nxt_str_t str; 443 nxt_conf_value_t *obj; 444 445 static nxt_str_t rootfs_name = nxt_string("rootfs"); 446 447 obj = nxt_conf_get_object_member(isolation, &rootfs_name, NULL); 448 if (obj != NULL) { 449 nxt_conf_get_string(obj, &str); 450 451 if (nxt_slow_path(str.length <= 1 || str.start[0] != '/')) { 452 nxt_log(task, NXT_LOG_ERR, "rootfs requires an absolute path other " 453 "than \"/\" but given \"%V\"", &str); 454 455 return NXT_ERROR; 456 } 457 458 if (str.start[str.length - 1] == '/') { 459 str.length--; 460 } 461 462 process->isolation.rootfs = nxt_mp_alloc(process->mem_pool, 463 str.length + 1); 464 465 if (nxt_slow_path(process->isolation.rootfs == NULL)) { 466 return NXT_ERROR; 467 } 468 469 nxt_memcpy(process->isolation.rootfs, str.start, str.length); 470 471 process->isolation.rootfs[str.length] = '\0'; 472 } 473 474 return NXT_OK; 475 } 476 477 478 static nxt_int_t 479 nxt_isolation_set_automount(nxt_task_t *task, nxt_conf_value_t *isolation, 480 nxt_process_t *process) 481 { 482 nxt_conf_value_t *conf, *value; 483 nxt_process_automount_t *automount; 484 485 static nxt_str_t automount_name = nxt_string("automount"); 486 static nxt_str_t langdeps_name = nxt_string("language_deps"); 487 static nxt_str_t tmp_name = nxt_string("tmpfs"); 488 489 automount = &process->isolation.automount; 490 491 automount->language_deps = 1; 492 automount->tmpfs = 1; 493 494 conf = nxt_conf_get_object_member(isolation, &automount_name, NULL); 495 if (conf != NULL) { 496 value = nxt_conf_get_object_member(conf, &langdeps_name, NULL); 497 if (value != NULL) { 498 automount->language_deps = nxt_conf_get_boolean(value); 499 } 500 501 value = nxt_conf_get_object_member(conf, &tmp_name, NULL); 502 if (value != NULL) { 503 automount->tmpfs = nxt_conf_get_boolean(value); 504 } 505 } 506 507 return NXT_OK; 508 } 509 510 511 static nxt_int_t 512 nxt_isolation_set_mounts(nxt_task_t *task, nxt_process_t *process, 513 nxt_str_t *app_type) 514 { 515 nxt_int_t ret, cap_chroot; 516 nxt_runtime_t *rt; 517 nxt_app_lang_module_t *lang; 518 519 rt = task->thread->runtime; 520 cap_chroot = rt->capabilities.chroot; 521 lang = nxt_app_lang_module(rt, app_type); 522 523 nxt_assert(lang != NULL); 524 525 #if (NXT_HAVE_CLONE_NEWUSER) 526 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWUSER)) { 527 cap_chroot = 1; 528 } 529 #endif 530 531 if (!cap_chroot) { 532 nxt_log(task, NXT_LOG_ERR, "The \"rootfs\" field requires privileges"); 533 return NXT_ERROR; 534 } 535 536 ret = nxt_isolation_set_lang_mounts(task, process, lang->mounts); 537 if (nxt_slow_path(ret != NXT_OK)) { 538 return NXT_ERROR; 539 } 540 541 process->isolation.cleanup = nxt_isolation_unmount_all; 542 543 return NXT_OK; 544 } 545 546 547 static nxt_int_t 548 nxt_isolation_set_lang_mounts(nxt_task_t *task, nxt_process_t *process, 549 nxt_array_t *lang_mounts) 550 { 551 u_char *p; 552 size_t i, n, rootfs_len, len; 553 nxt_mp_t *mp; 554 nxt_array_t *mounts; 555 const u_char *rootfs; 556 nxt_fs_mount_t *mnt, *lang_mnt; 557 558 mp = process->mem_pool; 559 560 /* copy to init mem pool */ 561 mounts = nxt_array_copy(mp, NULL, lang_mounts); 562 if (mounts == NULL) { 563 return NXT_ERROR; 564 } 565 566 n = mounts->nelts; 567 mnt = mounts->elts; 568 lang_mnt = lang_mounts->elts; 569 570 rootfs = process->isolation.rootfs; 571 rootfs_len = nxt_strlen(rootfs); 572 573 for (i = 0; i < n; i++) { 574 len = nxt_strlen(lang_mnt[i].dst); 575 576 mnt[i].dst = nxt_mp_alloc(mp, rootfs_len + len + 1); 577 if (nxt_slow_path(mnt[i].dst == NULL)) { 578 return NXT_ERROR; 579 } 580 581 p = nxt_cpymem(mnt[i].dst, rootfs, rootfs_len); 582 p = nxt_cpymem(p, lang_mnt[i].dst, len); 583 *p = '\0'; 584 } 585 586 if (process->isolation.automount.tmpfs) { 587 mnt = nxt_array_add(mounts); 588 if (nxt_slow_path(mnt == NULL)) { 589 return NXT_ERROR; 590 } 591 592 mnt->src = (u_char *) "tmpfs"; 593 mnt->name = (u_char *) "tmpfs"; 594 mnt->type = NXT_FS_TMP; 595 mnt->flags = (NXT_FS_FLAGS_NOSUID 596 | NXT_FS_FLAGS_NODEV 597 | NXT_FS_FLAGS_NOEXEC); 598 mnt->data = (u_char *) "size=1m,mode=777"; 599 mnt->builtin = 1; 600 mnt->deps = 0; 601 602 mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/tmp") + 1); 603 if (nxt_slow_path(mnt->dst == NULL)) { 604 return NXT_ERROR; 605 } 606 607 p = nxt_cpymem(mnt->dst, rootfs, rootfs_len); 608 p = nxt_cpymem(p, "/tmp", 4); 609 *p = '\0'; 610 } 611 612 mnt = nxt_array_add(mounts); 613 if (nxt_slow_path(mnt == NULL)) { 614 return NXT_ERROR; 615 } 616 617 mnt->name = (u_char *) "proc"; 618 mnt->type = NXT_FS_PROC; 619 mnt->src = (u_char *) "none"; 620 mnt->dst = nxt_mp_nget(mp, rootfs_len + nxt_length("/proc") + 1); 621 if (nxt_slow_path(mnt->dst == NULL)) { 622 return NXT_ERROR; 623 } 624 625 p = nxt_cpymem(mnt->dst, rootfs, rootfs_len); 626 p = nxt_cpymem(p, "/proc", 5); 627 *p = '\0'; 628 629 mnt->data = (u_char *) ""; 630 mnt->flags = NXT_FS_FLAGS_NOEXEC | NXT_FS_FLAGS_NOSUID; 631 mnt->builtin = 1; 632 mnt->deps = 0; 633 634 qsort(mounts->elts, mounts->nelts, sizeof(nxt_fs_mount_t), 635 nxt_isolation_mount_compare); 636 637 process->isolation.mounts = mounts; 638 639 return NXT_OK; 640 } 641 642 643 static int nxt_cdecl 644 nxt_isolation_mount_compare(const void *v1, const void *v2) 645 { 646 const nxt_fs_mount_t *mnt1, *mnt2; 647 648 mnt1 = v1; 649 mnt2 = v2; 650 651 return nxt_strlen(mnt1->src) > nxt_strlen(mnt2->src); 652 } 653 654 655 void 656 nxt_isolation_unmount_all(nxt_task_t *task, nxt_process_t *process) 657 { 658 size_t n; 659 nxt_array_t *mounts; 660 nxt_runtime_t *rt; 661 nxt_fs_mount_t *mnt; 662 nxt_process_automount_t *automount; 663 664 rt = task->thread->runtime; 665 666 if (!rt->capabilities.setid) { 667 return; 668 } 669 670 #if (NXT_HAVE_CLONE_NEWNS) 671 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) { 672 return; 673 } 674 #endif 675 676 nxt_debug(task, "unmount all (%s)", process->name); 677 678 automount = &process->isolation.automount; 679 mounts = process->isolation.mounts; 680 n = mounts->nelts; 681 mnt = mounts->elts; 682 683 while (n > 0) { 684 n--; 685 686 if (mnt[n].deps && !automount->language_deps) { 687 continue; 688 } 689 690 nxt_fs_unmount(mnt[n].dst); 691 } 692 } 693 694 695 nxt_int_t 696 nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process) 697 { 698 size_t i, n; 699 nxt_int_t ret; 700 struct stat st; 701 nxt_array_t *mounts; 702 const u_char *dst; 703 nxt_fs_mount_t *mnt; 704 nxt_process_automount_t *automount; 705 706 automount = &process->isolation.automount; 707 mounts = process->isolation.mounts; 708 709 n = mounts->nelts; 710 mnt = mounts->elts; 711 712 for (i = 0; i < n; i++) { 713 dst = mnt[i].dst; 714 715 if (mnt[i].deps && !automount->language_deps) { 716 continue; 717 } 718 719 if (nxt_slow_path(mnt[i].type == NXT_FS_BIND 720 && stat((const char *) mnt[i].src, &st) != 0)) 721 { 722 nxt_log(task, NXT_LOG_WARN, "host path not found: %s", mnt[i].src); 723 continue; 724 } 725 726 ret = nxt_fs_mkdir_all(dst, S_IRWXU | S_IRWXG | S_IRWXO); 727 if (nxt_slow_path(ret != NXT_OK)) { 728 nxt_alert(task, "mkdir(%s) %E", dst, nxt_errno); 729 goto undo; 730 } 731 732 ret = nxt_fs_mount(task, &mnt[i]); 733 if (nxt_slow_path(ret != NXT_OK)) { 734 goto undo; 735 } 736 } 737 738 return NXT_OK; 739 740 undo: 741 742 n = i + 1; 743 744 for (i = 0; i < n; i++) { 745 nxt_fs_unmount(mnt[i].dst); 746 } 747 748 return NXT_ERROR; 749 } 750 751 752 #if (NXT_HAVE_PIVOT_ROOT) && (NXT_HAVE_CLONE_NEWNS) 753 754 nxt_int_t 755 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process) 756 { 757 char *rootfs; 758 nxt_int_t ret; 759 760 rootfs = (char *) process->isolation.rootfs; 761 762 nxt_debug(task, "change root: %s", rootfs); 763 764 if (nxt_is_clone_flag_set(process->isolation.clone.flags, NEWNS)) { 765 ret = nxt_isolation_pivot_root(task, rootfs); 766 767 } else { 768 ret = nxt_isolation_chroot(task, rootfs); 769 } 770 771 if (nxt_fast_path(ret == NXT_OK)) { 772 if (nxt_slow_path(chdir("/") < 0)) { 773 nxt_alert(task, "chdir(\"/\") %E", nxt_errno); 774 return NXT_ERROR; 775 } 776 } 777 778 return ret; 779 } 780 781 782 /* 783 * pivot_root(2) can only be safely used with containers, otherwise it can 784 * umount(2) the global root filesystem and screw up the machine. 785 */ 786 787 static nxt_int_t 788 nxt_isolation_pivot_root(nxt_task_t *task, const char *path) 789 { 790 /* 791 * This implementation makes use of a kernel trick that works for ages 792 * and now documented in Linux kernel 5. 793 * https://lore.kernel.org/linux-man/87r24piwhm.fsf@x220.int.ebiederm.org/T/ 794 */ 795 796 if (nxt_slow_path(mount("", "/", "", MS_SLAVE|MS_REC, "") != 0)) { 797 nxt_alert(task, "mount(\"/\", MS_SLAVE|MS_REC) failed: %E", nxt_errno); 798 return NXT_ERROR; 799 } 800 801 if (nxt_slow_path(nxt_isolation_make_private_mount(task, path) != NXT_OK)) { 802 return NXT_ERROR; 803 } 804 805 if (nxt_slow_path(mount(path, path, "bind", MS_BIND|MS_REC, "") != 0)) { 806 nxt_alert(task, "error bind mounting rootfs %E", nxt_errno); 807 return NXT_ERROR; 808 } 809 810 if (nxt_slow_path(chdir(path) != 0)) { 811 nxt_alert(task, "failed to chdir(%s) %E", path, nxt_errno); 812 return NXT_ERROR; 813 } 814 815 if (nxt_slow_path(nxt_pivot_root(".", ".") != 0)) { 816 nxt_alert(task, "failed to pivot_root %E", nxt_errno); 817 return NXT_ERROR; 818 } 819 820 /* 821 * Demote the oldroot mount to avoid unmounts getting propagated to 822 * the host. 823 */ 824 if (nxt_slow_path(mount("", ".", "", MS_SLAVE | MS_REC, NULL) != 0)) { 825 nxt_alert(task, "failed to bind mount rootfs %E", nxt_errno); 826 return NXT_ERROR; 827 } 828 829 if (nxt_slow_path(umount2(".", MNT_DETACH) != 0)) { 830 nxt_alert(task, "failed to umount old root directory %E", nxt_errno); 831 return NXT_ERROR; 832 } 833 834 return NXT_OK; 835 } 836 837 838 static nxt_int_t 839 nxt_isolation_make_private_mount(nxt_task_t *task, const char *rootfs) 840 { 841 char *parent_mnt; 842 FILE *procfile; 843 u_char **mounts; 844 size_t len; 845 uint8_t *shared; 846 nxt_int_t ret, index, nmounts; 847 struct mntent *ent; 848 849 static const char *mount_path = "/proc/self/mounts"; 850 851 ret = NXT_ERROR; 852 ent = NULL; 853 shared = NULL; 854 procfile = NULL; 855 parent_mnt = NULL; 856 857 nmounts = 256; 858 859 mounts = nxt_malloc(nmounts * sizeof(uintptr_t)); 860 if (nxt_slow_path(mounts == NULL)) { 861 goto fail; 862 } 863 864 shared = nxt_malloc(nmounts); 865 if (nxt_slow_path(shared == NULL)) { 866 goto fail; 867 } 868 869 procfile = setmntent(mount_path, "r"); 870 if (nxt_slow_path(procfile == NULL)) { 871 nxt_alert(task, "failed to open %s %E", mount_path, nxt_errno); 872 873 goto fail; 874 } 875 876 index = 0; 877 878 again: 879 880 for ( ; index < nmounts; index++) { 881 ent = getmntent(procfile); 882 if (ent == NULL) { 883 nmounts = index; 884 break; 885 } 886 887 mounts[index] = (u_char *) strdup(ent->mnt_dir); 888 shared[index] = hasmntopt(ent, "shared") != NULL; 889 } 890 891 if (ent != NULL) { 892 /* there are still entries to be read */ 893 894 nmounts *= 2; 895 mounts = nxt_realloc(mounts, nmounts); 896 if (nxt_slow_path(mounts == NULL)) { 897 goto fail; 898 } 899 900 shared = nxt_realloc(shared, nmounts); 901 if (nxt_slow_path(shared == NULL)) { 902 goto fail; 903 } 904 905 goto again; 906 } 907 908 for (index = 0; index < nmounts; index++) { 909 if (nxt_strcmp(mounts[index], rootfs) == 0) { 910 parent_mnt = (char *) rootfs; 911 break; 912 } 913 } 914 915 if (parent_mnt == NULL) { 916 len = nxt_strlen(rootfs); 917 918 parent_mnt = nxt_malloc(len + 1); 919 if (parent_mnt == NULL) { 920 goto fail; 921 } 922 923 nxt_memcpy(parent_mnt, rootfs, len); 924 parent_mnt[len] = '\0'; 925 926 if (parent_mnt[len - 1] == '/') { 927 parent_mnt[len - 1] = '\0'; 928 len--; 929 } 930 931 for ( ;; ) { 932 for (index = 0; index < nmounts; index++) { 933 if (nxt_strcmp(mounts[index], parent_mnt) == 0) { 934 goto found; 935 } 936 } 937 938 if (len == 1 && parent_mnt[0] == '/') { 939 nxt_alert(task, "parent mount not found"); 940 goto fail; 941 } 942 943 /* parent dir */ 944 while (parent_mnt[len - 1] != '/' && len > 0) { 945 len--; 946 } 947 948 if (nxt_slow_path(len == 0)) { 949 nxt_alert(task, "parent mount not found"); 950 goto fail; 951 } 952 953 if (len == 1) { 954 parent_mnt[len] = '\0'; /* / */ 955 } else { 956 parent_mnt[len - 1] = '\0'; /* /<path> */ 957 } 958 } 959 } 960 961 found: 962 963 if (shared[index]) { 964 if (nxt_slow_path(mount("", parent_mnt, "", MS_PRIVATE, "") != 0)) { 965 nxt_alert(task, "mount(\"\", \"%s\", MS_PRIVATE) %E", parent_mnt, 966 nxt_errno); 967 968 goto fail; 969 } 970 } 971 972 ret = NXT_OK; 973 974 fail: 975 976 if (procfile != NULL) { 977 endmntent(procfile); 978 } 979 980 if (mounts != NULL) { 981 for (index = 0; index < nmounts; index++) { 982 nxt_free(mounts[index]); 983 } 984 985 nxt_free(mounts); 986 } 987 988 if (shared != NULL) { 989 nxt_free(shared); 990 } 991 992 if (parent_mnt != NULL && parent_mnt != rootfs) { 993 nxt_free(parent_mnt); 994 } 995 996 return ret; 997 } 998 999 1000 nxt_inline int 1001 nxt_pivot_root(const char *new_root, const char *old_root) 1002 { 1003 return syscall(__NR_pivot_root, new_root, old_root); 1004 } 1005 1006 1007 #else /* !(NXT_HAVE_PIVOT_ROOT) || !(NXT_HAVE_CLONE_NEWNS) */ 1008 1009 1010 nxt_int_t 1011 nxt_isolation_change_root(nxt_task_t *task, nxt_process_t *process) 1012 { 1013 char *rootfs; 1014 1015 rootfs = (char *) process->isolation.rootfs; 1016 1017 nxt_debug(task, "change root: %s", rootfs); 1018 1019 if (nxt_fast_path(nxt_isolation_chroot(task, rootfs) == NXT_OK)) { 1020 if (nxt_slow_path(chdir("/") < 0)) { 1021 nxt_alert(task, "chdir(\"/\") %E", nxt_errno); 1022 return NXT_ERROR; 1023 } 1024 1025 return NXT_OK; 1026 } 1027 1028 return NXT_ERROR; 1029 } 1030 1031 #endif 1032 1033 1034 static nxt_int_t 1035 nxt_isolation_chroot(nxt_task_t *task, const char *path) 1036 { 1037 if (nxt_slow_path(chroot(path) < 0)) { 1038 nxt_alert(task, "chroot(%s) %E", path, nxt_errno); 1039 return NXT_ERROR; 1040 } 1041 1042 return NXT_OK; 1043 } 1044 1045 #endif /* NXT_HAVE_ISOLATION_ROOTFS */ 1046 1047 1048 #if (NXT_HAVE_PR_SET_NO_NEW_PRIVS) 1049 1050 static nxt_int_t 1051 nxt_isolation_set_new_privs(nxt_task_t *task, nxt_conf_value_t *isolation, 1052 nxt_process_t *process) 1053 { 1054 nxt_conf_value_t *obj; 1055 1056 static nxt_str_t new_privs_name = nxt_string("new_privs"); 1057 1058 obj = nxt_conf_get_object_member(isolation, &new_privs_name, NULL); 1059 if (obj != NULL) { 1060 process->isolation.new_privs = nxt_conf_get_boolean(obj); 1061 } 1062 1063 return NXT_OK; 1064 } 1065 1066 #endif 1067