From af2c447705ab81c9beacbaab4af0f7912640372b Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Tue, 19 May 2020 17:52:51 +0200 Subject: [PATCH] cgroup2: try joining cgroup for initial process if we cannot join the initial container cgroup, then attempt to join the initial container process cgroup. Signed-off-by: Giuseppe Scrivano --- src/libcrun/cgroup.c | 166 +++++++++++++++++++++++++++++++------------ src/libcrun/cgroup.h | 2 +- src/libcrun/linux.c | 2 +- 3 files changed, 123 insertions(+), 47 deletions(-) diff --git a/src/libcrun/cgroup.c b/src/libcrun/cgroup.c index d664b967f1..00d15c1f0d 100644 --- a/src/libcrun/cgroup.c +++ b/src/libcrun/cgroup.c @@ -348,13 +348,13 @@ move_process_to_cgroup (pid_t pid, const char *subsystem, const char *path, libc } static int -enter_cgroup_subsystem (pid_t pid, const char *subsystem, const char *path, int ensure_missing, libcrun_error_t *err) +enter_cgroup_subsystem (pid_t pid, const char *subsystem, const char *path, bool create_if_missing, libcrun_error_t *err) { cleanup_free char *cgroup_path = NULL; int ret; xasprintf (&cgroup_path, "/sys/fs/cgroup/%s%s", subsystem, path ? path : ""); - if (ensure_missing) + if (create_if_missing) { ret = crun_ensure_directory (cgroup_path, 0755, false, err); if (UNLIKELY (ret < 0)) @@ -482,54 +482,45 @@ copy_owner (const char *from, const char *to, libcrun_error_t *err) } static int -enter_cgroup (int cgroup_mode, pid_t pid, const char *path, bool ensure_missing, libcrun_error_t *err) +read_unified_cgroup_pid (pid_t pid, char **path, libcrun_error_t *err) { - char pid_str[16]; int ret; - size_t i; - int entered_any = 0; - int rootless; - const cgroups_subsystem_t *subsystems; + char cgroup_path[32]; + char *from, *to; + cleanup_free char *content = NULL; - sprintf (pid_str, "%d", pid); + sprintf (cgroup_path, "/proc/%d/cgroup", pid); - if (cgroup_mode == CGROUP_MODE_UNIFIED) - { - cleanup_free char *cgroup_path_procs = NULL; - cleanup_free char *cgroup_path = NULL; + ret = read_all_file (cgroup_path, &content, NULL, err); + if (UNLIKELY (ret < 0)) + return ret; - xasprintf (&cgroup_path, "/sys/fs/cgroup/%s", path); - if (ensure_missing) - { - ret = crun_ensure_directory (cgroup_path, 0755, false, err); - if (UNLIKELY (ret < 0)) - return ret; - } + from = strstr (content, "0::"); + if (UNLIKELY (from == NULL)) + return crun_make_error (err, -1, "cannot find cgroup2 for the process %d", pid); - xasprintf (&cgroup_path_procs, "/sys/fs/cgroup/%s/cgroup.procs", path); - ret = write_file (cgroup_path_procs, pid_str, strlen (pid_str), err); - if (UNLIKELY (ret < 0)) - { - if (!ensure_missing && (*err)->status == EBUSY) - { - /* There are subdirectories so it is not possible to join the initial - cgroup. Create a subdirectory and use that. - It can still fail if the container creates a subdirectory under - /sys/fs/cgroup/../crun-exec/ */ - cleanup_free char *cgroup_crun_exec_path = NULL; + from += 3; + to = strchr (from, '\n'); + to = strchr (from, '\n'); + if (UNLIKELY (to == NULL)) + return crun_make_error (err, -1, "cannot parse `%s`", cgroup_path); + *to = '\0'; - xasprintf (&cgroup_crun_exec_path, "%s/crun-exec", path); + *path = xstrdup (from); + return 0; +} - ret = enter_cgroup (cgroup_mode, pid, cgroup_crun_exec_path, true, err); - if (UNLIKELY (ret < 0)) - return ret; +static int +enter_cgroup_v1 (int cgroup_mode, pid_t pid, const char *path, bool create_if_missing, libcrun_error_t *err) +{ + char pid_str[16]; + int ret; + size_t i; + int entered_any = 0; + int rootless; + const cgroups_subsystem_t *subsystems; - return copy_owner (cgroup_path_procs, cgroup_crun_exec_path, err); - } - return ret; - } - return 0; - } + sprintf (pid_str, "%d", pid); subsystems = libcrun_get_cgroups_subsystems (err); if (UNLIKELY (subsystems == NULL)) @@ -554,7 +545,7 @@ enter_cgroup (int cgroup_mode, pid_t pid, const char *path, bool ensure_missing, continue; entered_any = 1; - ret = enter_cgroup_subsystem (pid, subsystems[i], path, ensure_missing, err); + ret = enter_cgroup_subsystem (pid, subsystems[i], path, create_if_missing, err); if (UNLIKELY (ret < 0)) { int errcode = crun_error_get_errno (err); @@ -570,6 +561,91 @@ enter_cgroup (int cgroup_mode, pid_t pid, const char *path, bool ensure_missing, return entered_any ? 0 : -1; } +static int +enter_cgroup_v2 (pid_t pid, pid_t init_pid, const char *path, bool create_if_missing, libcrun_error_t *err) +{ + cleanup_free char *cgroup_path_procs = NULL; + cleanup_free char *cgroup_path = NULL; + char pid_str[16]; + int repeat; + int ret; + + sprintf (pid_str, "%d", pid); + + xasprintf (&cgroup_path, "/sys/fs/cgroup/%s", path); + if (create_if_missing) + { + ret = crun_ensure_directory (cgroup_path, 0755, false, err); + if (UNLIKELY (ret < 0)) + return ret; + } + + xasprintf (&cgroup_path_procs, "/sys/fs/cgroup/%s/cgroup.procs", path); + ret = write_file (cgroup_path_procs, pid_str, strlen (pid_str), err); + if (LIKELY (ret == 0)) + return ret; + + /* If the cgroup is not being created, try to handle EBUSY. */ + if (create_if_missing || crun_error_get_errno (err) != EBUSY) + return ret; + + crun_error_release (err); + + /* There are subdirectories so it is not possible to join the initial + cgroup. Create a subdirectory and use that. + It can still fail if the container creates a subdirectory under + /sys/fs/cgroup/../crun-exec/ */ + for (repeat = 0;; repeat++) + { + cleanup_free char *cgroup_crun_exec_path = NULL; + cleanup_free char *cgroup_sub_path_procs = NULL; + + /* There is an init pid, try to join its cgroup. */ + if (init_pid > 0) + { + ret = read_unified_cgroup_pid (init_pid, &cgroup_crun_exec_path, err); + if (UNLIKELY (ret < 0)) + return ret; + + /* Make sure the cgroup is below the initial cgroup specified for the container. */ + if (strncmp (path, cgroup_crun_exec_path, strlen (path))) + { + free (cgroup_crun_exec_path); + cgroup_crun_exec_path = NULL; + } + } + + /* There is no init_pid to lookup, try a static path. */ + if (cgroup_crun_exec_path == NULL) + xasprintf (&cgroup_crun_exec_path, "%s/crun-exec", path); + + xasprintf (&cgroup_sub_path_procs, "/sys/fs/cgroup/%s/cgroup.procs", cgroup_crun_exec_path); + + ret = write_file (cgroup_sub_path_procs, pid_str, strlen (pid_str), err); + if (UNLIKELY (ret < 0)) + { + /* The init process might have moved to a different cgroup, try again. */ + if (crun_error_get_errno (err) == EBUSY && init_pid && repeat < 20) + { + crun_error_release (err); + continue; + } + return ret; + } + return copy_owner (cgroup_path_procs, cgroup_crun_exec_path, err); + } + return ret; +} + +static int +enter_cgroup (int cgroup_mode, pid_t pid, pid_t init_pid, const char *path, bool create_if_missing, libcrun_error_t *err) +{ + if (cgroup_mode == CGROUP_MODE_UNIFIED) + return enter_cgroup_v2 (pid, init_pid, path, create_if_missing, err); + + return enter_cgroup_v1 (cgroup_mode, pid, path, create_if_missing, err); +} + int libcrun_cgroups_create_symlinks (int dirfd, libcrun_error_t *err) { @@ -591,7 +667,7 @@ libcrun_cgroups_create_symlinks (int dirfd, libcrun_error_t *err) } int -libcrun_move_process_to_cgroup (pid_t pid, char *path, libcrun_error_t *err) +libcrun_move_process_to_cgroup (pid_t pid, pid_t init_pid, char *path, libcrun_error_t *err) { int cgroup_mode = libcrun_get_cgroup_mode (err); if (UNLIKELY (cgroup_mode < 0)) @@ -600,7 +676,7 @@ libcrun_move_process_to_cgroup (pid_t pid, char *path, libcrun_error_t *err) if (path == NULL || *path == '\0') return 0; - return enter_cgroup (cgroup_mode, pid, path, false, err); + return enter_cgroup (cgroup_mode, pid, init_pid, path, false, err); } #ifdef HAVE_SYSTEMD @@ -1221,7 +1297,7 @@ libcrun_cgroup_enter_cgroupfs (struct libcrun_cgroup_args *args, libcrun_error_t return ret; } - return enter_cgroup (cgroup_mode, pid, *path, true, err); + return enter_cgroup (cgroup_mode, pid, 0, *path, true, err); } static int diff --git a/src/libcrun/cgroup.h b/src/libcrun/cgroup.h index ebc32d54d9..cf298cb92e 100644 --- a/src/libcrun/cgroup.h +++ b/src/libcrun/cgroup.h @@ -56,7 +56,7 @@ int libcrun_cgroup_enter (struct libcrun_cgroup_args *args, libcrun_error_t *err int libcrun_cgroup_killall_signal (char *path, int signal, libcrun_error_t *err); int libcrun_cgroup_killall (char *path, libcrun_error_t *err); int libcrun_cgroup_destroy (const char *id, char *path, int manager, libcrun_error_t *err); -int libcrun_move_process_to_cgroup (pid_t pid, char *path, libcrun_error_t *err); +int libcrun_move_process_to_cgroup (pid_t pid, pid_t init_pid, char *path, libcrun_error_t *err); int libcrun_update_cgroup_resources (int cgroup_mode, runtime_spec_schema_config_linux_resources *resources, char *path, libcrun_error_t *err); int libcrun_cgroups_create_symlinks (int dirfd, libcrun_error_t *err); diff --git a/src/libcrun/linux.c b/src/libcrun/linux.c index dcf764f46a..cc25f91d6e 100644 --- a/src/libcrun/linux.c +++ b/src/libcrun/linux.c @@ -2782,7 +2782,7 @@ join_process_parent_helper (pid_t child_pid, if (UNLIKELY (ret < 0)) return crun_make_error (err, errno, "waitpid for exec child pid"); - ret = libcrun_move_process_to_cgroup (pid, status->cgroup_path, err); + ret = libcrun_move_process_to_cgroup (pid, status->pid, status->cgroup_path, err); if (UNLIKELY (ret < 0)) return ret;