From dd696235a4b0bb668ad99e3cfdf08e53500bc851 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 17 Aug 2021 16:54:04 -0700 Subject: [PATCH 1/3] runc exec: reject paused container unless --ignore-paused Currently, if a container is paused (i.e. its cgroup is frozen), runc exec just hangs, and it is not obvious why. Refuse to exec in a paused container. Add a test case. In case runc exec in a paused container is a legit use case, add --ignore-paused option to override the check. Document it, add a test case. Signed-off-by: Kir Kolyshkin --- contrib/completions/bash/runc | 1 + exec.go | 9 ++++++- man/runc-exec.8.md | 5 ++++ tests/integration/cgroups.bats | 44 ++++++++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+), 1 deletion(-) diff --git a/contrib/completions/bash/runc b/contrib/completions/bash/runc index 403637df875..a4cd8993745 100644 --- a/contrib/completions/bash/runc +++ b/contrib/completions/bash/runc @@ -173,6 +173,7 @@ _runc_exec() { --apparmor --cap, -c --preserve-fds + --ignore-paused " local all_options="$options_with_args $boolean_options" diff --git a/exec.go b/exec.go index 0d36705517c..18c6bffdeb7 100644 --- a/exec.go +++ b/exec.go @@ -91,6 +91,10 @@ following will output a list of processes running in the container: Name: "cgroup", Usage: "run the process in an (existing) sub-cgroup(s). Format is [:].", }, + cli.BoolFlag{ + Name: "ignore-paused", + Usage: "allow exec in a paused container", + }, }, Action: func(context *cli.Context) error { if err := checkArgs(context, 1, minArgs); err != nil { @@ -145,7 +149,10 @@ func execProcess(context *cli.Context) (int, error) { return -1, err } if status == libcontainer.Stopped { - return -1, errors.New("cannot exec a container that has stopped") + return -1, errors.New("cannot exec in a stopped container") + } + if status == libcontainer.Paused && !context.Bool("ignore-paused") { + return -1, errors.New("cannot exec in a paused container (use --ignore-paused to override)") } path := context.String("process") if path == "" && len(context.Args()) == 1 { diff --git a/man/runc-exec.8.md b/man/runc-exec.8.md index 24e9ff3c994..c0ca4111033 100644 --- a/man/runc-exec.8.md +++ b/man/runc-exec.8.md @@ -59,6 +59,11 @@ multiple times. : Pass _N_ additional file descriptors to the container (**stdio** + **$LISTEN_FDS** + _N_ in total). Default is **0**. +**--ignore-paused** +: Allow exec in a paused container. By default, if a container is paused, +**runc exec** errors out; this option can be used to override it. +A paused container needs to be resumed for the exec to complete. + **--cgroup** _path_ | _controller_[,_controller_...]:_path_ : Execute a process in a sub-cgroup. If the specified cgroup does not exist, an error is returned. Default is empty path, which means to use container's top diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index 882e5929673..ded7b126030 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -303,3 +303,47 @@ function setup() { # check that the cgroups v2 path is the same for both processes [[ "$run_cgroup" == "$exec_cgroup" ]] } + +@test "runc exec should refuse a paused container" { + if [[ "$ROOTLESS" -ne 0 ]]; then + requires rootless_cgroup + fi + requires cgroups_freezer + + set_cgroups_path + + runc run -d --console-socket "$CONSOLE_SOCKET" ct1 + [ "$status" -eq 0 ] + runc pause ct1 + [ "$status" -eq 0 ] + + # Exec should not timeout or succeed. + runc exec ct1 echo ok + [ "$status" -eq 255 ] + [[ "$output" == *"cannot exec in a paused container"* ]] +} + +@test "runc exec --ignore-paused" { + if [[ "$ROOTLESS" -ne 0 ]]; then + requires rootless_cgroup + fi + requires cgroups_freezer + + set_cgroups_path + + runc run -d --console-socket "$CONSOLE_SOCKET" ct1 + [ "$status" -eq 0 ] + runc pause ct1 + [ "$status" -eq 0 ] + + # Resume the container a bit later. + ( + sleep 2 + runc resume ct1 + ) & + + # Exec should not timeout or succeed. + runc exec --ignore-paused ct1 echo ok + [ "$status" -eq 0 ] + [ "$output" = "ok" ] +} From d08bc0c1b3bb2bd3370b80ecc2ace934ce7496c0 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 17 Aug 2021 16:56:12 -0700 Subject: [PATCH 2/3] runc run: warn on non-empty cgroup Currently runc allows multiple containers to share the same cgroup (for example, by having the same cgroupPath in config.json). While such shared configuration might be OK, there are some issues: - When each container has its own resource limits, the order of containers start determines whose limits will be effectively applied. - When one of containers is paused, all others are paused, too. - When a container is paused, any attempt to do runc create/run/exec end up with runc init stuck inside a frozen cgroup. - When a systemd cgroup manager is used, this becomes even worse -- such as, stop (or even failed start) of any container results in "stopTransientUnit" command being sent to systemd, and so (depending on unit properties) other containers can receive SIGTERM, be killed after a timeout etc. Any of the above may lead to various hard-to-debug situations in production (runc init stuck, cgroup removal error, wrong resource limits, init not reaping zombies etc.). One obvious solution is to refuse a non-empty cgroup when starting a new container. This would be a breaking change though, so let's make it in steps, with the first step is issue a warning and a deprecated notice about a non-empty cgroup. Later (in runc 1.2) we will replace this warning with an error. Signed-off-by: Kir Kolyshkin --- libcontainer/factory_linux.go | 29 +++++++++++++++++++++++++---- tests/integration/cgroups.bats | 21 +++++++++++++++++++++ 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index 6c4a65c7394..2e19ed60163 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -160,14 +160,35 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err } else if !os.IsNotExist(err) { return nil, err } - if err := os.MkdirAll(containerRoot, 0o711); err != nil { + + cm, err := manager.New(config.Cgroups) + if err != nil { return nil, err } - if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil { + + // Check that cgroup does not exist or empty (no processes). + // Note for cgroup v1 this check is not thorough, as there are multiple + // separate hierarchies, while both Exists() and GetAllPids() only use + // one for "devices" controller (assuming others are the same, which is + // probably true in almost all scenarios). Checking all the hierarchies + // would be too expensive. + if cm.Exists() { + pids, err := cm.GetAllPids() + // Reading PIDs can race with cgroups removal, so ignore ENOENT and ENODEV. + if err != nil && !errors.Is(err, os.ErrNotExist) && !errors.Is(err, unix.ENODEV) { + return nil, fmt.Errorf("unable to get cgroup PIDs: %w", err) + } + if len(pids) != 0 { + // TODO: return an error. + logrus.Warnf("container's cgroup is not empty: %d process(es) found", len(pids)) + logrus.Warn("DEPRECATED: running container in a non-empty cgroup won't be supported in runc 1.2; https://github.com/opencontainers/runc/issues/3132") + } + } + + if err := os.MkdirAll(containerRoot, 0o711); err != nil { return nil, err } - cm, err := manager.New(config.Cgroups) - if err != nil { + if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil { return nil, err } c := &linuxContainer{ diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index ded7b126030..feddb0b4edd 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -347,3 +347,24 @@ function setup() { [ "$status" -eq 0 ] [ "$output" = "ok" ] } + +@test "runc run/create should warn about a non-empty cgroup" { + if [[ "$ROOTLESS" -ne 0 ]]; then + requires rootless_cgroup + fi + + set_cgroups_path + + runc run -d --console-socket "$CONSOLE_SOCKET" ct1 + [ "$status" -eq 0 ] + + # Run a second container sharing the cgroup with the first one. + runc --debug run -d --console-socket "$CONSOLE_SOCKET" ct2 + [ "$status" -eq 0 ] + [[ "$output" == *"container's cgroup is not empty"* ]] + + # Same but using runc create. + runc create --console-socket "$CONSOLE_SOCKET" ct3 + [ "$status" -eq 0 ] + [[ "$output" == *"container's cgroup is not empty"* ]] +} From 68c2b6a7d9ce1156c8e1227f41d59c66f1f76ba9 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 18 Aug 2021 20:36:37 -0700 Subject: [PATCH 3/3] runc run: refuse a frozen cgroup Sometimes a container cgroup already exists but is frozen. When this happens, runc init hangs, and it's not clear what is going on. Refuse to run in a frozen cgroup; add a test case. Signed-off-by: Kir Kolyshkin --- libcontainer/factory_linux.go | 10 +++++++++ tests/integration/cgroups.bats | 41 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/libcontainer/factory_linux.go b/libcontainer/factory_linux.go index 2e19ed60163..162996e1616 100644 --- a/libcontainer/factory_linux.go +++ b/libcontainer/factory_linux.go @@ -185,6 +185,16 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err } } + // Check that cgroup is not frozen. Do not use Exists() here + // since in cgroup v1 it only checks "devices" controller. + st, err := cm.GetFreezerState() + if err != nil { + return nil, fmt.Errorf("unable to get cgroup freezer state: %w", err) + } + if st == configs.Frozen { + return nil, errors.New("container's cgroup unexpectedly frozen") + } + if err := os.MkdirAll(containerRoot, 0o711); err != nil { return nil, err } diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index feddb0b4edd..ff7cf6d0b70 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -368,3 +368,44 @@ function setup() { [ "$status" -eq 0 ] [[ "$output" == *"container's cgroup is not empty"* ]] } + +@test "runc run/create should refuse pre-existing frozen cgroup" { + requires cgroups_freezer + if [[ "$ROOTLESS" -ne 0 ]]; then + requires rootless_cgroup + fi + + set_cgroups_path + + case $CGROUP_UNIFIED in + no) + FREEZER_DIR="${CGROUP_FREEZER_BASE_PATH}/${REL_CGROUPS_PATH}" + FREEZER="${FREEZER_DIR}/freezer.state" + STATE="FROZEN" + ;; + yes) + FREEZER_DIR="${CGROUP_PATH}" + FREEZER="${FREEZER_DIR}/cgroup.freeze" + STATE="1" + ;; + esac + + # Create and freeze the cgroup. + mkdir -p "$FREEZER_DIR" + echo "$STATE" >"$FREEZER" + + # Start a container. + runc run -d --console-socket "$CONSOLE_SOCKET" ct1 + [ "$status" -eq 1 ] + # A warning should be printed. + [[ "$output" == *"container's cgroup unexpectedly frozen"* ]] + + # Same check for runc create. + runc create --console-socket "$CONSOLE_SOCKET" ct2 + [ "$status" -eq 1 ] + # A warning should be printed. + [[ "$output" == *"container's cgroup unexpectedly frozen"* ]] + + # Cleanup. + rmdir "$FREEZER_DIR" +}