Skip to content

Commit a6eb8fe

Browse files
committed
runc exec: implement --cgroup
In some setups, multiple cgroups are used inside a container, and sometime there is a need to execute a process in a particular sub-cgroup (in case of cgroup v1, for a particular controller). This is what this commit implements. Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent ae6063e commit a6eb8fe

File tree

7 files changed

+195
-3
lines changed

7 files changed

+195
-3
lines changed

exec.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ following will output a list of processes running in the container:
8787
Name: "preserve-fds",
8888
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
8989
},
90+
cli.StringSliceFlag{
91+
Name: "cgroup",
92+
Usage: "run the process in an (existing) sub-cgroup(s). Format is [<controller>:]<cgroup>.",
93+
},
9094
},
9195
Action: func(context *cli.Context) error {
9296
if err := checkArgs(context, 1, minArgs); err != nil {
@@ -105,6 +109,32 @@ following will output a list of processes running in the container:
105109
SkipArgReorder: true,
106110
}
107111

112+
func getSubCgroupPaths(args []string) (map[string]string, error) {
113+
if len(args) == 0 {
114+
return nil, nil
115+
}
116+
paths := make(map[string]string, len(args))
117+
for _, c := range args {
118+
// Split into controller:path.
119+
cs := strings.SplitN(c, ":", 3)
120+
if len(cs) > 2 {
121+
return nil, fmt.Errorf("invalid --cgroup argument: %s", c)
122+
}
123+
if len(cs) == 1 { // no controller: prefix
124+
if len(args) != 1 {
125+
return nil, fmt.Errorf("invalid --cgroup argument: %s (missing <controller>: prefix)", c)
126+
}
127+
paths[""] = c
128+
} else {
129+
// There may be a few comma-separated controllers.
130+
for _, ctrl := range strings.Split(cs[0], ",") {
131+
paths[ctrl] = cs[1]
132+
}
133+
}
134+
}
135+
return paths, nil
136+
}
137+
108138
func execProcess(context *cli.Context) (int, error) {
109139
container, err := getContainer(context)
110140
if err != nil {
@@ -131,6 +161,11 @@ func execProcess(context *cli.Context) (int, error) {
131161
return -1, err
132162
}
133163

164+
cgPaths, err := getSubCgroupPaths(context.StringSlice("cgroup"))
165+
if err != nil {
166+
return -1, err
167+
}
168+
134169
r := &runner{
135170
enableSubreaper: false,
136171
shouldDestroy: false,
@@ -141,6 +176,7 @@ func execProcess(context *cli.Context) (int, error) {
141176
action: CT_ACT_RUN,
142177
init: false,
143178
preserveFDs: context.Int("preserve-fds"),
179+
subCgroupPaths: cgPaths,
144180
}
145181
return r.run(p)
146182
}

libcontainer/container_linux.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"net"
1111
"os"
1212
"os/exec"
13+
"path"
1314
"path/filepath"
1415
"reflect"
1516
"strconv"
@@ -561,7 +562,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
561562
if err != nil {
562563
return nil, err
563564
}
564-
return &setnsProcess{
565+
proc := &setnsProcess{
565566
cmd: cmd,
566567
cgroupPaths: state.CgroupPaths,
567568
rootlessCgroups: c.config.RootlessCgroups,
@@ -573,7 +574,29 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
573574
process: p,
574575
bootstrapData: data,
575576
initProcessPid: state.InitProcessPid,
576-
}, nil
577+
}
578+
if len(p.SubCgroupPaths) > 0 {
579+
if add, ok := p.SubCgroupPaths[""]; ok {
580+
// cgroup v1: using the same path for all controllers.
581+
// cgroup v2: the only possible way.
582+
for k := range proc.cgroupPaths {
583+
proc.cgroupPaths[k] = path.Join(proc.cgroupPaths[k], add)
584+
}
585+
// cgroup v2: do not try to join init process's cgroup
586+
// as a fallback (see (*setnsProcess).start).
587+
proc.initProcessPid = 0
588+
} else {
589+
// Per-controller paths.
590+
for ctrl, add := range p.SubCgroupPaths {
591+
if val, ok := proc.cgroupPaths[ctrl]; ok {
592+
proc.cgroupPaths[ctrl] = path.Join(val, add)
593+
} else {
594+
return nil, fmt.Errorf("unknown controller %s in SubCgroupPaths", ctrl)
595+
}
596+
}
597+
}
598+
}
599+
return proc, nil
577600
}
578601

579602
func (c *linuxContainer) newInitConfig(process *Process) *initConfig {

libcontainer/process.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,15 @@ type Process struct {
8080
ops processOperations
8181

8282
LogLevel string
83+
84+
// SubCgroupPaths specifies sub-cgroups to run the process in.
85+
// Map keys are controller names, map values are paths (relative to
86+
// container's top-level cgroup).
87+
//
88+
// If empty, the default top-level container's cgroup is used.
89+
//
90+
// For cgroup v2, the only key allowed is "".
91+
SubCgroupPaths map[string]string
8392
}
8493

8594
// Wait waits for the process to exit.

libcontainer/process_linux.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ func (p *setnsProcess) start() (retErr error) {
129129
// On cgroup v2 + nesting + domain controllers, WriteCgroupProc may fail with EBUSY.
130130
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
131131
// Try to join the cgroup of InitProcessPid.
132-
if cgroups.IsCgroup2UnifiedMode() {
132+
if cgroups.IsCgroup2UnifiedMode() && p.initProcessPid != 0 {
133133
initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
134134
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
135135
if initCgErr == nil {

man/runc-exec.8.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,17 @@ multiple times.
5959
: Pass _N_ additional file descriptors to the container (**stdio** +
6060
**$LISTEN_FDS** + _N_ in total). Default is **0**.
6161

62+
**--cgroup** _path_ | _controller_[,_controller_...]:_path_
63+
: Execute a process in a sub-cgroup. If the specified cgroup does not exist, an
64+
error is returned. Default is empty path, which means to use container's top
65+
level cgroup.
66+
: For cgroup v1 only, a particular _controller_ (or multiple comma-separated
67+
controllers) can be specified, and the option can be used multiple times to set
68+
different paths for different controllers.
69+
: Note for cgroup v2, in case the process can't join the top level cgroup,
70+
**runc exec** fallback is to try joining the cgroup of container's init.
71+
This fallback can be disabled by using **--cgroup /**.
72+
6273
# EXIT STATUS
6374

6475
Exits with a status of _command_ (unless **-d** is used), or **255** if

tests/integration/exec.bats

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,114 @@ function check_exec_debug() {
187187
[[ "${output}" == *"level=debug"* ]]
188188
check_exec_debug "$output"
189189
}
190+
191+
@test "runc exec --cgroup sub-cgroups [v1]" {
192+
requires root cgroups_v1
193+
194+
set_cgroups_path
195+
set_cgroup_mount_writable
196+
197+
__runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
198+
testcontainer test_busybox running
199+
200+
# Check we can't join non-existing subcgroup.
201+
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
202+
[ "$status" -ne 0 ]
203+
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
204+
205+
# Check we can't join non-existing subcgroup (for a particular controller).
206+
runc exec --cgroup cpu:nonexistent test_busybox cat /proc/self/cgroup
207+
[ "$status" -ne 0 ]
208+
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
209+
210+
# Check we can't specify non-existent controller.
211+
runc exec --cgroup whaaat:/ test_busybox true
212+
[ "$status" -ne 0 ]
213+
[[ "$output" == *"unknown controller "* ]]
214+
215+
# Check we can join top-level cgroup (implicit).
216+
runc exec test_busybox cat /proc/self/cgroup
217+
[ "$status" -eq 0 ]
218+
! grep -v ":$REL_CGROUPS_PATH\$" <<<"$output"
219+
220+
# Check we can join top-level cgroup (explicit).
221+
runc exec --cgroup / test_busybox cat /proc/self/cgroup
222+
[ "$status" -eq 0 ]
223+
! grep -v ":$REL_CGROUPS_PATH\$" <<<"$output"
224+
225+
# Create a few subcgroups.
226+
# Note that cpu,cpuacct may be mounted together or separate.
227+
runc exec test_busybox sh -euc "mkdir -p /sys/fs/cgroup/memory/submem /sys/fs/cgroup/cpu/subcpu /sys/fs/cgroup/cpuacct/subcpu"
228+
[ "$status" -eq 0 ]
229+
230+
# Check that explicit --cgroup works.
231+
runc exec --cgroup memory:submem --cgroup cpu,cpuacct:subcpu test_busybox cat /proc/self/cgroup
232+
[ "$status" -eq 0 ]
233+
[[ "$output" == *":memory:$REL_CGROUPS_PATH/submem"* ]]
234+
[[ "$output" == *":cpu"*":$REL_CGROUPS_PATH/subcpu"* ]]
235+
}
236+
237+
@test "runc exec --cgroup subcgroup [v2]" {
238+
requires root cgroups_v2
239+
240+
set_cgroups_path
241+
set_cgroup_mount_writable
242+
243+
__runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
244+
testcontainer test_busybox running
245+
246+
# Check we can't join non-existing subcgroup.
247+
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
248+
[ "$status" -ne 0 ]
249+
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
250+
251+
# Check we can join top-level cgroup (implicit).
252+
runc exec test_busybox grep '^0::/$' /proc/self/cgroup
253+
[ "$status" -eq 0 ]
254+
255+
# Check we can join top-level cgroup (explicit).
256+
runc exec --cgroup / test_busybox grep '^0::/$' /proc/self/cgroup
257+
[ "$status" -eq 0 ]
258+
259+
# Now move "init" to a subcgroup, and check it was moved.
260+
runc exec test_busybox sh -euc "mkdir /sys/fs/cgroup/foobar \
261+
&& echo 1 > /sys/fs/cgroup/foobar/cgroup.procs \
262+
&& grep -w foobar /proc/1/cgroup"
263+
[ "$status" -eq 0 ]
264+
265+
# The following part is taken from
266+
# @test "runc exec (cgroup v2 + init process in non-root cgroup) succeeds"
267+
268+
# The init process is now in "/foo", but an exec process can still
269+
# join "/" because we haven't enabled any domain controller yet.
270+
runc exec test_busybox grep '^0::/$' /proc/self/cgroup
271+
[ "$status" -eq 0 ]
272+
273+
# Turn on a domain controller (memory).
274+
runc exec test_busybox sh -euc 'echo $$ > /sys/fs/cgroup/foobar/cgroup.procs; echo +memory > /sys/fs/cgroup/cgroup.subtree_control'
275+
[ "$status" -eq 0 ]
276+
277+
# An exec process can no longer join "/" after turning on a domain
278+
# controller. Check that cgroup v2 fallback to init cgroup works.
279+
runc exec test_busybox sh -euc "cat /proc/self/cgroup && grep '^0::/foobar$' /proc/self/cgroup"
280+
[ "$status" -eq 0 ]
281+
282+
# Check that --cgroup / disables the init cgroup fallback.
283+
runc exec --cgroup / test_busybox true
284+
[ "$status" -ne 0 ]
285+
[[ "$output" == *" adding pid "*" to cgroups"*"/cgroup.procs: device or resource busy"* ]]
286+
287+
# Check that explicit --cgroup foobar works.
288+
runc exec --cgroup foobar test_busybox grep '^0::/foobar$' /proc/self/cgroup
289+
[ "$status" -eq 0 ]
290+
291+
# Check all processes is in foobar (this check is redundant).
292+
runc exec --cgroup foobar test_busybox sh -euc '! grep -vwH foobar /proc/*/cgroup'
293+
[ "$status" -eq 0 ]
294+
295+
# Add a second subcgroup, check we're in it.
296+
runc exec --cgroup foobar test_busybox mkdir /sys/fs/cgroup/second
297+
[ "$status" -eq 0 ]
298+
runc exec --cgroup second test_busybox grep -w second /proc/self/cgroup
299+
[ "$status" -eq 0 ]
300+
}

utils_linux.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ type runner struct {
255255
action CtAct
256256
notifySocket *notifySocket
257257
criuOpts *libcontainer.CriuOpts
258+
subCgroupPaths map[string]string
258259
}
259260

260261
func (r *runner) run(config *specs.Process) (int, error) {
@@ -274,6 +275,7 @@ func (r *runner) run(config *specs.Process) (int, error) {
274275
process.LogLevel = strconv.Itoa(int(logrus.GetLevel()))
275276
// Populate the fields that come from runner.
276277
process.Init = r.init
278+
process.SubCgroupPaths = r.subCgroupPaths
277279
if len(r.listenFDs) > 0 {
278280
process.Env = append(process.Env, "LISTEN_FDS="+strconv.Itoa(len(r.listenFDs)), "LISTEN_PID=1")
279281
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)

0 commit comments

Comments
 (0)