Skip to content

Commit 229b86a

Browse files
committed
runc exec: implement --cgroup
In some setups, multiple cgroups are used inside a container, and sometime there is a need to execute a process in a particular sub-cgroup (in case of cgroup v1, for a particular controller). This is what this commit implements. Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent d0f2766 commit 229b86a

File tree

7 files changed

+195
-3
lines changed

7 files changed

+195
-3
lines changed

exec.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@ following will output a list of processes running in the container:
8989
Name: "preserve-fds",
9090
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
9191
},
92+
cli.StringSliceFlag{
93+
Name: "cgroup",
94+
Usage: "run the process in an (existing) sub-cgroup(s). Format is [<container>:]<cgroup>.",
95+
},
9296
},
9397
Action: func(context *cli.Context) error {
9498
if err := checkArgs(context, 1, minArgs); err != nil {
@@ -107,6 +111,32 @@ following will output a list of processes running in the container:
107111
SkipArgReorder: true,
108112
}
109113

114+
func getSubCgroupPaths(args []string) (map[string]string, error) {
115+
if len(args) == 0 {
116+
return nil, nil
117+
}
118+
paths := make(map[string]string, len(args))
119+
for _, c := range args {
120+
// Split into controller:path.
121+
cs := strings.SplitN(c, ":", 3)
122+
if len(cs) > 2 {
123+
return nil, fmt.Errorf("invalid --cgroup argument: %s", c)
124+
}
125+
if len(cs) == 1 { // no controller: prefix
126+
if len(args) != 1 {
127+
return nil, fmt.Errorf("invalid --cgroup argument: %s (missing <controller>: prefix)", c)
128+
}
129+
paths[""] = c
130+
} else {
131+
// There may be a few comma-separated controllers.
132+
for _, ctrl := range strings.Split(cs[0], ",") {
133+
paths[ctrl] = cs[1]
134+
}
135+
}
136+
}
137+
return paths, nil
138+
}
139+
110140
func execProcess(context *cli.Context) (int, error) {
111141
container, err := getContainer(context)
112142
if err != nil {
@@ -138,6 +168,11 @@ func execProcess(context *cli.Context) (int, error) {
138168
logLevel = "debug"
139169
}
140170

171+
cgPaths, err := getSubCgroupPaths(context.StringSlice("cgroup"))
172+
if err != nil {
173+
return -1, err
174+
}
175+
141176
r := &runner{
142177
enableSubreaper: false,
143178
shouldDestroy: false,
@@ -149,6 +184,7 @@ func execProcess(context *cli.Context) (int, error) {
149184
init: false,
150185
preserveFDs: context.Int("preserve-fds"),
151186
logLevel: logLevel,
187+
subCgroupPaths: cgPaths,
152188
}
153189
return r.run(p)
154190
}

libcontainer/container_linux.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"net"
1313
"os"
1414
"os/exec"
15+
"path"
1516
"path/filepath"
1617
"reflect"
1718
"strconv"
@@ -555,7 +556,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
555556
if err != nil {
556557
return nil, err
557558
}
558-
return &setnsProcess{
559+
proc := &setnsProcess{
559560
cmd: cmd,
560561
cgroupPaths: state.CgroupPaths,
561562
rootlessCgroups: c.config.RootlessCgroups,
@@ -567,7 +568,29 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
567568
process: p,
568569
bootstrapData: data,
569570
initProcessPid: state.InitProcessPid,
570-
}, nil
571+
}
572+
if len(p.SubCgroupPaths) > 0 {
573+
if add, ok := p.SubCgroupPaths[""]; ok {
574+
// cgroup v1: using the same path for all controllers.
575+
// cgroup v2: the only possible way.
576+
for k := range proc.cgroupPaths {
577+
proc.cgroupPaths[k] = path.Join(proc.cgroupPaths[k], add)
578+
}
579+
// cgroup v2: do not try to join init process's cgroup
580+
// as a fallback (see (*setnsProcess).start).
581+
proc.initProcessPid = 0
582+
} else {
583+
// Per-controller paths.
584+
for ctrl, add := range p.SubCgroupPaths {
585+
if val, ok := proc.cgroupPaths[ctrl]; ok {
586+
proc.cgroupPaths[ctrl] = path.Join(val, add)
587+
} else {
588+
return nil, fmt.Errorf("unknown controller %s in SubCgroupPaths", ctrl)
589+
}
590+
}
591+
}
592+
}
593+
return proc, nil
571594
}
572595

573596
func (c *linuxContainer) newInitConfig(process *Process) *initConfig {

libcontainer/process.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,15 @@ type Process struct {
8080
ops processOperations
8181

8282
LogLevel string
83+
84+
// SubCgroupPaths specifies sub-cgroups to run the process in.
85+
// Map keys are controller names, map values are paths (relative to
86+
// container's top-level cgroup).
87+
//
88+
// If empty, the default top-level container's cgroup is used.
89+
//
90+
// For cgroup v2, the only key allowed is "".
91+
SubCgroupPaths map[string]string
8392
}
8493

8594
// Wait waits for the process to exit.

libcontainer/process_linux.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ func (p *setnsProcess) start() (retErr error) {
130130
// On cgroup v2 + nesting + domain controllers, WriteCgroupProc may fail with EBUSY.
131131
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
132132
// Try to join the cgroup of InitProcessPid.
133-
if cgroups.IsCgroup2UnifiedMode() {
133+
if cgroups.IsCgroup2UnifiedMode() && p.initProcessPid != 0 {
134134
initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
135135
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
136136
if initCgErr == nil {

man/runc-exec.8.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,17 @@ multiple times.
5959
: Pass _N_ additional file descriptors to the container (**stdio** +
6060
**$LISTEN_FDS** + _N_ in total). Default is **0**.
6161

62+
**--cgroup** _path_ | _controller_[,_controller_...]:_path_
63+
: Execute a process in a sub-cgroup. If the specified cgroup does not exist, an
64+
error is returned. Default is empty path, which means to use container's top
65+
level cgroup.
66+
: For cgroup v1 only, a particular _controller_ (or multiple comma-separated
67+
controllers) can be specified, and the option can be used multiple times to set
68+
different paths for different controllers.
69+
: Note for cgroup v2, in case the process can't join the top level cgroup,
70+
**runc exec** fallback is to try joining the cgroup of container's init.
71+
This fallback can be disabled by using **--cgroup /**.
72+
6273
# EXIT STATUS
6374

6475
Exits with a status of _command_ (unless **-d** is used), or **255** if

tests/integration/exec.bats

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,114 @@ function check_exec_debug() {
187187
[[ "${output}" == *"level=debug"* ]]
188188
check_exec_debug "$output"
189189
}
190+
191+
@test "runc exec --cgroup sub-cgroups [v1]" {
192+
requires root cgroups_v1
193+
194+
set_cgroups_path
195+
set_cgroup_mount_writable
196+
197+
__runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
198+
testcontainer test_busybox running
199+
200+
# Check we can't join non-existing subcgroup.
201+
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
202+
[ "$status" -ne 0 ]
203+
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
204+
205+
# Check we can't join non-existing subcgroup (for a particular controller).
206+
runc exec --cgroup cpu:nonexistent test_busybox cat /proc/self/cgroup
207+
[ "$status" -ne 0 ]
208+
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
209+
210+
# Check we can't specify non-existent controller.
211+
runc exec --cgroup whaaat:/ test_busybox true
212+
[ "$status" -ne 0 ]
213+
[[ "$output" == *"unknown controller "* ]]
214+
215+
# Check we can join top-level cgroup (implicit).
216+
runc exec test_busybox cat /proc/self/cgroup
217+
[ "$status" -eq 0 ]
218+
! grep -v ":$REL_CGROUPS_PATH\$" <<<"$output"
219+
220+
# Check we can join top-level cgroup (explicit).
221+
runc exec --cgroup / test_busybox cat /proc/self/cgroup
222+
[ "$status" -eq 0 ]
223+
! grep -v ":$REL_CGROUPS_PATH\$" <<<"$output"
224+
225+
# Create a few subcgroups.
226+
# Note that cpu,cpuacct may be mounted together or separate.
227+
runc exec test_busybox sh -euc "mkdir -p /sys/fs/cgroup/memory/submem /sys/fs/cgroup/cpu/subcpu /sys/fs/cgroup/cpuacct/subcpu"
228+
[ "$status" -eq 0 ]
229+
230+
# Check that explicit --cgroup works.
231+
runc exec --cgroup memory:submem --cgroup cpu,cpuacct:subcpu test_busybox cat /proc/self/cgroup
232+
[ "$status" -eq 0 ]
233+
[[ "$output" == *":memory:$REL_CGROUPS_PATH/submem"* ]]
234+
[[ "$output" == *":cpu"*":$REL_CGROUPS_PATH/subcpu"* ]]
235+
}
236+
237+
@test "runc exec --cgroup subcgroup [v2]" {
238+
requires root cgroups_v2
239+
240+
set_cgroups_path
241+
set_cgroup_mount_writable
242+
243+
__runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
244+
testcontainer test_busybox running
245+
246+
# Check we can't join non-existing subcgroup.
247+
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
248+
[ "$status" -ne 0 ]
249+
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
250+
251+
# Check we can join top-level cgroup (implicit).
252+
runc exec test_busybox grep '^0::/$' /proc/self/cgroup
253+
[ "$status" -eq 0 ]
254+
255+
# Check we can join top-level cgroup (explicit).
256+
runc exec --cgroup / test_busybox grep '^0::/$' /proc/self/cgroup
257+
[ "$status" -eq 0 ]
258+
259+
# Now move "init" to a subcgroup, and check it was moved.
260+
runc exec test_busybox sh -euc "mkdir /sys/fs/cgroup/foobar \
261+
&& echo 1 > /sys/fs/cgroup/foobar/cgroup.procs \
262+
&& grep -w foobar /proc/1/cgroup"
263+
[ "$status" -eq 0 ]
264+
265+
# The following part is taken from
266+
# @test "runc exec (cgroup v2 + init process in non-root cgroup) succeeds"
267+
268+
# The init process is now in "/foo", but an exec process can still
269+
# join "/" because we haven't enabled any domain controller yet.
270+
runc exec test_busybox grep '^0::/$' /proc/self/cgroup
271+
[ "$status" -eq 0 ]
272+
273+
# Turn on a domain controller (memory).
274+
runc exec test_busybox sh -euc 'echo $$ > /sys/fs/cgroup/foobar/cgroup.procs; echo +memory > /sys/fs/cgroup/cgroup.subtree_control'
275+
[ "$status" -eq 0 ]
276+
277+
# An exec process can no longer join "/" after turning on a domain
278+
# controller. Check that cgroup v2 fallback to init cgroup works.
279+
runc exec test_busybox sh -euc "cat /proc/self/cgroup && grep '^0::/foobar$' /proc/self/cgroup"
280+
[ "$status" -eq 0 ]
281+
282+
# Check that --cgroup / disables the init cgroup fallback.
283+
runc exec --cgroup / test_busybox true
284+
[ "$status" -ne 0 ]
285+
[[ "$output" == *" adding pid "*" to cgroups"*"/cgroup.procs: device or resource busy"* ]]
286+
287+
# Check that explicit --cgroup foobar works.
288+
runc exec --cgroup foobar test_busybox grep '^0::/foobar$' /proc/self/cgroup
289+
[ "$status" -eq 0 ]
290+
291+
# Check all processes is in foobar (this check is redundant).
292+
runc exec --cgroup foobar test_busybox sh -euc '! grep -vwH foobar /proc/*/cgroup'
293+
[ "$status" -eq 0 ]
294+
295+
# Add a second subcgroup, check we're in it.
296+
runc exec --cgroup foobar test_busybox mkdir /sys/fs/cgroup/second
297+
[ "$status" -eq 0 ]
298+
runc exec --cgroup second test_busybox grep -w second /proc/self/cgroup
299+
[ "$status" -eq 0 ]
300+
}

utils_linux.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ type runner struct {
258258
notifySocket *notifySocket
259259
criuOpts *libcontainer.CriuOpts
260260
logLevel string
261+
subCgroupPaths map[string]string
261262
}
262263

263264
func (r *runner) run(config *specs.Process) (int, error) {
@@ -277,6 +278,7 @@ func (r *runner) run(config *specs.Process) (int, error) {
277278
// Populate the fields that come from runner.
278279
process.Init = r.init
279280
process.LogLevel = r.logLevel
281+
process.SubCgroupPaths = r.subCgroupPaths
280282
if len(r.listenFDs) > 0 {
281283
process.Env = append(process.Env, "LISTEN_FDS="+strconv.Itoa(len(r.listenFDs)), "LISTEN_PID=1")
282284
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)

0 commit comments

Comments
 (0)