Skip to content

Commit 33b44c1

Browse files
committed
runc exec: implement --cgroup
In some setups, multiple cgroups are used inside a container, and sometime there is a need to execute a process in a particular sub-cgroup (in case of cgroup v1, for a particular controller). This is what this commit implements. Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent a52d8ae commit 33b44c1

File tree

7 files changed

+196
-3
lines changed

7 files changed

+196
-3
lines changed

exec.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@ following will output a list of processes running in the container:
8989
Name: "preserve-fds",
9090
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
9191
},
92+
cli.StringSliceFlag{
93+
Name: "cgroup",
94+
Usage: "run the process in an (existing) sub-cgroup(s). Format is [<container>:]<cgroup>.",
95+
},
9296
},
9397
Action: func(context *cli.Context) error {
9498
if err := checkArgs(context, 1, minArgs); err != nil {
@@ -106,6 +110,32 @@ following will output a list of processes running in the container:
106110
SkipArgReorder: true,
107111
}
108112

113+
func getSubCgroupPaths(args []string) (map[string]string, error) {
114+
if len(args) == 0 {
115+
return nil, nil
116+
}
117+
paths := make(map[string]string, len(args))
118+
for _, c := range args {
119+
// Split into controller:path.
120+
cs := strings.SplitN(c, ":", 3)
121+
if len(cs) > 2 {
122+
return nil, fmt.Errorf("invalid --cgroup argument: %s", c)
123+
}
124+
if len(cs) == 1 { // no controller: prefix
125+
if len(args) != 1 {
126+
return nil, fmt.Errorf("invalid --cgroup argument: %s (missing <controller>: prefix)", c)
127+
}
128+
paths[""] = c
129+
} else {
130+
// There may be a few comma-separated controllers.
131+
for _, ctrl := range strings.Split(cs[0], ",") {
132+
paths[ctrl] = cs[1]
133+
}
134+
}
135+
}
136+
return paths, nil
137+
}
138+
109139
func execProcess(context *cli.Context) (int, error) {
110140
container, err := getContainer(context)
111141
if err != nil {
@@ -137,6 +167,11 @@ func execProcess(context *cli.Context) (int, error) {
137167
logLevel = "debug"
138168
}
139169

170+
cgPaths, err := getSubCgroupPaths(context.StringSlice("cgroup"))
171+
if err != nil {
172+
return -1, err
173+
}
174+
140175
r := &runner{
141176
enableSubreaper: false,
142177
shouldDestroy: false,
@@ -148,6 +183,7 @@ func execProcess(context *cli.Context) (int, error) {
148183
init: false,
149184
preserveFDs: context.Int("preserve-fds"),
150185
logLevel: logLevel,
186+
subCgroupPaths: cgPaths,
151187
}
152188
return r.run(p)
153189
}

libcontainer/container_linux.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"net"
1313
"os"
1414
"os/exec"
15+
"path"
1516
"path/filepath"
1617
"reflect"
1718
"strconv"
@@ -555,7 +556,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
555556
if err != nil {
556557
return nil, err
557558
}
558-
return &setnsProcess{
559+
proc := &setnsProcess{
559560
cmd: cmd,
560561
cgroupPaths: state.CgroupPaths,
561562
rootlessCgroups: c.config.RootlessCgroups,
@@ -567,7 +568,29 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
567568
process: p,
568569
bootstrapData: data,
569570
initProcessPid: state.InitProcessPid,
570-
}, nil
571+
}
572+
if len(p.SubCgroupPaths) > 0 {
573+
if add, ok := p.SubCgroupPaths[""]; ok {
574+
// cgroup v1: using the same path for all controllers.
575+
// cgroup v2: the only possible way.
576+
for k := range proc.cgroupPaths {
577+
proc.cgroupPaths[k] = path.Join(proc.cgroupPaths[k], add)
578+
}
579+
// cgroup v2: do not try to join init process's cgroup
580+
// as a fallback (see (*setnsProcess).start).
581+
proc.initProcessPid = 0
582+
} else {
583+
// Per-controller paths.
584+
for ctrl, add := range p.SubCgroupPaths {
585+
if val, ok := proc.cgroupPaths[ctrl]; ok {
586+
proc.cgroupPaths[ctrl] = path.Join(val, add)
587+
} else {
588+
return nil, fmt.Errorf("unknown controller %s in SubCgroupPaths", ctrl)
589+
}
590+
}
591+
}
592+
}
593+
return proc, nil
571594
}
572595

573596
func (c *linuxContainer) newInitConfig(process *Process) *initConfig {

libcontainer/process.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,15 @@ type Process struct {
8080
ops processOperations
8181

8282
LogLevel string
83+
84+
// SubCgroupPaths specifies sub-cgroups to run the process in.
85+
// Map keys are controller names, map values are paths (relative to
86+
// container's top-level cgroup).
87+
//
88+
// If empty, the default top-level container's cgroup is used.
89+
//
90+
// For cgroup v2, the only key allowed is "".
91+
SubCgroupPaths map[string]string
8392
}
8493

8594
// Wait waits for the process to exit.

libcontainer/process_linux.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ func (p *setnsProcess) start() (retErr error) {
134134
// On cgroup v2 + nesting + domain controllers, WriteCgroupProc may fail with EBUSY.
135135
// https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643
136136
// Try to join the cgroup of InitProcessPid.
137-
if cgroups.IsCgroup2UnifiedMode() {
137+
if cgroups.IsCgroup2UnifiedMode() && p.initProcessPid != 0 {
138138
initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid)
139139
initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile)
140140
if initCgErr == nil {

man/runc-exec.8.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,17 @@ multiple times.
5959
: Pass _N_ additional file descriptors to the container (**stdio** +
6060
**$LISTEN_FDS** + _N_ in total). Default is **0**.
6161

62+
**--cgroup** _path_ | _controller_[,_controller_...]:_path_
63+
: Execute a process in a sub-cgroup. If the specified cgroup does not exist, an
64+
error is returned. Default is empty path, which means to use container's top
65+
level cgroup.
66+
: For cgroup v1 only, a particular _controller_ (or multiple comma-separated
67+
controllers) can be specified, and the option can be used multiple times to set
68+
different _path_s for different _controllers_.
69+
: Note for cgroup v2, in case the process can't join the top level cgroup,
70+
**runc exec** fallback is to try joining the cgroup of container's init.
71+
This fallback can be disabled by using **--cgroup /**.
72+
6273
# EXAMPLES
6374
If the container can run **ps**(1) command, the following
6475
will output a list of processes running in the container:

tests/integration/exec.bats

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,3 +167,115 @@ function check_exec_debug() {
167167
[[ "${output}" == *"level=debug"* ]]
168168
check_exec_debug "$output"
169169
}
170+
171+
@test "runc exec --cgroup sub-cgroups [v1]" {
172+
requires root cgroups_v1 cgroupns
173+
174+
set_cgroups_path
175+
set_cgroup_mount_writable
176+
# Enable CGROUPNS (to simplify test case working with cgroup paths).
177+
update_config '.linux.namespaces += [{"type": "cgroup"}]'
178+
179+
__runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
180+
testcontainer test_busybox running
181+
182+
# Check we can't join non-existing subcgroup.
183+
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
184+
[ "$status" -ne 0 ]
185+
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
186+
187+
# Check we can't join non-existing subcgroup (for a particular controller).
188+
runc exec --cgroup cpu:nonexistent test_busybox cat /proc/self/cgroup
189+
[ "$status" -ne 0 ]
190+
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
191+
192+
# Check we can't specify non-existant controller.
193+
runc exec --cgroup whaaat:/ test_busybox true
194+
[ "$status" -ne 0 ]
195+
[[ "$output" == *"unknown controller "* ]]
196+
197+
# Check we can join top-level cgroup (implicit).
198+
runc exec test_busybox sh -euc "! grep -v ':/$' /proc/self/cgroup"
199+
[ "$status" -eq 0 ]
200+
[ "$output" = "" ]
201+
202+
# Check we can join top-level cgroup (explicit).
203+
runc exec --cgroup / test_busybox sh -euc "! grep -v ':/$' /proc/self/cgroup"
204+
[ "$status" -eq 0 ]
205+
[ "$output" = "" ]
206+
207+
# Create a few subcgroups.
208+
runc exec test_busybox sh -euc "mkdir -p /sys/fs/cgroup/memory/submem /sys/fs/cgroup/cpu/subcpu /sys/fs/cgroup/cpuacct/subcpu"
209+
[ "$status" -eq 0 ]
210+
211+
# Check that explicit --cgroup works.
212+
runc exec --cgroup memory:submem --cgroup cpu,cpuacct:subcpu test_busybox cat /proc/self/cgroup
213+
[ "$status" -eq 0 ]
214+
[[ "$output" == *":memory:/submem"* ]]
215+
[[ "$output" == *":cpu"*":/subcpu"* ]]
216+
}
217+
218+
@test "runc exec --cgroup subcgroup [v2]" {
219+
requires root cgroups_v2
220+
221+
set_cgroups_path
222+
set_cgroup_mount_writable
223+
224+
__runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
225+
testcontainer test_busybox running
226+
227+
# Check we can't join non-existing subcgroup.
228+
runc exec --cgroup nonexistent test_busybox cat /proc/self/cgroup
229+
[ "$status" -ne 0 ]
230+
[[ "$output" == *" adding pid "*"/nonexistent/cgroup.procs: no such file "* ]]
231+
232+
# Check we can join top-level cgroup (implicit).
233+
runc exec test_busybox grep '^0::/$' /proc/self/cgroup
234+
[ "$status" -eq 0 ]
235+
236+
# Check we can join top-level cgroup (explicit).
237+
runc exec --cgroup / test_busybox grep '^0::/$' /proc/self/cgroup
238+
[ "$status" -eq 0 ]
239+
240+
# Now move "init" to a subcgroup, and check it was moved.
241+
runc exec test_busybox sh -euc "mkdir /sys/fs/cgroup/foobar \
242+
&& echo 1 > /sys/fs/cgroup/foobar/cgroup.procs \
243+
&& grep -w foobar /proc/1/cgroup"
244+
[ "$status" -eq 0 ]
245+
246+
# The following part is taken from
247+
# @test "runc exec (cgroup v2 + init process in non-root cgroup) succeeds"
248+
249+
# The init process is now in "/foo", but an exec process can still
250+
# join "/" because we haven't enabled any domain controller yet.
251+
runc exec test_busybox grep '^0::/$' /proc/self/cgroup
252+
[ "$status" -eq 0 ]
253+
254+
# Turn on a domain controller (memory).
255+
runc exec test_busybox sh -euc 'echo $$ > /sys/fs/cgroup/foobar/cgroup.procs; echo +memory > /sys/fs/cgroup/cgroup.subtree_control'
256+
[ "$status" -eq 0 ]
257+
258+
# An exec process can no longer join "/" after turning on a domain
259+
# controller. Check that cgroup v2 fallback to init cgroup works.
260+
runc exec test_busybox sh -euc "cat /proc/self/cgroup && grep '^0::/foobar$' /proc/self/cgroup"
261+
[ "$status" -eq 0 ]
262+
263+
# Check that --cgroup / disables the init cgroup fallback.
264+
runc exec --cgroup / test_busybox true
265+
[ "$status" -ne 0 ]
266+
[[ "$output" == *" adding pid "*" to cgroups"*"/cgroup.procs: device or resource busy"* ]]
267+
268+
# Check that explicit --cgroup foobar works.
269+
runc exec --cgroup foobar test_busybox grep '^0::/foobar$' /proc/self/cgroup
270+
[ "$status" -eq 0 ]
271+
272+
# Check all processes is in foobar (this check is redundant).
273+
runc exec --cgroup foobar test_busybox sh -euc '! grep -vwH foobar /proc/*/cgroup'
274+
[ "$status" -eq 0 ]
275+
276+
# Add a second subcgroup, check we're in it.
277+
runc exec --cgroup foobar test_busybox mkdir /sys/fs/cgroup/second
278+
[ "$status" -eq 0 ]
279+
runc exec --cgroup second test_busybox grep -w second /proc/self/cgroup
280+
[ "$status" -eq 0 ]
281+
}

utils_linux.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ type runner struct {
258258
notifySocket *notifySocket
259259
criuOpts *libcontainer.CriuOpts
260260
logLevel string
261+
subCgroupPaths map[string]string
261262
}
262263

263264
func (r *runner) run(config *specs.Process) (int, error) {
@@ -277,6 +278,7 @@ func (r *runner) run(config *specs.Process) (int, error) {
277278
// Populate the fields that come from runner.
278279
process.Init = r.init
279280
process.LogLevel = r.logLevel
281+
process.SubCgroupPaths = r.subCgroupPaths
280282
if len(r.listenFDs) > 0 {
281283
process.Env = append(process.Env, "LISTEN_FDS="+strconv.Itoa(len(r.listenFDs)), "LISTEN_PID=1")
282284
process.ExtraFiles = append(process.ExtraFiles, r.listenFDs...)

0 commit comments

Comments
 (0)