Skip to content

Commit bf15cc9

Browse files
committed
cgroup v2: support rootless systemd
Tested with both Podman (master) and Moby (master), on Ubuntu 19.10 . $ podman --cgroup-manager=systemd run -it --rm --runtime=runc \ --cgroupns=host --memory 42m --cpus 0.42 --pids-limit 42 alpine / # cat /proc/self/cgroup 0::/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope / # cat /sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope/memory.max 44040192 / # cat /sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope/cpu.max 42000 100000 / # cat /sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope/pids.max 42 Signed-off-by: Akihiro Suda <[email protected]>
1 parent 492cfd8 commit bf15cc9

File tree

13 files changed

+276
-43
lines changed

13 files changed

+276
-43
lines changed

.travis.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ matrix:
3636
- sudo ssh default -t 'cd /vagrant && sudo make localintegration RUNC_USE_SYSTEMD=yes'
3737
# same setup but with fs2 driver instead of systemd
3838
- sudo ssh default -t 'cd /vagrant && sudo make localintegration'
39-
# rootless
39+
# cgroupv2+systemd (rootless)
40+
- sudo ssh default -t 'cd /vagrant && sudo make localrootlessintegration RUNC_USE_SYSTEMD=yes'
41+
# same setup but with fs2 driver (rootless) instead of systemd
4042
- sudo ssh default -t 'cd /vagrant && sudo make localrootlessintegration'
4143
allow_failures:
4244
- go: tip

Vagrantfile

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,25 @@ EOF
2828
# Add a user for rootless tests
2929
useradd -u2000 -m -d/home/rootless -s/bin/bash rootless
3030
31+
# Allow root to execute `ssh rootless@localhost` in tests/rootless.sh
32+
ssh-keygen -t ecdsa -N "" -f /root/rootless.key
33+
mkdir -m 0700 -p /home/rootless/.ssh
34+
cat /root/rootless.key.pub >> /home/rootless/.ssh/authorized_keys
35+
chown -R rootless.rootless /home/rootless
36+
3137
# Add busybox for libcontainer/integration tests
3238
. /vagrant/tests/integration/multi-arch.bash \
3339
&& mkdir /busybox \
3440
&& curl -fsSL $(get_busybox) | tar xfJC - /busybox
41+
42+
# Delegate cgroup v2 controllers to rootless user via --systemd-cgroup
43+
mkdir -p /etc/systemd/system/[email protected]
44+
cat > /etc/systemd/system/[email protected]/delegate.conf << EOF
45+
[Service]
46+
# default: Delegate=pids memory
47+
# NOTE: delegation of cpuset requires systemd >= 244 (Fedora >= 32, Ubuntu >= 20.04). cpuset is ignored on Fedora 31.
48+
Delegate=cpu cpuset io memory pids
49+
EOF
50+
systemctl daemon-reload
3551
SHELL
3652
end

libcontainer/cgroups/systemd/common.go

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,13 @@ func ExpandSlice(slice string) (string, error) {
7171

7272
// getDbusConnection lazy initializes systemd dbus connection
7373
// and returns it
74-
func getDbusConnection() (*systemdDbus.Conn, error) {
74+
func getDbusConnection(rootless bool) (*systemdDbus.Conn, error) {
7575
connOnce.Do(func() {
76-
connDbus, connErr = systemdDbus.New()
76+
if rootless {
77+
connDbus, connErr = NewUserSystemdDbus()
78+
} else {
79+
connDbus, connErr = systemdDbus.New()
80+
}
7781
})
7882
return connDbus, connErr
7983
}
@@ -103,12 +107,7 @@ func isUnitExists(err error) bool {
103107
return false
104108
}
105109

106-
func startUnit(unitName string, properties []systemdDbus.Property) error {
107-
dbusConnection, err := getDbusConnection()
108-
if err != nil {
109-
return err
110-
}
111-
110+
func startUnit(dbusConnection *systemdDbus.Conn, unitName string, properties []systemdDbus.Property) error {
112111
statusChan := make(chan string, 1)
113112
if _, err := dbusConnection.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
114113
select {
@@ -129,12 +128,7 @@ func startUnit(unitName string, properties []systemdDbus.Property) error {
129128
return nil
130129
}
131130

132-
func stopUnit(unitName string) error {
133-
dbusConnection, err := getDbusConnection()
134-
if err != nil {
135-
return err
136-
}
137-
131+
func stopUnit(dbusConnection *systemdDbus.Conn, unitName string) error {
138132
statusChan := make(chan string, 1)
139133
if _, err := dbusConnection.StopUnit(unitName, "replace", statusChan); err == nil {
140134
select {
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
// +build linux
2+
3+
package systemd
4+
5+
import (
6+
"bufio"
7+
"bytes"
8+
"os"
9+
"os/exec"
10+
"path/filepath"
11+
"strconv"
12+
"strings"
13+
14+
systemdDbus "github.com/coreos/go-systemd/v22/dbus"
15+
dbus "github.com/godbus/dbus/v5"
16+
"github.com/opencontainers/runc/libcontainer/system"
17+
"github.com/pkg/errors"
18+
)
19+
20+
// NewUserSystemdDbus creates a connection for systemd user-instance.
21+
func NewUserSystemdDbus() (*systemdDbus.Conn, error) {
22+
addr, err := DetectUserDbusSessionBusAddress()
23+
if err != nil {
24+
return nil, err
25+
}
26+
uid, err := DetectUID()
27+
if err != nil {
28+
return nil, err
29+
}
30+
31+
return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
32+
conn, err := dbus.Dial(addr)
33+
if err != nil {
34+
return nil, errors.Wrapf(err, "error while dialing %q", addr)
35+
}
36+
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
37+
err = conn.Auth(methods)
38+
if err != nil {
39+
conn.Close()
40+
return nil, errors.Wrapf(err, "error while authenticating connection, address=%q, UID=%d", addr, uid)
41+
}
42+
if err = conn.Hello(); err != nil {
43+
conn.Close()
44+
return nil, errors.Wrapf(err, "error while sending Hello message, address=%q, UID=%d", addr, uid)
45+
}
46+
return conn, nil
47+
})
48+
}
49+
50+
// DetectUID detects UID from the OwnerUID field of `busctl --user status`
51+
// if running in userNS. The value corresponds to sd_bus_creds_get_owner_uid(3) .
52+
//
53+
// Otherwise returns os.Getuid() .
54+
func DetectUID() (int, error) {
55+
if !system.RunningInUserNS() {
56+
return os.Getuid(), nil
57+
}
58+
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
59+
if err != nil {
60+
return -1, errors.Wrap(err, "could not execute `busctl --user --no-pager status`")
61+
}
62+
scanner := bufio.NewScanner(bytes.NewReader(b))
63+
for scanner.Scan() {
64+
s := strings.TrimSpace(scanner.Text())
65+
if strings.HasPrefix(s, "OwnerUID=") {
66+
uidStr := strings.TrimPrefix(s, "OwnerUID=")
67+
i, err := strconv.Atoi(uidStr)
68+
if err != nil {
69+
return -1, errors.Wrapf(err, "could not detect the OwnerUID: %s", s)
70+
}
71+
return i, nil
72+
}
73+
}
74+
if err := scanner.Err(); err != nil {
75+
return -1, err
76+
}
77+
return -1, errors.New("could not detect the OwnerUID")
78+
}
79+
80+
// DetectUserDbusSessionBusAddress returns $DBUS_SESSION_BUS_ADDRESS if set.
81+
// Otherwise returns "unix:path=$XDG_RUNTIME_DIR/bus" if $XDG_RUNTIME_DIR/bus exists.
82+
// Otherwise parses the value from `systemctl --user show-environment` .
83+
func DetectUserDbusSessionBusAddress() (string, error) {
84+
if env := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); env != "" {
85+
return env, nil
86+
}
87+
if xdr := os.Getenv("XDG_RUNTIME_DIR"); xdr != "" {
88+
busPath := filepath.Join(xdr, "bus")
89+
if _, err := os.Stat(busPath); err == nil {
90+
busAddress := "unix:path=" + busPath
91+
return busAddress, nil
92+
}
93+
}
94+
b, err := exec.Command("systemctl", "--user", "--no-pager", "show-environment").CombinedOutput()
95+
if err != nil {
96+
return "", errors.Wrapf(err, "could not execute `systemctl --user --no-pager show-environment`, output=%q", string(b))
97+
}
98+
scanner := bufio.NewScanner(bytes.NewReader(b))
99+
for scanner.Scan() {
100+
s := strings.TrimSpace(scanner.Text())
101+
if strings.HasPrefix(s, "DBUS_SESSION_BUS_ADDRESS=") {
102+
return strings.TrimPrefix(s, "DBUS_SESSION_BUS_ADDRESS="), nil
103+
}
104+
}
105+
return "", errors.New("could not detect DBUS_SESSION_BUS_ADDRESS from `systemctl --user --no-pager show-environment`")
106+
}

libcontainer/cgroups/systemd/v1.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,11 @@ func (m *LegacyManager) Apply(pid int) error {
182182
properties = append(properties, resourcesProperties...)
183183
properties = append(properties, c.SystemdProps...)
184184

185-
if err := startUnit(unitName, properties); err != nil {
185+
dbusConnection, err := getDbusConnection(false)
186+
if err != nil {
187+
return err
188+
}
189+
if err := startUnit(dbusConnection, unitName, properties); err != nil {
186190
return err
187191
}
188192

@@ -213,8 +217,12 @@ func (m *LegacyManager) Destroy() error {
213217
m.mu.Lock()
214218
defer m.mu.Unlock()
215219

220+
dbusConnection, err := getDbusConnection(false)
221+
if err != nil {
222+
return err
223+
}
216224
unitName := getUnitName(m.Cgroups)
217-
if err := stopUnit(unitName); err != nil {
225+
if err := stopUnit(dbusConnection, unitName); err != nil {
218226
return err
219227
}
220228
m.Paths = make(map[string]string)
@@ -371,7 +379,7 @@ func (m *LegacyManager) Set(container *configs.Config) error {
371379
if err != nil {
372380
return err
373381
}
374-
dbusConnection, err := getDbusConnection()
382+
dbusConnection, err := getDbusConnection(false)
375383
if err != nil {
376384
return err
377385
}

libcontainer/cgroups/systemd/v2.go

Lines changed: 62 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"math"
77
"os"
88
"path/filepath"
9+
"strconv"
910
"strings"
1011
"sync"
1112

@@ -14,6 +15,7 @@ import (
1415
"github.com/opencontainers/runc/libcontainer/cgroups"
1516
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
1617
"github.com/opencontainers/runc/libcontainer/configs"
18+
"github.com/pkg/errors"
1719
)
1820

1921
type unifiedManager struct {
@@ -89,14 +91,17 @@ func (m *unifiedManager) Apply(pid int) error {
8991
var (
9092
c = m.cgroups
9193
unitName = getUnitName(c)
92-
slice = "system.slice"
9394
properties []systemdDbus.Property
9495
)
9596

9697
if c.Paths != nil {
9798
return cgroups.WriteCgroupProc(m.path, pid)
9899
}
99100

101+
slice := "system.slice"
102+
if m.rootless {
103+
slice = "user.slice"
104+
}
100105
if c.Parent != "" {
101106
slice = c.Parent
102107
}
@@ -140,9 +145,13 @@ func (m *unifiedManager) Apply(pid int) error {
140145
properties = append(properties, resourcesProperties...)
141146
properties = append(properties, c.SystemdProps...)
142147

143-
if err := startUnit(unitName, properties); err != nil {
148+
dbusConnection, err := getDbusConnection(m.rootless)
149+
if err != nil {
144150
return err
145151
}
152+
if err := startUnit(dbusConnection, unitName, properties); err != nil {
153+
return errors.Wrapf(err, "error while starting unit %q with properties %+v", unitName, properties)
154+
}
146155

147156
_, err = m.GetUnifiedPath()
148157
if err != nil {
@@ -161,13 +170,17 @@ func (m *unifiedManager) Destroy() error {
161170
m.mu.Lock()
162171
defer m.mu.Unlock()
163172

173+
dbusConnection, err := getDbusConnection(m.rootless)
174+
if err != nil {
175+
return err
176+
}
164177
unitName := getUnitName(m.cgroups)
165-
if err := stopUnit(unitName); err != nil {
178+
if err := stopUnit(dbusConnection, unitName); err != nil {
166179
return err
167180
}
168181

169182
// XXX this is probably not needed, systemd should handle it
170-
err := os.Remove(m.path)
183+
err = os.Remove(m.path)
171184
if err != nil && !os.IsNotExist(err) {
172185
return err
173186
}
@@ -190,31 +203,66 @@ func (m *unifiedManager) GetPaths() map[string]string {
190203
return paths
191204
}
192205

206+
// getSliceFull value is used in GetUnifiedPath.
207+
// The value is incompatible with systemdDbus.PropSlice.
208+
func (m *unifiedManager) getSliceFull() (string, error) {
209+
c := m.cgroups
210+
slice := "system.slice"
211+
if m.rootless {
212+
slice = "user.slice"
213+
}
214+
if c.Parent != "" {
215+
var err error
216+
slice, err = ExpandSlice(c.Parent)
217+
if err != nil {
218+
return "", err
219+
}
220+
}
221+
222+
if m.rootless {
223+
dbusConnection, err := getDbusConnection(m.rootless)
224+
if err != nil {
225+
return "", err
226+
}
227+
// managerCGQuoted is typically "/user.slice/user-${uid}.slice/user@${uid}.service" including the quote symbols
228+
managerCGQuoted, err := dbusConnection.GetManagerProperty("ControlGroup")
229+
if err != nil {
230+
return "", err
231+
}
232+
managerCG, err := strconv.Unquote(managerCGQuoted)
233+
if err != nil {
234+
return "", err
235+
}
236+
slice = filepath.Join(managerCG, slice)
237+
}
238+
239+
// an example of the final slice in rootless: "/user.slice/user-1001.slice/[email protected]/user.slice"
240+
// NOTE: systemdDbus.PropSlice requires the "/user.slice/user-1001.slice/[email protected]/" prefix NOT to be specified.
241+
return slice, nil
242+
}
243+
193244
func (m *unifiedManager) GetUnifiedPath() (string, error) {
194245
m.mu.Lock()
195246
defer m.mu.Unlock()
196247
if m.path != "" {
197248
return m.path, nil
198249
}
199250

200-
c := m.cgroups
201-
slice := "system.slice"
202-
if c.Parent != "" {
203-
slice = c.Parent
204-
}
205-
206-
slice, err := ExpandSlice(slice)
251+
sliceFull, err := m.getSliceFull()
207252
if err != nil {
208253
return "", err
209254
}
210255

211-
path := filepath.Join(slice, getUnitName(c))
256+
c := m.cgroups
257+
path := filepath.Join(sliceFull, getUnitName(c))
212258
path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path)
213259
if err != nil {
214260
return "", err
215261
}
216262
m.path = path
217263

264+
// an example of the final path in rootless:
265+
// "/sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope"
218266
return m.path, nil
219267
}
220268

@@ -263,12 +311,12 @@ func (m *unifiedManager) Set(container *configs.Config) error {
263311
if err != nil {
264312
return err
265313
}
266-
dbusConnection, err := getDbusConnection()
314+
dbusConnection, err := getDbusConnection(m.rootless)
267315
if err != nil {
268316
return err
269317
}
270318
if err := dbusConnection.SetUnitProperties(getUnitName(m.cgroups), true, properties...); err != nil {
271-
return err
319+
return errors.Wrap(err, "error while setting unit properties")
272320
}
273321

274322
fsMgr, err := m.fsManager()

0 commit comments

Comments
 (0)