Skip to content

Commit 862beb6

Browse files
committed
runc update: implement memory.checkBeforeUpdate
This is aimed at solving the problem of cgroup v2 memory controller behavior which is not compatible with that of cgroup v1. In cgroup v1, if the new memory limit being set is lower than the current usage, setting the new limit fails. In cgroup v2, same operation succeeds, and the container is OOM killed. Introduce a new setting, memory.checkBeforeUpdate, and use it to mimic cgroup v1 behavior. Note that this is not 100% reliable because of TOCTOU, but this is the best we can do. Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent 2e8b7a1 commit 862beb6

File tree

11 files changed

+112
-12
lines changed

11 files changed

+112
-12
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ require (
1212
github.com/godbus/dbus/v5 v5.1.0
1313
github.com/moby/sys/mountinfo v0.6.2
1414
github.com/mrunalp/fileutils v0.5.0
15-
github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b
15+
github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78
1616
github.com/opencontainers/selinux v1.10.1
1717
github.com/seccomp/libseccomp-golang v0.10.0
1818
github.com/sirupsen/logrus v1.9.0

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ github.com/moby/sys/mountinfo v0.6.2 h1:BzJjoreD5BMFNmD9Rus6gdd1pLuecOFPt8wC+Vyg
3131
github.com/moby/sys/mountinfo v0.6.2/go.mod h1:IJb6JQeOklcdMU9F5xQ8ZALD+CUr5VlGpwtX+VE0rpI=
3232
github.com/mrunalp/fileutils v0.5.0 h1:NKzVxiH7eSk+OQ4M+ZYW1K6h27RUV3MI6NUTsHhU6Z4=
3333
github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ=
34-
github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b h1:udwtfS44rxYE/ViMLchHQBjfE60GZSB1arY7BFbyxLs=
35-
github.com/opencontainers/runtime-spec v1.0.3-0.20220718201635-a8106e99982b/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
34+
github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78 h1:R5M2qXZiK/mWPMT4VldCOiSL9HIAMuxQZWdG0CSM5+4=
35+
github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
3636
github.com/opencontainers/selinux v1.10.1 h1:09LIPVRP3uuZGQvgR+SgMSNBd1Eb3vlRbGqQpoHsF8w=
3737
github.com/opencontainers/selinux v1.10.1/go.mod h1:2i0OySw99QjzBBQByd1Gr9gSjvuho1lHsJxIJ3gGbJI=
3838
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

libcontainer/cgroups/fs2/fs2.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,32 @@ func (m *manager) OOMKillCount() (uint64, error) {
269269

270270
return c, err
271271
}
272+
273+
func CheckMemoryUsage(dirPath string, r *configs.Resources) error {
274+
if !r.MemoryCheckBeforeUpdate {
275+
return nil
276+
}
277+
278+
if r.Memory <= 0 && r.MemorySwap <= 0 {
279+
return nil
280+
}
281+
282+
usage, err := fscommon.GetCgroupParamUint(dirPath, "memory.current")
283+
if err != nil {
284+
return nil
285+
}
286+
287+
if r.MemorySwap > 0 {
288+
if uint64(r.MemorySwap) <= usage {
289+
return fmt.Errorf("rejecting memory+swap limit %d > usage %d", r.MemorySwap, usage)
290+
}
291+
}
292+
293+
if r.Memory > 0 {
294+
if uint64(r.Memory) <= usage {
295+
return fmt.Errorf("rejecting memory limit %d > usage %d", r.Memory, usage)
296+
}
297+
}
298+
299+
return nil
300+
}

libcontainer/cgroups/fs2/memory.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ func setMemory(dirPath string, r *configs.Resources) error {
4040
if !isMemorySet(r) {
4141
return nil
4242
}
43+
44+
if err := CheckMemoryUsage(dirPath, r); err != nil {
45+
return err
46+
}
47+
4348
swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory)
4449
if err != nil {
4550
return err

libcontainer/cgroups/systemd/v2.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,11 @@ func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props
174174
return props, nil
175175
}
176176

177-
func genV2ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
177+
func genV2ResourcesProperties(dirPath string, r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) {
178+
if err := fs2.CheckMemoryUsage(dirPath, r); err != nil {
179+
return nil, err
180+
}
181+
178182
var properties []systemdDbus.Property
179183

180184
// NOTE: This is of questionable correctness because we insert our own
@@ -437,7 +441,7 @@ func (m *unifiedManager) Set(r *configs.Resources) error {
437441
if r == nil {
438442
return nil
439443
}
440-
properties, err := genV2ResourcesProperties(r, m.dbus)
444+
properties, err := genV2ResourcesProperties(m.fsMgr.Path(""), r, m.dbus)
441445
if err != nil {
442446
return err
443447
}

libcontainer/configs/cgroup_linux.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,4 +155,9 @@ type Resources struct {
155155
// during Set() to figure out whether the freeze is required. Those
156156
// methods may be relatively slow, thus this flag.
157157
SkipFreezeOnSet bool `json:"-"`
158+
159+
// MemoryCheckBeforeUpdate is a flag for cgroup v2 managers to check
160+
// if the new memory limits (Memory and MemorySwap) being set are lower
161+
// than the current memory usage, and reject if so.
162+
MemoryCheckBeforeUpdate bool `json:"memory_check_before_update"`
158163
}

libcontainer/specconv/spec_linux.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,9 @@ func CreateCgroupConfig(opts *CreateOpts, defaultDevs []*devices.Device) (*confi
723723
if r.Memory.DisableOOMKiller != nil {
724724
c.Resources.OomKillDisable = *r.Memory.DisableOOMKiller
725725
}
726+
if r.Memory.CheckBeforeUpdate != nil {
727+
c.Resources.MemoryCheckBeforeUpdate = *r.Memory.CheckBeforeUpdate
728+
}
726729
}
727730
if r.CPU != nil {
728731
if r.CPU.Shares != nil {

tests/integration/update.bats

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,3 +700,43 @@ EOF
700700
runc resume test_update
701701
[ "$status" -eq 0 ]
702702
}
703+
704+
@test "update memory vs CheckBeforeUpdate" {
705+
requires cgroups_v2
706+
[ $EUID -ne 0 ] && requires rootless_cgroup
707+
708+
runc run -d --console-socket "$CONSOLE_SOCKET" test_update
709+
[ "$status" -eq 0 ]
710+
711+
# Setting memory to low value with checkBeforeUpdate=true should fail.
712+
runc update -r - test_update <<EOF
713+
{
714+
"memory": {
715+
"limit": 1024,
716+
"checkBeforeUpdate": true
717+
}
718+
}
719+
EOF
720+
[ "$status" -ne 0 ]
721+
[[ "$output" == *"rejecting memory limit"* ]]
722+
testcontainer test_update running
723+
724+
# Setting memory+swap to low value with checkBeforeUpdate=true should fail.
725+
runc update -r - test_update <<EOF
726+
{
727+
"memory": {
728+
"limit": 1024,
729+
"swap": 2048,
730+
"checkBeforeUpdate": true
731+
}
732+
}
733+
EOF
734+
[ "$status" -ne 0 ]
735+
[[ "$output" == *"rejecting memory+swap limit"* ]]
736+
testcontainer test_update running
737+
738+
# This should succeed but the container will be OOM killed.
739+
runc update test_update --memory 1024
740+
[ "$status" -eq 0 ]
741+
testcontainer test_update stopped
742+
}

update.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
func i64Ptr(i int64) *int64 { return &i }
2121
func u64Ptr(i uint64) *uint64 { return &i }
2222
func u16Ptr(i uint16) *uint16 { return &i }
23+
func boolPtr(b bool) *bool { return &b }
2324

2425
var updateCommand = cli.Command{
2526
Name: "update",
@@ -37,7 +38,8 @@ The accepted format is as follow (unchanged values can be omitted):
3738
"memory": {
3839
"limit": 0,
3940
"reservation": 0,
40-
"swap": 0
41+
"swap": 0,
42+
"checkBeforeUpdate": true
4143
},
4244
"cpu": {
4345
"shares": 0,
@@ -136,11 +138,12 @@ other options are ignored.
136138

137139
r := specs.LinuxResources{
138140
Memory: &specs.LinuxMemory{
139-
Limit: i64Ptr(0),
140-
Reservation: i64Ptr(0),
141-
Swap: i64Ptr(0),
142-
Kernel: i64Ptr(0),
143-
KernelTCP: i64Ptr(0),
141+
Limit: i64Ptr(0),
142+
Reservation: i64Ptr(0),
143+
Swap: i64Ptr(0),
144+
Kernel: i64Ptr(0),
145+
KernelTCP: i64Ptr(0),
146+
CheckBeforeUpdate: boolPtr(false),
144147
},
145148
CPU: &specs.LinuxCPU{
146149
Shares: u64Ptr(0),
@@ -293,6 +296,7 @@ other options are ignored.
293296
config.Cgroups.Resources.Memory = *r.Memory.Limit
294297
config.Cgroups.Resources.MemoryReservation = *r.Memory.Reservation
295298
config.Cgroups.Resources.MemorySwap = *r.Memory.Swap
299+
config.Cgroups.Resources.MemoryCheckBeforeUpdate = *r.Memory.CheckBeforeUpdate
296300
config.Cgroups.Resources.PidsLimit = r.Pids.Limit
297301
config.Cgroups.Resources.Unified = r.Unified
298302

vendor/github.com/opencontainers/runtime-spec/specs-go/config.go

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)