Skip to content

Commit 911366a

Browse files
committed
*: introduce pidfd-socket flag
The container manager like containerd-shim can't use cgroup.kill feature or freeze all the processes in cgroup to terminate the exec init process. It's unsafe to call kill(2) since the pid can be recycled. It's good to provide the pidfd of init process through the pidfd-socket. It's similar to the console-socket. With the pidfd, the container manager like containerd-shim can send the signal to target process safely. And for the standard init process, we can have polling support to get exit event instead of blocking on wait4. Signed-off-by: Wei Fu <[email protected]>
1 parent 27eb67a commit 911366a

File tree

14 files changed

+370
-5
lines changed

14 files changed

+370
-5
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ vendor/pkg
66
/contrib/cmd/seccompagent/seccompagent
77
/contrib/cmd/fs-idmap/fs-idmap
88
/contrib/cmd/memfd-bind/memfd-bind
9+
/contrib/cmd/pidfd-kill/pidfd-kill
910
man/man8
1011
release
1112
Vagrantfile

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,10 @@ runc-bin: runc-dmz
7171
$(GO_BUILD) -o runc .
7272

7373
.PHONY: all
74-
all: runc recvtty sd-helper seccompagent fs-idmap memfd-bind
74+
all: runc recvtty sd-helper seccompagent fs-idmap memfd-bind pidfd-kill
7575

76-
.PHONY: recvtty sd-helper seccompagent fs-idmap memfd-bind
77-
recvtty sd-helper seccompagent fs-idmap memfd-bind:
76+
.PHONY: recvtty sd-helper seccompagent fs-idmap memfd-bind pidfd-kill
77+
recvtty sd-helper seccompagent fs-idmap memfd-bind pidfd-kill:
7878
$(GO_BUILD) -o contrib/cmd/$@/$@ ./contrib/cmd/$@
7979

8080
.PHONY: static
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
package main
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"net"
7+
"os"
8+
"os/signal"
9+
10+
"github.com/urfave/cli"
11+
"golang.org/x/sys/unix"
12+
13+
"github.com/opencontainers/runc/libcontainer/utils"
14+
)
15+
16+
const (
17+
usage = `Open Container Initiative contrib/cmd/pidfd-kill
18+
19+
pidfd-kill is an implementation of a consumer of runC's --pidfd-socket API.
20+
After received SIGTERM, pidfd-kill sends the given signal to init process by
21+
pidfd received from --pidfd-socket.
22+
23+
To use pidfd-kill, just specify a socket path at which you want to receive
24+
pidfd:
25+
26+
$ pidfd-kill [--signal KILL] socket.sock
27+
`
28+
)
29+
30+
func main() {
31+
app := cli.NewApp()
32+
app.Name = "pidfd-kill"
33+
app.Usage = usage
34+
35+
app.Flags = []cli.Flag{
36+
cli.StringFlag{
37+
Name: "signal",
38+
Value: "SIGKILL",
39+
Usage: "Signal to send to the init process",
40+
},
41+
cli.StringFlag{
42+
Name: "pid-file",
43+
Value: "",
44+
Usage: "Path to write the pidfd-kill process ID to",
45+
},
46+
}
47+
48+
app.Action = func(ctx *cli.Context) error {
49+
args := ctx.Args()
50+
if len(args) != 1 {
51+
return errors.New("required a single socket path")
52+
}
53+
54+
socketFile := ctx.Args()[0]
55+
56+
pidFile := ctx.String("pid-file")
57+
if pidFile != "" {
58+
pid := fmt.Sprintf("%d\n", os.Getpid())
59+
if err := os.WriteFile(pidFile, []byte(pid), 0o644); err != nil {
60+
return err
61+
}
62+
defer os.Remove(pidFile)
63+
}
64+
65+
sigStr := ctx.String("signal")
66+
if sigStr == "" {
67+
sigStr = "SIGKILL"
68+
}
69+
sig := unix.SignalNum(sigStr)
70+
71+
pidfdFile, err := recvPidfd(socketFile)
72+
if err != nil {
73+
return err
74+
}
75+
defer pidfdFile.Close()
76+
77+
signalCh := make(chan os.Signal, 16)
78+
signal.Notify(signalCh, unix.SIGTERM)
79+
<-signalCh
80+
81+
return unix.PidfdSendSignal(int(pidfdFile.Fd()), sig, nil, 0)
82+
}
83+
if err := app.Run(os.Args); err != nil {
84+
fmt.Fprintln(os.Stderr, "fatal error:", err)
85+
os.Exit(1)
86+
}
87+
}
88+
89+
func recvPidfd(socketFile string) (*os.File, error) {
90+
ln, err := net.Listen("unix", socketFile)
91+
if err != nil {
92+
return nil, err
93+
}
94+
defer ln.Close()
95+
96+
conn, err := ln.Accept()
97+
if err != nil {
98+
return nil, err
99+
}
100+
defer conn.Close()
101+
102+
unixconn, ok := conn.(*net.UnixConn)
103+
if !ok {
104+
return nil, errors.New("failed to cast to unixconn")
105+
}
106+
107+
socket, err := unixconn.File()
108+
if err != nil {
109+
return nil, err
110+
}
111+
defer socket.Close()
112+
113+
return utils.RecvFile(socket)
114+
}

create.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ command(s) that get executed on start, edit the args parameter of the spec. See
3434
Value: "",
3535
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
3636
},
37+
cli.StringFlag{
38+
Name: "pidfd-socket",
39+
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the init process",
40+
},
3741
cli.StringFlag{
3842
Name: "pid-file",
3943
Value: "",

exec.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ following will output a list of processes running in the container:
3333
Name: "console-socket",
3434
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
3535
},
36+
cli.StringFlag{
37+
Name: "pidfd-socket",
38+
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the init process",
39+
},
3640
cli.StringFlag{
3741
Name: "cwd",
3842
Usage: "current working directory in the container",
@@ -181,6 +185,7 @@ func execProcess(context *cli.Context) (int, error) {
181185
shouldDestroy: false,
182186
container: container,
183187
consoleSocket: context.String("console-socket"),
188+
pidfdSocket: context.String("pidfd-socket"),
184189
detach: context.Bool("detach"),
185190
pidFile: context.String("pid-file"),
186191
action: CT_ACT_RUN,

libcontainer/container_linux.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,13 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
586586
cmd.Env = append(cmd.Env, "_LIBCONTAINER_LOGLEVEL="+p.LogLevel)
587587
}
588588

589+
if p.PidfdSocket != nil {
590+
cmd.ExtraFiles = append(cmd.ExtraFiles, p.PidfdSocket)
591+
cmd.Env = append(cmd.Env,
592+
"_LIBCONTAINER_PIDFD_SOCK="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
593+
)
594+
}
595+
589596
if safeExe != nil {
590597
// Due to a Go stdlib bug, we need to add safeExe to the set of
591598
// ExtraFiles otherwise it is possible for the stdlib to clobber the fd

libcontainer/init_linux.go

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,16 @@ func startInitialization() (retErr error) {
179179
defer consoleSocket.Close()
180180
}
181181

182+
var pidfdSocket *os.File
183+
if envSockFd := os.Getenv("_LIBCONTAINER_PIDFD_SOCK"); envSockFd != "" {
184+
sockFd, err := strconv.Atoi(envSockFd)
185+
if err != nil {
186+
return fmt.Errorf("unable to convert _LIBCONTAINER_PIDFD_SOCK: %w", err)
187+
}
188+
pidfdSocket = os.NewFile(uintptr(sockFd), "pidfd-socket")
189+
defer pidfdSocket.Close()
190+
}
191+
182192
// Get mount files (O_PATH).
183193
mountSrcFds, err := parseFdsFromEnv("_LIBCONTAINER_MOUNT_FDS")
184194
if err != nil {
@@ -222,10 +232,10 @@ func startInitialization() (retErr error) {
222232
}
223233

224234
// If init succeeds, it will not return, hence none of the defers will be called.
225-
return containerInit(it, &config, syncPipe, consoleSocket, fifofd, logFD, dmzExe, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
235+
return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifofd, logFD, dmzExe, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
226236
}
227237

228-
func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket *os.File, fifoFd, logFd int, dmzExe *os.File, mountFds mountFds) error {
238+
func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket, pidfdSocket *os.File, fifoFd, logFd int, dmzExe *os.File, mountFds mountFds) error {
229239
if err := populateProcessEnvironment(config.Env); err != nil {
230240
return err
231241
}
@@ -240,6 +250,7 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock
240250
i := &linuxSetnsInit{
241251
pipe: pipe,
242252
consoleSocket: consoleSocket,
253+
pidfdSocket: pidfdSocket,
243254
config: config,
244255
logFd: logFd,
245256
dmzExe: dmzExe,
@@ -249,6 +260,7 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock
249260
i := &linuxStandardInit{
250261
pipe: pipe,
251262
consoleSocket: consoleSocket,
263+
pidfdSocket: pidfdSocket,
252264
parentPid: unix.Getppid(),
253265
config: config,
254266
fifoFd: fifoFd,
@@ -690,3 +702,20 @@ func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
690702

691703
return nil
692704
}
705+
706+
// setupPidfd opens a process file descriptor of init process, and sends the
707+
// file descriptor back to the socket.
708+
func setupPidfd(socket *os.File, initType string) error {
709+
defer socket.Close()
710+
711+
pidFd, err := unix.PidfdOpen(os.Getpid(), 0)
712+
if err != nil {
713+
return fmt.Errorf("failed to pidfd_open: %w", err)
714+
}
715+
716+
if err := utils.SendRawFd(socket, initType, uintptr(pidFd)); err != nil {
717+
unix.Close(pidFd)
718+
return fmt.Errorf("failed to send pidfd on socket: %w", err)
719+
}
720+
return unix.Close(pidFd)
721+
}

libcontainer/process.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ type Process struct {
7777
// ConsoleSocket provides the masterfd console.
7878
ConsoleSocket *os.File
7979

80+
// PidfdSocket provides process file descriptor of it own.
81+
PidfdSocket *os.File
82+
8083
// Init specifies whether the process is the first process in the container.
8184
Init bool
8285

libcontainer/setns_init_linux.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
type linuxSetnsInit struct {
2323
pipe *syncSocket
2424
consoleSocket *os.File
25+
pidfdSocket *os.File
2526
config *initConfig
2627
logFd int
2728
dmzExe *os.File
@@ -56,6 +57,11 @@ func (l *linuxSetnsInit) Init() error {
5657
return err
5758
}
5859
}
60+
if l.pidfdSocket != nil {
61+
if err := setupPidfd(l.pidfdSocket, "setns"); err != nil {
62+
return fmt.Errorf("failed to setup pidfd: %w", err)
63+
}
64+
}
5965
if l.config.NoNewPrivileges {
6066
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
6167
return err

libcontainer/standard_init_linux.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
type linuxStandardInit struct {
2323
pipe *syncSocket
2424
consoleSocket *os.File
25+
pidfdSocket *os.File
2526
parentPid int
2627
fifoFd int
2728
logFd int
@@ -114,6 +115,12 @@ func (l *linuxStandardInit) Init() error {
114115
}
115116
}
116117

118+
if l.pidfdSocket != nil {
119+
if err := setupPidfd(l.pidfdSocket, "standard"); err != nil {
120+
return fmt.Errorf("failed to setup pidfd: %w", err)
121+
}
122+
}
123+
117124
// Finish the rootfs setup.
118125
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
119126
if err := finalizeRootfs(l.config.Config); err != nil {

0 commit comments

Comments
 (0)