Skip to content

Commit 1105572

Browse files
committed
*: introduce pidfd-socket flag
The container manager like containerd-shim can't use cgroup.kill feature or freeze all the processes in cgroup to terminate the exec init process. It's unsafe to call kill(2) since the pid can be recycled. It's good to provide the pidfd of init process through the pidfd-socket. It's similar to the console-socket. With the pidfd, the container manager like containerd-shim can send the signal to target process safely. And for the standard init process, we can have polling support to get exit event instead of blocking on wait4. Signed-off-by: Wei Fu <[email protected]>
1 parent 141835c commit 1105572

File tree

9 files changed

+99
-2
lines changed

9 files changed

+99
-2
lines changed

create.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ command(s) that get executed on start, edit the args parameter of the spec. See
3434
Value: "",
3535
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
3636
},
37+
cli.StringFlag{
38+
Name: "pidfd-socket",
39+
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the init process",
40+
},
3741
cli.StringFlag{
3842
Name: "pid-file",
3943
Value: "",

exec.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ following will output a list of processes running in the container:
3333
Name: "console-socket",
3434
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
3535
},
36+
cli.StringFlag{
37+
Name: "pidfd-socket",
38+
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the init process",
39+
},
3640
cli.StringFlag{
3741
Name: "cwd",
3842
Usage: "current working directory in the container",
@@ -181,6 +185,7 @@ func execProcess(context *cli.Context) (int, error) {
181185
shouldDestroy: false,
182186
container: container,
183187
consoleSocket: context.String("console-socket"),
188+
pidfdSocket: context.String("pidfd-socket"),
184189
detach: context.Bool("detach"),
185190
pidFile: context.String("pid-file"),
186191
action: CT_ACT_RUN,

libcontainer/container_linux.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,13 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
586586
cmd.Env = append(cmd.Env, "_LIBCONTAINER_LOGLEVEL="+p.LogLevel)
587587
}
588588

589+
if p.PidfdSocket != nil {
590+
cmd.ExtraFiles = append(cmd.ExtraFiles, p.PidfdSocket)
591+
cmd.Env = append(cmd.Env,
592+
"_LIBCONTAINER_PIDFD_SOCK="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
593+
)
594+
}
595+
589596
if safeExe != nil {
590597
// Due to a Go stdlib bug, we need to add safeExe to the set of
591598
// ExtraFiles otherwise it is possible for the stdlib to clobber the fd

libcontainer/init_linux.go

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,16 @@ func startInitialization() (retErr error) {
179179
defer consoleSocket.Close()
180180
}
181181

182+
var pidfdSocket *os.File
183+
if envSockFd := os.Getenv("_LIBCONTAINER_PIDFD_SOCK"); envSockFd != "" {
184+
sockFd, err := strconv.Atoi(envSockFd)
185+
if err != nil {
186+
return fmt.Errorf("unable to convert _LIBCONTAINER_PIDFD_SOCK: %w", err)
187+
}
188+
pidfdSocket = os.NewFile(uintptr(sockFd), "pidfd-socket")
189+
defer pidfdSocket.Close()
190+
}
191+
182192
// Get mount files (O_PATH).
183193
mountSrcFds, err := parseFdsFromEnv("_LIBCONTAINER_MOUNT_FDS")
184194
if err != nil {
@@ -222,10 +232,10 @@ func startInitialization() (retErr error) {
222232
}
223233

224234
// If init succeeds, it will not return, hence none of the defers will be called.
225-
return containerInit(it, &config, syncPipe, consoleSocket, fifofd, logFD, dmzExe, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
235+
return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifofd, logFD, dmzExe, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
226236
}
227237

228-
func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket *os.File, fifoFd, logFd int, dmzExe *os.File, mountFds mountFds) error {
238+
func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket, pidfdSocket *os.File, fifoFd, logFd int, dmzExe *os.File, mountFds mountFds) error {
229239
if err := populateProcessEnvironment(config.Env); err != nil {
230240
return err
231241
}
@@ -240,6 +250,7 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock
240250
i := &linuxSetnsInit{
241251
pipe: pipe,
242252
consoleSocket: consoleSocket,
253+
pidfdSocket: pidfdSocket,
243254
config: config,
244255
logFd: logFd,
245256
dmzExe: dmzExe,
@@ -249,6 +260,7 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock
249260
i := &linuxStandardInit{
250261
pipe: pipe,
251262
consoleSocket: consoleSocket,
263+
pidfdSocket: pidfdSocket,
252264
parentPid: unix.Getppid(),
253265
config: config,
254266
fifoFd: fifoFd,
@@ -676,3 +688,20 @@ func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
676688

677689
return nil
678690
}
691+
692+
// setupPidfd opens a process file descriptor of init process, and sends the
693+
// file descriptor back to the socket.
694+
func setupPidfd(socket *os.File, initType string) error {
695+
defer socket.Close()
696+
697+
pidFd, err := unix.PidfdOpen(os.Getpid(), 0)
698+
if err != nil {
699+
return fmt.Errorf("failed to pidfd_open: %w", err)
700+
}
701+
702+
if err := utils.SendRawFd(socket, initType, uintptr(pidFd)); err != nil {
703+
unix.Close(pidFd)
704+
return fmt.Errorf("failed to send pidfd on socket: %w", err)
705+
}
706+
return unix.Close(pidFd)
707+
}

libcontainer/process.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ type Process struct {
7777
// ConsoleSocket provides the masterfd console.
7878
ConsoleSocket *os.File
7979

80+
// PidfdSocket provides process file descriptor of it own.
81+
PidfdSocket *os.File
82+
8083
// Init specifies whether the process is the first process in the container.
8184
Init bool
8285

libcontainer/setns_init_linux.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
type linuxSetnsInit struct {
2323
pipe *syncSocket
2424
consoleSocket *os.File
25+
pidfdSocket *os.File
2526
config *initConfig
2627
logFd int
2728
dmzExe *os.File
@@ -56,6 +57,11 @@ func (l *linuxSetnsInit) Init() error {
5657
return err
5758
}
5859
}
60+
if l.pidfdSocket != nil {
61+
if err := setupPidfd(l.pidfdSocket, "setns"); err != nil {
62+
return fmt.Errorf("failed to setup pidfd: %w", err)
63+
}
64+
}
5965
if l.config.NoNewPrivileges {
6066
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
6167
return err

libcontainer/standard_init_linux.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
type linuxStandardInit struct {
2323
pipe *syncSocket
2424
consoleSocket *os.File
25+
pidfdSocket *os.File
2526
parentPid int
2627
fifoFd int
2728
logFd int
@@ -114,6 +115,12 @@ func (l *linuxStandardInit) Init() error {
114115
}
115116
}
116117

118+
if l.pidfdSocket != nil {
119+
if err := setupPidfd(l.pidfdSocket, "standard"); err != nil {
120+
return fmt.Errorf("failed to setup pidfd: %w", err)
121+
}
122+
}
123+
117124
// Finish the rootfs setup.
118125
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
119126
if err := finalizeRootfs(l.config.Config); err != nil {

run.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ command(s) that get executed on start, edit the args parameter of the spec. See
3535
Value: "",
3636
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
3737
},
38+
cli.StringFlag{
39+
Name: "pidfd-socket",
40+
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the init process",
41+
},
3842
cli.BoolFlag{
3943
Name: "detach, d",
4044
Usage: "detach from the container's process",

utils_linux.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ type runner struct {
194194
preserveFDs int
195195
pidFile string
196196
consoleSocket string
197+
pidfdSocket string
197198
container *libcontainer.Container
198199
action CtAct
199200
notifySocket *notifySocket
@@ -250,6 +251,14 @@ func (r *runner) run(config *specs.Process) (int, error) {
250251
}
251252
defer tty.Close()
252253

254+
if r.pidfdSocket != "" {
255+
connClose, err := setupPidfdSocket(process, r.pidfdSocket)
256+
if err != nil {
257+
return -1, err
258+
}
259+
defer connClose()
260+
}
261+
253262
switch r.action {
254263
case CT_ACT_CREATE:
255264
err = r.container.Start(process)
@@ -385,6 +394,7 @@ func startContainer(context *cli.Context, action CtAct, criuOpts *libcontainer.C
385394
listenFDs: listenFDs,
386395
notifySocket: notifySocket,
387396
consoleSocket: context.String("console-socket"),
397+
pidfdSocket: context.String("pidfd-socket"),
388398
detach: context.Bool("detach"),
389399
pidFile: context.String("pid-file"),
390400
preserveFDs: context.Int("preserve-fds"),
@@ -394,3 +404,25 @@ func startContainer(context *cli.Context, action CtAct, criuOpts *libcontainer.C
394404
}
395405
return r.run(spec.Process)
396406
}
407+
408+
func setupPidfdSocket(process *libcontainer.Process, sockpath string) (_clean func(), _ error) {
409+
conn, err := net.Dial("unix", sockpath)
410+
if err != nil {
411+
return nil, fmt.Errorf("failed to dail %s: %w", sockpath, err)
412+
}
413+
414+
uc, ok := conn.(*net.UnixConn)
415+
if !ok {
416+
return nil, errors.New("failed to cast to UnixConn")
417+
}
418+
419+
socket, err := uc.File()
420+
if err != nil {
421+
return nil, fmt.Errorf("failed to dup socket: %w", err)
422+
}
423+
424+
process.PidfdSocket = socket
425+
return func() {
426+
conn.Close()
427+
}, nil
428+
}

0 commit comments

Comments
 (0)