Skip to content

Commit 2328294

Browse files
committed
*: introduce pidfd-socket flag
The container manager like containerd-shim can't use cgroup.kill feature or freeze all the processes in cgroup to terminate the exec init process. It's unsafe to call kill(2) since the pid can be recycled. It's good to provide the pidfd of init process through the pidfd-socket. It's similar to the console-socket. With the pidfd, the container manager like containerd-shim can send the signal to target process safely. And for the standard init process, we can have polling support to get exit event instead of blocking on wait4. Signed-off-by: Wei Fu <[email protected]>
1 parent ee45b9b commit 2328294

File tree

9 files changed

+99
-2
lines changed

9 files changed

+99
-2
lines changed

create.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ command(s) that get executed on start, edit the args parameter of the spec. See
3434
Value: "",
3535
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
3636
},
37+
cli.StringFlag{
38+
Name: "pidfd-socket",
39+
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the init process",
40+
},
3741
cli.StringFlag{
3842
Name: "pid-file",
3943
Value: "",

exec.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ following will output a list of processes running in the container:
3333
Name: "console-socket",
3434
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
3535
},
36+
cli.StringFlag{
37+
Name: "pidfd-socket",
38+
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the init process",
39+
},
3640
cli.StringFlag{
3741
Name: "cwd",
3842
Usage: "current working directory in the container",
@@ -181,6 +185,7 @@ func execProcess(context *cli.Context) (int, error) {
181185
shouldDestroy: false,
182186
container: container,
183187
consoleSocket: context.String("console-socket"),
188+
pidfdSocket: context.String("pidfd-socket"),
184189
detach: context.Bool("detach"),
185190
pidFile: context.String("pid-file"),
186191
action: CT_ACT_RUN,

libcontainer/container_linux.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,13 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
588588
cmd.Env = append(cmd.Env, "_LIBCONTAINER_LOGLEVEL="+p.LogLevel)
589589
}
590590

591+
if p.PidfdSocket != nil {
592+
cmd.ExtraFiles = append(cmd.ExtraFiles, p.PidfdSocket)
593+
cmd.Env = append(cmd.Env,
594+
"_LIBCONTAINER_PIDFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
595+
)
596+
}
597+
591598
if safeExe != nil {
592599
// Due to a Go stdlib bug, we need to add safeExe to the set of
593600
// ExtraFiles otherwise it is possible for the stdlib to clobber the fd

libcontainer/init_linux.go

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,16 @@ func startInitialization() (retErr error) {
170170
defer consoleSocket.Close()
171171
}
172172

173+
var pidfdSocket *os.File
174+
if envConsole := os.Getenv("_LIBCONTAINER_PIDFD"); envConsole != "" {
175+
console, err := strconv.Atoi(envConsole)
176+
if err != nil {
177+
return fmt.Errorf("unable to convert _LIBCONTAINER_PIDFD: %w", err)
178+
}
179+
pidfdSocket = os.NewFile(uintptr(console), "pidfd-socket")
180+
defer pidfdSocket.Close()
181+
}
182+
173183
// Get mount files (O_PATH).
174184
mountSrcFds, err := parseFdsFromEnv("_LIBCONTAINER_MOUNT_FDS")
175185
if err != nil {
@@ -208,10 +218,10 @@ func startInitialization() (retErr error) {
208218
}()
209219

210220
// If init succeeds, it will not return, hence none of the defers will be called.
211-
return containerInit(it, pipe, consoleSocket, fifofd, logFD, dmzExe, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
221+
return containerInit(it, pipe, consoleSocket, pidfdSocket, fifofd, logFD, dmzExe, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
212222
}
213223

214-
func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, dmzExe *os.File, mountFds mountFds) error {
224+
func containerInit(t initType, pipe, consoleSocket, pidfdSocket *os.File, fifoFd, logFd int, dmzExe *os.File, mountFds mountFds) error {
215225
var config *initConfig
216226
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
217227
return err
@@ -230,6 +240,7 @@ func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, lo
230240
i := &linuxSetnsInit{
231241
pipe: pipe,
232242
consoleSocket: consoleSocket,
243+
pidfdSocket: pidfdSocket,
233244
config: config,
234245
logFd: logFd,
235246
dmzExe: dmzExe,
@@ -239,6 +250,7 @@ func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, lo
239250
i := &linuxStandardInit{
240251
pipe: pipe,
241252
consoleSocket: consoleSocket,
253+
pidfdSocket: pidfdSocket,
242254
parentPid: unix.Getppid(),
243255
config: config,
244256
fifoFd: fifoFd,
@@ -666,3 +678,20 @@ func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
666678

667679
return nil
668680
}
681+
682+
// setupPidfd opens a process file descriptor of init process, and sends the
683+
// file descriptor back to the socket.
684+
func setupPidfd(socket *os.File, initType string) error {
685+
defer socket.Close()
686+
687+
pidFd, err := unix.PidfdOpen(os.Getpid(), 0)
688+
if err != nil {
689+
return fmt.Errorf("failed to pidfd_open: %w", err)
690+
}
691+
692+
if err := utils.SendRawFd(socket, initType, uintptr(pidFd)); err != nil {
693+
unix.Close(pidFd)
694+
return fmt.Errorf("failed to send pidfd on socket: %w", err)
695+
}
696+
return unix.Close(pidFd)
697+
}

libcontainer/process.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ type Process struct {
7777
// ConsoleSocket provides the masterfd console.
7878
ConsoleSocket *os.File
7979

80+
// PidfdSocket provides process file descriptor of it own.
81+
PidfdSocket *os.File
82+
8083
// Init specifies whether the process is the first process in the container.
8184
Init bool
8285

libcontainer/setns_init_linux.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
type linuxSetnsInit struct {
2323
pipe *os.File
2424
consoleSocket *os.File
25+
pidfdSocket *os.File
2526
config *initConfig
2627
logFd int
2728
dmzExe *os.File
@@ -56,6 +57,11 @@ func (l *linuxSetnsInit) Init() error {
5657
return err
5758
}
5859
}
60+
if l.pidfdSocket != nil {
61+
if err := setupPidfd(l.pidfdSocket, "setns"); err != nil {
62+
return fmt.Errorf("failed to setup pidfd: %w", err)
63+
}
64+
}
5965
if l.config.NoNewPrivileges {
6066
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
6167
return err

libcontainer/standard_init_linux.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
type linuxStandardInit struct {
2323
pipe *os.File
2424
consoleSocket *os.File
25+
pidfdSocket *os.File
2526
parentPid int
2627
fifoFd int
2728
logFd int
@@ -114,6 +115,12 @@ func (l *linuxStandardInit) Init() error {
114115
}
115116
}
116117

118+
if l.pidfdSocket != nil {
119+
if err := setupPidfd(l.pidfdSocket, "standard"); err != nil {
120+
return fmt.Errorf("failed to setup pidfd: %w", err)
121+
}
122+
}
123+
117124
// Finish the rootfs setup.
118125
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
119126
if err := finalizeRootfs(l.config.Config); err != nil {

run.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ command(s) that get executed on start, edit the args parameter of the spec. See
3535
Value: "",
3636
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
3737
},
38+
cli.StringFlag{
39+
Name: "pidfd-socket",
40+
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the init process",
41+
},
3842
cli.BoolFlag{
3943
Name: "detach, d",
4044
Usage: "detach from the container's process",

utils_linux.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ type runner struct {
194194
preserveFDs int
195195
pidFile string
196196
consoleSocket string
197+
pidfdSocket string
197198
container *libcontainer.Container
198199
action CtAct
199200
notifySocket *notifySocket
@@ -250,6 +251,14 @@ func (r *runner) run(config *specs.Process) (int, error) {
250251
}
251252
defer tty.Close()
252253

254+
if r.pidfdSocket != "" {
255+
connClose, err := setupPidfdSocket(process, r.pidfdSocket)
256+
if err != nil {
257+
return -1, err
258+
}
259+
defer connClose()
260+
}
261+
253262
switch r.action {
254263
case CT_ACT_CREATE:
255264
err = r.container.Start(process)
@@ -385,6 +394,7 @@ func startContainer(context *cli.Context, action CtAct, criuOpts *libcontainer.C
385394
listenFDs: listenFDs,
386395
notifySocket: notifySocket,
387396
consoleSocket: context.String("console-socket"),
397+
pidfdSocket: context.String("pidfd-socket"),
388398
detach: context.Bool("detach"),
389399
pidFile: context.String("pid-file"),
390400
preserveFDs: context.Int("preserve-fds"),
@@ -394,3 +404,25 @@ func startContainer(context *cli.Context, action CtAct, criuOpts *libcontainer.C
394404
}
395405
return r.run(spec.Process)
396406
}
407+
408+
func setupPidfdSocket(process *libcontainer.Process, sockpath string) (_clean func(), _ error) {
409+
conn, err := net.Dial("unix", sockpath)
410+
if err != nil {
411+
return nil, fmt.Errorf("failed to dail %s: %w", sockpath, err)
412+
}
413+
414+
uc, ok := conn.(*net.UnixConn)
415+
if !ok {
416+
return nil, errors.New("failed to cast to UnixConn")
417+
}
418+
419+
socket, err := uc.File()
420+
if err != nil {
421+
return nil, fmt.Errorf("failed to dup socket: %w", err)
422+
}
423+
424+
process.PidfdSocket = socket
425+
return func() {
426+
conn.Close()
427+
}, nil
428+
}

0 commit comments

Comments
 (0)