diff --git a/config-linux.md b/config-linux.md index 2fe80bc2f..c634112c1 100644 --- a/config-linux.md +++ b/config-linux.md @@ -629,6 +629,21 @@ The following parameters can be specified to set up seccomp: * `SECCOMP_FILTER_FLAG_LOG` * `SECCOMP_FILTER_FLAG_SPEC_ALLOW` +* **`listenerPath`** *(string, OPTIONAL)* - specifies the path of UNIX domain socket over which the runtime will send the [container process state](#containerprocessstate) data structure when the `SCMP_ACT_NOTIFY` action is used. + This socket MUST use `AF_UNIX` domain and `SOCK_STREAM` type. + The runtime MUST send exactly one [container process state](#containerprocessstate) per connection. + The connection MUST NOT be reused and it MUST be closed after sending a seccomp state. + If sending to this socket fails, the runtime MUST [generate an error](runtime.md#errors). + If the `SCMP_ACT_NOTIFY` action is not used this value is ignored. + + The runtime sends the following file descriptors using `SCM_RIGHTS` and set their names in the `fds` array of the [container process state](#containerprocessstate): + + * **`seccompFd`** (string, REQUIRED) is the seccomp file descriptor returned by the seccomp syscall. + +* **`listenerMetadata`** *(string, OPTIONAL)* - specifies an opaque data to pass to the seccomp agent. + This string will be sent as the `metadata` field in the [container process state](#containerprocessstate). + This field MUST NOT be set if `listenerPath` is not set. + * **`syscalls`** *(array of objects, OPTIONAL)* - match a syscall in seccomp. While this property is OPTIONAL, some values of `defaultAction` are not useful without `syscalls` entries. For example, if `defaultAction` is `SCMP_ACT_KILL` and `syscalls` is empty or unset, the kernel will kill the container process on its first syscall. @@ -637,7 +652,7 @@ The following parameters can be specified to set up seccomp: * **`names`** *(array of strings, REQUIRED)* - the names of the syscalls. `names` MUST contain at least one entry. * **`action`** *(string, REQUIRED)* - the action for seccomp rules. - A valid list of constants as of libseccomp v2.4.0 is shown below. + A valid list of constants as of libseccomp v2.5.0 is shown below. * `SCMP_ACT_KILL` * `SCMP_ACT_KILL_PROCESS` @@ -647,6 +662,7 @@ The following parameters can be specified to set up seccomp: * `SCMP_ACT_TRACE` * `SCMP_ACT_ALLOW` * `SCMP_ACT_LOG` + * `SCMP_ACT_NOTIFY` * **`errnoRet`** *(uint, OPTIONAL)* - the errno return code to use. Some actions like `SCMP_ACT_ERRNO` and `SCMP_ACT_TRACE` allow to specify the errno code to return. @@ -691,6 +707,45 @@ The following parameters can be specified to set up seccomp: } ``` +### The Container Process State + +The container process state is a data structure passed via a UNIX socket. +The container runtime MUST send the container process state over the UNIX socket as regular payload serialized in JSON and file descriptors MUST be sent using `SCM_RIGHTS`. +The container runtime MAY use several `sendmsg(2)` calls to send the aforementioned data. +If more than one `sendmsg(2)` is used, the file descriptors MUST be sent only in the first call. + +The container process state includes the following properties: + +* **`ociVersion`** (string, REQUIRED) is version of the Open Container Initiative Runtime Specification with which the container process state complies. +* **`fds`** (array, OPTIONAL) is a string array containing the names of the file descriptors passed. + The index of the name in this array corresponds to index of the file descriptors in the `SCM_RIGHTS` array. +* **`pid`** (int, REQUIRED) is the container process ID, as seen by the runtime. +* **`metadata`** (string, OPTIONAL) opaque metadata. +* **`state`** ([state](runtime.md#state), REQUIRED) is the state of the container. + +Example sending a single `seccompFD` file descriptor in the `SCM_RIGHTS` array: + +```json +{ + "ociVersion": "0.2.0", + "fds": [ + "seccompFd" + ], + "pid": 4422, + "metadata": "MKNOD=/dev/null,/dev/net/tun;BPF_MAP_TYPES=hash,array", + "state": { + "ociVersion": "0.2.0", + "id": "oci-container1", + "status": "creating", + "pid": 4422, + "bundle": "/containers/redis", + "annotations": { + "myKey": "myValue" + } + } +} +``` + ## Rootfs Mount Propagation **`rootfsPropagation`** (string, OPTIONAL) sets the rootfs's mount propagation. diff --git a/schema/config-linux.json b/schema/config-linux.json index 61468b9c7..b97fcb56c 100644 --- a/schema/config-linux.json +++ b/schema/config-linux.json @@ -212,6 +212,12 @@ "$ref": "defs-linux.json#/definitions/SeccompFlag" } }, + "listenerPath": { + "type": "string" + }, + "listenerMetadata": { + "type": "string" + }, "architectures": { "type": "array", "items": { diff --git a/schema/defs-linux.json b/schema/defs-linux.json index 61b6ec75c..a99bd7488 100644 --- a/schema/defs-linux.json +++ b/schema/defs-linux.json @@ -61,7 +61,8 @@ "SCMP_ACT_ERRNO", "SCMP_ACT_TRACE", "SCMP_ACT_ALLOW", - "SCMP_ACT_LOG" + "SCMP_ACT_LOG", + "SCMP_ACT_NOTIFY" ] }, "SeccompFlag": { @@ -69,7 +70,8 @@ "enum": [ "SECCOMP_FILTER_FLAG_TSYNC", "SECCOMP_FILTER_FLAG_LOG", - "SECCOMP_FILTER_FLAG_SPEC_ALLOW" + "SECCOMP_FILTER_FLAG_SPEC_ALLOW", + "SECCOMP_FILTER_FLAG_NEW_LISTENER" ] }, "SeccompOperators": { diff --git a/specs-go/config.go b/specs-go/config.go index 16eac6dd0..df1f43214 100644 --- a/specs-go/config.go +++ b/specs-go/config.go @@ -598,11 +598,13 @@ type VMImage struct { // LinuxSeccomp represents syscall restrictions type LinuxSeccomp struct { - DefaultAction LinuxSeccompAction `json:"defaultAction"` - DefaultErrnoRet *uint `json:"defaultErrnoRet,omitempty"` - Architectures []Arch `json:"architectures,omitempty"` - Flags []LinuxSeccompFlag `json:"flags,omitempty"` - Syscalls []LinuxSyscall `json:"syscalls,omitempty"` + DefaultAction LinuxSeccompAction `json:"defaultAction"` + DefaultErrnoRet *uint `json:"defaultErrnoRet,omitempty"` + Architectures []Arch `json:"architectures,omitempty"` + Flags []LinuxSeccompFlag `json:"flags,omitempty"` + ListenerPath string `json:"listenerPath,omitempty"` + ListenerMetadata string `json:"listenerMetadata,omitempty"` + Syscalls []LinuxSyscall `json:"syscalls,omitempty"` } // Arch used for additional architectures diff --git a/specs-go/state.go b/specs-go/state.go index e2e64c663..7c010d4fe 100644 --- a/specs-go/state.go +++ b/specs-go/state.go @@ -5,17 +5,17 @@ type ContainerState string const ( // StateCreating indicates that the container is being created - StateCreating ContainerState = "creating" + StateCreating ContainerState = "creating" // StateCreated indicates that the runtime has finished the create operation - StateCreated ContainerState = "created" + StateCreated ContainerState = "created" // StateRunning indicates that the container process has executed the // user-specified program but has not exited - StateRunning ContainerState = "running" + StateRunning ContainerState = "running" // StateStopped indicates that the container process has exited - StateStopped ContainerState = "stopped" + StateStopped ContainerState = "stopped" ) // State holds information about the runtime state of the container. @@ -33,3 +33,24 @@ type State struct { // Annotations are key values associated with the container. Annotations map[string]string `json:"annotations,omitempty"` } + +const ( + // SeccompFdName is the name of the seccomp notify file descriptor. + SeccompFdName string = "seccompFd" +) + +// ContainerProcessState holds information about the state of a container process. +type ContainerProcessState struct { + // Version is the version of the specification that is supported. + Version string `json:"ociVersion"` + // Fds is a string array containing the names of the file descriptors passed. + // The index of the name in this array corresponds to index of the file + // descriptor in the `SCM_RIGHTS` array. + Fds []string `json:"fds"` + // Pid is the process ID as seen by the runtime. + Pid int `json:"pid"` + // Opaque metadata. + Metadata string `json:"metadata,omitempty"` + // State of the container. + State State `json:"state"` +}