From edd501a605fbbf53dba7bee8324842347ee239f8 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 7 Mar 2018 16:27:33 -0800 Subject: [PATCH 1/2] config-vm: Recycle the 'process' schema We already have two ways to specify a process to launch (for the container process and for hooks). This commit recycles the container process schema for launcing the hypervisor. I've dropped the terminal configuration because callers are unlikely to need control over their hypervisor's standard streams, but otherwise this is the same structure. The JSON Schema cheats a bit by not forbidding the terminal properties. We could address that if we really wanted to (JSON Schema makes it hard to extend a previously-defined object), but I'm leaving it to downstream tools in this commit. Signed-off-by: W. Trevor King --- config-vm.md | 9 ++-- schema/config-schema.json | 107 +------------------------------------ schema/config-vm.json | 13 +---- schema/defs.json | 108 ++++++++++++++++++++++++++++++++++++++ specs-go/config.go | 10 +--- 5 files changed, 114 insertions(+), 133 deletions(-) diff --git a/config-vm.md b/config-vm.md index ff551d317..0c671a120 100644 --- a/config-vm.md +++ b/config-vm.md @@ -5,17 +5,14 @@ The virtual-machine container specification provides additional configuration fo ## Hypervisor Object -**`hypervisor`** (object, OPTIONAL) specifies details of the hypervisor that manages the container virtual machine. -* **`path`** (string, REQUIRED) path to the hypervisor binary that manages the container virtual machine. - This value MUST be an absolute path in the [runtime mount namespace](glossary.md#runtime-namespace). -* **`parameters`** (array of strings, OPTIONAL) specifies an array of parameters to pass to the hypervisor. +**`hypervisor`** (object, OPTIONAL) configures the hypervisor process. +It has the same schema as [`process`](config.md#process), but `terminal` and `consoleSize` MUST NOT be configured. ### Example ```json "hypervisor": { - "path": "/path/to/vmm", - "parameters": ["opts1=foo", "opts2=bar"] + "args": ["/path/to/vmm", "opts1=foo", "opts2=bar"] } ``` diff --git a/schema/config-schema.json b/schema/config-schema.json index f90bd4b7d..ec94e5627 100644 --- a/schema/config-schema.json +++ b/schema/config-schema.json @@ -48,112 +48,7 @@ } }, "process": { - "type": "object", - "required": [ - "cwd", - "args" - ], - "properties": { - "args": { - "$ref": "defs.json#/definitions/ArrayOfStrings" - }, - "consoleSize": { - "type": "object", - "required": [ - "height", - "width" - ], - "properties": { - "height": { - "$ref": "defs.json#/definitions/uint64" - }, - "width": { - "$ref": "defs.json#/definitions/uint64" - } - } - }, - "cwd": { - "type": "string" - }, - "env": { - "$ref": "defs.json#/definitions/Env" - }, - "terminal": { - "type": "boolean" - }, - "user": { - "type": "object", - "properties": { - "uid": { - "$ref": "defs.json#/definitions/UID" - }, - "gid": { - "$ref": "defs.json#/definitions/GID" - }, - "additionalGids": { - "$ref": "defs.json#/definitions/ArrayOfGIDs" - }, - "username": { - "type": "string" - } - } - }, - "capabilities": { - "type": "object", - "properties": { - "bounding": { - "$ref": "defs.json#/definitions/ArrayOfStrings" - }, - "permitted": { - "$ref": "defs.json#/definitions/ArrayOfStrings" - }, - "effective": { - "$ref": "defs.json#/definitions/ArrayOfStrings" - }, - "inheritable": { - "$ref": "defs.json#/definitions/ArrayOfStrings" - }, - "ambient": { - "$ref": "defs.json#/definitions/ArrayOfStrings" - } - } - }, - "apparmorProfile": { - "type": "string" - }, - "oomScoreAdj": { - "type": "integer" - }, - "selinuxLabel": { - "type": "string" - }, - "noNewPrivileges": { - "type": "boolean" - }, - "rlimits": { - "type": "array", - "items": { - "type": "object", - "required": [ - "type", - "soft", - "hard" - ], - "properties": { - "hard": { - "$ref": "defs.json#/definitions/uint64" - }, - "soft": { - "$ref": "defs.json#/definitions/uint64" - }, - "type": { - "type": "string", - "pattern": "^RLIMIT_[A-Z]+$" - } - } - } - } - } + "$ref": "defs.json#/definitions/Process" }, "linux": { "$ref": "config-linux.json#/linux" diff --git a/schema/config-vm.json b/schema/config-vm.json index 6b1fb4baf..9baf73f13 100644 --- a/schema/config-vm.json +++ b/schema/config-vm.json @@ -8,18 +8,7 @@ "properties": { "hypervisor": { "description": "hypervisor config used by VM-based containers", - "type": "object", - "required": [ - "path" - ], - "properties": { - "path": { - "$ref": "defs.json#/definitions/FilePath" - }, - "parameters": { - "$ref": "defs.json#/definitions/ArrayOfStrings" - } - } + "$ref": "defs.json#/definitions/Process" }, "kernel": { "description": "kernel config used by VM-based containers", diff --git a/schema/defs.json b/schema/defs.json index c1533aede..fe0a0287a 100644 --- a/schema/defs.json +++ b/schema/defs.json @@ -78,6 +78,114 @@ "Env": { "$ref": "#/definitions/ArrayOfStrings" }, + "Process": { + "type": "object", + "required": [ + "cwd", + "args" + ], + "properties": { + "args": { + "$ref": "defs.json#/definitions/ArrayOfStrings" + }, + "consoleSize": { + "type": "object", + "required": [ + "height", + "width" + ], + "properties": { + "height": { + "$ref": "defs.json#/definitions/uint64" + }, + "width": { + "$ref": "defs.json#/definitions/uint64" + } + } + }, + "cwd": { + "type": "string" + }, + "env": { + "$ref": "defs.json#/definitions/Env" + }, + "terminal": { + "type": "boolean" + }, + "user": { + "type": "object", + "properties": { + "uid": { + "$ref": "defs.json#/definitions/UID" + }, + "gid": { + "$ref": "defs.json#/definitions/GID" + }, + "additionalGids": { + "$ref": "defs.json#/definitions/ArrayOfGIDs" + }, + "username": { + "type": "string" + } + } + }, + "capabilities": { + "type": "object", + "properties": { + "bounding": { + "$ref": "defs.json#/definitions/ArrayOfStrings" + }, + "permitted": { + "$ref": "defs.json#/definitions/ArrayOfStrings" + }, + "effective": { + "$ref": "defs.json#/definitions/ArrayOfStrings" + }, + "inheritable": { + "$ref": "defs.json#/definitions/ArrayOfStrings" + }, + "ambient": { + "$ref": "defs.json#/definitions/ArrayOfStrings" + } + } + }, + "apparmorProfile": { + "type": "string" + }, + "oomScoreAdj": { + "type": "integer" + }, + "selinuxLabel": { + "type": "string" + }, + "noNewPrivileges": { + "type": "boolean" + }, + "rlimits": { + "type": "array", + "items": { + "type": "object", + "required": [ + "type", + "soft", + "hard" + ], + "properties": { + "hard": { + "$ref": "defs.json#/definitions/uint64" + }, + "soft": { + "$ref": "defs.json#/definitions/uint64" + }, + "type": { + "type": "string", + "pattern": "^RLIMIT_[A-Z]+$" + } + } + } + } + } + }, "Hook": { "type": "object", "properties": { diff --git a/specs-go/config.go b/specs-go/config.go index c9e848db6..77f12b171 100644 --- a/specs-go/config.go +++ b/specs-go/config.go @@ -504,21 +504,13 @@ type WindowsHyperV struct { // VM contains information for virtual-machine-based containers. type VM struct { // Hypervisor specifies hypervisor-related configuration for virtual-machine-based containers. - Hypervisor VMHypervisor `json:"hypervisor,omitempty"` + Hypervisor *Process `json:"hypervisor,omitempty"` // Kernel specifies kernel-related configuration for virtual-machine-based containers. Kernel VMKernel `json:"kernel"` // Image specifies guest image related configuration for virtual-machine-based containers. Image VMImage `json:"image,omitempty"` } -// VMHypervisor contains information about the hypervisor to use for a virtual machine. -type VMHypervisor struct { - // Path is the host path to the hypervisor used to manage the virtual machine. - Path string `json:"path"` - // Parameters specifies parameters to pass to the hypervisor. - Parameters string `json:"parameters,omitempty"` -} - // VMKernel contains information about the kernel to use for a virtual machine. type VMKernel struct { // Path is the host path to the kernel used to boot the virtual machine. From 96c89b275d924a93186cde998260c95f93ec2a76 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 7 Mar 2018 16:38:59 -0800 Subject: [PATCH 2/2] WIP: Copy/paste new Markdown and Go for hypervisor Michael asked for this [1], because it allows you to extend one schema without extending the other (the terminal properties are an example of this). I think the overlap is much greater than the differences, so I'd rather use the previous commit's "is is the same, except for ..." approach. But as far as configurations are concerned, the two approaches are identical, so I don't really care. I've left the JSON Schema change off this commit for now, I'll go back through and adjust that once we have a maintainer confirming the copy/paste approach. [1]: http://ircbot.wl.linuxfoundation.org/meetings/opencontainers/2018/opencontainers.2018-03-07-22.00.log.html#l-53 Signed-off-by: W. Trevor King --- config-vm.md | 89 ++++++++++++++++++++++++++++++++++++++++++++-- specs-go/config.go | 27 +++++++++++++- 2 files changed, 112 insertions(+), 4 deletions(-) diff --git a/config-vm.md b/config-vm.md index 0c671a120..c8cab3ed0 100644 --- a/config-vm.md +++ b/config-vm.md @@ -6,7 +6,80 @@ The virtual-machine container specification provides additional configuration fo ## Hypervisor Object **`hypervisor`** (object, OPTIONAL) configures the hypervisor process. -It has the same schema as [`process`](config.md#process), but `terminal` and `consoleSize` MUST NOT be configured. +The schema is a subset of the [`process`](config.md#process) schema with the terminal properties `terminal` and `consoleSize` removed. + +* **`cwd`** (string, REQUIRED) is the working directory that will be set for the executable. + This value MUST be an absolute path. +* **`env`** (array of strings, OPTIONAL) with the same semantics as [IEEE Std 1003.1-2008's `environ`][ieee-1003.1-2008-xbd-c8.1_2]. +* **`args`** (array of strings, REQUIRED) with similar semantics to [IEEE Std 1003.1-2008 `execvp`'s *argv*][ieee-1003.1-2008-functions-exec_2]. + This specification extends the IEEE standard in that at least one entry is REQUIRED, and that entry is used with the same semantics as `execvp`'s *file*. + +### POSIX process + +For systems that support POSIX rlimits (for example Linux and Solaris), the `hypervisor` object supports the following process-specific properties: + +* **`rlimits`** (array of objects, OPTIONAL) allows setting resource limits for the process. + Each entry has the following structure: + + * **`type`** (string, REQUIRED) the platform resource being limited. + * Linux: valid values are defined in the [`getrlimit(2)`][getrlimit.2_2] man page, such as `RLIMIT_MSGQUEUE`. + * Solaris: valid values are defined in the [`getrlimit(3)`][getrlimit.3_2] man page, such as `RLIMIT_CORE`. + + The runtime MUST [generate an error](runtime.md#errors) for any values which cannot be mapped to a relevant kernel interface. + For each entry in `rlimits`, a [`getrlimit(3)`][getrlimit.3_2] on `type` MUST succeed. + For the following properties, `rlim` refers to the status returned by the `getrlimit(3)` call. + + * **`soft`** (uint64, REQUIRED) the value of the limit enforced for the corresponding resource. + `rlim.rlim_cur` MUST match the configured value. + * **`hard`** (uint64, REQUIRED) the ceiling for the soft limit that could be set by an unprivileged process. + `rlim.rlim_max` MUST match the configured value. + Only a privileged process (e.g. one with the `CAP_SYS_RESOURCE` capability) can raise a hard limit. + + If `rlimits` contains duplicated entries with same `type`, the runtime MUST [generate an error](runtime.md#errors). + +### Linux Process + +For Linux-based systems, the `hypervisor` object supports the following process-specific properties. + +* **`apparmorProfile`** (string, OPTIONAL) specifies the name of the AppArmor profile for the process. + For more information about AppArmor, see [AppArmor documentation][apparmor_2]. +* **`capabilities`** (object, OPTIONAL) is an object containing arrays that specifies the sets of capabilities for the process. + Valid values are defined in the [capabilities(7)][capabilities.7_2] man page, such as `CAP_CHOWN`. + Any value which cannot be mapped to a relevant kernel interface MUST cause an error. + `capabilities` contains the following properties: + + * **`effective`** (array of strings, OPTIONAL) the `effective` field is an array of effective capabilities that are kept for the process. + * **`bounding`** (array of strings, OPTIONAL) the `bounding` field is an array of bounding capabilities that are kept for the process. + * **`inheritable`** (array of strings, OPTIONAL) the `inheritable` field is an array of inheritable capabilities that are kept for the process. + * **`permitted`** (array of strings, OPTIONAL) the `permitted` field is an array of permitted capabilities that are kept for the process. + * **`ambient`** (array of strings, OPTIONAL) the `ambient` field is an array of ambient capabilities that are kept for the process. +* **`noNewPrivileges`** (bool, OPTIONAL) setting `noNewPrivileges` to true prevents the process from gaining additional privileges. + As an example, the [`no_new_privs`][no-new-privs_2] article in the kernel documentation has information on how this is achieved using a `prctl` system call on Linux. +* **`oomScoreAdj`** *(int, OPTIONAL)* adjusts the oom-killer score in `[pid]/oom_score_adj` for the process's `[pid]` in a [proc pseudo-filesystem][proc_3]. + If `oomScoreAdj` is set, the runtime MUST set `oom_score_adj` to the given value. + If `oomScoreAdj` is not set, the runtime MUST NOT change the value of `oom_score_adj`. +* **`selinuxLabel`** (string, OPTIONAL) specifies the SELinux label for the process. + For more information about SELinux, see [SELinux documentation][selinux_2]. + +### User + +The user for the process is a platform-specific structure that allows specific control over which user the process runs as. + +#### POSIX-platform User + +For POSIX platforms the `user` structure has the following fields: + +* **`uid`** (int, REQUIRED) specifies the user ID in the [container namespace](glossary.md#container-namespace). +* **`gid`** (int, REQUIRED) specifies the group ID in the [container namespace](glossary.md#container-namespace). +* **`additionalGids`** (array of ints, OPTIONAL) specifies additional group IDs in the [container namespace](glossary.md#container-namespace) to be added to the process. + +_Note: symbolic name for uid and gid, such as uname and gname respectively, are left to upper levels to derive (i.e. `/etc/passwd` parsing, NSS, etc)_ + +#### Windows User + +For Windows based systems the user structure has the following fields: + +* **`username`** (string, OPTIONAL) specifies the user name for the process. ### Example @@ -58,8 +131,18 @@ This image contains the root filesystem that the virtual machine **`kernel`** wi } ``` -[raw-image-format]: https://en.wikipedia.org/wiki/IMG_(file_format) +[apparmor_2]: https://wiki.ubuntu.com/AppArmor +[ieee-1003.1-2008-functions-exec_2]: http://pubs.opengroup.org/onlinepubs/9699919799/fu +[ieee-1003.1-2008-xbd-c8.1_2]: http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_01 +[no-new-privs_2]: https://www.kernel.org/doc/Documentation/prctl/no_new_privs.txt +[proc_3]: https://www.kernel.org/doc/Documentation/filesystems/proc.txt [qcow2-image-format]: https://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/interop/qcow2.txt;hb=HEAD +[raw-image-format]: https://en.wikipedia.org/wiki/IMG_(file_format) +[selinux_2]:http://selinuxproject.org/page/Main_Page [vdi-image-format]: https://forensicswiki.org/wiki/Virtual_Disk_Image_(VDI) -[vmdk-image-format]: http://www.vmware.com/app/vmdk/?src=vmdk [vhd-image-format]: https://github.com/libyal/libvhdi/blob/master/documentation/Virtual%20Hard%20Disk%20(VHD)%20image%20format.asciidoc +[vmdk-image-format]: http://www.vmware.com/app/vmdk/?src=vmdk + +[capabilities.7_2]: http://man7.org/linux/man-pages/man7/capabilities.7.html +[getrlimit.2_2]: http://man7.org/linux/man-pages/man2/getrlimit.2.html +[getrlimit.3_2]: http://pubs.opengroup.org/onlinepubs/9699919799/functions/getrlimit.html diff --git a/specs-go/config.go b/specs-go/config.go index 77f12b171..99f4c6df8 100644 --- a/specs-go/config.go +++ b/specs-go/config.go @@ -58,6 +58,31 @@ type Process struct { SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"` } +// HypervisorProcess is like Process, except for launching the hypervisor instead of for launching a container process. +type Process struct { + // User specifies user information for the process. + User User `json:"user"` + // Args specifies the binary and arguments for the application to execute. + Args []string `json:"args"` + // Env populates the process environment for the process. + Env []string `json:"env,omitempty"` + // Cwd is the current working directory for the process and must be + // relative to the container's root. + Cwd string `json:"cwd"` + // Capabilities are Linux capabilities that are kept for the process. + Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"` + // Rlimits specifies rlimit options to apply to the process. + Rlimits []POSIXRlimit `json:"rlimits,omitempty" platform:"linux,solaris"` + // NoNewPrivileges controls whether additional privileges could be gained by processes in the container. + NoNewPrivileges bool `json:"noNewPrivileges,omitempty" platform:"linux"` + // ApparmorProfile specifies the apparmor profile for the container. + ApparmorProfile string `json:"apparmorProfile,omitempty" platform:"linux"` + // Specify an oom_score_adj for the container. + OOMScoreAdj *int `json:"oomScoreAdj,omitempty" platform:"linux"` + // SelinuxLabel specifies the selinux context that the container process is run as. + SelinuxLabel string `json:"selinuxLabel,omitempty" platform:"linux"` +} + // LinuxCapabilities specifies the whitelist of capabilities that are kept for a process. // http://man7.org/linux/man-pages/man7/capabilities.7.html type LinuxCapabilities struct { @@ -504,7 +529,7 @@ type WindowsHyperV struct { // VM contains information for virtual-machine-based containers. type VM struct { // Hypervisor specifies hypervisor-related configuration for virtual-machine-based containers. - Hypervisor *Process `json:"hypervisor,omitempty"` + Hypervisor *HypervisorProcess `json:"hypervisor,omitempty"` // Kernel specifies kernel-related configuration for virtual-machine-based containers. Kernel VMKernel `json:"kernel"` // Image specifies guest image related configuration for virtual-machine-based containers.