From 2d41a2a63328ee8d84c3f97a7e5e9f174c74c475 Mon Sep 17 00:00:00 2001 From: Chris Farmiloe Date: Thu, 19 Jun 2014 22:17:29 +0200 Subject: [PATCH 1/9] Improvements for declarative containers. * Use unique service units for each container rather than using templates (containers@xxx) to work around reloading issues. * Fix triggering of switching to new container configuration when the container's config has changed. * Add new options: `macvlanInterface`, `macvlanAddress` and `macvlanPrefixLength` to enable a macvlan interface within the container (eth1). * Do not force a veth pair to be created when `privateNetwork` is true (Sometimes you just want to isolate the container). * veth pairs are now created only when `localAddress` or `hostAddress` are set. * Add a `wantedBy` option so that you can control when the container unit should be started (defaults to `multi-user.target`). --- nixos/modules/virtualisation/containers.nix | 193 ++++++++++++++------ 1 file changed, 139 insertions(+), 54 deletions(-) diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index d0d04d9a1e5dc..d373d747a15f7 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -18,10 +18,10 @@ let containerInit = pkgs.writeScript "container-init" '' #! ${pkgs.stdenv.shell} -e - + # Initialise the container side of the veth pair. - if [ "$PRIVATE_NETWORK" = 1 ]; then - ip link set host0 name eth0 + if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ]; then + ip link set host0 name eth0 ip link set dev eth0 up if [ -n "$HOST_ADDRESS" ]; then ip route add $HOST_ADDRESS dev eth0 @@ -30,7 +30,16 @@ let if [ -n "$LOCAL_ADDRESS" ]; then ip addr add $LOCAL_ADDRESS dev eth0 fi - fi + fi + + # Initialise the container side of the macvlan interface. + if [ -n "$MACVLANS" ]; then + ip link set mv-$MACVLANS name eth1 + ip link set dev eth1 up + if [ -n "$MACVLAN_ADDRESS" ]; then + ip addr add $MACVLAN_ADDRESS dev eth1 + fi + fi exec "$1" ''; @@ -79,13 +88,11 @@ in type = types.bool; default = false; description = '' - Whether to give the container its own private virtual - Ethernet interface. The interface is called - eth0, and is hooked up to the interface - ve-container-name - on the host. If this option is not set, then the - container shares the network interfaces of the host, - and can bind to any port on any interface. + Whether to give the container its own private network namespace. + This option is implied if setting , + or + If this option is not set, then the container shares the network + interfaces of the host, and can bind to any port on any interface. ''; }; @@ -94,7 +101,11 @@ in default = null; example = "10.231.136.1"; description = '' - The IPv4 address assigned to the host interface. + The IPv4 address assigned to the host-side of a veth pair. + Setting or + will create a veth pair with one side in the container appearing as + eth0, and the other side in the host as + ve-container-name. ''; }; @@ -103,8 +114,55 @@ in default = null; example = "10.231.136.2"; description = '' - The IPv4 address assigned to eth0 - in the container. + The IPv4 address assigned to the container-side of a veth pair + (eth0 in the container). + Setting or + will create a veth pair with one side in the container appearing as + eth0, and the other side in the host as + ve-container-name. + ''; + }; + + macvlanInterface = mkOption { + type = types.nullOr types.string; + default = null; + example = "enp1s1"; + description = '' + When this option is set an eth1 interface + will be available within the container that bridges to the host's + physical network using macvlan. + Note: while macvlan interfaces allow your containers to be accessable + via the the same physical network as the specified host interface, you + may not be able to communicate between the host itself and container. + ''; + }; + + macvlanAddress = mkOption { + type = types.nullOr types.string; + default = null; + example = "10.231.136.2"; + description = '' + The IPv4 address assigned to eth1 + (the macvlan interface) in the container. + ''; + }; + + macvlanPrefixLength = mkOption { + type = types.nullOr types.int; + default = 32; + example = 16; + description = '' + The network prefix length for the macvlan interface. + ''; + }; + + wantedBy = mkOption { + type = types.listOf types.str; + default = [ "multi-user.target" ]; + description = '' + List of systemd units/targets that should cause this + container to start. Set to [] if + you do not want this container to start. ''; }; @@ -156,15 +214,21 @@ in config = mkIf (!config.boot.isContainer) { - systemd.services."container@" = - { description = "Container '%i'"; + systemd.services = mapAttrs (name: cfg: { - unitConfig.RequiresMountsFor = [ "/var/lib/containers/%i" ]; + wantedBy = cfg.wantedBy; + wants = [ "network.target" ]; + after = [ "network.target" ]; + + description = "Container '${name}'"; + + unitConfig.RequiresMountsFor = [ "/var/lib/containers/${name}" ]; path = [ pkgs.iproute ]; - environment.INSTANCE = "%i"; - environment.root = "/var/lib/containers/%i"; + environment = { + root = "/var/lib/containers/${name}"; + }; preStart = '' @@ -184,10 +248,14 @@ in fi mkdir -p -m 0755 \ - "/nix/var/nix/profiles/per-container/$INSTANCE" \ - "/nix/var/nix/gcroots/per-container/$INSTANCE" + "/nix/var/nix/profiles/per-container/${name}" \ + "/nix/var/nix/gcroots/per-container/${name}" if [ "$PRIVATE_NETWORK" = 1 ]; then + extraFlags+=" --private-network" + fi + + if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ]; then extraFlags+=" --network-veth" fi @@ -197,23 +265,25 @@ in # If the host is 64-bit and the container is 32-bit, add a # --personality flag. - ${optionalString (config.nixpkgs.system == "x86_64-linux") '' - if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then + '' + optionalString (config.nixpkgs.system == "x86_64-linux") '' + if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/${name}/system}/system)" = i686-linux ]; then extraFlags+=" --personality=x86" fi - ''} + '' + '' exec ${config.systemd.package}/bin/systemd-nspawn \ --keep-unit \ - -M "$INSTANCE" -D "$root" $extraFlags \ + -M "${name}" -D "$root" $extraFlags \ --bind-ro=/nix/store \ --bind-ro=/nix/var/nix/db \ --bind-ro=/nix/var/nix/daemon-socket \ - --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \ - --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \ + --bind="/nix/var/nix/profiles/per-container/${name}:/nix/var/nix/profiles" \ + --bind="/nix/var/nix/gcroots/per-container/${name}:/nix/var/nix/gcroots" \ --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \ --setenv HOST_ADDRESS="$HOST_ADDRESS" \ --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \ + --setenv MACVLANS="$MACVLANS" \ + --setenv MACVLAN_ADDRESS="$MACVLAN_ADDRESS" \ --setenv PATH="$PATH" \ ${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init" ''; @@ -226,8 +296,8 @@ in read x < $root/var/lib/startup-done rm -f $root/var/lib/startup-done - if [ "$PRIVATE_NETWORK" = 1 ]; then - ifaceHost=ve-$INSTANCE + if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ]; then + ifaceHost=ve-${name} ip link set dev $ifaceHost up if [ -n "$HOST_ADDRESS" ]; then ip addr add $HOST_ADDRESS dev $ifaceHost @@ -240,46 +310,61 @@ in preStop = '' - machinectl poweroff "$INSTANCE" + machinectl poweroff "${name}" ''; restartIfChanged = false; - #reloadIfChanged = true; # FIXME + reloadIfChanged = true; + # If the network configuration has changed, then trigger a reboot of the + # container to setup the new interfaces, otherwise just rebuild the config + # within the container without restarting. serviceConfig.ExecReload = pkgs.writeScript "reload-container" - '' - #! ${pkgs.stdenv.shell} -e - SYSTEM_PATH=/nix/var/nix/profiles/system - echo $SYSTEM_PATH/bin/switch-to-configuration test | \ - ${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket - + ''#!${pkgs.stdenv.shell} + SYSTEM_PATH="''${SYSTEM_PATH:-/nix/var/nix/profiles/system}" + echo $SYSTEM_PATH/bin/switch-to-configuration test | \ + ${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket - ''; - serviceConfig.SyslogIdentifier = "container %i"; + serviceConfig.SyslogIdentifier = "container ${name}"; + + serviceConfig.EnvironmentFile = "-/etc/containers/${name}.conf"; - serviceConfig.EnvironmentFile = "-/etc/containers/%i.conf"; - }; + }) config.containers; - # Generate a configuration file in /etc/containers for each + # Generate a configuration file in /etc/containers for each # container so that container@.target can get the container # configuration. - environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf" - { text = - '' - SYSTEM_PATH=${cfg.path} - ${optionalString cfg.privateNetwork '' - PRIVATE_NETWORK=1 - ${optionalString (cfg.hostAddress != null) '' + environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf" { + text = + '' + SYSTEM_PATH=${cfg.path} + '' + + (optionalString ( cfg.privateNetwork + || cfg.localAddress!=null + || cfg.hostAddress!=null + || cfg.macvlanInterface!=null) + '' + PRIVATE_NETWORK=1 + '') + + (optionalString (cfg.hostAddress != null) + '' HOST_ADDRESS=${cfg.hostAddress} - ''} - ${optionalString (cfg.localAddress != null) '' + '') + + (optionalString (cfg.localAddress != null) + '' LOCAL_ADDRESS=${cfg.localAddress} - ''} - ''} - ''; + '') + + (optionalString (cfg.macvlanInterface != null) + '' + MACVLANS=${cfg.macvlanInterface} + '') + + (optionalString (cfg.macvlanAddress != null) + '' + MACVLAN_ADDRESS=${cfg.macvlanAddress}/${toString cfg.macvlanPrefixLength} + ''); }) config.containers; - # FIXME: auto-start containers. - # Generate /etc/hosts entries for the containers. networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null) '' From 62f50e81e115174a4c513102edf70ff9218be1ac Mon Sep 17 00:00:00 2001 From: Chris Farmiloe Date: Thu, 19 Jun 2014 22:39:35 +0200 Subject: [PATCH 2/9] retab --- nixos/modules/virtualisation/containers.nix | 176 ++++++++++---------- 1 file changed, 85 insertions(+), 91 deletions(-) diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index d373d747a15f7..92c0a05bd0046 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -16,12 +16,11 @@ let # The container's init script, a small wrapper around the regular # NixOS stage-2 init script. containerInit = pkgs.writeScript "container-init" - '' - #! ${pkgs.stdenv.shell} -e + ''#! ${pkgs.stdenv.shell} -e # Initialise the container side of the veth pair. - if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ]; then - ip link set host0 name eth0 + if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ]; then + ip link set host0 name eth0 ip link set dev eth0 up if [ -n "$HOST_ADDRESS" ]; then ip route add $HOST_ADDRESS dev eth0 @@ -30,16 +29,16 @@ let if [ -n "$LOCAL_ADDRESS" ]; then ip addr add $LOCAL_ADDRESS dev eth0 fi - fi - - # Initialise the container side of the macvlan interface. - if [ -n "$MACVLANS" ]; then - ip link set mv-$MACVLANS name eth1 - ip link set dev eth1 up - if [ -n "$MACVLAN_ADDRESS" ]; then - ip addr add $MACVLAN_ADDRESS dev eth1 - fi - fi + fi + + # Initialise the container side of the macvlan interface. + if [ -n "$MACVLANS" ]; then + ip link set mv-$MACVLANS name eth1 + ip link set dev eth1 up + if [ -n "$MACVLAN_ADDRESS" ]; then + ip addr add $MACVLAN_ADDRESS dev eth1 + fi + fi exec "$1" ''; @@ -89,10 +88,10 @@ in default = false; description = '' Whether to give the container its own private network namespace. - This option is implied if setting , - or + This option is implied if setting , + or If this option is not set, then the container shares the network - interfaces of the host, and can bind to any port on any interface. + interfaces of the host, and can bind to any port on any interface. ''; }; @@ -102,10 +101,10 @@ in example = "10.231.136.1"; description = '' The IPv4 address assigned to the host-side of a veth pair. - Setting or - will create a veth pair with one side in the container appearing as - eth0, and the other side in the host as - ve-container-name. + Setting or + will create a veth pair with one side in the container appearing as + eth0, and the other side in the host as + ve-container-name. ''; }; @@ -115,11 +114,11 @@ in example = "10.231.136.2"; description = '' The IPv4 address assigned to the container-side of a veth pair - (eth0 in the container). - Setting or - will create a veth pair with one side in the container appearing as - eth0, and the other side in the host as - ve-container-name. + (eth0 in the container). + Setting or + will create a veth pair with one side in the container appearing as + eth0, and the other side in the host as + ve-container-name. ''; }; @@ -129,14 +128,14 @@ in example = "enp1s1"; description = '' When this option is set an eth1 interface - will be available within the container that bridges to the host's - physical network using macvlan. - Note: while macvlan interfaces allow your containers to be accessable - via the the same physical network as the specified host interface, you - may not be able to communicate between the host itself and container. + will be available within the container that bridges to the host's + physical network using macvlan. + Note: while macvlan interfaces allow your containers to be accessable + via the the same physical network as the specified host interface, you + may not be able to communicate between the host itself and container. ''; }; - + macvlanAddress = mkOption { type = types.nullOr types.string; default = null; @@ -152,7 +151,7 @@ in default = 32; example = 16; description = '' - The network prefix length for the macvlan interface. + The network prefix length for the macvlan interface. ''; }; @@ -160,9 +159,9 @@ in type = types.listOf types.str; default = [ "multi-user.target" ]; description = '' - List of systemd units/targets that should cause this - container to start. Set to [] if - you do not want this container to start. + List of systemd units/targets that should cause this + container to start. Set to [] if + you do not want this container to start. ''; }; @@ -216,21 +215,19 @@ in systemd.services = mapAttrs (name: cfg: { - wantedBy = cfg.wantedBy; - wants = [ "network.target" ]; - after = [ "network.target" ]; - - description = "Container '${name}'"; + wantedBy = cfg.wantedBy; + wants = [ "network.target" ]; + after = [ "network.target" ]; + + description = "Container '${name}'"; - unitConfig.RequiresMountsFor = [ "/var/lib/containers/${name}" ]; + unitConfig.RequiresMountsFor = [ "/var/lib/containers/${name}" ]; - path = [ pkgs.iproute ]; + path = [ pkgs.iproute ]; - environment = { - root = "/var/lib/containers/${name}"; - }; + environment.root = "/var/lib/containers/${name}"; - preStart = + preStart = '' mkdir -p -m 0755 $root/var/lib @@ -240,7 +237,7 @@ in mkfifo -m 0600 $root/var/lib/startup-done ''; - script = + script = '' mkdir -p -m 0755 "$root/etc" "$root/var/lib" if ! [ -e "$root/etc/os-release" ]; then @@ -269,7 +266,7 @@ in if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/${name}/system}/system)" = i686-linux ]; then extraFlags+=" --personality=x86" fi - '' + '' + '' + '' exec ${config.systemd.package}/bin/systemd-nspawn \ --keep-unit \ @@ -288,7 +285,7 @@ in ${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init" ''; - postStart = + postStart = '' # This blocks until the container-startup-done service # writes something to this pipe. FIXME: it also hangs @@ -308,62 +305,59 @@ in fi ''; - preStop = + preStop = '' machinectl poweroff "${name}" ''; - restartIfChanged = false; - reloadIfChanged = true; + restartIfChanged = false; + reloadIfChanged = true; - # If the network configuration has changed, then trigger a reboot of the - # container to setup the new interfaces, otherwise just rebuild the config - # within the container without restarting. - serviceConfig.ExecReload = pkgs.writeScript "reload-container" + # If the network configuration has changed, then trigger a reboot of the + # container to setup the new interfaces, otherwise just rebuild the config + # within the container without restarting. + serviceConfig.ExecReload = pkgs.writeScript "reload-container" ''#!${pkgs.stdenv.shell} - SYSTEM_PATH="''${SYSTEM_PATH:-/nix/var/nix/profiles/system}" - echo $SYSTEM_PATH/bin/switch-to-configuration test | \ - ${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket - + SYSTEM_PATH="''${SYSTEM_PATH:-/nix/var/nix/profiles/system}" + echo $SYSTEM_PATH/bin/switch-to-configuration test | \ + ${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket - ''; - serviceConfig.SyslogIdentifier = "container ${name}"; + serviceConfig.SyslogIdentifier = "container ${name}"; - serviceConfig.EnvironmentFile = "-/etc/containers/${name}.conf"; + serviceConfig.EnvironmentFile = "-/etc/containers/${name}.conf"; }) config.containers; - # Generate a configuration file in /etc/containers for each + # Generate a configuration file in /etc/containers for each # container so that container@.target can get the container # configuration. environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf" { - text = - '' - SYSTEM_PATH=${cfg.path} - '' - + (optionalString ( cfg.privateNetwork - || cfg.localAddress!=null - || cfg.hostAddress!=null - || cfg.macvlanInterface!=null) - '' - PRIVATE_NETWORK=1 - '') - + (optionalString (cfg.hostAddress != null) - '' - HOST_ADDRESS=${cfg.hostAddress} - '') - + (optionalString (cfg.localAddress != null) - '' - LOCAL_ADDRESS=${cfg.localAddress} - '') - + (optionalString (cfg.macvlanInterface != null) - '' - MACVLANS=${cfg.macvlanInterface} - '') - + (optionalString (cfg.macvlanAddress != null) - '' - MACVLAN_ADDRESS=${cfg.macvlanAddress}/${toString cfg.macvlanPrefixLength} - ''); - }) config.containers; + text = ''SYSTEM_PATH=${cfg.path}'' + + (optionalString ( cfg.privateNetwork + || cfg.localAddress!=null + || cfg.hostAddress!=null + || cfg.macvlanInterface!=null) + '' + PRIVATE_NETWORK=1 + '') + + (optionalString (cfg.hostAddress != null) + '' + HOST_ADDRESS=${cfg.hostAddress} + '') + + (optionalString (cfg.localAddress != null) + '' + LOCAL_ADDRESS=${cfg.localAddress} + '') + + (optionalString (cfg.macvlanInterface != null) + '' + MACVLANS=${cfg.macvlanInterface} + '') + + (optionalString (cfg.macvlanAddress != null) + '' + MACVLAN_ADDRESS=${cfg.macvlanAddress}/${toString cfg.macvlanPrefixLength} + ''); + }) config.containers; # Generate /etc/hosts entries for the containers. networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null) From 1d9a6de644370b24153eeebf2e8d87b9b294e2ce Mon Sep 17 00:00:00 2001 From: Chris Farmiloe Date: Thu, 19 Jun 2014 22:44:34 +0200 Subject: [PATCH 3/9] fix comment --- nixos/modules/virtualisation/containers.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index 92c0a05bd0046..e652a397a9f46 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -313,7 +313,7 @@ in restartIfChanged = false; reloadIfChanged = true; - # If the network configuration has changed, then trigger a reboot of the + # TODO: If the network configuration has changed, then trigger a full reboot of the # container to setup the new interfaces, otherwise just rebuild the config # within the container without restarting. serviceConfig.ExecReload = pkgs.writeScript "reload-container" From c0c30371b6b327baaf00ad26c1686d5b7c791fa2 Mon Sep 17 00:00:00 2001 From: Chris Farmiloe Date: Thu, 19 Jun 2014 23:02:35 +0200 Subject: [PATCH 4/9] fix issue caused by retab --- nixos/modules/virtualisation/containers.nix | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index e652a397a9f46..10263e2749a0b 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -312,6 +312,7 @@ in restartIfChanged = false; reloadIfChanged = true; + restartTriggers = [ cfg.path ]; # TODO: If the network configuration has changed, then trigger a full reboot of the # container to setup the new interfaces, otherwise just rebuild the config @@ -333,7 +334,9 @@ in # container so that container@.target can get the container # configuration. environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf" { - text = ''SYSTEM_PATH=${cfg.path}'' + text = '' + SYSTEM_PATH=${cfg.path} + '' + (optionalString ( cfg.privateNetwork || cfg.localAddress!=null || cfg.hostAddress!=null From 6e36619b277f78ece1bb81b79b5651897e46a2bf Mon Sep 17 00:00:00 2001 From: Chris Farmiloe Date: Fri, 20 Jun 2014 01:51:16 +0200 Subject: [PATCH 5/9] Use unit templates for containers but only consider a unit as a 'template instance' during switch if it does not have a unit file. --- .../activation/switch-to-configuration.pl | 9 ++- nixos/modules/virtualisation/containers.nix | 66 +++++++++++-------- 2 files changed, 46 insertions(+), 29 deletions(-) diff --git a/nixos/modules/system/activation/switch-to-configuration.pl b/nixos/modules/system/activation/switch-to-configuration.pl index 12012698efe35..9674a6896bedc 100644 --- a/nixos/modules/system/activation/switch-to-configuration.pl +++ b/nixos/modules/system/activation/switch-to-configuration.pl @@ -131,11 +131,16 @@ sub fingerprintUnit { while (my ($unit, $state) = each %{$activePrev}) { my $baseUnit = $unit; - # Recognise template instances. - $baseUnit = "$1\@.$2" if $unit =~ /^(.*)@[^\.]*\.(.*)$/; my $prevUnitFile = "/etc/systemd/system/$baseUnit"; my $newUnitFile = "$out/etc/systemd/system/$baseUnit"; + # Detect template instances. + if (!-e $prevUnitFile && !-e $newUnitFile && $unit =~ /^(.*)@[^\.]*\.(.*)$/) { + $baseUnit = "$1\@.$2"; + $prevUnitFile = "/etc/systemd/system/$baseUnit"; + $newUnitFile = "$out/etc/systemd/system/$baseUnit"; + } + my $baseName = $baseUnit; $baseName =~ s/\.[a-z]*$//; diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index 10263e2749a0b..87e0dec147c7a 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -211,21 +211,18 @@ in }; - config = mkIf (!config.boot.isContainer) { + config = mkIf (!config.boot.isContainer) (let + + unit = { - systemd.services = mapAttrs (name: cfg: { + description = "Container '%i'"; - wantedBy = cfg.wantedBy; - wants = [ "network.target" ]; - after = [ "network.target" ]; - - description = "Container '${name}'"; - - unitConfig.RequiresMountsFor = [ "/var/lib/containers/${name}" ]; + unitConfig.RequiresMountsFor = [ "/var/lib/containers/%i" ]; path = [ pkgs.iproute ]; - environment.root = "/var/lib/containers/${name}"; + environment.INSTANCE = "%i"; + environment.root = "/var/lib/containers/%i"; preStart = '' @@ -245,8 +242,8 @@ in fi mkdir -p -m 0755 \ - "/nix/var/nix/profiles/per-container/${name}" \ - "/nix/var/nix/gcroots/per-container/${name}" + "/nix/var/nix/profiles/per-container/$INSTANCE" \ + "/nix/var/nix/gcroots/per-container/$INSTANCE" if [ "$PRIVATE_NETWORK" = 1 ]; then extraFlags+=" --private-network" @@ -263,19 +260,19 @@ in # If the host is 64-bit and the container is 32-bit, add a # --personality flag. '' + optionalString (config.nixpkgs.system == "x86_64-linux") '' - if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/${name}/system}/system)" = i686-linux ]; then + if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then extraFlags+=" --personality=x86" fi '' + '' exec ${config.systemd.package}/bin/systemd-nspawn \ --keep-unit \ - -M "${name}" -D "$root" $extraFlags \ + -M "$INSTANCE" -D "$root" $extraFlags \ --bind-ro=/nix/store \ --bind-ro=/nix/var/nix/db \ --bind-ro=/nix/var/nix/daemon-socket \ - --bind="/nix/var/nix/profiles/per-container/${name}:/nix/var/nix/profiles" \ - --bind="/nix/var/nix/gcroots/per-container/${name}:/nix/var/nix/gcroots" \ + --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \ + --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \ --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \ --setenv HOST_ADDRESS="$HOST_ADDRESS" \ --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \ @@ -294,7 +291,7 @@ in rm -f $root/var/lib/startup-done if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ]; then - ifaceHost=ve-${name} + ifaceHost=ve-$INSTANCE ip link set dev $ifaceHost up if [ -n "$HOST_ADDRESS" ]; then ip addr add $HOST_ADDRESS dev $ifaceHost @@ -307,12 +304,10 @@ in preStop = '' - machinectl poweroff "${name}" + machinectl poweroff "%i" ''; restartIfChanged = false; - reloadIfChanged = true; - restartTriggers = [ cfg.path ]; # TODO: If the network configuration has changed, then trigger a full reboot of the # container to setup the new interfaces, otherwise just rebuild the config @@ -324,19 +319,36 @@ in ${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket - ''; - serviceConfig.SyslogIdentifier = "container ${name}"; + serviceConfig.SyslogIdentifier = "container %i"; - serviceConfig.EnvironmentFile = "-/etc/containers/${name}.conf"; + serviceConfig.EnvironmentFile = "-/etc/containers/%i.conf"; - }) config.containers; + }; + + in { + + systemd.services = listToAttrs ( + # The generic container template used by imperative containers + [{ name = "container@"; value = unit; }] + # declarative containers + ++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" ( + unit // { + wantedBy = cfg.wantedBy; + wants = [ "network.target" ]; + after = [ "network.target" ]; + restartTriggers = [ cfg.path ]; + reloadIfChanged = true; + } + )) config.containers)); # Generate a configuration file in /etc/containers for each # container so that container@.target can get the container # configuration. environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf" { - text = '' - SYSTEM_PATH=${cfg.path} - '' + text = + '' + SYSTEM_PATH=${cfg.path} + '' + (optionalString ( cfg.privateNetwork || cfg.localAddress!=null || cfg.hostAddress!=null @@ -372,5 +384,5 @@ in environment.systemPackages = [ nixos-container ]; - }; + }); } From a5a49461d441055ad4b7d729e88eda1887aaa417 Mon Sep 17 00:00:00 2001 From: Chris Farmiloe Date: Fri, 20 Jun 2014 13:25:32 +0200 Subject: [PATCH 6/9] fix machinectl typo --- nixos/modules/virtualisation/containers.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index 87e0dec147c7a..e3a5658bf1a54 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -304,7 +304,7 @@ in preStop = '' - machinectl poweroff "%i" + machinectl poweroff "$INSTANCE" ''; restartIfChanged = false; From 29c0000351c59ccbb52954d2865c521f06338dff Mon Sep 17 00:00:00 2001 From: Chris Farmiloe Date: Fri, 20 Jun 2014 14:09:13 +0200 Subject: [PATCH 7/9] Add new option 'linkJournal' to control the mode by which the container's journal is linked with the host. Setting this to 'guest' keeps the journal data in the container's filesystem while still being able to montior the container via 'journalctl -M ' on the host. While setting to 'host' can give you a single collection of journals in /var/log/journal on the host. --- nixos/modules/virtualisation/containers.nix | 74 ++++++++++++++------- 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index e3a5658bf1a54..b3cfa9eccf576 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -165,6 +165,29 @@ in ''; }; + linkJournal = mkOption { + type = types.enum [ "auto" "host" "guest" "no" ]; + default = "auto"; + description = '' + Control whether the container's journal shall be made + visible to the host to allow viewing the container's + journal files from the host (but not vice versa). + Takes one of "no", "host", "guest", "auto". If "no", + the journal is not linked. If "host", the journal files + are stored on the host file system (beneath /var/log/journal/machine-id) + and the subdirectory is bind-mounted into the container + at the same location. If "guest", the journal files are + stored on the guest file system (beneath /var/log/journal/machine-id) + and the subdirectory is symlinked into the host at the + same location. If "auto" (the default), and the subdirectory of + /var/log/journal/machine-id exists, it will be bind mounted + into the container. If the subdirectory does not exist, no + linking is performed. Effectively, booting a container once + with "guest" or "host" will link the journal persistently + if further on the default of "auto" is used. + ''; + }; + }; config = mkMerge @@ -268,6 +291,7 @@ in exec ${config.systemd.package}/bin/systemd-nspawn \ --keep-unit \ -M "$INSTANCE" -D "$root" $extraFlags \ + --link-journal=''${LINK_JOURNAL:-auto} \ --bind-ro=/nix/store \ --bind-ro=/nix/var/nix/db \ --bind-ro=/nix/var/nix/daemon-socket \ @@ -349,29 +373,33 @@ in '' SYSTEM_PATH=${cfg.path} '' - + (optionalString ( cfg.privateNetwork - || cfg.localAddress!=null - || cfg.hostAddress!=null - || cfg.macvlanInterface!=null) - '' - PRIVATE_NETWORK=1 - '') - + (optionalString (cfg.hostAddress != null) - '' - HOST_ADDRESS=${cfg.hostAddress} - '') - + (optionalString (cfg.localAddress != null) - '' - LOCAL_ADDRESS=${cfg.localAddress} - '') - + (optionalString (cfg.macvlanInterface != null) - '' - MACVLANS=${cfg.macvlanInterface} - '') - + (optionalString (cfg.macvlanAddress != null) - '' - MACVLAN_ADDRESS=${cfg.macvlanAddress}/${toString cfg.macvlanPrefixLength} - ''); + + (optionalString ( cfg.privateNetwork + || cfg.localAddress!=null + || cfg.hostAddress!=null + || cfg.macvlanInterface!=null) + '' + PRIVATE_NETWORK=1 + '') + + (optionalString (cfg.hostAddress != null) + '' + HOST_ADDRESS=${cfg.hostAddress} + '') + + (optionalString (cfg.localAddress != null) + '' + LOCAL_ADDRESS=${cfg.localAddress} + '') + + (optionalString (cfg.macvlanInterface != null) + '' + MACVLANS=${cfg.macvlanInterface} + '') + + (optionalString (cfg.macvlanAddress != null) + '' + MACVLAN_ADDRESS=${cfg.macvlanAddress}/${toString cfg.macvlanPrefixLength} + '') + + (optionalString (cfg.linkJournal != null) + '' + LINK_JOURNAL=${cfg.linkJournal} + ''); }) config.containers; # Generate /etc/hosts entries for the containers. From 7f8d0a239fcc5e08b9be9d67673396d135c0a03a Mon Sep 17 00:00:00 2001 From: Chris Farmiloe Date: Fri, 20 Jun 2014 18:03:39 +0200 Subject: [PATCH 8/9] allow setting of additional container capabilities via 'grantCapabilities' option. This enables you to run more priviledges containers for example those that must access other /dev items --- nixos/modules/virtualisation/containers.nix | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index b3cfa9eccf576..5617ae06274a3 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -188,6 +188,15 @@ in ''; }; + grantCapabilities = mkOption { + type = types.listOf types.str; + default = []; + example = [ "CAP_MKNOD" ]; + description = '' + List of additional capabilities to grant the container. + ''; + }; + }; config = mkMerge @@ -280,6 +289,10 @@ in extraFlags+=" --network-macvlan=$iface" done + if [ -n "$GRANT_CAPS" ]; then + extraFlags+=" --capability=$GRANT_CAPS" + fi + # If the host is 64-bit and the container is 32-bit, add a # --personality flag. '' + optionalString (config.nixpkgs.system == "x86_64-linux") '' @@ -399,6 +412,10 @@ in + (optionalString (cfg.linkJournal != null) '' LINK_JOURNAL=${cfg.linkJournal} + '') + + (optionalString (length cfg.grantCapabilities > 0) + '' + GRANT_CAPS=${concatStringsSep "," cfg.grantCapabilities} ''); }) config.containers; From e78ad1e789c3997549a274f08f11d4a089a2278b Mon Sep 17 00:00:00 2001 From: Chris Farmiloe Date: Mon, 23 Jun 2014 00:35:31 +0200 Subject: [PATCH 9/9] `enable` option and fix for graceful shutdown When a container is enabled a seperate service unit is created to manage auto-starting of the container via the `wantedBy` option. When disabled the container is still built but will not be started by systemd, it can then be started/stopped manually via `systemctl` or `nixos-container`. Containers were not getting cleanly shutdown by systemd as machinectl did not wait for container to shutdown and the resulting RTMIN+4 signal would upset the service and trigger systemd to start killing off the processes (I believe this needs some work from upstream tbh). Now the container is shutdown via the command socket, and machinectl is used to wait resulting in a clean shutdown/exit. --- nixos/modules/virtualisation/containers.nix | 64 +++++++++++++++------ 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index 5617ae06274a3..47417d5c2442e 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -65,6 +65,20 @@ in { options = { + enable = mkOption { + type = types.bool; + default = false; + description = '' + Enable the container as a systemd service. A "disabled" + container will still be built and can be started via + the nixos-container command. An + "enabled" container will be managed by systemd and will + be automatically started on boot. See container option for altering when the container + should be started. + ''; + }; + config = mkOption { description = '' A specification of the desired configuration of this @@ -289,9 +303,9 @@ in extraFlags+=" --network-macvlan=$iface" done - if [ -n "$GRANT_CAPS" ]; then - extraFlags+=" --capability=$GRANT_CAPS" - fi + if [ -n "$GRANT_CAPS" ]; then + extraFlags+=" --capability=$GRANT_CAPS" + fi # If the host is 64-bit and the container is 32-bit, add a # --personality flag. @@ -301,10 +315,12 @@ in fi '' + '' - exec ${config.systemd.package}/bin/systemd-nspawn \ + + + ${config.systemd.package}/bin/systemd-nspawn \ --keep-unit \ -M "$INSTANCE" -D "$root" $extraFlags \ - --link-journal=''${LINK_JOURNAL:-auto} \ + --link-journal=''${LINK_JOURNAL:-auto} \ --bind-ro=/nix/store \ --bind-ro=/nix/var/nix/db \ --bind-ro=/nix/var/nix/daemon-socket \ @@ -339,13 +355,22 @@ in fi ''; - preStop = - '' - machinectl poweroff "$INSTANCE" - ''; + # Ask container to halt and wait until it has shutdown + preStop = + '' + echo 'systemctl poweroff' | \ + ${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket - + while [[ "$(machinectl -p State show '$INSTANCE' 2>/dev/null)" == "State=running" ]]; do + sleep 0.2 + done + ''; restartIfChanged = false; + # Be graceful about killing off container processes. + serviceConfig.KillMode = "mixed"; + serviceConfig.TimeoutStopSec = 60; + # TODO: If the network configuration has changed, then trigger a full reboot of the # container to setup the new interfaces, otherwise just rebuild the config # within the container without restarting. @@ -364,19 +389,22 @@ in in { - systemd.services = listToAttrs ( + systemd.services = listToAttrs (filter (x: x.value != null) ( # The generic container template used by imperative containers [{ name = "container@"; value = unit; }] # declarative containers ++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" ( - unit // { - wantedBy = cfg.wantedBy; - wants = [ "network.target" ]; - after = [ "network.target" ]; - restartTriggers = [ cfg.path ]; - reloadIfChanged = true; - } - )) config.containers)); + if cfg.enable then + unit // { + wantedBy = cfg.wantedBy; + wants = [ "network.target" ]; + after = [ "network.target" ]; + restartTriggers = [ cfg.path ]; + reloadIfChanged = true; + } + else null + )) config.containers) + )); # Generate a configuration file in /etc/containers for each # container so that container@.target can get the container