diff --git a/nixos/modules/system/activation/switch-to-configuration.pl b/nixos/modules/system/activation/switch-to-configuration.pl index 12012698efe35..9674a6896bedc 100644 --- a/nixos/modules/system/activation/switch-to-configuration.pl +++ b/nixos/modules/system/activation/switch-to-configuration.pl @@ -131,11 +131,16 @@ sub fingerprintUnit { while (my ($unit, $state) = each %{$activePrev}) { my $baseUnit = $unit; - # Recognise template instances. - $baseUnit = "$1\@.$2" if $unit =~ /^(.*)@[^\.]*\.(.*)$/; my $prevUnitFile = "/etc/systemd/system/$baseUnit"; my $newUnitFile = "$out/etc/systemd/system/$baseUnit"; + # Detect template instances. + if (!-e $prevUnitFile && !-e $newUnitFile && $unit =~ /^(.*)@[^\.]*\.(.*)$/) { + $baseUnit = "$1\@.$2"; + $prevUnitFile = "/etc/systemd/system/$baseUnit"; + $newUnitFile = "$out/etc/systemd/system/$baseUnit"; + } + my $baseName = $baseUnit; $baseName =~ s/\.[a-z]*$//; diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix index d0d04d9a1e5dc..47417d5c2442e 100644 --- a/nixos/modules/virtualisation/containers.nix +++ b/nixos/modules/virtualisation/containers.nix @@ -16,12 +16,11 @@ let # The container's init script, a small wrapper around the regular # NixOS stage-2 init script. containerInit = pkgs.writeScript "container-init" - '' - #! ${pkgs.stdenv.shell} -e - + ''#! ${pkgs.stdenv.shell} -e + # Initialise the container side of the veth pair. - if [ "$PRIVATE_NETWORK" = 1 ]; then - ip link set host0 name eth0 + if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ]; then + ip link set host0 name eth0 ip link set dev eth0 up if [ -n "$HOST_ADDRESS" ]; then ip route add $HOST_ADDRESS dev eth0 @@ -32,6 +31,15 @@ let fi fi + # Initialise the container side of the macvlan interface. + if [ -n "$MACVLANS" ]; then + ip link set mv-$MACVLANS name eth1 + ip link set dev eth1 up + if [ -n "$MACVLAN_ADDRESS" ]; then + ip addr add $MACVLAN_ADDRESS dev eth1 + fi + fi + exec "$1" ''; @@ -57,6 +65,20 @@ in { options = { + enable = mkOption { + type = types.bool; + default = false; + description = '' + Enable the container as a systemd service. A "disabled" + container will still be built and can be started via + the nixos-container command. An + "enabled" container will be managed by systemd and will + be automatically started on boot. See container option for altering when the container + should be started. + ''; + }; + config = mkOption { description = '' A specification of the desired configuration of this @@ -79,13 +101,11 @@ in type = types.bool; default = false; description = '' - Whether to give the container its own private virtual - Ethernet interface. The interface is called - eth0, and is hooked up to the interface - ve-container-name - on the host. If this option is not set, then the - container shares the network interfaces of the host, - and can bind to any port on any interface. + Whether to give the container its own private network namespace. + This option is implied if setting , + or + If this option is not set, then the container shares the network + interfaces of the host, and can bind to any port on any interface. ''; }; @@ -94,7 +114,11 @@ in default = null; example = "10.231.136.1"; description = '' - The IPv4 address assigned to the host interface. + The IPv4 address assigned to the host-side of a veth pair. + Setting or + will create a veth pair with one side in the container appearing as + eth0, and the other side in the host as + ve-container-name. ''; }; @@ -103,8 +127,87 @@ in default = null; example = "10.231.136.2"; description = '' - The IPv4 address assigned to eth0 - in the container. + The IPv4 address assigned to the container-side of a veth pair + (eth0 in the container). + Setting or + will create a veth pair with one side in the container appearing as + eth0, and the other side in the host as + ve-container-name. + ''; + }; + + macvlanInterface = mkOption { + type = types.nullOr types.string; + default = null; + example = "enp1s1"; + description = '' + When this option is set an eth1 interface + will be available within the container that bridges to the host's + physical network using macvlan. + Note: while macvlan interfaces allow your containers to be accessable + via the the same physical network as the specified host interface, you + may not be able to communicate between the host itself and container. + ''; + }; + + macvlanAddress = mkOption { + type = types.nullOr types.string; + default = null; + example = "10.231.136.2"; + description = '' + The IPv4 address assigned to eth1 + (the macvlan interface) in the container. + ''; + }; + + macvlanPrefixLength = mkOption { + type = types.nullOr types.int; + default = 32; + example = 16; + description = '' + The network prefix length for the macvlan interface. + ''; + }; + + wantedBy = mkOption { + type = types.listOf types.str; + default = [ "multi-user.target" ]; + description = '' + List of systemd units/targets that should cause this + container to start. Set to [] if + you do not want this container to start. + ''; + }; + + linkJournal = mkOption { + type = types.enum [ "auto" "host" "guest" "no" ]; + default = "auto"; + description = '' + Control whether the container's journal shall be made + visible to the host to allow viewing the container's + journal files from the host (but not vice versa). + Takes one of "no", "host", "guest", "auto". If "no", + the journal is not linked. If "host", the journal files + are stored on the host file system (beneath /var/log/journal/machine-id) + and the subdirectory is bind-mounted into the container + at the same location. If "guest", the journal files are + stored on the guest file system (beneath /var/log/journal/machine-id) + and the subdirectory is symlinked into the host at the + same location. If "auto" (the default), and the subdirectory of + /var/log/journal/machine-id exists, it will be bind mounted + into the container. If the subdirectory does not exist, no + linking is performed. Effectively, booting a container once + with "guest" or "host" will link the journal persistently + if further on the default of "auto" is used. + ''; + }; + + grantCapabilities = mkOption { + type = types.listOf types.str; + default = []; + example = [ "CAP_MKNOD" ]; + description = '' + List of additional capabilities to grant the container. ''; }; @@ -154,19 +257,20 @@ in }; - config = mkIf (!config.boot.isContainer) { + config = mkIf (!config.boot.isContainer) (let + + unit = { - systemd.services."container@" = - { description = "Container '%i'"; + description = "Container '%i'"; - unitConfig.RequiresMountsFor = [ "/var/lib/containers/%i" ]; + unitConfig.RequiresMountsFor = [ "/var/lib/containers/%i" ]; - path = [ pkgs.iproute ]; + path = [ pkgs.iproute ]; - environment.INSTANCE = "%i"; - environment.root = "/var/lib/containers/%i"; + environment.INSTANCE = "%i"; + environment.root = "/var/lib/containers/%i"; - preStart = + preStart = '' mkdir -p -m 0755 $root/var/lib @@ -176,7 +280,7 @@ in mkfifo -m 0600 $root/var/lib/startup-done ''; - script = + script = '' mkdir -p -m 0755 "$root/etc" "$root/var/lib" if ! [ -e "$root/etc/os-release" ]; then @@ -188,6 +292,10 @@ in "/nix/var/nix/gcroots/per-container/$INSTANCE" if [ "$PRIVATE_NETWORK" = 1 ]; then + extraFlags+=" --private-network" + fi + + if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ]; then extraFlags+=" --network-veth" fi @@ -195,17 +303,24 @@ in extraFlags+=" --network-macvlan=$iface" done + if [ -n "$GRANT_CAPS" ]; then + extraFlags+=" --capability=$GRANT_CAPS" + fi + # If the host is 64-bit and the container is 32-bit, add a # --personality flag. - ${optionalString (config.nixpkgs.system == "x86_64-linux") '' + '' + optionalString (config.nixpkgs.system == "x86_64-linux") '' if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then extraFlags+=" --personality=x86" fi - ''} + '' + '' - exec ${config.systemd.package}/bin/systemd-nspawn \ + + + ${config.systemd.package}/bin/systemd-nspawn \ --keep-unit \ -M "$INSTANCE" -D "$root" $extraFlags \ + --link-journal=''${LINK_JOURNAL:-auto} \ --bind-ro=/nix/store \ --bind-ro=/nix/var/nix/db \ --bind-ro=/nix/var/nix/daemon-socket \ @@ -214,11 +329,13 @@ in --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \ --setenv HOST_ADDRESS="$HOST_ADDRESS" \ --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \ + --setenv MACVLANS="$MACVLANS" \ + --setenv MACVLAN_ADDRESS="$MACVLAN_ADDRESS" \ --setenv PATH="$PATH" \ ${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init" ''; - postStart = + postStart = '' # This blocks until the container-startup-done service # writes something to this pipe. FIXME: it also hangs @@ -226,7 +343,7 @@ in read x < $root/var/lib/startup-done rm -f $root/var/lib/startup-done - if [ "$PRIVATE_NETWORK" = 1 ]; then + if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ]; then ifaceHost=ve-$INSTANCE ip link set dev $ifaceHost up if [ -n "$HOST_ADDRESS" ]; then @@ -238,47 +355,97 @@ in fi ''; - preStop = - '' - machinectl poweroff "$INSTANCE" - ''; + # Ask container to halt and wait until it has shutdown + preStop = + '' + echo 'systemctl poweroff' | \ + ${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket - + while [[ "$(machinectl -p State show '$INSTANCE' 2>/dev/null)" == "State=running" ]]; do + sleep 0.2 + done + ''; - restartIfChanged = false; - #reloadIfChanged = true; # FIXME + restartIfChanged = false; - serviceConfig.ExecReload = pkgs.writeScript "reload-container" - '' - #! ${pkgs.stdenv.shell} -e - SYSTEM_PATH=/nix/var/nix/profiles/system + # Be graceful about killing off container processes. + serviceConfig.KillMode = "mixed"; + serviceConfig.TimeoutStopSec = 60; + + # TODO: If the network configuration has changed, then trigger a full reboot of the + # container to setup the new interfaces, otherwise just rebuild the config + # within the container without restarting. + serviceConfig.ExecReload = pkgs.writeScript "reload-container" + ''#!${pkgs.stdenv.shell} + SYSTEM_PATH="''${SYSTEM_PATH:-/nix/var/nix/profiles/system}" echo $SYSTEM_PATH/bin/switch-to-configuration test | \ ${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket - ''; - serviceConfig.SyslogIdentifier = "container %i"; + serviceConfig.SyslogIdentifier = "container %i"; + + serviceConfig.EnvironmentFile = "-/etc/containers/%i.conf"; + + }; - serviceConfig.EnvironmentFile = "-/etc/containers/%i.conf"; - }; + in { + + systemd.services = listToAttrs (filter (x: x.value != null) ( + # The generic container template used by imperative containers + [{ name = "container@"; value = unit; }] + # declarative containers + ++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" ( + if cfg.enable then + unit // { + wantedBy = cfg.wantedBy; + wants = [ "network.target" ]; + after = [ "network.target" ]; + restartTriggers = [ cfg.path ]; + reloadIfChanged = true; + } + else null + )) config.containers) + )); # Generate a configuration file in /etc/containers for each # container so that container@.target can get the container # configuration. - environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf" - { text = - '' - SYSTEM_PATH=${cfg.path} - ${optionalString cfg.privateNetwork '' - PRIVATE_NETWORK=1 - ${optionalString (cfg.hostAddress != null) '' - HOST_ADDRESS=${cfg.hostAddress} - ''} - ${optionalString (cfg.localAddress != null) '' - LOCAL_ADDRESS=${cfg.localAddress} - ''} - ''} - ''; - }) config.containers; - - # FIXME: auto-start containers. + environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf" { + text = + '' + SYSTEM_PATH=${cfg.path} + '' + + (optionalString ( cfg.privateNetwork + || cfg.localAddress!=null + || cfg.hostAddress!=null + || cfg.macvlanInterface!=null) + '' + PRIVATE_NETWORK=1 + '') + + (optionalString (cfg.hostAddress != null) + '' + HOST_ADDRESS=${cfg.hostAddress} + '') + + (optionalString (cfg.localAddress != null) + '' + LOCAL_ADDRESS=${cfg.localAddress} + '') + + (optionalString (cfg.macvlanInterface != null) + '' + MACVLANS=${cfg.macvlanInterface} + '') + + (optionalString (cfg.macvlanAddress != null) + '' + MACVLAN_ADDRESS=${cfg.macvlanAddress}/${toString cfg.macvlanPrefixLength} + '') + + (optionalString (cfg.linkJournal != null) + '' + LINK_JOURNAL=${cfg.linkJournal} + '') + + (optionalString (length cfg.grantCapabilities > 0) + '' + GRANT_CAPS=${concatStringsSep "," cfg.grantCapabilities} + ''); + }) config.containers; # Generate /etc/hosts entries for the containers. networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null) @@ -290,5 +457,5 @@ in environment.systemPackages = [ nixos-container ]; - }; + }); }