diff --git a/maintainers/team-list.nix b/maintainers/team-list.nix
index 7efff7f45eca..caee548d719c 100644
--- a/maintainers/team-list.nix
+++ b/maintainers/team-list.nix
@@ -33,10 +33,11 @@ with lib.maintainers; {
podman = {
members = [
+ adisbladis
saschagrunert
vdemeester
zowoq
];
- scope = "Maintain podman related packages.";
+ scope = "Maintain Podman related packages and modules.";
};
}
diff --git a/nixos/doc/manual/release-notes/rl-2009.xml b/nixos/doc/manual/release-notes/rl-2009.xml
index c800cbc75a5a..3e29c19af8fb 100644
--- a/nixos/doc/manual/release-notes/rl-2009.xml
+++ b/nixos/doc/manual/release-notes/rl-2009.xml
@@ -40,6 +40,11 @@
make use of these new options instead.
+
+
+ There is a new module for Podman(virtualisation.podman), a drop-in replacement for the Docker command line.
+
+
diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix
index df08ac2959ed..7244a7e0a894 100644
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -984,6 +984,7 @@
./virtualisation/anbox.nix
./virtualisation/container-config.nix
./virtualisation/containers.nix
+ ./virtualisation/nixos-containers.nix
./virtualisation/cri-o.nix
./virtualisation/docker.nix
./virtualisation/docker-containers.nix
@@ -997,6 +998,7 @@
./virtualisation/kvmgt.nix
./virtualisation/openvswitch.nix
./virtualisation/parallels-guest.nix
+ ./virtualisation/podman.nix
./virtualisation/qemu-guest-agent.nix
./virtualisation/railcar.nix
./virtualisation/rkt.nix
diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix
index 2db5c117aa7f..e6127e284869 100644
--- a/nixos/modules/virtualisation/containers.nix
+++ b/nixos/modules/virtualisation/containers.nix
@@ -1,838 +1,150 @@
{ config, lib, pkgs, ... }:
-
-with lib;
-
let
+ cfg = config.virtualisation.containers;
- # The container's init script, a small wrapper around the regular
- # NixOS stage-2 init script.
- containerInit = (cfg:
- let
- renderExtraVeth = (name: cfg:
- ''
- echo "Bringing ${name} up"
- ip link set dev ${name} up
- ${optionalString (cfg.localAddress != null) ''
- echo "Setting ip for ${name}"
- ip addr add ${cfg.localAddress} dev ${name}
- ''}
- ${optionalString (cfg.localAddress6 != null) ''
- echo "Setting ip6 for ${name}"
- ip -6 addr add ${cfg.localAddress6} dev ${name}
- ''}
- ${optionalString (cfg.hostAddress != null) ''
- echo "Setting route to host for ${name}"
- ip route add ${cfg.hostAddress} dev ${name}
- ''}
- ${optionalString (cfg.hostAddress6 != null) ''
- echo "Setting route6 to host for ${name}"
- ip -6 route add ${cfg.hostAddress6} dev ${name}
- ''}
- ''
- );
- in
- pkgs.writeScript "container-init"
- ''
- #! ${pkgs.runtimeShell} -e
+ inherit (lib) mkOption types;
- # Initialise the container side of the veth pair.
- if [ -n "$HOST_ADDRESS" ] || [ -n "$HOST_ADDRESS6" ] ||
- [ -n "$LOCAL_ADDRESS" ] || [ -n "$LOCAL_ADDRESS6" ] ||
- [ -n "$HOST_BRIDGE" ]; then
- ip link set host0 name eth0
- ip link set dev eth0 up
+ # Once https://github.com/NixOS/nixpkgs/pull/75584 is merged we can use the TOML generator
+ toTOML = name: value: pkgs.runCommandNoCC name {
+ nativeBuildInputs = [ pkgs.remarshal ];
+ value = builtins.toJSON value;
+ passAsFile = [ "value" ];
+ } ''
+ json2toml "$valuePath" "$out"
+ '';
- if [ -n "$LOCAL_ADDRESS" ]; then
- ip addr add $LOCAL_ADDRESS dev eth0
- fi
- if [ -n "$LOCAL_ADDRESS6" ]; then
- ip -6 addr add $LOCAL_ADDRESS6 dev eth0
- fi
- if [ -n "$HOST_ADDRESS" ]; then
- ip route add $HOST_ADDRESS dev eth0
- ip route add default via $HOST_ADDRESS
- fi
- if [ -n "$HOST_ADDRESS6" ]; then
- ip -6 route add $HOST_ADDRESS6 dev eth0
- ip -6 route add default via $HOST_ADDRESS6
- fi
-
- ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
- fi
-
- # Start the regular stage 1 script.
- exec "$1"
- ''
- );
-
- nspawnExtraVethArgs = (name: cfg: "--network-veth-extra=${name}");
-
- startScript = cfg:
- ''
- mkdir -p -m 0755 "$root/etc" "$root/var/lib"
- mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers
- if ! [ -e "$root/etc/os-release" ]; then
- touch "$root/etc/os-release"
- fi
-
- if ! [ -e "$root/etc/machine-id" ]; then
- touch "$root/etc/machine-id"
- fi
-
- mkdir -p -m 0755 \
- "/nix/var/nix/profiles/per-container/$INSTANCE" \
- "/nix/var/nix/gcroots/per-container/$INSTANCE"
-
- cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf"
-
- if [ "$PRIVATE_NETWORK" = 1 ]; then
- extraFlags+=" --private-network"
- fi
-
- if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
- [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
- extraFlags+=" --network-veth"
- fi
-
- if [ -n "$HOST_PORT" ]; then
- OIFS=$IFS
- IFS=","
- for i in $HOST_PORT
- do
- extraFlags+=" --port=$i"
- done
- IFS=$OIFS
- fi
-
- if [ -n "$HOST_BRIDGE" ]; then
- extraFlags+=" --network-bridge=$HOST_BRIDGE"
- fi
-
- extraFlags+=" ${concatStringsSep " " (mapAttrsToList nspawnExtraVethArgs cfg.extraVeths)}"
-
- for iface in $INTERFACES; do
- extraFlags+=" --network-interface=$iface"
- done
-
- for iface in $MACVLANS; do
- extraFlags+=" --network-macvlan=$iface"
- done
-
- # If the host is 64-bit and the container is 32-bit, add a
- # --personality flag.
- ${optionalString (config.nixpkgs.localSystem.system == "x86_64-linux") ''
- if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then
- extraFlags+=" --personality=x86"
- fi
- ''}
-
- # Run systemd-nspawn without startup notification (we'll
- # wait for the container systemd to signal readiness).
- exec ${config.systemd.package}/bin/systemd-nspawn \
- --keep-unit \
- -M "$INSTANCE" -D "$root" $extraFlags \
- $EXTRA_NSPAWN_FLAGS \
- --notify-ready=yes \
- --bind-ro=/nix/store \
- --bind-ro=/nix/var/nix/db \
- --bind-ro=/nix/var/nix/daemon-socket \
- --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
- --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
- ${optionalString (!cfg.ephemeral) "--link-journal=try-guest"} \
- --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
- --setenv HOST_BRIDGE="$HOST_BRIDGE" \
- --setenv HOST_ADDRESS="$HOST_ADDRESS" \
- --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
- --setenv HOST_ADDRESS6="$HOST_ADDRESS6" \
- --setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \
- --setenv HOST_PORT="$HOST_PORT" \
- --setenv PATH="$PATH" \
- ${optionalString cfg.ephemeral "--ephemeral"} \
- ${if cfg.additionalCapabilities != null && cfg.additionalCapabilities != [] then
- ''--capability="${concatStringsSep "," cfg.additionalCapabilities}"'' else ""
- } \
- ${if cfg.tmpfs != null && cfg.tmpfs != [] then
- ''--tmpfs=${concatStringsSep " --tmpfs=" cfg.tmpfs}'' else ""
- } \
- ${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
- '';
-
- preStartScript = cfg:
- ''
- # Clean up existing machined registration and interfaces.
- machinectl terminate "$INSTANCE" 2> /dev/null || true
-
- if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
- [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
- ip link del dev "ve-$INSTANCE" 2> /dev/null || true
- ip link del dev "vb-$INSTANCE" 2> /dev/null || true
- fi
-
- ${concatStringsSep "\n" (
- mapAttrsToList (name: cfg:
- ''ip link del dev ${name} 2> /dev/null || true ''
- ) cfg.extraVeths
- )}
- '';
-
- postStartScript = (cfg:
- let
- ipcall = cfg: ipcmd: variable: attribute:
- if cfg.${attribute} == null then
- ''
- if [ -n "${variable}" ]; then
- ${ipcmd} add ${variable} dev $ifaceHost
- fi
- ''
- else
- ''${ipcmd} add ${cfg.${attribute}} dev $ifaceHost'';
- renderExtraVeth = name: cfg:
- if cfg.hostBridge != null then
- ''
- # Add ${name} to bridge ${cfg.hostBridge}
- ip link set dev ${name} master ${cfg.hostBridge} up
- ''
- else
- ''
- echo "Bring ${name} up"
- ip link set dev ${name} up
- # Set IPs and routes for ${name}
- ${optionalString (cfg.hostAddress != null) ''
- ip addr add ${cfg.hostAddress} dev ${name}
- ''}
- ${optionalString (cfg.hostAddress6 != null) ''
- ip -6 addr add ${cfg.hostAddress6} dev ${name}
- ''}
- ${optionalString (cfg.localAddress != null) ''
- ip route add ${cfg.localAddress} dev ${name}
- ''}
- ${optionalString (cfg.localAddress6 != null) ''
- ip -6 route add ${cfg.localAddress6} dev ${name}
- ''}
- '';
- in
- ''
- if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
- [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
- if [ -z "$HOST_BRIDGE" ]; then
- ifaceHost=ve-$INSTANCE
- ip link set dev $ifaceHost up
-
- ${ipcall cfg "ip addr" "$HOST_ADDRESS" "hostAddress"}
- ${ipcall cfg "ip -6 addr" "$HOST_ADDRESS6" "hostAddress6"}
- ${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"}
- ${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"}
- fi
- ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
- fi
- ''
- );
-
- serviceDirectives = cfg: {
- ExecReload = pkgs.writeScript "reload-container"
- ''
- #! ${pkgs.runtimeShell} -e
- ${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \
- bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
- '';
-
- SyslogIdentifier = "container %i";
-
- EnvironmentFile = "-/etc/containers/%i.conf";
-
- Type = "notify";
-
- RuntimeDirectory = lib.optional cfg.ephemeral "containers/%i";
-
- # Note that on reboot, systemd-nspawn returns 133, so this
- # unit will be restarted. On poweroff, it returns 0, so the
- # unit won't be restarted.
- RestartForceExitStatus = "133";
- SuccessExitStatus = "133";
-
- # Some containers take long to start
- # especially when you automatically start many at once
- TimeoutStartSec = cfg.timeoutStartSec;
-
- Restart = "on-failure";
-
- Slice = "machine.slice";
- Delegate = true;
-
- # Hack: we don't want to kill systemd-nspawn, since we call
- # "machinectl poweroff" in preStop to shut down the
- # container cleanly. But systemd requires sending a signal
- # (at least if we want remaining processes to be killed
- # after the timeout). So send an ignored signal.
- KillMode = "mixed";
- KillSignal = "WINCH";
-
- DevicePolicy = "closed";
- DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices;
+ # Copy configuration files to avoid having the entire sources in the system closure
+ copyFile = filePath: pkgs.runCommandNoCC (builtins.unsafeDiscardStringContext (builtins.baseNameOf filePath)) {} ''
+ cp ${filePath} $out
+ '';
+in
+{
+ meta = {
+ maintainers = [] ++ lib.teams.podman.members;
};
+ options.virtualisation.containers = {
- system = config.nixpkgs.localSystem.system;
-
- bindMountOpts = { name, ... }: {
-
- options = {
- mountPoint = mkOption {
- example = "/mnt/usb";
- type = types.str;
- description = "Mount point on the container file system.";
- };
- hostPath = mkOption {
- default = null;
- example = "/home/alice";
- type = types.nullOr types.str;
- description = "Location of the host path to be mounted.";
- };
- isReadOnly = mkOption {
- default = true;
+ enable =
+ mkOption {
type = types.bool;
- description = "Determine whether the mounted path will be accessed in read-only mode.";
- };
- };
-
- config = {
- mountPoint = mkDefault name;
- };
-
- };
-
- allowedDeviceOpts = { ... }: {
- options = {
- node = mkOption {
- example = "/dev/net/tun";
- type = types.str;
- description = "Path to device node";
- };
- modifier = mkOption {
- example = "rw";
- type = types.str;
+ default = false;
description = ''
- Device node access modifier. Takes a combination
- r (read), w (write), and
- m (mknod). See the
- systemd.resource-control(5) man page for more
- information.'';
+ This option enables the common libpod container configuration module.
+ '';
+ };
+
+ registries = {
+ search = mkOption {
+ type = types.listOf types.str;
+ default = [ "docker.io" "quay.io" ];
+ description = ''
+ List of repositories to search.
+ '';
+ };
+
+ insecure = mkOption {
+ default = [];
+ type = types.listOf types.str;
+ description = ''
+ List of insecure repositories.
+ '';
+ };
+
+ block = mkOption {
+ default = [];
+ type = types.listOf types.str;
+ description = ''
+ List of blocked repositories.
+ '';
};
};
- };
-
- mkBindFlag = d:
- let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind=";
- mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}";
- in flagPrefix + mountstr ;
-
- mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs);
-
- networkOptions = {
- hostBridge = mkOption {
- type = types.nullOr types.str;
- default = null;
- example = "br0";
+ policy = mkOption {
+ default = {};
+ type = types.attrs;
+ example = lib.literalExample ''
+ {
+ default = [ { type = "insecureAcceptAnything"; } ];
+ transports = {
+ docker-daemon = {
+ "" = [ { type = "insecureAcceptAnything"; } ];
+ };
+ };
+ }
+ '';
description = ''
- Put the host-side of the veth-pair into the named bridge.
- Only one of hostAddress* or hostBridge can be given.
+ Signature verification policy file.
+ If this option is empty the default policy file from
+ skopeo will be used.
'';
};
- forwardPorts = mkOption {
- type = types.listOf (types.submodule {
+ users = mkOption {
+ default = [];
+ type = types.listOf types.str;
+ description = ''
+ List of users to set up subuid/subgid mappings for.
+ This is a requirement for running rootless containers.
+ '';
+ };
+
+ libpod = mkOption {
+ default = {};
+ description = "Libpod configuration";
+ type = types.submodule {
options = {
- protocol = mkOption {
- type = types.str;
- default = "tcp";
- description = "The protocol specifier for port forwarding between host and container";
- };
- hostPort = mkOption {
- type = types.int;
- description = "Source port of the external interface on host";
- };
- containerPort = mkOption {
- type = types.nullOr types.int;
- default = null;
- description = "Target port of container";
+
+ extraConfig = mkOption {
+ type = types.lines;
+ default = "";
+ description = ''
+ Extra configuration that should be put in the libpod.conf
+ configuration file
+ '';
+
};
};
- });
- default = [];
- example = [ { protocol = "tcp"; hostPort = 8080; containerPort = 80; } ];
- description = ''
- List of forwarded ports from host to container. Each forwarded port
- is specified by protocol, hostPort and containerPort. By default,
- protocol is tcp and hostPort and containerPort are assumed to be
- the same if containerPort is not explicitly given.
- '';
- };
-
-
- hostAddress = mkOption {
- type = types.nullOr types.str;
- default = null;
- example = "10.231.136.1";
- description = ''
- The IPv4 address assigned to the host interface.
- (Not used when hostBridge is set.)
- '';
- };
-
- hostAddress6 = mkOption {
- type = types.nullOr types.str;
- default = null;
- example = "fc00::1";
- description = ''
- The IPv6 address assigned to the host interface.
- (Not used when hostBridge is set.)
- '';
- };
-
- localAddress = mkOption {
- type = types.nullOr types.str;
- default = null;
- example = "10.231.136.2";
- description = ''
- The IPv4 address assigned to the interface in the container.
- If a hostBridge is used, this should be given with netmask to access
- the whole network. Otherwise the default netmask is /32 and routing is
- set up from localAddress to hostAddress and back.
- '';
- };
-
- localAddress6 = mkOption {
- type = types.nullOr types.str;
- default = null;
- example = "fc00::2";
- description = ''
- The IPv6 address assigned to the interface in the container.
- If a hostBridge is used, this should be given with netmask to access
- the whole network. Otherwise the default netmask is /128 and routing is
- set up from localAddress6 to hostAddress6 and back.
- '';
- };
-
- };
-
- dummyConfig =
- {
- extraVeths = {};
- additionalCapabilities = [];
- ephemeral = false;
- timeoutStartSec = "15s";
- allowedDevices = [];
- hostAddress = null;
- hostAddress6 = null;
- localAddress = null;
- localAddress6 = null;
- tmpfs = null;
- };
-
-in
-
-{
- options = {
-
- boot.isContainer = mkOption {
- type = types.bool;
- default = false;
- description = ''
- Whether this NixOS machine is a lightweight container running
- in another NixOS system.
- '';
- };
-
- boot.enableContainers = mkOption {
- type = types.bool;
- default = !config.boot.isContainer;
- description = ''
- Whether to enable support for NixOS containers.
- '';
- };
-
- containers = mkOption {
- type = types.attrsOf (types.submodule (
- { config, options, name, ... }:
- {
- options = {
-
- config = mkOption {
- description = ''
- A specification of the desired configuration of this
- container, as a NixOS module.
- '';
- type = let
- confPkgs = if config.pkgs == null then pkgs else config.pkgs;
- in lib.mkOptionType {
- name = "Toplevel NixOS config";
- merge = loc: defs: (import (confPkgs.path + "/nixos/lib/eval-config.nix") {
- inherit system;
- pkgs = confPkgs;
- baseModules = import (confPkgs.path + "/nixos/modules/module-list.nix");
- inherit (confPkgs) lib;
- modules =
- let
- extraConfig = {
- _file = "module at ${__curPos.file}:${toString __curPos.line}";
- config = {
- boot.isContainer = true;
- networking.hostName = mkDefault name;
- networking.useDHCP = false;
- assertions = [
- {
- assertion = config.privateNetwork -> stringLength name < 12;
- message = ''
- Container name `${name}` is too long: When `privateNetwork` is enabled, container names can
- not be longer than 11 characters, because the container's interface name is derived from it.
- This might be fixed in the future. See https://github.com/NixOS/nixpkgs/issues/38509
- '';
- }
- ];
- };
- };
- in [ extraConfig ] ++ (map (x: x.value) defs);
- prefix = [ "containers" name ];
- }).config;
- };
- };
-
- path = mkOption {
- type = types.path;
- example = "/nix/var/nix/profiles/containers/webserver";
- description = ''
- As an alternative to specifying
- , you can specify the path to
- the evaluated NixOS system configuration, typically a
- symlink to a system profile.
- '';
- };
-
- additionalCapabilities = mkOption {
- type = types.listOf types.str;
- default = [];
- example = [ "CAP_NET_ADMIN" "CAP_MKNOD" ];
- description = ''
- Grant additional capabilities to the container. See the
- capabilities(7) and systemd-nspawn(1) man pages for more
- information.
- '';
- };
-
- pkgs = mkOption {
- type = types.nullOr types.attrs;
- default = null;
- example = literalExample "pkgs";
- description = ''
- Customise which nixpkgs to use for this container.
- '';
- };
-
- ephemeral = mkOption {
- type = types.bool;
- default = false;
- description = ''
- Runs container in ephemeral mode with the empty root filesystem at boot.
- This way container will be bootstrapped from scratch on each boot
- and will be cleaned up on shutdown leaving no traces behind.
- Useful for completely stateless, reproducible containers.
-
- Note that this option might require to do some adjustments to the container configuration,
- e.g. you might want to set
- systemd.network.networks.$interface.dhcpConfig.ClientIdentifier to "mac"
- if you use macvlans option.
- This way dhcp client identifier will be stable between the container restarts.
-
- Note that the container journal will not be linked to the host if this option is enabled.
- '';
- };
-
- enableTun = mkOption {
- type = types.bool;
- default = false;
- description = ''
- Allows the container to create and setup tunnel interfaces
- by granting the NET_ADMIN capability and
- enabling access to /dev/net/tun.
- '';
- };
-
- privateNetwork = mkOption {
- type = types.bool;
- default = false;
- description = ''
- Whether to give the container its own private virtual
- Ethernet interface. The interface is called
- eth0, and is hooked up to the interface
- ve-container-name
- on the host. If this option is not set, then the
- container shares the network interfaces of the host,
- and can bind to any port on any interface.
- '';
- };
-
- interfaces = mkOption {
- type = types.listOf types.str;
- default = [];
- example = [ "eth1" "eth2" ];
- description = ''
- The list of interfaces to be moved into the container.
- '';
- };
-
- macvlans = mkOption {
- type = types.listOf types.str;
- default = [];
- example = [ "eth1" "eth2" ];
- description = ''
- The list of host interfaces from which macvlans will be
- created. For each interface specified, a macvlan interface
- will be created and moved to the container.
- '';
- };
-
- extraVeths = mkOption {
- type = with types; attrsOf (submodule { options = networkOptions; });
- default = {};
- description = ''
- Extra veth-pairs to be created for the container
- '';
- };
-
- autoStart = mkOption {
- type = types.bool;
- default = false;
- description = ''
- Whether the container is automatically started at boot-time.
- '';
- };
-
- timeoutStartSec = mkOption {
- type = types.str;
- default = "1min";
- description = ''
- Time for the container to start. In case of a timeout,
- the container processes get killed.
- See systemd.time
- 7
- for more information about the format.
- '';
- };
-
- bindMounts = mkOption {
- type = with types; loaOf (submodule bindMountOpts);
- default = {};
- example = literalExample ''
- { "/home" = { hostPath = "/home/alice";
- isReadOnly = false; };
- }
- '';
-
- description =
- ''
- An extra list of directories that is bound to the container.
- '';
- };
-
- allowedDevices = mkOption {
- type = with types; listOf (submodule allowedDeviceOpts);
- default = [];
- example = [ { node = "/dev/net/tun"; modifier = "rw"; } ];
- description = ''
- A list of device nodes to which the containers has access to.
- '';
- };
-
- tmpfs = mkOption {
- type = types.listOf types.str;
- default = [];
- example = [ "/var" ];
- description = ''
- Mounts a set of tmpfs file systems into the container.
- Multiple paths can be specified.
- Valid items must conform to the --tmpfs argument
- of systemd-nspawn. See systemd-nspawn(1) for details.
- '';
- };
-
- extraFlags = mkOption {
- type = types.listOf types.str;
- default = [];
- example = [ "--drop-capability=CAP_SYS_CHROOT" ];
- description = ''
- Extra flags passed to the systemd-nspawn command.
- See systemd-nspawn(1) for details.
- '';
- };
-
- } // networkOptions;
-
- config = mkMerge
- [
- (mkIf options.config.isDefined {
- path = config.config.system.build.toplevel;
- })
- ];
- }));
-
- default = {};
- example = literalExample
- ''
- { webserver =
- { path = "/nix/var/nix/profiles/webserver";
- };
- database =
- { config =
- { config, pkgs, ... }:
- { services.postgresql.enable = true;
- services.postgresql.package = pkgs.postgresql_9_6;
-
- system.stateVersion = "17.03";
- };
- };
- }
- '';
- description = ''
- A set of NixOS system configurations to be run as lightweight
- containers. Each container appears as a service
- container-name
- on the host system, allowing it to be started and stopped via
- systemctl.
- '';
- };
-
- };
-
-
- config = mkIf (config.boot.enableContainers) (let
-
- unit = {
- description = "Container '%i'";
-
- unitConfig.RequiresMountsFor = "/var/lib/containers/%i";
-
- path = [ pkgs.iproute ];
-
- environment = {
- root = "/var/lib/containers/%i";
- INSTANCE = "%i";
};
-
- preStart = preStartScript dummyConfig;
-
- script = startScript dummyConfig;
-
- postStart = postStartScript dummyConfig;
-
- preStop = "machinectl poweroff $INSTANCE";
-
- restartIfChanged = false;
-
- serviceConfig = serviceDirectives dummyConfig;
};
- in {
- systemd.targets.multi-user.wants = [ "machines.target" ];
- systemd.services = listToAttrs (filter (x: x.value != null) (
- # The generic container template used by imperative containers
- [{ name = "container@"; value = unit; }]
- # declarative containers
- ++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (let
- containerConfig = cfg // (
- if cfg.enableTun then
- {
- allowedDevices = cfg.allowedDevices
- ++ [ { node = "/dev/net/tun"; modifier = "rw"; } ];
- additionalCapabilities = cfg.additionalCapabilities
- ++ [ "CAP_NET_ADMIN" ];
- }
- else {});
- in
- recursiveUpdate unit {
- preStart = preStartScript containerConfig;
- script = startScript containerConfig;
- postStart = postStartScript containerConfig;
- serviceConfig = serviceDirectives containerConfig;
- unitConfig.RequiresMountsFor = lib.optional (!containerConfig.ephemeral) "/var/lib/containers/%i";
- environment.root = if containerConfig.ephemeral then "/run/containers/%i" else "/var/lib/containers/%i";
- } // (
- if containerConfig.autoStart then
- {
- wantedBy = [ "machines.target" ];
- wants = [ "network.target" ];
- after = [ "network.target" ];
- restartTriggers = [
- containerConfig.path
- config.environment.etc."containers/${name}.conf".source
- ];
- restartIfChanged = true;
- }
- else {})
- )) config.containers)
- ));
+ };
- # Generate a configuration file in /etc/containers for each
- # container so that container@.target can get the container
- # configuration.
- environment.etc =
- let mkPortStr = p: p.protocol + ":" + (toString p.hostPort) + ":" + (if p.containerPort == null then toString p.hostPort else toString p.containerPort);
- in mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf"
- { text =
- ''
- SYSTEM_PATH=${cfg.path}
- ${optionalString cfg.privateNetwork ''
- PRIVATE_NETWORK=1
- ${optionalString (cfg.hostBridge != null) ''
- HOST_BRIDGE=${cfg.hostBridge}
- ''}
- ${optionalString (length cfg.forwardPorts > 0) ''
- HOST_PORT=${concatStringsSep "," (map mkPortStr cfg.forwardPorts)}
- ''}
- ${optionalString (cfg.hostAddress != null) ''
- HOST_ADDRESS=${cfg.hostAddress}
- ''}
- ${optionalString (cfg.hostAddress6 != null) ''
- HOST_ADDRESS6=${cfg.hostAddress6}
- ''}
- ${optionalString (cfg.localAddress != null) ''
- LOCAL_ADDRESS=${cfg.localAddress}
- ''}
- ${optionalString (cfg.localAddress6 != null) ''
- LOCAL_ADDRESS6=${cfg.localAddress6}
- ''}
- ''}
- INTERFACES="${toString cfg.interfaces}"
- MACVLANS="${toString cfg.macvlans}"
- ${optionalString cfg.autoStart ''
- AUTO_START=1
- ''}
- EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts +
- optionalString (cfg.extraFlags != [])
- (" " + concatStringsSep " " cfg.extraFlags)}"
- '';
- }) config.containers;
+ config = lib.mkIf cfg.enable {
- # Generate /etc/hosts entries for the containers.
- networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null)
- ''
- ${head (splitString "/" cfg.localAddress)} ${name}.containers
- '') config.containers);
+ environment.etc."containers/libpod.conf".text = ''
+ cni_plugin_dir = ["${pkgs.cni-plugins}/bin/"]
+ cni_config_dir = "/etc/cni/net.d/"
- networking.dhcpcd.denyInterfaces = [ "ve-*" "vb-*" ];
+ '' + cfg.libpod.extraConfig;
- services.udev.extraRules = optionalString config.networking.networkmanager.enable ''
- # Don't manage interfaces created by nixos-container.
- ENV{INTERFACE}=="v[eb]-*", ENV{NM_UNMANAGED}="1"
- '';
+ environment.etc."containers/registries.conf".source = toTOML "registries.conf" {
+ registries = lib.mapAttrs (n: v: { registries = v; }) cfg.registries;
+ };
- environment.systemPackages = [ pkgs.nixos-container ];
+ users.extraUsers = builtins.listToAttrs (
+ (
+ builtins.foldl' (
+ acc: user: {
+ values = acc.values ++ [
+ {
+ name = user;
+ value = {
+ subUidRanges = [ { startUid = acc.offset; count = 65536; } ];
+ subGidRanges = [ { startGid = acc.offset; count = 65536; } ];
+ };
+ }
+ ];
+ offset = acc.offset + 65536;
+ }
+ )
+ { values = []; offset = 100000; } (lib.unique cfg.users)
+ ).values
+ );
+
+ environment.etc."containers/policy.json".source =
+ if cfg.policy != {} then pkgs.writeText "policy.json" (builtins.toJSON cfg.policy)
+ else copyFile "${pkgs.skopeo.src}/default-policy.json";
+ };
- boot.kernelModules = [
- "bridge"
- "macvlan"
- "tap"
- "tun"
- ];
- });
}
diff --git a/nixos/modules/virtualisation/cri-o.nix b/nixos/modules/virtualisation/cri-o.nix
index 14a435f6c8bb..7882b7fc19de 100644
--- a/nixos/modules/virtualisation/cri-o.nix
+++ b/nixos/modules/virtualisation/cri-o.nix
@@ -62,9 +62,7 @@ in
log_level = "${cfg.logLevel}"
manage_network_ns_lifecycle = true
'';
- environment.etc."containers/policy.json".text = ''
- {"default": [{"type": "insecureAcceptAnything"}]}
- '';
+
environment.etc."cni/net.d/20-cri-o-bridge.conf".text = ''
{
"cniVersion": "0.3.1",
@@ -83,6 +81,9 @@ in
}
'';
+ # Enable common container configuration, this will create policy.json
+ virtualisation.containers.enable = true;
+
systemd.services.crio = {
description = "Container Runtime Interface for OCI (CRI-O)";
documentation = [ "https://github.com/cri-o/cri-o" ];
diff --git a/nixos/modules/virtualisation/nixos-containers.nix b/nixos/modules/virtualisation/nixos-containers.nix
new file mode 100644
index 000000000000..2db5c117aa7f
--- /dev/null
+++ b/nixos/modules/virtualisation/nixos-containers.nix
@@ -0,0 +1,838 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+
+let
+
+ # The container's init script, a small wrapper around the regular
+ # NixOS stage-2 init script.
+ containerInit = (cfg:
+ let
+ renderExtraVeth = (name: cfg:
+ ''
+ echo "Bringing ${name} up"
+ ip link set dev ${name} up
+ ${optionalString (cfg.localAddress != null) ''
+ echo "Setting ip for ${name}"
+ ip addr add ${cfg.localAddress} dev ${name}
+ ''}
+ ${optionalString (cfg.localAddress6 != null) ''
+ echo "Setting ip6 for ${name}"
+ ip -6 addr add ${cfg.localAddress6} dev ${name}
+ ''}
+ ${optionalString (cfg.hostAddress != null) ''
+ echo "Setting route to host for ${name}"
+ ip route add ${cfg.hostAddress} dev ${name}
+ ''}
+ ${optionalString (cfg.hostAddress6 != null) ''
+ echo "Setting route6 to host for ${name}"
+ ip -6 route add ${cfg.hostAddress6} dev ${name}
+ ''}
+ ''
+ );
+ in
+ pkgs.writeScript "container-init"
+ ''
+ #! ${pkgs.runtimeShell} -e
+
+ # Initialise the container side of the veth pair.
+ if [ -n "$HOST_ADDRESS" ] || [ -n "$HOST_ADDRESS6" ] ||
+ [ -n "$LOCAL_ADDRESS" ] || [ -n "$LOCAL_ADDRESS6" ] ||
+ [ -n "$HOST_BRIDGE" ]; then
+ ip link set host0 name eth0
+ ip link set dev eth0 up
+
+ if [ -n "$LOCAL_ADDRESS" ]; then
+ ip addr add $LOCAL_ADDRESS dev eth0
+ fi
+ if [ -n "$LOCAL_ADDRESS6" ]; then
+ ip -6 addr add $LOCAL_ADDRESS6 dev eth0
+ fi
+ if [ -n "$HOST_ADDRESS" ]; then
+ ip route add $HOST_ADDRESS dev eth0
+ ip route add default via $HOST_ADDRESS
+ fi
+ if [ -n "$HOST_ADDRESS6" ]; then
+ ip -6 route add $HOST_ADDRESS6 dev eth0
+ ip -6 route add default via $HOST_ADDRESS6
+ fi
+
+ ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
+ fi
+
+ # Start the regular stage 1 script.
+ exec "$1"
+ ''
+ );
+
+ nspawnExtraVethArgs = (name: cfg: "--network-veth-extra=${name}");
+
+ startScript = cfg:
+ ''
+ mkdir -p -m 0755 "$root/etc" "$root/var/lib"
+ mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers
+ if ! [ -e "$root/etc/os-release" ]; then
+ touch "$root/etc/os-release"
+ fi
+
+ if ! [ -e "$root/etc/machine-id" ]; then
+ touch "$root/etc/machine-id"
+ fi
+
+ mkdir -p -m 0755 \
+ "/nix/var/nix/profiles/per-container/$INSTANCE" \
+ "/nix/var/nix/gcroots/per-container/$INSTANCE"
+
+ cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf"
+
+ if [ "$PRIVATE_NETWORK" = 1 ]; then
+ extraFlags+=" --private-network"
+ fi
+
+ if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
+ [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
+ extraFlags+=" --network-veth"
+ fi
+
+ if [ -n "$HOST_PORT" ]; then
+ OIFS=$IFS
+ IFS=","
+ for i in $HOST_PORT
+ do
+ extraFlags+=" --port=$i"
+ done
+ IFS=$OIFS
+ fi
+
+ if [ -n "$HOST_BRIDGE" ]; then
+ extraFlags+=" --network-bridge=$HOST_BRIDGE"
+ fi
+
+ extraFlags+=" ${concatStringsSep " " (mapAttrsToList nspawnExtraVethArgs cfg.extraVeths)}"
+
+ for iface in $INTERFACES; do
+ extraFlags+=" --network-interface=$iface"
+ done
+
+ for iface in $MACVLANS; do
+ extraFlags+=" --network-macvlan=$iface"
+ done
+
+ # If the host is 64-bit and the container is 32-bit, add a
+ # --personality flag.
+ ${optionalString (config.nixpkgs.localSystem.system == "x86_64-linux") ''
+ if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then
+ extraFlags+=" --personality=x86"
+ fi
+ ''}
+
+ # Run systemd-nspawn without startup notification (we'll
+ # wait for the container systemd to signal readiness).
+ exec ${config.systemd.package}/bin/systemd-nspawn \
+ --keep-unit \
+ -M "$INSTANCE" -D "$root" $extraFlags \
+ $EXTRA_NSPAWN_FLAGS \
+ --notify-ready=yes \
+ --bind-ro=/nix/store \
+ --bind-ro=/nix/var/nix/db \
+ --bind-ro=/nix/var/nix/daemon-socket \
+ --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
+ --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
+ ${optionalString (!cfg.ephemeral) "--link-journal=try-guest"} \
+ --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
+ --setenv HOST_BRIDGE="$HOST_BRIDGE" \
+ --setenv HOST_ADDRESS="$HOST_ADDRESS" \
+ --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
+ --setenv HOST_ADDRESS6="$HOST_ADDRESS6" \
+ --setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \
+ --setenv HOST_PORT="$HOST_PORT" \
+ --setenv PATH="$PATH" \
+ ${optionalString cfg.ephemeral "--ephemeral"} \
+ ${if cfg.additionalCapabilities != null && cfg.additionalCapabilities != [] then
+ ''--capability="${concatStringsSep "," cfg.additionalCapabilities}"'' else ""
+ } \
+ ${if cfg.tmpfs != null && cfg.tmpfs != [] then
+ ''--tmpfs=${concatStringsSep " --tmpfs=" cfg.tmpfs}'' else ""
+ } \
+ ${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
+ '';
+
+ preStartScript = cfg:
+ ''
+ # Clean up existing machined registration and interfaces.
+ machinectl terminate "$INSTANCE" 2> /dev/null || true
+
+ if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
+ [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
+ ip link del dev "ve-$INSTANCE" 2> /dev/null || true
+ ip link del dev "vb-$INSTANCE" 2> /dev/null || true
+ fi
+
+ ${concatStringsSep "\n" (
+ mapAttrsToList (name: cfg:
+ ''ip link del dev ${name} 2> /dev/null || true ''
+ ) cfg.extraVeths
+ )}
+ '';
+
+ postStartScript = (cfg:
+ let
+ ipcall = cfg: ipcmd: variable: attribute:
+ if cfg.${attribute} == null then
+ ''
+ if [ -n "${variable}" ]; then
+ ${ipcmd} add ${variable} dev $ifaceHost
+ fi
+ ''
+ else
+ ''${ipcmd} add ${cfg.${attribute}} dev $ifaceHost'';
+ renderExtraVeth = name: cfg:
+ if cfg.hostBridge != null then
+ ''
+ # Add ${name} to bridge ${cfg.hostBridge}
+ ip link set dev ${name} master ${cfg.hostBridge} up
+ ''
+ else
+ ''
+ echo "Bring ${name} up"
+ ip link set dev ${name} up
+ # Set IPs and routes for ${name}
+ ${optionalString (cfg.hostAddress != null) ''
+ ip addr add ${cfg.hostAddress} dev ${name}
+ ''}
+ ${optionalString (cfg.hostAddress6 != null) ''
+ ip -6 addr add ${cfg.hostAddress6} dev ${name}
+ ''}
+ ${optionalString (cfg.localAddress != null) ''
+ ip route add ${cfg.localAddress} dev ${name}
+ ''}
+ ${optionalString (cfg.localAddress6 != null) ''
+ ip -6 route add ${cfg.localAddress6} dev ${name}
+ ''}
+ '';
+ in
+ ''
+ if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
+ [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
+ if [ -z "$HOST_BRIDGE" ]; then
+ ifaceHost=ve-$INSTANCE
+ ip link set dev $ifaceHost up
+
+ ${ipcall cfg "ip addr" "$HOST_ADDRESS" "hostAddress"}
+ ${ipcall cfg "ip -6 addr" "$HOST_ADDRESS6" "hostAddress6"}
+ ${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"}
+ ${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"}
+ fi
+ ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
+ fi
+ ''
+ );
+
+ serviceDirectives = cfg: {
+ ExecReload = pkgs.writeScript "reload-container"
+ ''
+ #! ${pkgs.runtimeShell} -e
+ ${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \
+ bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
+ '';
+
+ SyslogIdentifier = "container %i";
+
+ EnvironmentFile = "-/etc/containers/%i.conf";
+
+ Type = "notify";
+
+ RuntimeDirectory = lib.optional cfg.ephemeral "containers/%i";
+
+ # Note that on reboot, systemd-nspawn returns 133, so this
+ # unit will be restarted. On poweroff, it returns 0, so the
+ # unit won't be restarted.
+ RestartForceExitStatus = "133";
+ SuccessExitStatus = "133";
+
+ # Some containers take long to start
+ # especially when you automatically start many at once
+ TimeoutStartSec = cfg.timeoutStartSec;
+
+ Restart = "on-failure";
+
+ Slice = "machine.slice";
+ Delegate = true;
+
+ # Hack: we don't want to kill systemd-nspawn, since we call
+ # "machinectl poweroff" in preStop to shut down the
+ # container cleanly. But systemd requires sending a signal
+ # (at least if we want remaining processes to be killed
+ # after the timeout). So send an ignored signal.
+ KillMode = "mixed";
+ KillSignal = "WINCH";
+
+ DevicePolicy = "closed";
+ DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices;
+ };
+
+
+ system = config.nixpkgs.localSystem.system;
+
+ bindMountOpts = { name, ... }: {
+
+ options = {
+ mountPoint = mkOption {
+ example = "/mnt/usb";
+ type = types.str;
+ description = "Mount point on the container file system.";
+ };
+ hostPath = mkOption {
+ default = null;
+ example = "/home/alice";
+ type = types.nullOr types.str;
+ description = "Location of the host path to be mounted.";
+ };
+ isReadOnly = mkOption {
+ default = true;
+ type = types.bool;
+ description = "Determine whether the mounted path will be accessed in read-only mode.";
+ };
+ };
+
+ config = {
+ mountPoint = mkDefault name;
+ };
+
+ };
+
+ allowedDeviceOpts = { ... }: {
+ options = {
+ node = mkOption {
+ example = "/dev/net/tun";
+ type = types.str;
+ description = "Path to device node";
+ };
+ modifier = mkOption {
+ example = "rw";
+ type = types.str;
+ description = ''
+ Device node access modifier. Takes a combination
+ r (read), w (write), and
+ m (mknod). See the
+ systemd.resource-control(5) man page for more
+ information.'';
+ };
+ };
+ };
+
+
+ mkBindFlag = d:
+ let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind=";
+ mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}";
+ in flagPrefix + mountstr ;
+
+ mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs);
+
+ networkOptions = {
+ hostBridge = mkOption {
+ type = types.nullOr types.str;
+ default = null;
+ example = "br0";
+ description = ''
+ Put the host-side of the veth-pair into the named bridge.
+ Only one of hostAddress* or hostBridge can be given.
+ '';
+ };
+
+ forwardPorts = mkOption {
+ type = types.listOf (types.submodule {
+ options = {
+ protocol = mkOption {
+ type = types.str;
+ default = "tcp";
+ description = "The protocol specifier for port forwarding between host and container";
+ };
+ hostPort = mkOption {
+ type = types.int;
+ description = "Source port of the external interface on host";
+ };
+ containerPort = mkOption {
+ type = types.nullOr types.int;
+ default = null;
+ description = "Target port of container";
+ };
+ };
+ });
+ default = [];
+ example = [ { protocol = "tcp"; hostPort = 8080; containerPort = 80; } ];
+ description = ''
+ List of forwarded ports from host to container. Each forwarded port
+ is specified by protocol, hostPort and containerPort. By default,
+ protocol is tcp and hostPort and containerPort are assumed to be
+ the same if containerPort is not explicitly given.
+ '';
+ };
+
+
+ hostAddress = mkOption {
+ type = types.nullOr types.str;
+ default = null;
+ example = "10.231.136.1";
+ description = ''
+ The IPv4 address assigned to the host interface.
+ (Not used when hostBridge is set.)
+ '';
+ };
+
+ hostAddress6 = mkOption {
+ type = types.nullOr types.str;
+ default = null;
+ example = "fc00::1";
+ description = ''
+ The IPv6 address assigned to the host interface.
+ (Not used when hostBridge is set.)
+ '';
+ };
+
+ localAddress = mkOption {
+ type = types.nullOr types.str;
+ default = null;
+ example = "10.231.136.2";
+ description = ''
+ The IPv4 address assigned to the interface in the container.
+ If a hostBridge is used, this should be given with netmask to access
+ the whole network. Otherwise the default netmask is /32 and routing is
+ set up from localAddress to hostAddress and back.
+ '';
+ };
+
+ localAddress6 = mkOption {
+ type = types.nullOr types.str;
+ default = null;
+ example = "fc00::2";
+ description = ''
+ The IPv6 address assigned to the interface in the container.
+ If a hostBridge is used, this should be given with netmask to access
+ the whole network. Otherwise the default netmask is /128 and routing is
+ set up from localAddress6 to hostAddress6 and back.
+ '';
+ };
+
+ };
+
+ dummyConfig =
+ {
+ extraVeths = {};
+ additionalCapabilities = [];
+ ephemeral = false;
+ timeoutStartSec = "15s";
+ allowedDevices = [];
+ hostAddress = null;
+ hostAddress6 = null;
+ localAddress = null;
+ localAddress6 = null;
+ tmpfs = null;
+ };
+
+in
+
+{
+ options = {
+
+ boot.isContainer = mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ Whether this NixOS machine is a lightweight container running
+ in another NixOS system.
+ '';
+ };
+
+ boot.enableContainers = mkOption {
+ type = types.bool;
+ default = !config.boot.isContainer;
+ description = ''
+ Whether to enable support for NixOS containers.
+ '';
+ };
+
+ containers = mkOption {
+ type = types.attrsOf (types.submodule (
+ { config, options, name, ... }:
+ {
+ options = {
+
+ config = mkOption {
+ description = ''
+ A specification of the desired configuration of this
+ container, as a NixOS module.
+ '';
+ type = let
+ confPkgs = if config.pkgs == null then pkgs else config.pkgs;
+ in lib.mkOptionType {
+ name = "Toplevel NixOS config";
+ merge = loc: defs: (import (confPkgs.path + "/nixos/lib/eval-config.nix") {
+ inherit system;
+ pkgs = confPkgs;
+ baseModules = import (confPkgs.path + "/nixos/modules/module-list.nix");
+ inherit (confPkgs) lib;
+ modules =
+ let
+ extraConfig = {
+ _file = "module at ${__curPos.file}:${toString __curPos.line}";
+ config = {
+ boot.isContainer = true;
+ networking.hostName = mkDefault name;
+ networking.useDHCP = false;
+ assertions = [
+ {
+ assertion = config.privateNetwork -> stringLength name < 12;
+ message = ''
+ Container name `${name}` is too long: When `privateNetwork` is enabled, container names can
+ not be longer than 11 characters, because the container's interface name is derived from it.
+ This might be fixed in the future. See https://github.com/NixOS/nixpkgs/issues/38509
+ '';
+ }
+ ];
+ };
+ };
+ in [ extraConfig ] ++ (map (x: x.value) defs);
+ prefix = [ "containers" name ];
+ }).config;
+ };
+ };
+
+ path = mkOption {
+ type = types.path;
+ example = "/nix/var/nix/profiles/containers/webserver";
+ description = ''
+ As an alternative to specifying
+ , you can specify the path to
+ the evaluated NixOS system configuration, typically a
+ symlink to a system profile.
+ '';
+ };
+
+ additionalCapabilities = mkOption {
+ type = types.listOf types.str;
+ default = [];
+ example = [ "CAP_NET_ADMIN" "CAP_MKNOD" ];
+ description = ''
+ Grant additional capabilities to the container. See the
+ capabilities(7) and systemd-nspawn(1) man pages for more
+ information.
+ '';
+ };
+
+ pkgs = mkOption {
+ type = types.nullOr types.attrs;
+ default = null;
+ example = literalExample "pkgs";
+ description = ''
+ Customise which nixpkgs to use for this container.
+ '';
+ };
+
+ ephemeral = mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ Runs container in ephemeral mode with the empty root filesystem at boot.
+ This way container will be bootstrapped from scratch on each boot
+ and will be cleaned up on shutdown leaving no traces behind.
+ Useful for completely stateless, reproducible containers.
+
+ Note that this option might require to do some adjustments to the container configuration,
+ e.g. you might want to set
+ systemd.network.networks.$interface.dhcpConfig.ClientIdentifier to "mac"
+ if you use macvlans option.
+ This way dhcp client identifier will be stable between the container restarts.
+
+ Note that the container journal will not be linked to the host if this option is enabled.
+ '';
+ };
+
+ enableTun = mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ Allows the container to create and setup tunnel interfaces
+ by granting the NET_ADMIN capability and
+ enabling access to /dev/net/tun.
+ '';
+ };
+
+ privateNetwork = mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ Whether to give the container its own private virtual
+ Ethernet interface. The interface is called
+ eth0, and is hooked up to the interface
+ ve-container-name
+ on the host. If this option is not set, then the
+ container shares the network interfaces of the host,
+ and can bind to any port on any interface.
+ '';
+ };
+
+ interfaces = mkOption {
+ type = types.listOf types.str;
+ default = [];
+ example = [ "eth1" "eth2" ];
+ description = ''
+ The list of interfaces to be moved into the container.
+ '';
+ };
+
+ macvlans = mkOption {
+ type = types.listOf types.str;
+ default = [];
+ example = [ "eth1" "eth2" ];
+ description = ''
+ The list of host interfaces from which macvlans will be
+ created. For each interface specified, a macvlan interface
+ will be created and moved to the container.
+ '';
+ };
+
+ extraVeths = mkOption {
+ type = with types; attrsOf (submodule { options = networkOptions; });
+ default = {};
+ description = ''
+ Extra veth-pairs to be created for the container
+ '';
+ };
+
+ autoStart = mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ Whether the container is automatically started at boot-time.
+ '';
+ };
+
+ timeoutStartSec = mkOption {
+ type = types.str;
+ default = "1min";
+ description = ''
+ Time for the container to start. In case of a timeout,
+ the container processes get killed.
+ See systemd.time
+ 7
+ for more information about the format.
+ '';
+ };
+
+ bindMounts = mkOption {
+ type = with types; loaOf (submodule bindMountOpts);
+ default = {};
+ example = literalExample ''
+ { "/home" = { hostPath = "/home/alice";
+ isReadOnly = false; };
+ }
+ '';
+
+ description =
+ ''
+ An extra list of directories that is bound to the container.
+ '';
+ };
+
+ allowedDevices = mkOption {
+ type = with types; listOf (submodule allowedDeviceOpts);
+ default = [];
+ example = [ { node = "/dev/net/tun"; modifier = "rw"; } ];
+ description = ''
+ A list of device nodes to which the containers has access to.
+ '';
+ };
+
+ tmpfs = mkOption {
+ type = types.listOf types.str;
+ default = [];
+ example = [ "/var" ];
+ description = ''
+ Mounts a set of tmpfs file systems into the container.
+ Multiple paths can be specified.
+ Valid items must conform to the --tmpfs argument
+ of systemd-nspawn. See systemd-nspawn(1) for details.
+ '';
+ };
+
+ extraFlags = mkOption {
+ type = types.listOf types.str;
+ default = [];
+ example = [ "--drop-capability=CAP_SYS_CHROOT" ];
+ description = ''
+ Extra flags passed to the systemd-nspawn command.
+ See systemd-nspawn(1) for details.
+ '';
+ };
+
+ } // networkOptions;
+
+ config = mkMerge
+ [
+ (mkIf options.config.isDefined {
+ path = config.config.system.build.toplevel;
+ })
+ ];
+ }));
+
+ default = {};
+ example = literalExample
+ ''
+ { webserver =
+ { path = "/nix/var/nix/profiles/webserver";
+ };
+ database =
+ { config =
+ { config, pkgs, ... }:
+ { services.postgresql.enable = true;
+ services.postgresql.package = pkgs.postgresql_9_6;
+
+ system.stateVersion = "17.03";
+ };
+ };
+ }
+ '';
+ description = ''
+ A set of NixOS system configurations to be run as lightweight
+ containers. Each container appears as a service
+ container-name
+ on the host system, allowing it to be started and stopped via
+ systemctl.
+ '';
+ };
+
+ };
+
+
+ config = mkIf (config.boot.enableContainers) (let
+
+ unit = {
+ description = "Container '%i'";
+
+ unitConfig.RequiresMountsFor = "/var/lib/containers/%i";
+
+ path = [ pkgs.iproute ];
+
+ environment = {
+ root = "/var/lib/containers/%i";
+ INSTANCE = "%i";
+ };
+
+ preStart = preStartScript dummyConfig;
+
+ script = startScript dummyConfig;
+
+ postStart = postStartScript dummyConfig;
+
+ preStop = "machinectl poweroff $INSTANCE";
+
+ restartIfChanged = false;
+
+ serviceConfig = serviceDirectives dummyConfig;
+ };
+ in {
+ systemd.targets.multi-user.wants = [ "machines.target" ];
+
+ systemd.services = listToAttrs (filter (x: x.value != null) (
+ # The generic container template used by imperative containers
+ [{ name = "container@"; value = unit; }]
+ # declarative containers
+ ++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (let
+ containerConfig = cfg // (
+ if cfg.enableTun then
+ {
+ allowedDevices = cfg.allowedDevices
+ ++ [ { node = "/dev/net/tun"; modifier = "rw"; } ];
+ additionalCapabilities = cfg.additionalCapabilities
+ ++ [ "CAP_NET_ADMIN" ];
+ }
+ else {});
+ in
+ recursiveUpdate unit {
+ preStart = preStartScript containerConfig;
+ script = startScript containerConfig;
+ postStart = postStartScript containerConfig;
+ serviceConfig = serviceDirectives containerConfig;
+ unitConfig.RequiresMountsFor = lib.optional (!containerConfig.ephemeral) "/var/lib/containers/%i";
+ environment.root = if containerConfig.ephemeral then "/run/containers/%i" else "/var/lib/containers/%i";
+ } // (
+ if containerConfig.autoStart then
+ {
+ wantedBy = [ "machines.target" ];
+ wants = [ "network.target" ];
+ after = [ "network.target" ];
+ restartTriggers = [
+ containerConfig.path
+ config.environment.etc."containers/${name}.conf".source
+ ];
+ restartIfChanged = true;
+ }
+ else {})
+ )) config.containers)
+ ));
+
+ # Generate a configuration file in /etc/containers for each
+ # container so that container@.target can get the container
+ # configuration.
+ environment.etc =
+ let mkPortStr = p: p.protocol + ":" + (toString p.hostPort) + ":" + (if p.containerPort == null then toString p.hostPort else toString p.containerPort);
+ in mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf"
+ { text =
+ ''
+ SYSTEM_PATH=${cfg.path}
+ ${optionalString cfg.privateNetwork ''
+ PRIVATE_NETWORK=1
+ ${optionalString (cfg.hostBridge != null) ''
+ HOST_BRIDGE=${cfg.hostBridge}
+ ''}
+ ${optionalString (length cfg.forwardPorts > 0) ''
+ HOST_PORT=${concatStringsSep "," (map mkPortStr cfg.forwardPorts)}
+ ''}
+ ${optionalString (cfg.hostAddress != null) ''
+ HOST_ADDRESS=${cfg.hostAddress}
+ ''}
+ ${optionalString (cfg.hostAddress6 != null) ''
+ HOST_ADDRESS6=${cfg.hostAddress6}
+ ''}
+ ${optionalString (cfg.localAddress != null) ''
+ LOCAL_ADDRESS=${cfg.localAddress}
+ ''}
+ ${optionalString (cfg.localAddress6 != null) ''
+ LOCAL_ADDRESS6=${cfg.localAddress6}
+ ''}
+ ''}
+ INTERFACES="${toString cfg.interfaces}"
+ MACVLANS="${toString cfg.macvlans}"
+ ${optionalString cfg.autoStart ''
+ AUTO_START=1
+ ''}
+ EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts +
+ optionalString (cfg.extraFlags != [])
+ (" " + concatStringsSep " " cfg.extraFlags)}"
+ '';
+ }) config.containers;
+
+ # Generate /etc/hosts entries for the containers.
+ networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null)
+ ''
+ ${head (splitString "/" cfg.localAddress)} ${name}.containers
+ '') config.containers);
+
+ networking.dhcpcd.denyInterfaces = [ "ve-*" "vb-*" ];
+
+ services.udev.extraRules = optionalString config.networking.networkmanager.enable ''
+ # Don't manage interfaces created by nixos-container.
+ ENV{INTERFACE}=="v[eb]-*", ENV{NM_UNMANAGED}="1"
+ '';
+
+ environment.systemPackages = [ pkgs.nixos-container ];
+
+ boot.kernelModules = [
+ "bridge"
+ "macvlan"
+ "tap"
+ "tun"
+ ];
+ });
+}
diff --git a/nixos/modules/virtualisation/podman.nix b/nixos/modules/virtualisation/podman.nix
new file mode 100644
index 000000000000..2ec45fa18a46
--- /dev/null
+++ b/nixos/modules/virtualisation/podman.nix
@@ -0,0 +1,80 @@
+{ config, lib, pkgs, ... }:
+let
+ cfg = config.virtualisation.podman;
+
+ inherit (lib) mkOption types;
+
+ # Provides a fake "docker" binary mapping to podman
+ dockerCompat = pkgs.runCommandNoCC "${pkgs.podman.pname}-docker-compat-${pkgs.podman.version}" {
+ outputs = [ "out" "bin" "man" ];
+ inherit (pkgs.podman) meta;
+ } ''
+ mkdir $out
+
+ mkdir -p $bin/bin
+ ln -s ${pkgs.podman.bin}/bin/podman $bin/bin/docker
+
+ mkdir -p $man/share/man/man1
+ for f in ${pkgs.podman.man}/share/man/man1/*; do
+ basename=$(basename $f | sed s/podman/docker/g)
+ ln -s $f $man/share/man/man1/$basename
+ done
+ '';
+
+ # Copy configuration files to avoid having the entire sources in the system closure
+ copyFile = filePath: pkgs.runCommandNoCC (builtins.unsafeDiscardStringContext (builtins.baseNameOf filePath)) {} ''
+ cp ${filePath} $out
+ '';
+
+in
+{
+ meta = {
+ maintainers = lib.teams.podman.members;
+ };
+
+ options.virtualisation.podman = {
+
+ enable =
+ mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ This option enables Podman, a daemonless container engine for
+ developing, managing, and running OCI Containers on your Linux System.
+
+ It is a drop-in replacement for the docker command.
+ '';
+ };
+
+ dockerCompat = mkOption {
+ type = types.bool;
+ default = false;
+ description = ''
+ Create an alias mapping docker to podman.
+ '';
+ };
+
+ };
+
+ config = lib.mkIf cfg.enable {
+
+ environment.systemPackages = [
+ pkgs.podman # Docker compat
+ pkgs.runc # Default container runtime
+ pkgs.crun # Default container runtime (cgroups v2)
+ pkgs.conmon # Container runtime monitor
+ pkgs.slirp4netns # User-mode networking for unprivileged namespaces
+ pkgs.fuse-overlayfs # CoW for images, much faster than default vfs
+ pkgs.utillinux # nsenter
+ pkgs.cni-plugins # Networking plugins
+ pkgs.iptables
+ ]
+ ++ lib.optional cfg.dockerCompat dockerCompat;
+
+ environment.etc."cni/net.d/87-podman-bridge.conflist".source = copyFile "${pkgs.podman.src}/cni/87-podman-bridge.conflist";
+
+ virtualisation.containers.enable = true;
+
+ };
+
+}
diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix
index 69333913bb9c..cbfdf3ad9c20 100644
--- a/nixos/tests/all-tests.nix
+++ b/nixos/tests/all-tests.nix
@@ -248,6 +248,7 @@ in
php = handleTest ./php {};
plasma5 = handleTest ./plasma5.nix {};
plotinus = handleTest ./plotinus.nix {};
+ podman = handleTest ./podman.nix {};
postgis = handleTest ./postgis.nix {};
postgresql = handleTest ./postgresql.nix {};
postgresql-wal-receiver = handleTest ./postgresql-wal-receiver.nix {};
diff --git a/nixos/tests/podman.nix b/nixos/tests/podman.nix
new file mode 100644
index 000000000000..ccfe8334a160
--- /dev/null
+++ b/nixos/tests/podman.nix
@@ -0,0 +1,30 @@
+# This test runs podman and checks if simple container starts
+
+import ./make-test-python.nix (
+ { pkgs, lib, ... }: {
+ name = "podman";
+ meta = {
+ maintainers = lib.teams.podman.members;
+ };
+
+ nodes = {
+ podman =
+ { pkgs, ... }:
+ {
+ virtualisation.podman.enable = true;
+ };
+ };
+
+ testScript = ''
+ start_all()
+
+ podman.wait_for_unit("sockets.target")
+ podman.succeed("tar cv --files-from /dev/null | podman import - scratchimg")
+ podman.succeed(
+ "podman run -d --name=sleeping -v /nix/store:/nix/store -v /run/current-system/sw/bin:/bin scratchimg /bin/sleep 10"
+ )
+ podman.succeed("podman ps | grep sleeping")
+ podman.succeed("podman stop sleeping")
+ '';
+ }
+)