Initial custom systemd-nspawn based containers rewrite

This commit is contained in:
Jack O'Sullivan 2022-04-03 19:01:21 +01:00
parent 1d233e323f
commit 19dcdcfa30
5 changed files with 205 additions and 76 deletions

13
flake.lock generated
View File

@ -180,15 +180,16 @@
},
"nixpkgs-mine": {
"locked": {
"lastModified": 1644969450,
"narHash": "sha256-DgDeMJmgIWJcZGzGYpF8V3dHzM77pXlrXxFyGM29Ze8=",
"lastModified": 1648933481,
"narHash": "sha256-ziMZ55TOahiD9iO+YfBcAeCm2mT3wfmfZ73UTvuBHhg=",
"owner": "devplayer0",
"repo": "nixpkgs",
"rev": "c374a5dd496f0acb95ab44fe54241195ea6b55b9",
"rev": "5fd6f5662c320506aba548bb03cfd8f63dac2c1a",
"type": "github"
},
"original": {
"owner": "devplayer0",
"ref": "devplayer0",
"repo": "nixpkgs",
"type": "github"
}
@ -210,11 +211,11 @@
},
"nixpkgs-unstable": {
"locked": {
"lastModified": 1645334861,
"narHash": "sha256-We9ECiMglthzbZ5S6Myqqf+RHzBFZPoM2qL5/jDkUjs=",
"lastModified": 1648390671,
"narHash": "sha256-u69opCeHUx3CsdIerD0wVSR+DjfDQjnztObqfk9Trqc=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "d5f237872975e6fb6f76eef1368b5634ffcd266f",
"rev": "ce8cbe3c01fd8ee2de526ccd84bbf9b82397a510",
"type": "github"
},
"original": {

View File

@ -9,7 +9,7 @@
nixpkgs-master.url = "nixpkgs";
nixpkgs-unstable.url = "nixpkgs/nixos-unstable";
nixpkgs-stable.url = "nixpkgs/nixos-21.11";
nixpkgs-mine.url = "github:devplayer0/nixpkgs";
nixpkgs-mine.url = "github:devplayer0/nixpkgs/devplayer0";
home-manager-unstable.url = "home-manager";
home-manager-unstable.inputs.nixpkgs.follows = "nixpkgs-unstable";

View File

@ -1,7 +1,7 @@
{
nixos.systems.colony = {
system = "x86_64-linux";
nixpkgs = "unstable";
nixpkgs = "mine";
home-manager = "unstable";
configuration = { lib, pkgs, modulesPath, config, ... }:

View File

@ -1,29 +1,89 @@
{ lib, options, config, systems, ... }:
{ lib, pkgs, options, config, systems, ... }:
let
inherit (builtins) attrNames attrValues mapAttrs;
inherit (lib) concatMapStringsSep filterAttrs mkDefault mkIf mkMerge mkAliasDefinitions mkVMOverride mkAfter;
inherit (lib.my) mkOpt';
inherit (builtins) attrNames attrValues mapAttrs all;
inherit (lib) groupBy' flatten mapAttrsToList optionalString optional concatMapStringsSep filterAttrs mkOption mkDefault mkIf mkMerge mkAliasDefinitions mkVMOverride mkAfter;
inherit (lib.my) mkOpt' mkBoolOpt' attrsToNVList;
cfg = config.my.containers;
devVMKeyPath = "/run/dev.key";
ctrProfiles = n: "/nix/var/nix/profiles/per-container/${n}";
dummyProfile = pkgs.writeTextFile {
name = "dummy-init";
executable = true;
destination = "/init";
# Although this will be in the new root, the shell will be available because the store will be mounted!
text = ''
#!${pkgs.runtimeShell}
${pkgs.iproute2}/bin/ip link set dev host0 up
while true; do
echo "This is a dummy, please deploy the real container!"
${pkgs.coreutils}/bin/sleep 5
done
'';
};
bindMountOpts = with lib.types; { name, ... }: {
options = {
mountPoint = mkOption {
example = "/mnt/usb";
type = str;
description = "Mount point on the container file system.";
};
hostPath = mkOption {
default = null;
example = "/home/alice";
type = nullOr str;
description = "Location of the host path to be mounted.";
};
readOnly = mkOption {
default = true;
type = bool;
description = "Determine whether the mounted path will be accessed in read-only mode.";
};
};
config = {
mountPoint = mkDefault name;
};
};
netZoneOpts = with lib.types; { name, ... }: {
options = {
hostAddresses = mkOpt' (either str (listOf str)) null "Addresses for the host bridge.";
};
};
containerOpts = with lib.types; { name, ... }: {
options = {
system = mkOpt' unspecified systems."${name}".configuration.config.my.buildAs.container
"Top-level system configuration.";
opts = mkOpt' lib.my.naiveModule { } "Options to pass to `containers.*name*`.";
system = mkOpt' path "${ctrProfiles name}/system" "Path to NixOS system configuration.";
containerSystem = mkOpt' path "/nix/var/nix/profiles/system" "Path to NixOS system configuration from within container.";
autoStart = mkBoolOpt' true "Whether to start the container automatically at boot.";
# Yoinked from nixos/modules/virtualisation/nixos-containers.nix
bindMounts = mkOption {
type = attrsOf (submodule bindMountOpts);
default = { };
description =
''
An extra list of directories that is bound to the container.
'';
};
networkZone = mkOpt' str "containers" "Network zone to connect to.";
};
};
in
{
options.my.containers = with lib.types; {
networking = {
bridgeName = mkOpt' str "containers" "Name of host bridge.";
hostAddresses = mkOpt' (either str (listOf str)) "172.16.137.1/24" "Addresses for the host bridge.";
};
persistDir = mkOpt' str "/persist/containers" "Where to store container persistence data.";
instances = mkOpt' (attrsOf (submodule containerOpts)) { } "Individual containers.";
networkZones = mkOpt' (attrsOf (submodule netZoneOpts)) {
"containers" = {
hostAddresses = "172.16.137.1/24";
};
} "systemd-nspawn network zones";
};
config = mkMerge [
@ -33,38 +93,39 @@ in
assertion = config.systemd.network.enable;
message = "Containers currently require systemd-networkd!";
}
{
assertion = all (z: cfg.networkZones ? "${z}") (mapAttrsToList (_: c: c.networkZone) cfg.instances);
message = "Each container must be within one of the configured network zones.";
}
];
my.firewall.trustedInterfaces = [ cfg.networking.bridgeName ];
my.firewall.trustedInterfaces = (attrNames cfg.networkZones) ++ (map (n: "vb-${n}") (attrNames cfg.instances));
systemd = {
systemd = mkMerge ([
{
# By symlinking to the original systemd-nspawn@.service for every instance we force the unit generator to
# create overrides instead of replacing the unit entirely
packages = [
(pkgs.linkFarm "systemd-nspawn-containers" (map (n: {
name = "etc/systemd/system/systemd-nspawn@${n}.service";
path = "${pkgs.systemd}/example/systemd/system/systemd-nspawn@.service";
}) (attrNames cfg.instances)))
];
}
] ++ (mapAttrsToList (n: z: {
network = {
netdevs."25-container-bridge".netdevConfig = {
Name = cfg.networking.bridgeName;
netdevs."25-container-bridge-${n}".netdevConfig = {
Name = n;
Kind = "bridge";
};
# Based on the pre-installed 80-container-vz
networks."80-container-vb" = {
# Replace the pre-installed config
networks."80-container-bridge-${n}" = {
matchConfig = {
Name = "vb-*";
Driver = "veth";
};
networkConfig = {
# systemd LLDP doesn't work on bridge interfaces
LLDP = true;
EmitLLDP = "customer-bridge";
# Although nspawn will set the veth's master, systemd will clear it (systemd 250 adds a `KeepMaster`
# to avoid this)
Bridge = cfg.networking.bridgeName;
};
};
networks."80-containers-bridge" = {
matchConfig = {
Name = cfg.networking.bridgeName;
Name = n;
Driver = "bridge";
};
networkConfig = {
Address = cfg.networking.hostAddresses;
Address = z.hostAddresses;
DHCPServer = true;
# TODO: Configuration for routed IPv6 (and maybe IPv4)
IPMasquerade = "both";
@ -72,36 +133,88 @@ in
};
};
};
tmpfiles.rules = map (n: "d ${cfg.persistDir}/${n} 0755 root root") (attrNames cfg.instances);
};
containers = mapAttrs (n: c: mkMerge [
{
path = "/nix/var/nix/profiles/per-container/${n}";
ephemeral = true;
autoStart = mkDefault true;
bindMounts = {
"/persist" = {
hostPath = "${cfg.persistDir}/${n}";
isReadOnly = false;
};
}) cfg.networkZones) ++ (mapAttrsToList (n: c: {
nspawn."${n}" = {
execConfig = {
Boot = true;
Ephemeral = true;
LinkJournal = false;
NotifyReady = true;
ResolvConf = "bind-stub";
PrivateUsers = false;
};
privateNetwork = true;
hostBridge = cfg.networking.bridgeName;
additionalCapabilities = [ "CAP_NET_ADMIN" ];
}
c.opts
(mkIf config.my.build.isDevVM {
path = mkVMOverride c.system;
bindMounts."${devVMKeyPath}" = {
hostPath = config.my.secrets.vmKeyPath;
isReadOnly = true;
filesConfig =
let
binds = groupBy'
(l: b: l ++ [ (if b.hostPath != null then "${b.hostPath}:${b.mountPoint}" else b.mountPoint) ])
[ ]
(b: if b.readOnly then "ro" else "rw")
(attrValues c.bindMounts);
in {
BindReadOnly = [
"/nix/store"
"/nix/var/nix/db"
"/nix/var/nix/daemon-socket"
] ++ optional config.my.build.isDevVM "${config.my.secrets.vmKeyPath}:${devVMKeyPath}" ++ binds.ro or [ ];
Bind = [
"${ctrProfiles n}:/nix/var/nix/profiles"
"/nix/var/nix/gcroots/per-container/${n}:/nix/var/nix/gcroots"
"${cfg.persistDir}/${n}:/persist"
] ++ binds.rw or [ ];
};
})
]) cfg.instances;
networkConfig = {
Bridge = c.networkZone;
};
};
services."systemd-nspawn@${n}" = {
# systemd.nspawn units can't set the root directory directly, but /run/machines/${n} is one of the search paths
environment.root = "/run/machines/${n}";
preStart =
let
sysProfile = "${ctrProfiles n}/system";
system = if
config.my.build.isDevVM then
systems."${n}".configuration.config.my.buildAs.container else
c.system;
containerSystem = if
config.my.build.isDevVM then
system else
c.containerSystem;
in
''
mkdir -p -m 0755 \
/nix/var/nix/{profiles,gcroots}/per-container/${n} \
${cfg.persistDir}/${n}
${optionalString (system == sysProfile)
''
if [ ! -e "${sysProfile}" ]; then
echo "Creating dummy profile"
${pkgs.nix}/bin/nix-env -p ${sysProfile} --set ${dummyProfile}
fi
''}
mkdir -p -m 0755 "$root"/sbin "$root"/etc
touch "$root"/etc/os-release
ln -sf "${containerSystem}"/init "$root"/sbin/init
'';
wantedBy = optional c.autoStart "machines.target";
};
network.networks."80-container-${n}-vb" = {
matchConfig = {
Name = "vb-${n}";
Driver = "veth";
};
networkConfig = {
# systemd LLDP doesn't work on bridge interfaces
LLDP = true;
EmitLLDP = "customer-bridge";
# Although nspawn will set the veth's master, systemd will clear it (systemd 250 adds a `KeepMaster`
# to avoid this)
Bridge = c.networkZone;
};
};
}) cfg.instances));
})
# Inside container
@ -138,10 +251,10 @@ in
networking = {
useHostResolvConf = false;
};
# Based on the pre-installed 80-container-host0
systemd.network.networks."80-container-eth0" = {
# Replace the pre-installed 80-container-host0
systemd.network.networks."80-container-host0" = {
matchConfig = {
Name = "eth0";
Name = "host0";
Virtualization = "container";
};
networkConfig = {

View File

@ -1,10 +1,23 @@
{ lib, pkgs, config, ... }:
{ lib, pkgs, config, systems, ... }:
let
inherit (builtins) head;
inherit (lib) mkMerge mkIf mkDefault;
inherit (builtins) head attrNames;
inherit (lib) mkMerge mkIf mkDefault optionalAttrs mapAttrs';
inherit (lib.my) mkOpt' mkBoolOpt';
cfg = config.my.deploy;
ctrProfiles = optionalAttrs cfg.generate.containers.enable (mapAttrs' (n: c: {
name = "container-${n}";
value = {
path = pkgs.deploy-rs.lib.activate.custom systems."${n}".configuration.config.my.buildAs.container
''
systemctl restart systemd-nspawn@${n}
'';
profilePath = "/nix/var/nix/profiles/per-container/${n}/system";
user = "root";
};
}) config.my.containers.instances);
in
{
options.my.deploy = with lib.types; {
@ -18,6 +31,7 @@ in
generate = {
system.enable = mkBoolOpt' true "Whether to generate a deploy-rs profile for this system's config.";
containers.enable = mkBoolOpt' true "Whether to generate deploy-rs profiles for this system's containers.";
};
};
@ -28,13 +42,14 @@ in
(mkIf cfg.enable {
my.deploy.node = {
hostname = mkDefault config.networking.fqdn;
profilesOrder = [ "system" ] ++ (attrNames ctrProfiles);
profiles = {
system = mkIf cfg.generate.system.enable {
path = pkgs.deploy-rs.lib.activate.nixos { inherit config; };
user = "root";
};
};
} // ctrProfiles;
sshUser = "deploy";
user = mkDefault "root";