Jack O'Sullivan
7e2dfc21c6
All checks were successful
CI / Check, build and cache Nix flake (push) Successful in 44m19s
321 lines
10 KiB
Nix
321 lines
10 KiB
Nix
{ lib, pkgs, options, config, systems, ... }:
|
|
let
|
|
inherit (builtins) attrNames attrValues all hashString toJSON any;
|
|
inherit (lib)
|
|
groupBy' mapAttrsToList optionalString optional concatMapStringsSep filterAttrs mkOption mkDefault mkIf mkMerge;
|
|
inherit (lib.my) mkOpt' mkBoolOpt';
|
|
|
|
cfg = config.my.containers;
|
|
|
|
devVMKeyPath = "/run/dev.key";
|
|
ctrProfiles = n: "/nix/var/nix/profiles/per-container/${n}";
|
|
|
|
dummyReady = pkgs.runCommandCC "dummy-sd-ready" {
|
|
buildInputs = [ pkgs.systemd ];
|
|
passAsFile = [ "code" ];
|
|
code = ''
|
|
#include <stdio.h>
|
|
#include <signal.h>
|
|
#include <unistd.h>
|
|
#include <systemd/sd-daemon.h>
|
|
|
|
void handler(int signum) {
|
|
exit(0);
|
|
}
|
|
|
|
int main() {
|
|
// systemd sends this to PID 1 for an "orderly shutdown"
|
|
signal(SIGRTMIN+3, handler);
|
|
|
|
int ret =
|
|
sd_notifyf(0, "READY=1\n"
|
|
"STATUS=Dummy container, please deploy for real!\n"
|
|
"MAINPID=%lu",
|
|
(unsigned long)getpid());
|
|
if (ret <= 0) {
|
|
fprintf(stderr, "sd_notify() returned %d\n", ret);
|
|
return ret == 0 ? -1 : ret;
|
|
}
|
|
|
|
pause();
|
|
return 0;
|
|
};
|
|
'';
|
|
} ''
|
|
$CC -o "$out" -x c -lsystemd "$codePath"
|
|
'';
|
|
dummyProfile = pkgs.writeTextFile {
|
|
name = "dummy-init";
|
|
executable = true;
|
|
destination = "/init";
|
|
# Although this will be in the new root, the shell will be available because the store will be mounted!
|
|
text = ''
|
|
#!${pkgs.runtimeShell}
|
|
${pkgs.iproute2}/bin/ip link set dev host0 up
|
|
|
|
exec ${dummyReady}
|
|
'';
|
|
};
|
|
|
|
bindMountOpts = with lib.types; { name, ... }: {
|
|
options = {
|
|
mountPoint = mkOption {
|
|
default = name;
|
|
example = "/mnt/usb";
|
|
type = str;
|
|
description = "Mount point on the container file system.";
|
|
};
|
|
hostPath = mkOption {
|
|
default = name;
|
|
example = "/home/alice";
|
|
type = nullOr str;
|
|
description = "Location of the host path to be mounted.";
|
|
};
|
|
readOnly = mkOption {
|
|
default = true;
|
|
type = bool;
|
|
description = "Determine whether the mounted path will be accessed in read-only mode.";
|
|
};
|
|
};
|
|
};
|
|
|
|
containerOpts = with lib.types; { name, ... }: {
|
|
options = {
|
|
system = mkOpt' path "${ctrProfiles name}/system" "Path to NixOS system configuration.";
|
|
containerSystem = mkOpt' path "/nix/var/nix/profiles/system" "Path to NixOS system configuration from within container.";
|
|
autoStart = mkBoolOpt' true "Whether to start the container automatically at boot.";
|
|
hotReload = mkBoolOpt' true
|
|
"Whether to apply new configuration by running `switch-to-configuration` instead of rebooting the container.";
|
|
|
|
# Yoinked from nixos/modules/virtualisation/nixos-containers.nix
|
|
bindMounts = mkOption {
|
|
type = attrsOf (submodule bindMountOpts);
|
|
default = { };
|
|
description =
|
|
''
|
|
An extra list of directories that is bound to the container.
|
|
'';
|
|
};
|
|
networking = {
|
|
bridge = mkOpt' (nullOr str) null "Network bridge to connect to.";
|
|
macVLAN = mkOpt' (nullOr str) null "Network interface to make MACVLAN interface from.";
|
|
};
|
|
};
|
|
};
|
|
in
|
|
{
|
|
options.my.containers = with lib.types; {
|
|
persistDir = mkOpt' str "/persist/containers" "Where to store container persistence data.";
|
|
instances = mkOpt' (attrsOf (submodule containerOpts)) { } "Individual containers.";
|
|
};
|
|
|
|
config = mkMerge [
|
|
(mkIf (cfg.instances != { }) {
|
|
assertions = [
|
|
{
|
|
assertion = config.systemd.network.enable;
|
|
message = "Containers currently require systemd-networkd!";
|
|
}
|
|
{
|
|
assertion = all (i: i.networking.bridge == null || i.networking.macVLAN == null) (attrValues cfg.instances);
|
|
message = "Only bridge OR MACVLAN can be set";
|
|
}
|
|
];
|
|
|
|
# TODO: Better security
|
|
my.firewall.trustedInterfaces =
|
|
mapAttrsToList
|
|
(n: _: "ve-${n}")
|
|
(filterAttrs (_: c: c.networking.bridge == null && c.networking.macVLAN == null) cfg.instances);
|
|
|
|
systemd = mkMerge (mapAttrsToList (n: c: {
|
|
nspawn."${n}" = {
|
|
execConfig = {
|
|
Boot = true;
|
|
Ephemeral = true;
|
|
LinkJournal = false;
|
|
NotifyReady = true;
|
|
ResolvConf = "bind-stub";
|
|
PrivateUsers = false;
|
|
};
|
|
filesConfig =
|
|
let
|
|
binds = groupBy'
|
|
(l: b: l ++ [ (if b.hostPath != null then "${b.hostPath}:${b.mountPoint}" else b.mountPoint) ])
|
|
[ ]
|
|
(b: if b.readOnly then "ro" else "rw")
|
|
(attrValues c.bindMounts);
|
|
in {
|
|
BindReadOnly = [
|
|
"/nix/store"
|
|
"/nix/var/nix/db"
|
|
"/nix/var/nix/daemon-socket"
|
|
] ++ optional config.my.build.isDevVM "${config.my.secrets.vmKeyPath}:${devVMKeyPath}" ++ binds.ro or [ ];
|
|
Bind = [
|
|
"${ctrProfiles n}:/nix/var/nix/profiles"
|
|
"/nix/var/nix/gcroots/per-container/${n}:/nix/var/nix/gcroots"
|
|
"${cfg.persistDir}/${n}:/persist"
|
|
] ++ binds.rw or [ ];
|
|
};
|
|
networkConfig = if (c.networking.bridge != null) then {
|
|
Bridge = c.networking.bridge;
|
|
} else if (c.networking.macVLAN != null) then {
|
|
MACVLAN = "${c.networking.macVLAN}:host0";
|
|
} else {
|
|
VirtualEthernet = true;
|
|
};
|
|
};
|
|
services."systemd-nspawn@${n}" =
|
|
let
|
|
sysProfile = "${ctrProfiles n}/system";
|
|
system = if
|
|
config.my.build.isDevVM then
|
|
systems."${n}".configuration.config.my.buildAs.container else
|
|
c.system;
|
|
containerSystem = if
|
|
config.my.build.isDevVM then
|
|
system else
|
|
c.containerSystem;
|
|
in
|
|
{
|
|
# To prevent creating a whole new unit file
|
|
overrideStrategy = "asDropin";
|
|
|
|
environment = {
|
|
# systemd.nspawn units can't set the root directory directly, but /run/machines/${n} is one of the search paths
|
|
root = "/run/machines/${n}";
|
|
# Without this, systemd-nspawn will do cgroupsv1
|
|
SYSTEMD_NSPAWN_UNIFIED_HIERARCHY = "1";
|
|
};
|
|
restartTriggers = [
|
|
(''${n}.nspawn:${hashString "sha256" (toJSON config.systemd.nspawn."${n}")}'')
|
|
];
|
|
|
|
preStart =
|
|
''
|
|
mkdir -p -m 0755 \
|
|
/nix/var/nix/{profiles,gcroots}/per-container/${n} \
|
|
${cfg.persistDir}/${n}
|
|
|
|
${optionalString (system == sysProfile)
|
|
''
|
|
if [ ! -e "${sysProfile}" ]; then
|
|
echo "Creating dummy profile"
|
|
${config.nix.package}/bin/nix-env -p ${sysProfile} --set ${dummyProfile}
|
|
fi
|
|
''}
|
|
|
|
mkdir -p -m 0755 "$root"/sbin "$root"/etc
|
|
touch "$root"/etc/os-release
|
|
|
|
${if system == sysProfile then ''
|
|
if [ -e "${sysProfile}"/prepare-root ]; then
|
|
initSource="${containerSystem}"/prepare-root
|
|
else
|
|
initSource="${containerSystem}"/init
|
|
fi
|
|
ln -sf "$initSource" "$root"/sbin/init
|
|
'' else ''
|
|
ln -sf "${containerSystem}/prepare-root" "$root"/sbin/init
|
|
''}
|
|
'';
|
|
postStop =
|
|
''
|
|
rm -rf "$root"
|
|
'';
|
|
reload =
|
|
# `switch-to-configuration test` switches config without trying to update bootloader
|
|
''
|
|
[ -e "${system}"/bin/switch-to-configuration ] && \
|
|
systemd-run --pipe --machine ${n} -- "${containerSystem}"/bin/switch-to-configuration test
|
|
'';
|
|
|
|
wantedBy = optional c.autoStart "machines.target";
|
|
};
|
|
network.networks."80-container-${n}-vb" = mkIf (c.networking.bridge != null) {
|
|
matchConfig = {
|
|
Name = "vb-${n}";
|
|
Driver = "veth";
|
|
};
|
|
networkConfig = {
|
|
# systemd LLDP doesn't work on bridge interfaces
|
|
LLDP = true;
|
|
EmitLLDP = "customer-bridge";
|
|
# Although nspawn will set the veth's master, systemd will clear it (systemd 250 adds a `KeepMaster`
|
|
# to avoid this)
|
|
Bridge = c.networking.bridge;
|
|
};
|
|
};
|
|
}) cfg.instances);
|
|
})
|
|
|
|
# Inside container
|
|
(mkIf config.boot.isContainer {
|
|
assertions = [
|
|
{
|
|
assertion = config.systemd.network.enable;
|
|
message = "Containers currently require systemd-networkd!";
|
|
}
|
|
];
|
|
|
|
nix = {
|
|
gc.automatic = false;
|
|
};
|
|
|
|
my = {
|
|
tmproot = {
|
|
enable = true;
|
|
persistence.dir = "/persist";
|
|
};
|
|
};
|
|
|
|
system.activationScripts = {
|
|
# So that update-users-groups.pl can see the saved info. Normally stage-1-init.sh would do these mounts early.
|
|
earlyPersist.text = ''
|
|
if ! mountpoint -q /var/lib/nixos; then
|
|
mkdir -p {/persist,}/var/lib/nixos
|
|
mount --bind {/persist,}/var/lib/nixos
|
|
fi
|
|
'';
|
|
users.deps = [ "earlyPersist" ];
|
|
|
|
# Ordinarily I think the Nix daemon does this but ofc it doesn't in the container
|
|
createNixPerUserDirs = {
|
|
text =
|
|
let
|
|
users = attrValues (filterAttrs (_: u: u.isNormalUser) config.users.users);
|
|
in
|
|
concatMapStringsSep "\n"
|
|
(u: ''install -d -o ${u.name} -g ${u.group} /nix/var/nix/{profiles,gcroots}/per-user/"${u.name}"'') users;
|
|
deps = [ "users" "groups" ];
|
|
};
|
|
|
|
# age requires all keys to at least exist, even if they're not going to be used
|
|
agenixInstall.deps = [ "ensureDevKey" ];
|
|
ensureDevKey.text =
|
|
''
|
|
[ ! -e "${devVMKeyPath}" ] && touch "${devVMKeyPath}"
|
|
'';
|
|
};
|
|
|
|
networking = {
|
|
useHostResolvConf = false;
|
|
};
|
|
# Replace the pre-installed 80-container-host0
|
|
systemd.network.networks."80-container-host0" = {
|
|
matchConfig = {
|
|
Name = "host0";
|
|
Virtualization = "container";
|
|
};
|
|
networkConfig = {
|
|
LLDP = true;
|
|
EmitLLDP = "customer-bridge";
|
|
};
|
|
};
|
|
|
|
# If the host is a dev VM
|
|
age.identityPaths = [ devVMKeyPath ];
|
|
})
|
|
];
|
|
}
|