Working "shill From Home" full network boot

This commit is contained in:
2024-06-29 23:12:21 +01:00
parent 84ca556c47
commit f9870abc9e
75 changed files with 1550 additions and 1088 deletions

View File

@@ -66,6 +66,7 @@ in
];
services = {
fstrim.enable = true;
netdata.enable = true;
};

View File

@@ -129,6 +129,12 @@ in
hostnqn =
"nqn.2014-08.org.nvmexpress:uuid:2230b066-a674-4f45-a1dc-f7727b3a9e7b";
serial = "SPDK00000000000002";
}) ++ (nvmfBdev {
bdev = "NVMeRaidp3";
nqn = "nqn.2016-06.io.spdk:sfh";
hostnqn =
"nqn.2014-08.org.nvmexpress:uuid:85d7df36-0de0-431b-b06e-51f7c0a455b4";
serial = "SPDK00000000000003";
});
};
};

View File

@@ -2,6 +2,7 @@
imports = [
./cellar
./river.nix
./sfh
];
nixos.systems.palace.configuration = { lib, pkgs, config, systems, allAssignments, ... }:
@@ -57,11 +58,11 @@
systemd.services =
let
awaitCellar = {
after = [ "vm@cellar.service" ];
bindsTo = [ "vm@cellar.service" ];
awaitVM = system: {
after = [ "vm@${system}.service" ];
bindsTo = [ "vm@${system}.service" ];
preStart = ''
until ${pkgs.netcat}/bin/nc -w1 -z ${allAssignments.cellar.hi.ipv4.address} 22; do
until ${pkgs.netcat}/bin/nc -w1 -z ${allAssignments.${system}.hi.ipv4.address} 22; do
sleep 1
done
'';
@@ -81,13 +82,13 @@
vtapUnit = "sys-subsystem-net-devices-vm\\x2det1g0.device";
in
mkMerge [
awaitCellar
(awaitVM "cellar")
{
requires = [ vtapUnit ];
after = [ vtapUnit ];
}
];
"vm@sfh" = awaitCellar;
"vm@sfh" = (awaitVM "river");
};
my = {

View File

@@ -10,18 +10,7 @@
let
inherit (lib.my) networkdAssignment mkVLAN;
inherit (lib.my.c) networkd;
inherit (lib.my.c.home) vlans;
lanLink = {
matchConfig = {
Driver = "mlx5_core";
PermanentMACAddress = "52:54:00:8a:8a:f2";
};
linkConfig = {
Name = "lan";
MTUBytes = toString lib.my.c.home.hiMTU;
};
};
inherit (lib.my.c.home) vlans domain prefixes roceBootModules;
in
{
imports = [
@@ -30,29 +19,16 @@
config = {
boot = {
kernelModules = [ "kvm-intel" ];
kernelModules = [ "kvm-amd" ];
kernelParams = [ "console=ttyS0,115200n8" ];
initrd = {
availableKernelModules = [
"virtio_pci" "ahci" "sr_mod" "virtio_blk"
"ib_core" "ib_uverbs" "mlx5_core" "mlx5_ib" "8021q"
"rdma_cm" "iw_cm" "ib_cm" "nvme_core" "nvme_rdma"
];
kernelModules = [ "dm-snapshot" "nvme-fabrics" ];
] ++ roceBootModules;
kernelModules = [ "dm-snapshot" ];
systemd = {
extraBin = with pkgs; {
dmesg = "${util-linux}/bin/dmesg";
ip = "${iproute2}/bin/ip";
};
extraConfig = ''
DefaultTimeoutStartSec=50
DefaultDeviceTimeoutSec=50
'';
network = {
enable = true;
wait-online.enable = true;
links."10-lan" = lanLink;
# Don't need to put the link config here, they're copied from main config
netdevs = mkVLAN "lan-hi" vlans.hi;
networks = {
"20-lan" = {
@@ -70,9 +46,6 @@
hardware = {
enableRedistributableFirmware = true;
cpu = {
intel.updateMicrocode = true;
};
};
fileSystems = {
@@ -96,6 +69,7 @@
boot.thin.enable = true;
dmeventd.enable = true;
};
fstrim.enable = true;
};
systemd.network = {
@@ -114,7 +88,16 @@
};
};
"10-lan" = lanLink;
"10-lan" = {
matchConfig = {
Driver = "mlx5_core";
PermanentMACAddress = "52:54:00:8a:8a:f2";
};
linkConfig = {
Name = "lan";
MTUBytes = toString lib.my.c.home.hiMTU;
};
};
};
# So we don't drop the IP we use to connect to NVMe-oF!
@@ -134,6 +117,14 @@
};
};
netboot.server = {
enable = true;
ip = assignments.lo.ipv4.address;
host = "boot.${domain}";
allowedPrefixes = with prefixes; [ hi.v4 hi.v6 lo.v4 lo.v6 ];
instances = [ "sfh" ];
};
deploy.node.hostname = "192.168.68.1";
};
};

View File

@@ -0,0 +1,122 @@
{ lib, ... }:
let
inherit (lib.my) net;
inherit (lib.my.c) pubDomain;
inherit (lib.my.c.home) domain prefixes vips hiMTU roceBootModules;
in
{
config.nixos.systems.sfh = {
system = "x86_64-linux";
nixpkgs = "mine";
home-manager = "mine";
assignments = {
hi = {
inherit domain;
mtu = hiMTU;
ipv4 = {
address = net.cidr.host 81 prefixes.hi.v4;
mask = 22;
gateway = vips.hi.v4;
};
ipv6 = {
iid = "::4:2";
address = net.cidr.host (65536*4+2) prefixes.hi.v6;
};
};
};
configuration = { lib, modulesPath, pkgs, config, assignments, allAssignments, ... }:
let
inherit (lib) mkMerge;
inherit (lib.my) networkdAssignment;
inherit (lib.my.c.home) domain;
in
{
imports = [
"${modulesPath}/profiles/qemu-guest.nix"
];
config = {
boot = {
kernelModules = [ "kvm-amd" ];
kernelParams = [ "console=ttyS0,115200n8" ];
initrd = {
availableKernelModules = [
"virtio_pci" "ahci" "sr_mod" "virtio_blk"
] ++ roceBootModules;
kernelModules = [ "dm-snapshot" ];
systemd = {
network = {
networks = {
"20-lan-hi" = networkdAssignment "lan-hi" assignments.hi;
};
};
};
};
};
hardware = {
enableRedistributableFirmware = true;
};
fileSystems = {
"/nix" = {
device = "/dev/main/nix";
fsType = "ext4";
};
"/persist" = {
device = "/dev/main/persist";
fsType = "ext4";
neededForBoot = true;
};
};
services = {
lvm = {
boot.thin.enable = true;
dmeventd.enable = true;
};
};
systemd.network = {
links = {
"10-lan-hi" = {
matchConfig = {
Driver = "mlx5_core";
PermanentMACAddress = "52:54:00:ac:15:a9";
};
linkConfig = {
Name = "lan-hi";
MTUBytes = toString lib.my.c.home.hiMTU;
};
};
};
networks."30-lan-hi" = mkMerge [
(networkdAssignment "lan-hi" assignments.hi)
# So we don't drop the IP we use to connect to NVMe-oF!
{ networkConfig.KeepConfiguration = "static"; }
];
};
my = {
secrets = {
key = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAAaav5Se1E/AbqEXmADryVszYfNDscyP6jrWioN57R7";
};
server.enable = true;
netboot.client = {
enable = true;
};
nvme = {
uuid = "85d7df36-0de0-431b-b06e-51f7c0a455b4";
boot = {
nqn = "nqn.2016-06.io.spdk:sfh";
address = "192.168.68.80";
};
};
};
};
};
};
}

View File

@@ -401,11 +401,6 @@ in
}
'';
};
netboot.server = {
enable = true;
ip = vips.lo.v4;
host = "boot.${domain}";
};
};
};
};

View File

@@ -172,7 +172,7 @@ in
}}
${elemAt routers 0} IN AAAA ${net.cidr.host 1 prefixes.hi.v6}
${elemAt routers 1} IN AAAA ${net.cidr.host 2 prefixes.hi.v6}
boot IN CNAME router-hi.${config.networking.domain}.
boot IN CNAME river-hi.${config.networking.domain}.
@ IN NS ns1
@ IN NS ns2

View File

@@ -1,7 +1,7 @@
index: { lib, pkgs, config, assignments, ... }:
index: { lib, pkgs, config, assignments, allAssignments, ... }:
let
inherit (lib) mkForce;
inherit (lib.my) net;
inherit (lib.my) net netbootKeaClientClasses;
inherit (lib.my.c.home) domain prefixes vips hiMTU;
dns-servers = [
@@ -63,7 +63,13 @@ in
always-send = true;
}
];
client-classes = config.my.netboot.server.keaClientClasses;
client-classes = netbootKeaClientClasses {
tftpIP = allAssignments.river.lo.ipv4.address;
hostname = "boot.${domain}";
systems = {
sfh = "52:54:00:a5:7e:93";
};
};
subnet4 = [
{
id = 1;

View File

@@ -86,6 +86,7 @@
dhcpcd
lm_sensors
ethtool
nfs-utils
];
# Much of this onwards is yoinked from modules/profiles/installation-device.nix

View File

@@ -1,6 +1,6 @@
{ lib, pkgs, config, systems, ... }:
{ lib, pkgs, config, ... }:
let
inherit (lib) mkMerge mkIf mkForce mkOption;
inherit (lib) mkMerge mkIf mkForce genAttrs concatMapStringsSep;
inherit (lib.my) mkOpt' mkBoolOpt';
cfg = config.my.netboot;
@@ -16,31 +16,54 @@ let
} ''
substituteAll ${./menu.ipxe} "$out"
'';
bootBuilder = pkgs.substituteAll {
src = ./netboot-loader-builder.py;
isExecutable = true;
inherit (pkgs) python3;
bootspecTools = pkgs.bootspec;
nix = config.nix.package.out;
inherit (config.system.nixos) distroName;
systemName = config.system.name;
inherit (cfg.client) configurationLimit;
checkMountpoints = pkgs.writeShellScript "check-mountpoints" ''
if ! ${pkgs.util-linuxMinimal}/bin/findmnt /boot > /dev/null; then
echo "/boot is not a mounted partition. Is the path configured correctly?" >&2
exit 1
fi
'';
};
in
{
options.my.netboot = with lib.types; {
client = {
enable = mkBoolOpt' false "Whether network booting should be enabled.";
configurationLimit = mkOpt' ints.unsigned 10 "Max generations to show in boot menu.";
};
server = {
enable = mkBoolOpt' false "Whether a netboot server should be enabled.";
ip = mkOpt' str null "IP clients should connect to via TFTP.";
host = mkOpt' str config.networking.fqdn "Hostname clients should connect to over HTTP.";
host = mkOpt' str config.networking.fqdn "Hostname clients should connect to over HTTP / NFS.";
allowedPrefixes = mkOpt' (listOf str) null "Prefixes clients should be allowed to connect from (NFS).";
installer = {
storeSize = mkOpt' str "16GiB" "Total allowed writable size of store.";
};
instances = mkOpt' (listOf str) [ ] "Systems to hold boot files for.";
keaClientClasses = mkOption {
type = listOf (attrsOf str);
description = "Kea client classes for PXE boot.";
readOnly = true;
};
};
};
config = mkMerge [
(mkIf cfg.client.enable {
# TODO: Implement!
boot.loader = {
grub.enable = false;
systemd-boot.enable = false;
};
system = {
build.installBootLoader = bootBuilder;
boot.loader.id = "ipxe-netboot";
};
})
(mkIf cfg.server.enable {
environment = {
@@ -51,14 +74,21 @@ in
};
systemd = {
tmpfiles.settings."10-netboot" = genAttrs
(map (i: "/srv/netboot/systems/${i}") cfg.server.instances)
(p: {
d = {
user = "root";
group = "root";
mode = "0777";
};
});
services = {
netboot-update = {
description = "Update netboot images";
after = [ "systemd-networkd-wait-online.service" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
};
serviceConfig.Type = "oneshot";
path = with pkgs; [
coreutils curl jq zstd gnutar
];
@@ -136,6 +166,15 @@ in
};
};
};
nfs = {
server = {
enable = true;
exports = ''
/srv/netboot/systems ${concatMapStringsSep " " (p: "${p}(rw,all_squash)") cfg.server.allowedPrefixes}
'';
};
};
};
my = {
@@ -143,22 +182,6 @@ in
"/srv/netboot"
{ directory = "/var/cache/netboot"; mode = "0700"; }
];
netboot.server.keaClientClasses = [
{
name = "ipxe";
test = "substring(option[user-class].hex, 0, 4) == 'iPXE'";
next-server = cfg.server.ip;
server-hostname = cfg.server.host;
boot-file-name = "http://${cfg.server.host}/boot.ipxe";
}
{
name = "efi-x86_64";
test = "option[client-system].hex == 0x0007";
next-server = cfg.server.ip;
server-hostname = cfg.server.host;
boot-file-name = "ipxe-x86_64.efi";
}
];
};
})
];

View File

@@ -0,0 +1,280 @@
#! @python3@/bin/python3 -B
# Based on `nixos/modules/system/boot/loader/systemd-boot/systemd-boot-builder.py`
import argparse
import datetime
import glob
import os
import os.path
import shutil
import subprocess
import sys
import json
from typing import NamedTuple, Dict, List
from dataclasses import dataclass
BOOT_MOUNT_POINT = '/boot'
STORE_DIR = 'nix'
# These values will be replaced with actual values during the package build
BOOTSPEC_TOOLS = '@bootspecTools@'
NIX = '@nix@'
DISTRO_NAME = '@distroName@'
SYSTEM_NAME = '@systemName@'
CONFIGURATION_LIMIT = int('@configurationLimit@')
CHECK_MOUNTPOINTS = "@checkMountpoints@"
@dataclass
class BootSpec:
init: str
initrd: str
kernel: str
kernelParams: List[str]
label: str
system: str
toplevel: str
specialisations: Dict[str, 'BootSpec']
sortKey: str
initrdSecrets: str | None = None
class SystemIdentifier(NamedTuple):
profile: str | None
generation: int
specialisation: str | None
def copy_if_not_exists(source: str, dest: str) -> None:
if not os.path.exists(dest):
shutil.copyfile(source, dest)
def generation_dir(profile: str | None, generation: int) -> str:
if profile:
return f'/nix/var/nix/profiles/system-profiles/{profile}-{generation}-link'
else:
return f'/nix/var/nix/profiles/system-{generation}-link'
def system_dir(i: SystemIdentifier) -> str:
d = generation_dir(i.profile, i.generation)
if i.specialisation:
return os.path.join(d, 'specialisation', i.specialisation)
else:
return d
def entry_key(i: SystemIdentifier) -> str:
pieces = [
'nixos',
i.profile or None,
'generation',
str(i.generation),
f'specialisation-{i.specialisation}' if i.specialisation else None,
]
return '-'.join(p for p in pieces if p)
def bootspec_from_json(bootspec_json: Dict) -> BootSpec:
specialisations = bootspec_json['org.nixos.specialisation.v1']
specialisations = {k: bootspec_from_json(v) for k, v in specialisations.items()}
systemdBootExtension = bootspec_json.get('org.nixos.systemd-boot', {})
sortKey = systemdBootExtension.get('sortKey', 'nixos')
return BootSpec(
**bootspec_json['org.nixos.bootspec.v1'],
specialisations=specialisations,
sortKey=sortKey
)
bootspecs = {}
def get_bootspec(profile: str | None, generation: int) -> BootSpec:
k = (profile, generation)
if k in bootspecs:
return bootspecs[k]
system_directory = system_dir(SystemIdentifier(profile, generation, None))
boot_json_path = os.path.realpath(f'{system_directory}/boot.json')
if os.path.isfile(boot_json_path):
boot_json_f = open(boot_json_path, 'r')
bootspec_json = json.load(boot_json_f)
else:
boot_json_str = subprocess.check_output([
f'{BOOTSPEC_TOOLS}/bin/synthesize',
'--version',
'1',
system_directory,
'/dev/stdout',
],
universal_newlines=True)
bootspec_json = json.loads(boot_json_str)
bs = bootspec_from_json(bootspec_json)
bootspecs[k] = bs
return bs
def copy_from_file(file: str, dry_run: bool = False) -> str:
store_file_path = os.path.realpath(file)
suffix = os.path.basename(store_file_path)
store_dir = os.path.basename(os.path.dirname(store_file_path))
dst_path = f'/{STORE_DIR}/{store_dir}-{suffix}'
if not dry_run:
copy_if_not_exists(store_file_path, f'{BOOT_MOUNT_POINT}{dst_path}')
return dst_path
MENU_ITEM = 'item {gen_key} {title} Generation {generation} {description}'
BOOT_ENTRY = ''':{gen_key}
kernel ${{server}}/systems/{system_name}{kernel} {kernel_params} boothost=${{boothost}}
initrd ${{server}}/systems/{system_name}{initrd}
boot
'''
def gen_entry(i: SystemIdentifier) -> (str, str):
bootspec = get_bootspec(i.profile, i.generation)
if i.specialisation:
bootspec = bootspec.specialisations[i.specialisation]
kernel = copy_from_file(bootspec.kernel)
initrd = copy_from_file(bootspec.initrd)
gen_key = entry_key(i)
title = '{name}{profile}{specialisation}'.format(
name=DISTRO_NAME,
profile=' [' + i.profile + ']' if i.profile else '',
specialisation=f' ({i.specialisation})' if i.specialisation else '')
kernel_params = f'init={bootspec.init} '
kernel_params = kernel_params + ' '.join(bootspec.kernelParams)
build_time = int(os.path.getctime(system_dir(i)))
build_date = datetime.datetime.fromtimestamp(build_time).strftime('%F')
return MENU_ITEM.format(
gen_key=gen_key,
title=title,
description=f'{bootspec.label}, built on {build_date}',
generation=i.generation,
), BOOT_ENTRY.format(
gen_key=gen_key,
generation=i.generation,
system_name=SYSTEM_NAME,
kernel=kernel,
kernel_params=kernel_params,
initrd=initrd,
)
def get_generations(profile: str | None = None) -> list[SystemIdentifier]:
gen_list = subprocess.check_output([
f'{NIX}/bin/nix-env',
'--list-generations',
'-p',
'/nix/var/nix/profiles/' + ('system-profiles/' + profile if profile else 'system')],
universal_newlines=True)
gen_lines = gen_list.split('\n')
gen_lines.pop()
configurationLimit = CONFIGURATION_LIMIT
configurations = [
SystemIdentifier(
profile=profile,
generation=int(line.split()[0]),
specialisation=None
)
for line in gen_lines
]
return configurations[-configurationLimit:]
def remove_old_files(gens: list[SystemIdentifier]) -> None:
known_paths = []
for gen in gens:
bootspec = get_bootspec(gen.profile, gen.generation)
known_paths.append(copy_from_file(bootspec.kernel, True))
known_paths.append(copy_from_file(bootspec.initrd, True))
for path in glob.iglob(f'{BOOT_MOUNT_POINT}/{STORE_DIR}/*'):
if not path in known_paths and not os.path.isdir(path):
os.unlink(path)
def get_profiles() -> list[str]:
if os.path.isdir('/nix/var/nix/profiles/system-profiles/'):
return [x
for x in os.listdir('/nix/var/nix/profiles/system-profiles/')
if not x.endswith('-link')]
else:
return []
MENU = '''#!ipxe
# Server hostname option
set boothost ${{66:string}}
set server http://${{boothost}}
:start
menu {distro} boot menu
item --gap -- Generations
{generation_items}
item --gap -- Other
item --key m main Main netboot menu
choose --timeout 5000 --default {menu_default} selected || goto cancel
goto ${{selected}}
:cancel
shell
goto start
:error
echo Booting failed, dropping to shell
shell
goto start
:main
chain ${{server}}/boot.ipxe || goto error
'''
def write_menu(gens: list[SystemIdentifier], default: SystemIdentifier) -> None:
gen_menu_items = []
gen_cmds = []
for g in gens:
bootspec = get_bootspec(g.profile, g.generation)
specialisations = [
SystemIdentifier(profile=g.profile, generation=g.generation, specialisation=s) for s in bootspec.specialisations]
for i in [g] + specialisations:
mi, cmds = gen_entry(i)
gen_menu_items.append(mi)
gen_cmds.append(cmds)
menu_file = f'{BOOT_MOUNT_POINT}/menu.ipxe'
with open(f'{menu_file}.tmp', 'w') as f:
f.write(MENU.format(
distro=DISTRO_NAME,
generation_items='\n'.join(gen_menu_items),
menu_default=entry_key(default),
))
print(file=f)
print('\n\n'.join(gen_cmds), file=f)
os.rename(f'{menu_file}.tmp', menu_file)
def install_bootloader(args: argparse.Namespace) -> None:
os.makedirs(f'{BOOT_MOUNT_POINT}/{STORE_DIR}', exist_ok=True)
gens = get_generations()
for profile in get_profiles():
gens += get_generations(profile)
gens = sorted(gens, key=lambda g: entry_key(g), reverse=True)
remove_old_files(gens)
for g in gens:
if os.path.dirname(get_bootspec(g.profile, g.generation).init) == os.path.realpath(args.default_config):
default = g
break
else:
assert False, 'No default generation found'
write_menu(gens, default)
def main() -> None:
parser = argparse.ArgumentParser(description=f'Update {DISTRO_NAME}-related netboot files')
parser.add_argument('default_config', metavar='DEFAULT-CONFIG', help=f'The default {DISTRO_NAME} config to boot')
args = parser.parse_args()
subprocess.check_call(CHECK_MOUNTPOINTS)
install_bootloader(args)
if __name__ == '__main__':
main()

View File

@@ -33,24 +33,43 @@ in
etc = etc "";
};
boot.initrd.systemd = mkIf (cfg.boot.nqn != null) {
contents = etc "/etc/";
extraBin.nvme = "${nvme-cli}/bin/nvme";
boot = mkIf (cfg.boot.nqn != null) {
initrd = {
availableKernelModules = [ "rdma_cm" "iw_cm" "ib_cm" "nvme_core" "nvme_rdma" ];
kernelModules = [ "nvme-fabrics" ];
systemd = {
contents = etc "/etc/";
extraBin = with pkgs; {
dmesg = "${util-linux}/bin/dmesg";
ip = "${iproute2}/bin/ip";
nvme = "${nvme-cli}/bin/nvme";
};
extraConfig = ''
DefaultTimeoutStartSec=20
DefaultDeviceTimeoutSec=20
'';
services.connect-nvme = {
description = "Connect NVMe-oF";
before = [ "initrd-root-device.target" ];
after = [ "systemd-networkd-wait-online.service" ];
requires = [ "systemd-networkd-wait-online.service" ];
network = {
enable = true;
wait-online.enable = true;
};
serviceConfig = {
Type = "oneshot";
ExecStart = "${nvme-cli}/bin/nvme connect -t rdma -a ${cfg.boot.address} -n ${cfg.boot.nqn}";
Restart = "on-failure";
RestartSec = 10;
services.connect-nvme = {
description = "Connect NVMe-oF";
before = [ "initrd-root-device.target" ];
after = [ "systemd-networkd-wait-online.service" ];
requires = [ "systemd-networkd-wait-online.service" ];
serviceConfig = {
Type = "oneshot";
ExecStart = "${nvme-cli}/bin/nvme connect -t rdma -a ${cfg.boot.address} -n ${cfg.boot.nqn}";
Restart = "on-failure";
RestartSec = 10;
};
wantedBy = [ "initrd-root-device.target" ];
};
};
wantedBy = [ "initrd-root-device.target" ];
};
};
};