From baed6f24f6e6418c1f483a868ec451e143a178f8 Mon Sep 17 00:00:00 2001 From: Jack O'Sullivan Date: Sun, 29 May 2022 15:22:49 +0100 Subject: [PATCH] nixos: Improve VM networking --- nixos/boxes/colony/default.nix | 25 +++++------- nixos/boxes/colony/vms/default.nix | 40 +++++++------------ nixos/modules/vms.nix | 64 ++++++++++++++++++++++-------- 3 files changed, 73 insertions(+), 56 deletions(-) diff --git a/nixos/boxes/colony/default.nix b/nixos/boxes/colony/default.nix index 8f79217..f12a46a 100644 --- a/nixos/boxes/colony/default.nix +++ b/nixos/boxes/colony/default.nix @@ -28,7 +28,7 @@ configuration = { lib, pkgs, modulesPath, config, systems, assignments, allAssignments, ... }: let - inherit (lib) mkIf mkMerge mkForce mapAttrs; + inherit (lib) mkIf mkMerge mkForce; inherit (lib.my) networkdAssignment; in { @@ -77,10 +77,15 @@ Name = "base"; Kind = "bridge"; }; + "25-vms".netdevConfig = { Name = "vms"; Kind = "bridge"; }; + "30-vms-dummy".netdevConfig = { + Name = "vms0"; + Kind = "dummy"; + }; }; networks = { @@ -119,19 +124,11 @@ ]; } ]; - - "80-vm-tap" = { - matchConfig = { - # Don't think we have control over the name of the TAP from qemu-bridge-helper (or how to easily pick - # which interface is which) - Name = "tap*"; - Driver = "tun"; - }; - networkConfig = { - KeepMaster = true; - LLDP = true; - EmitLLDP = "customer-bridge"; - }; + # Just so the vms interface will come up (in networkd's eyes), allowing dependant VMs to start. + # Could tweak the `waitOnline` for a single VM, but this seems better overall? + "80-vms-dummy" = { + matchConfig.Name = "vms0"; + networkConfig.Bridge = "vms"; }; }; }; diff --git a/nixos/boxes/colony/vms/default.nix b/nixos/boxes/colony/vms/default.nix index 001787c..e427322 100644 --- a/nixos/boxes/colony/vms/default.nix +++ b/nixos/boxes/colony/vms/default.nix @@ -6,7 +6,8 @@ nixos.systems.colony.configuration = { lib, pkgs, config, systems, ... }: let - inherit (lib) mkMerge; + inherit (builtins) listToAttrs; + inherit (lib) mkIf mkMerge optionals; wanBDF = if config.my.build.isDevVM then "00:02.0" else "01:00.0"; @@ -28,32 +29,18 @@ }; in { - systemd = { - services."vm@estuary" = { - # Depend the interface, networkd wait-online would deadlock... - requires = [ "sys-subsystem-net-devices-base.device" ]; - preStart = '' - count=0 - while ! ${pkgs.iproute2}/bin/ip link show dev base > /dev/null 2>&1; do - count=$((count+1)) - if [ $count -ge 5 ]; then - echo "Timed out waiting for bridge interface" - fi - sleep 0.5 - done - ''; - }; - }; - my = { vms = { instances = { estuary = { uuid = "59f51efb-7e6d-477b-a263-ed9620dbc87b"; - networks.base.mac = "52:54:00:ab:f1:52"; + networks.base = { + waitOnline = "no-carrier"; + mac = "52:54:00:ab:f1:52"; + }; drives = { # TODO: Split into separate LVs - disk = { + disk = mkIf (!config.my.build.isDevVM) { backend = { driver = "host_device"; filename = "/dev/ssds/vm-estuary"; @@ -72,10 +59,7 @@ shill = { uuid = "e34569ec-d24e-446b-aca8-a3b27abc1f9b"; networks.vms.mac = "52:54:00:85:b3:b1"; - drives = mkMerge [ - (vmLVM "shill" "esp") - (vmLVM "shill" "nix") - (vmLVM "shill" "persist") + drives = mkMerge ([ { installer = { backend = { @@ -89,9 +73,13 @@ bootindex = 1; }; }; - esp.frontendOpts.bootindex = 0; } - ]; + ] ++ (optionals (!config.my.build.isDevVM) [ + (vmLVM "shill" "esp") + (vmLVM "shill" "nix") + (vmLVM "shill" "persist") + { esp.frontendOpts.bootindex = 0; } + ])); }; }; }; diff --git a/nixos/modules/vms.nix b/nixos/modules/vms.nix index 9569b75..fbc1c4d 100644 --- a/nixos/modules/vms.nix +++ b/nixos/modules/vms.nix @@ -3,7 +3,8 @@ let inherit (builtins) filter any attrNames attrValues fetchGit; inherit (lib) unique optional optionals optionalString flatten concatStringsSep - concatMapStringsSep mapAttrsToList mapAttrs' mkIf mkDefault; + concatMapStringsSep mapAttrsToList mapAttrs' filterAttrs mkIf mkMerge + mkDefault mkOption; inherit (lib.my) mkOpt' mkBoolOpt'; flattenQEMUOpts = attrs: @@ -66,9 +67,13 @@ let cfg = config.my.vms; - netOpts = with lib.types; { name, ... }: { + netOpts = with lib.types; { name, iName, ... }: { options = { - bridge = mkOpt' str name "Network bridge to connect to."; + ifname = mkOpt' str "vm-${iName}" "TAP device to create "; + bridge = mkOpt' (nullOr str) name "Network bridge to connect to (null to not attach to bridge)."; + waitOnline = mkOpt' (either bool str) true + "Whether to wait for networkd to consider the bridge online. Pass a string to set the OPERSTATE will wait for."; + model = mkOpt' str "virtio-net" "Device type for network interface."; mac = mkOpt' str null "Guest MAC address."; extraOptions = mkOpt' qemuOpts { } "Extra QEMU options to set for the NIC."; @@ -115,7 +120,13 @@ let memory = mkOpt' ints.unsigned 1024 "Amount of RAM (mebibytes)."; vga = mkOpt' str "virtio" "VGA card type."; spice.enable = mkBoolOpt' true "Whether to enable SPICE."; - networks = mkOpt' (attrsOf (submodule netOpts)) { } "Networks to attach VM to."; + networks = mkOption { + description = "Networks to attach VM to."; + type = attrsOf (submoduleWith { + modules = [ { _module.args.iName = name; } netOpts ]; + }); + default = { }; + }; drives = mkOpt' (attrsOf (submodule driveOpts)) { } "Drives to attach to VM."; hostDevices = mkOpt' (attrsOf (submodule hostDevOpts)) { } "Host PCI devices to pass to the VM."; }; @@ -127,7 +138,7 @@ let (i: mapAttrsToList (bdf: c: { inherit bdf; inherit (c) bindVFIO; }) i.hostDevices) (attrValues cfg.instances)); - mkQemuCommand = n: i: + mkQemuScript = n: i: let flags = i.qemuFlags ++ @@ -154,7 +165,7 @@ let ]) ++ (optional i.spice.enable "spice unix=on,addr=/run/vms/${n}/spice.sock,disable-ticketing=on") ++ (flatten (mapAttrsToList (nn: c: [ - "netdev bridge,id=${nn},br=${c.bridge}" + "netdev tap,id=${nn},ifname=${c.ifname},script=no" ("device ${c.model},netdev=${nn},mac=${c.mac}" + (extraQEMUOpts c.extraOptions)) ]) i.networks)) ++ (flatten (mapAttrsToList (dn: c: [ @@ -165,7 +176,10 @@ let (map (bdf: "device vfio-pci,host=${bdf}") (attrNames i.hostDevices)); args = map (v: "-${v}") flags; in - concatStringsSep " " ([ i.qemuBin ] ++ args); + '' + exec ${i.qemuBin} \ + ${concatStringsSep " \\\n " args} + ''; in { options.my.vms = with lib.types; { @@ -198,15 +212,33 @@ in my.tmproot.persistence.config.directories = [ "/var/lib/vms" ]; - # qemu-bridge-helper will fail otherwise - environment.etc."qemu/bridge.conf".text = "allow all"; - systemd = { - services = mapAttrs' (n: i: { - name = "vm@${n}"; - value = { + systemd = mkMerge ([ ] ++ + (mapAttrsToList (n: i: { + # TODO: LLDP? + network.networks = + mapAttrs' + (nn: net: { + name = "70-vm-${n}-${nn}"; + value = { + matchConfig = { + Name = net.ifname; + Kind = "tap"; + }; + networkConfig.Bridge = net.bridge; + }; + }) + (filterAttrs (_: net: net.bridge != null) i.networks); + services."vm@${n}" = { description = "Virtual machine '${n}'"; + requires = + map + (net: + let + arg = if net.waitOnline == true then net.bridge else "${net.bridge}:${net.waitOnline}"; + in + "systemd-networkd-wait-online@${arg}.service") + (filter (net: net.bridge != null && net.waitOnline != false) (attrValues i.networks)); serviceConfig = { - ExecStart = mkQemuCommand n i; ExecStop = mkIf i.cleanShutdown.enabled "${doCleanShutdown} /run/vms/${n}/monitor-qmp.sock"; TimeoutStopSec = mkIf i.cleanShutdown.enabled i.cleanShutdown.timeout; @@ -220,6 +252,7 @@ in cp "${cfg.ovmfPackage.fd}"/FV/OVMF_VARS.fd "$STATE_DIRECTORY"/ovmf_vars.bin fi ''; + script = mkQemuScript n i; postStart = '' socks=(monitor-qmp monitor tty spice) @@ -233,7 +266,6 @@ in restartIfChanged = mkDefault false; wantedBy = optional i.autoStart "machines.target"; }; - }) cfg.instances; - }; + }) cfg.instances)); }; }