2022-05-02 20:40:11 +01:00
|
|
|
{ lib, pkgs, config, ... }:
|
|
|
|
let
|
2022-05-16 00:05:02 +01:00
|
|
|
inherit (builtins) filter any attrNames attrValues fetchGit;
|
|
|
|
inherit (lib)
|
|
|
|
unique optional optionals optionalString flatten concatStringsSep
|
2022-05-29 15:22:49 +01:00
|
|
|
concatMapStringsSep mapAttrsToList mapAttrs' filterAttrs mkIf mkMerge
|
|
|
|
mkDefault mkOption;
|
2022-05-02 20:40:11 +01:00
|
|
|
inherit (lib.my) mkOpt' mkBoolOpt';
|
|
|
|
|
|
|
|
flattenQEMUOpts = attrs:
|
|
|
|
concatStringsSep
|
|
|
|
","
|
|
|
|
(mapAttrsToList
|
|
|
|
(k: v: if (v != null) then "${k}=${toString v}" else k)
|
|
|
|
attrs);
|
|
|
|
qemuOpts = with lib.types; coercedTo (attrsOf unspecified) flattenQEMUOpts str;
|
|
|
|
extraQEMUOpts = o: optionalString (o != "") ",${o}";
|
|
|
|
|
2022-05-07 19:56:47 +01:00
|
|
|
doCleanShutdown =
|
|
|
|
let
|
2023-10-31 19:59:20 +00:00
|
|
|
pyEnv = pkgs.python3.withPackages (ps: with ps; [ qemu ]);
|
2022-05-07 19:56:47 +01:00
|
|
|
in
|
|
|
|
pkgs.writeScript "qemu-clean-shutdown" ''
|
|
|
|
#!${pyEnv}/bin/python
|
2023-10-31 19:59:20 +00:00
|
|
|
import asyncio
|
2022-05-07 19:56:47 +01:00
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
|
2023-10-31 19:59:20 +00:00
|
|
|
import qemu.qmp
|
2022-05-07 19:56:47 +01:00
|
|
|
|
2023-10-31 19:59:20 +00:00
|
|
|
async def main():
|
|
|
|
if len(sys.argv) != 2:
|
|
|
|
print(f'usage: {sys.argv[0]} <qmp unix socket>', file=sys.stderr)
|
|
|
|
sys.exit(1)
|
2022-05-07 19:56:47 +01:00
|
|
|
|
2023-10-31 19:59:20 +00:00
|
|
|
if 'MAINPID' not in os.environ:
|
|
|
|
# Special case: systemd is calling us after QEMU exited on its own
|
|
|
|
sys.exit(0)
|
2022-05-07 19:56:47 +01:00
|
|
|
|
2023-10-31 19:59:20 +00:00
|
|
|
client = qemu.qmp.QMPClient('clean-shutdown')
|
|
|
|
await client.connect(sys.argv[1])
|
|
|
|
await client.execute('system_powerdown')
|
|
|
|
async for event in client.events:
|
|
|
|
if event['event'] == 'SHUTDOWN':
|
|
|
|
break
|
|
|
|
await client.disconnect()
|
|
|
|
|
|
|
|
asyncio.run(main())
|
2022-05-07 19:56:47 +01:00
|
|
|
'';
|
|
|
|
|
2022-05-02 20:40:11 +01:00
|
|
|
cfg = config.my.vms;
|
|
|
|
|
2022-05-29 15:22:49 +01:00
|
|
|
netOpts = with lib.types; { name, iName, ... }: {
|
2022-05-02 20:40:11 +01:00
|
|
|
options = {
|
2022-06-30 03:24:10 +01:00
|
|
|
ifname = mkOpt' str "vm-${iName}" "TAP device to create / use.";
|
2022-05-29 15:22:49 +01:00
|
|
|
bridge = mkOpt' (nullOr str) name "Network bridge to connect to (null to not attach to bridge).";
|
|
|
|
waitOnline = mkOpt' (either bool str) true
|
2022-06-30 03:24:10 +01:00
|
|
|
"Whether to wait for networkd to consider the bridge / existing TAP device online. Pass a string to set the OPERSTATE will wait for.";
|
|
|
|
tapFD = mkOpt' (nullOr ints.unsigned) null "FD to use to pass existing TAP device.";
|
2022-05-29 15:22:49 +01:00
|
|
|
|
2022-05-02 20:40:11 +01:00
|
|
|
model = mkOpt' str "virtio-net" "Device type for network interface.";
|
2022-05-12 21:06:46 +01:00
|
|
|
mac = mkOpt' str null "Guest MAC address.";
|
2022-05-02 20:40:11 +01:00
|
|
|
extraOptions = mkOpt' qemuOpts { } "Extra QEMU options to set for the NIC.";
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2022-05-07 17:27:12 +01:00
|
|
|
driveOpts = with lib.types; {
|
|
|
|
options = {
|
2022-06-18 03:06:01 +01:00
|
|
|
name = mkOpt' str null "Drive name.";
|
2022-05-07 17:27:12 +01:00
|
|
|
backend = mkOpt' qemuOpts { } "Backend blockdev options.";
|
|
|
|
|
|
|
|
format = mkOpt' qemuOpts { } "Format blockdev options.";
|
|
|
|
formatBackendProp = mkOpt' str "file" "Property that references the backend blockdev.";
|
|
|
|
|
|
|
|
frontend = mkOpt' str "virtio-blk" "Frontend device driver.";
|
|
|
|
frontendOpts = mkOpt' qemuOpts { } "Frontend device options.";
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2022-05-16 00:05:02 +01:00
|
|
|
hostDevOpts = with lib.types; {
|
|
|
|
options = {
|
2022-06-30 00:50:50 +01:00
|
|
|
index = mkOpt' ints.unsigned null "Index of device in guest (for root port chassis and slot).";
|
|
|
|
hostBDF = mkOpt' str null "PCI BDF of host device.";
|
2022-05-16 00:05:02 +01:00
|
|
|
bindVFIO = mkBoolOpt' true "Whether to automatically bind the device to vfio-pci.";
|
2022-06-30 00:50:50 +01:00
|
|
|
extraOptions = mkOpt' qemuOpts { } "Extra QEMU options for the vfio-pci QEMU device.";
|
2022-05-16 00:05:02 +01:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2022-05-02 20:40:11 +01:00
|
|
|
vmOpts = with lib.types; { name, ... }: {
|
|
|
|
options = {
|
|
|
|
qemuBin = mkOpt' path "${pkgs.qemu_kvm}/bin/qemu-kvm" "Path to QEMU executable.";
|
|
|
|
qemuFlags = mkOpt' (listOf str) [ ] "Additional flags to pass to QEMU.";
|
|
|
|
autoStart = mkBoolOpt' true "Whether to start the VM automatically at boot.";
|
2022-05-07 19:56:47 +01:00
|
|
|
cleanShutdown = {
|
|
|
|
enabled = mkBoolOpt' true "Whether to attempt to cleanly shut down the guest.";
|
|
|
|
timeout = mkOpt' ints.unsigned 30 "Clean shutdown timeout (in seconds).";
|
|
|
|
};
|
2022-05-02 20:40:11 +01:00
|
|
|
|
2022-05-12 21:06:46 +01:00
|
|
|
uuid = mkOpt' str null "QEMU machine UUID.";
|
2022-05-02 20:40:11 +01:00
|
|
|
machine = mkOpt' str "q35" "QEMU machine type.";
|
|
|
|
enableKVM = mkBoolOpt' true "Whether to enable KVM.";
|
|
|
|
enableUEFI = mkBoolOpt' true "Whether to enable UEFI.";
|
|
|
|
cpu = mkOpt' str "host" "QEMU CPU model.";
|
|
|
|
smp = {
|
|
|
|
cpus = mkOpt' ints.unsigned 1 "Number of CPU cores.";
|
|
|
|
threads = mkOpt' ints.unsigned 1 "Number of threads per core.";
|
|
|
|
};
|
|
|
|
memory = mkOpt' ints.unsigned 1024 "Amount of RAM (mebibytes).";
|
2022-06-18 02:55:08 +01:00
|
|
|
boot = mkOpt' qemuOpts { menu = "on"; splash-time = 5000; } "Boot options.";
|
2022-05-29 03:30:40 +01:00
|
|
|
vga = mkOpt' str "virtio" "VGA card type.";
|
2022-05-02 20:40:11 +01:00
|
|
|
spice.enable = mkBoolOpt' true "Whether to enable SPICE.";
|
2022-05-29 15:22:49 +01:00
|
|
|
networks = mkOption {
|
|
|
|
description = "Networks to attach VM to.";
|
|
|
|
type = attrsOf (submoduleWith {
|
|
|
|
modules = [ { _module.args.iName = name; } netOpts ];
|
|
|
|
});
|
|
|
|
default = { };
|
|
|
|
};
|
2023-12-22 01:34:28 +00:00
|
|
|
drives = mkOpt' (listOf (submodule driveOpts)) [ ] "Drives to attach to VM.";
|
2022-05-16 00:05:02 +01:00
|
|
|
hostDevices = mkOpt' (attrsOf (submodule hostDevOpts)) { } "Host PCI devices to pass to the VM.";
|
2022-05-02 20:40:11 +01:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2022-05-16 00:05:02 +01:00
|
|
|
allHostDevs =
|
|
|
|
flatten
|
|
|
|
(map
|
2022-06-30 00:50:50 +01:00
|
|
|
(i: mapAttrsToList (name: c: c // { inherit name; }) i.hostDevices)
|
2022-05-16 00:05:02 +01:00
|
|
|
(attrValues cfg.instances));
|
2023-12-22 01:34:28 +00:00
|
|
|
anyVfioDevs = any (d: d.bindVFIO);
|
|
|
|
vfioHostDevs = filter (d: d.bindVFIO);
|
2022-05-16 00:05:02 +01:00
|
|
|
|
2022-05-29 15:22:49 +01:00
|
|
|
mkQemuScript = n: i:
|
2022-05-02 20:40:11 +01:00
|
|
|
let
|
|
|
|
flags =
|
|
|
|
i.qemuFlags ++
|
|
|
|
[
|
|
|
|
"name ${n}"
|
2022-05-12 21:06:46 +01:00
|
|
|
"uuid ${i.uuid}"
|
2022-05-02 20:40:11 +01:00
|
|
|
"machine ${i.machine}"
|
|
|
|
"cpu ${i.cpu}"
|
2022-06-11 16:42:00 +01:00
|
|
|
"smp cores=${toString i.smp.cpus},threads=${toString i.smp.threads}"
|
2022-05-02 20:40:11 +01:00
|
|
|
"m ${toString i.memory}"
|
2022-06-18 02:55:08 +01:00
|
|
|
"boot ${toString i.boot}"
|
2022-05-02 20:40:11 +01:00
|
|
|
"nographic"
|
|
|
|
"vga ${i.vga}"
|
2022-05-07 19:56:47 +01:00
|
|
|
"chardev socket,id=monitor-qmp,path=/run/vms/${n}/monitor-qmp.sock,server=on,wait=off"
|
|
|
|
"mon chardev=monitor-qmp,mode=control"
|
2022-05-02 20:40:11 +01:00
|
|
|
"chardev socket,id=monitor,path=/run/vms/${n}/monitor.sock,server=on,wait=off"
|
2022-05-07 19:56:47 +01:00
|
|
|
"mon chardev=monitor,mode=readline"
|
2022-05-02 20:40:11 +01:00
|
|
|
"chardev socket,id=tty,path=/run/vms/${n}/tty.sock,server=on,wait=off"
|
|
|
|
"device isa-serial,chardev=tty"
|
|
|
|
] ++
|
|
|
|
(optional i.enableKVM "enable-kvm") ++
|
2022-05-07 17:27:12 +01:00
|
|
|
(optionals i.enableUEFI [
|
|
|
|
"drive if=pflash,format=raw,unit=0,readonly=on,file=${cfg.ovmfPackage.fd}/FV/OVMF_CODE.fd"
|
|
|
|
"drive if=pflash,format=raw,unit=1,file=/var/lib/vms/${n}/ovmf_vars.bin"
|
|
|
|
]) ++
|
2022-05-02 20:40:11 +01:00
|
|
|
(optional i.spice.enable "spice unix=on,addr=/run/vms/${n}/spice.sock,disable-ticketing=on") ++
|
|
|
|
(flatten (mapAttrsToList (nn: c: [
|
2022-06-30 03:24:10 +01:00
|
|
|
("netdev tap,id=${nn}," + (
|
|
|
|
if (c.tapFD != null)
|
|
|
|
then "fd=${toString c.tapFD} ${toString c.tapFD}<>/dev/tap$(cat /sys/class/net/${c.ifname}/ifindex)"
|
|
|
|
else "ifname=${c.ifname},script=no,downscript=no"))
|
2022-05-12 21:06:46 +01:00
|
|
|
("device ${c.model},netdev=${nn},mac=${c.mac}" + (extraQEMUOpts c.extraOptions))
|
2022-05-02 20:40:11 +01:00
|
|
|
]) i.networks)) ++
|
2023-12-10 02:29:53 +00:00
|
|
|
(optional (i.networks == { }) "nic none") ++
|
2022-06-18 03:06:01 +01:00
|
|
|
(flatten (map (d: [
|
|
|
|
"blockdev node-name=${d.name}-backend,${d.backend}"
|
|
|
|
"blockdev node-name=${d.name}-format,${d.formatBackendProp}=${d.name}-backend,${d.format}"
|
|
|
|
("device ${d.frontend},id=${d.name},drive=${d.name}-format" + (extraQEMUOpts d.frontendOpts))
|
2022-05-16 00:05:02 +01:00
|
|
|
]) i.drives)) ++
|
2022-06-30 00:50:50 +01:00
|
|
|
(flatten (mapAttrsToList (id: c: [
|
|
|
|
"device pcie-root-port,id=${id}-port,chassis=${toString c.index},port=${toString c.index}"
|
|
|
|
("device vfio-pci,bus=${id}-port,host=${c.hostBDF}" + (extraQEMUOpts c.extraOptions))
|
|
|
|
]) i.hostDevices));
|
2022-05-02 20:40:11 +01:00
|
|
|
args = map (v: "-${v}") flags;
|
|
|
|
in
|
2022-05-29 15:22:49 +01:00
|
|
|
''
|
|
|
|
exec ${i.qemuBin} \
|
|
|
|
${concatStringsSep " \\\n " args}
|
|
|
|
'';
|
2022-05-02 20:40:11 +01:00
|
|
|
in
|
|
|
|
{
|
|
|
|
options.my.vms = with lib.types; {
|
|
|
|
instances = mkOpt' (attrsOf (submodule vmOpts)) { } "VM instances.";
|
|
|
|
ovmfPackage = mkOpt' package pkgs.OVMF "OVMF package.";
|
|
|
|
};
|
|
|
|
|
|
|
|
config = mkIf (cfg.instances != { }) {
|
2022-05-16 00:05:02 +01:00
|
|
|
assertions = [
|
|
|
|
{
|
2022-06-30 00:50:50 +01:00
|
|
|
assertion = let bdfs = map (d: d.hostBDF) allHostDevs; in (unique bdfs) == bdfs;
|
2022-05-16 00:05:02 +01:00
|
|
|
message = "VMs cannot share host devices!";
|
|
|
|
}
|
|
|
|
];
|
|
|
|
|
2022-06-17 01:53:10 +01:00
|
|
|
environment.systemPackages = [
|
|
|
|
(pkgs.writeShellScriptBin "vm-tty" ''
|
|
|
|
[ $# -eq 1 ] || (echo "usage: $0 <vm>" >&2; exit 1)
|
|
|
|
exec ${pkgs.minicom}/bin/minicom -D unix#/run/vms/"$1"/tty.sock
|
|
|
|
'')
|
|
|
|
];
|
|
|
|
|
2022-05-16 00:05:02 +01:00
|
|
|
services.udev = {
|
|
|
|
packages =
|
|
|
|
optionals
|
2023-12-22 01:34:28 +00:00
|
|
|
(anyVfioDevs allHostDevs)
|
2022-05-16 00:05:02 +01:00
|
|
|
[
|
2023-12-09 21:14:31 +00:00
|
|
|
pkgs.vfio-pci-bind
|
2022-05-16 00:05:02 +01:00
|
|
|
(pkgs.writeTextDir
|
|
|
|
"etc/udev/rules.d/20-vfio-tags.rules"
|
|
|
|
(concatMapStringsSep
|
|
|
|
"\n"
|
2022-06-30 00:50:50 +01:00
|
|
|
(d: ''ACTION=="add", SUBSYSTEM=="pci", KERNEL=="0000:${d.hostBDF}", TAG="vfio-pci-bind"'')
|
2023-12-22 01:34:28 +00:00
|
|
|
(vfioHostDevs allHostDevs)))
|
2022-05-16 00:05:02 +01:00
|
|
|
];
|
|
|
|
};
|
|
|
|
|
|
|
|
my.tmproot.persistence.config.directories = [ "/var/lib/vms" ];
|
|
|
|
|
2022-05-29 15:22:49 +01:00
|
|
|
systemd = mkMerge ([ ] ++
|
|
|
|
(mapAttrsToList (n: i: {
|
|
|
|
# TODO: LLDP?
|
|
|
|
network.networks =
|
|
|
|
mapAttrs'
|
|
|
|
(nn: net: {
|
|
|
|
name = "70-vm-${n}-${nn}";
|
|
|
|
value = {
|
|
|
|
matchConfig = {
|
|
|
|
Name = net.ifname;
|
2023-01-08 17:32:10 +00:00
|
|
|
Kind = "tun";
|
2022-05-29 15:22:49 +01:00
|
|
|
};
|
|
|
|
networkConfig.Bridge = net.bridge;
|
|
|
|
};
|
|
|
|
})
|
|
|
|
(filterAttrs (_: net: net.bridge != null) i.networks);
|
2022-06-30 03:24:10 +01:00
|
|
|
services."vm@${n}" =
|
|
|
|
let
|
|
|
|
dependencies =
|
2022-05-29 15:22:49 +01:00
|
|
|
map
|
|
|
|
(net:
|
|
|
|
let
|
2022-06-30 03:24:10 +01:00
|
|
|
ifname = if net.bridge != null then net.bridge else net.ifname;
|
|
|
|
arg = if net.waitOnline == true then ifname else "${ifname}:${net.waitOnline}";
|
2022-05-29 15:22:49 +01:00
|
|
|
in
|
|
|
|
"systemd-networkd-wait-online@${arg}.service")
|
2022-06-30 03:24:10 +01:00
|
|
|
(filter (net: (net.bridge != null || net.tapFD != null) && net.waitOnline != false) (attrValues i.networks));
|
|
|
|
in
|
|
|
|
{
|
|
|
|
description = "Virtual machine '${n}'";
|
2023-01-07 18:02:15 +00:00
|
|
|
# Use `Wants=` instead of `Requires=`. Otherwise restarting the wait-online services will cause the VM to
|
|
|
|
# restart as well.
|
|
|
|
wants = dependencies;
|
2022-06-30 03:24:10 +01:00
|
|
|
after = dependencies;
|
2022-05-02 20:40:11 +01:00
|
|
|
serviceConfig = {
|
2022-05-07 19:56:47 +01:00
|
|
|
ExecStop = mkIf i.cleanShutdown.enabled "${doCleanShutdown} /run/vms/${n}/monitor-qmp.sock";
|
|
|
|
TimeoutStopSec = mkIf i.cleanShutdown.enabled i.cleanShutdown.timeout;
|
|
|
|
|
2022-05-02 20:40:11 +01:00
|
|
|
RuntimeDirectory = "vms/${n}";
|
|
|
|
StateDirectory = "vms/${n}";
|
|
|
|
};
|
|
|
|
|
|
|
|
preStart =
|
2023-12-22 01:34:28 +00:00
|
|
|
let
|
|
|
|
hostDevs = attrValues i.hostDevices;
|
|
|
|
in
|
2022-05-02 20:40:11 +01:00
|
|
|
''
|
|
|
|
if [ ! -e "$STATE_DIRECTORY"/ovmf_vars.bin ]; then
|
|
|
|
cp "${cfg.ovmfPackage.fd}"/FV/OVMF_VARS.fd "$STATE_DIRECTORY"/ovmf_vars.bin
|
|
|
|
fi
|
2023-12-10 02:29:53 +00:00
|
|
|
|
2023-12-22 01:34:28 +00:00
|
|
|
${optionalString (anyVfioDevs hostDevs) ''
|
2023-12-10 02:29:53 +00:00
|
|
|
iommu_group() {
|
|
|
|
g=/sys/bus/pci/devices/0000:$1/iommu_group
|
|
|
|
until [ -e $g ]; do
|
|
|
|
sleep 0.1
|
|
|
|
done
|
|
|
|
basename $(readlink $g)
|
|
|
|
}
|
|
|
|
wait_vfio() {
|
|
|
|
until [ -e /dev/vfio/$(iommu_group $1) ]; do
|
|
|
|
sleep 0.1
|
|
|
|
done
|
|
|
|
}
|
|
|
|
|
2023-12-22 01:34:28 +00:00
|
|
|
${concatMapStringsSep "\n" (d: "wait_vfio ${d.hostBDF}") (vfioHostDevs hostDevs) }
|
2023-12-10 02:29:53 +00:00
|
|
|
''}
|
2022-05-02 20:40:11 +01:00
|
|
|
'';
|
2022-05-29 15:22:49 +01:00
|
|
|
script = mkQemuScript n i;
|
2022-05-02 20:40:11 +01:00
|
|
|
postStart =
|
|
|
|
''
|
2022-05-07 19:56:47 +01:00
|
|
|
socks=(monitor-qmp monitor tty spice)
|
2022-05-02 20:40:11 +01:00
|
|
|
for s in ''${socks[@]}; do
|
|
|
|
path="$RUNTIME_DIRECTORY"/''${s}.sock
|
|
|
|
until [ -e "$path" ]; do sleep 0.1; done
|
|
|
|
chgrp kvm "$path"
|
|
|
|
chmod 770 "$path"
|
|
|
|
done
|
|
|
|
'';
|
|
|
|
restartIfChanged = mkDefault false;
|
|
|
|
wantedBy = optional i.autoStart "machines.target";
|
|
|
|
};
|
2022-05-29 15:22:49 +01:00
|
|
|
}) cfg.instances));
|
2022-05-02 20:40:11 +01:00
|
|
|
};
|
|
|
|
}
|