{ lib, pkgs, options, config, systems, ... }:
let
  inherit (builtins) attrNames attrValues all hashString toJSON any;
  inherit (lib)
    groupBy' mapAttrsToList optionalString optional concatMapStringsSep filterAttrs mkOption mkDefault mkIf mkMerge;
  inherit (lib.my) mkOpt' mkBoolOpt';

  cfg = config.my.containers;

  devVMKeyPath = "/run/dev.key";
  ctrProfiles = n: "/nix/var/nix/profiles/per-container/${n}";

  dummyReady = pkgs.runCommandCC "dummy-sd-ready" {
    buildInputs = [ pkgs.systemd ];
    passAsFile = [ "code" ];
    code = ''
      #include <stdio.h>
      #include <signal.h>
      #include <unistd.h>
      #include <systemd/sd-daemon.h>

      void handler(int signum) {
        exit(0);
      }

      int main() {
        // systemd sends this to PID 1 for an "orderly shutdown"
        signal(SIGRTMIN+3, handler);

        int ret =
          sd_notifyf(0, "READY=1\n"
            "STATUS=Dummy container, please deploy for real!\n"
            "MAINPID=%lu",
            (unsigned long)getpid());
        if (ret <= 0) {
          fprintf(stderr, "sd_notify() returned %d\n", ret);
          return ret == 0 ? -1 : ret;
        }

        pause();
        return 0;
      };
    '';
  } ''
    $CC -o "$out" -x c -lsystemd "$codePath"
  '';
  dummyProfile = pkgs.writeTextFile {
    name = "dummy-init";
    executable = true;
    destination = "/init";
    # Although this will be in the new root, the shell will be available because the store will be mounted!
    text = ''
      #!${pkgs.runtimeShell}
      ${pkgs.iproute2}/bin/ip link set dev host0 up

      exec ${dummyReady}
    '';
  };

  bindMountOpts = with lib.types; { name, ... }: {
    options = {
      mountPoint = mkOption {
        default = name;
        example = "/mnt/usb";
        type = str;
        description = "Mount point on the container file system.";
      };
      hostPath = mkOption {
        default = name;
        example = "/home/alice";
        type = nullOr str;
        description = "Location of the host path to be mounted.";
      };
      readOnly = mkOption {
        default = true;
        type = bool;
        description = "Determine whether the mounted path will be accessed in read-only mode.";
      };
    };
  };

  containerOpts = with lib.types; { name, ... }: {
    options = {
      system = mkOpt' path "${ctrProfiles name}/system" "Path to NixOS system configuration.";
      containerSystem = mkOpt' path "/nix/var/nix/profiles/system" "Path to NixOS system configuration from within container.";
      autoStart = mkBoolOpt' true "Whether to start the container automatically at boot.";
      hotReload = mkBoolOpt' true
        "Whether to apply new configuration by running `switch-to-configuration` instead of rebooting the container.";

      # Yoinked from nixos/modules/virtualisation/nixos-containers.nix
      bindMounts = mkOption {
        type = attrsOf (submodule bindMountOpts);
        default = { };
        description =
          ''
            An extra list of directories that is bound to the container.
          '';
      };
      networking = {
        bridge = mkOpt' (nullOr str) null "Network bridge to connect to.";
        macVLAN = mkOpt' (nullOr str) null "Network interface to make MACVLAN interface from.";
      };
    };
  };
in
{
  options.my.containers = with lib.types; {
    persistDir = mkOpt' str "/persist/containers" "Where to store container persistence data.";
    instances = mkOpt' (attrsOf (submodule containerOpts)) { } "Individual containers.";
  };

  config = mkMerge [
    (mkIf (cfg.instances != { }) {
      assertions = [
        {
          assertion = config.systemd.network.enable;
          message = "Containers currently require systemd-networkd!";
        }
        {
          assertion = all (i: i.networking.bridge == null || i.networking.macVLAN == null) (attrValues cfg.instances);
          message = "Only bridge OR MACVLAN can be set";
        }
      ];

      # TODO: Better security
      my.firewall.trustedInterfaces =
        mapAttrsToList
          (n: _: "ve-${n}")
          (filterAttrs (_: c: c.networking.bridge == null && c.networking.macVLAN == null) cfg.instances);

      systemd = mkMerge (mapAttrsToList (n: c: {
        nspawn."${n}" = {
          execConfig = {
            Boot = true;
            Ephemeral = true;
            LinkJournal = false;
            NotifyReady = true;
            ResolvConf = "bind-stub";
            PrivateUsers = false;
          };
          filesConfig =
          let
            binds = groupBy'
              (l: b: l ++ [ (if b.hostPath != null then "${b.hostPath}:${b.mountPoint}" else b.mountPoint) ])
              [ ]
              (b: if b.readOnly then "ro" else "rw")
              (attrValues c.bindMounts);
          in {
            BindReadOnly = [
              "/nix/store"
              "/nix/var/nix/db"
              "/nix/var/nix/daemon-socket"
            ] ++ optional config.my.build.isDevVM "${config.my.secrets.vmKeyPath}:${devVMKeyPath}" ++ binds.ro or [ ];
            Bind = [
              "${ctrProfiles n}:/nix/var/nix/profiles"
              "/nix/var/nix/gcroots/per-container/${n}:/nix/var/nix/gcroots"
              "${cfg.persistDir}/${n}:/persist"
            ] ++ binds.rw or [ ];
          };
          networkConfig = if (c.networking.bridge != null) then {
            Bridge = c.networking.bridge;
          } else if (c.networking.macVLAN != null) then {
            MACVLAN = "${c.networking.macVLAN}:host0";
          } else {
            VirtualEthernet = true;
          };
        };
        services."systemd-nspawn@${n}" =
        let
          sysProfile = "${ctrProfiles n}/system";
          system = if
            config.my.build.isDevVM then
            systems."${n}".configuration.config.my.buildAs.container else
            c.system;
          containerSystem = if
            config.my.build.isDevVM then
            system else
            c.containerSystem;
        in
        {
          # To prevent creating a whole new unit file
          overrideStrategy = "asDropin";

          environment = {
            # systemd.nspawn units can't set the root directory directly, but /run/machines/${n} is one of the search paths
            root = "/run/machines/${n}";
            # Without this, systemd-nspawn will do cgroupsv1
            SYSTEMD_NSPAWN_UNIFIED_HIERARCHY = "1";
          };
          restartTriggers = [
            (''${n}.nspawn:${hashString "sha256" (toJSON config.systemd.nspawn."${n}")}'')
          ];

          preStart =
          ''
            mkdir -p -m 0755 \
              /nix/var/nix/{profiles,gcroots}/per-container/${n} \
              ${cfg.persistDir}/${n}

            ${optionalString (system == sysProfile)
            ''
              if [ ! -e "${sysProfile}" ]; then
                echo "Creating dummy profile"
                ${config.nix.package}/bin/nix-env -p ${sysProfile} --set ${dummyProfile}
              fi
            ''}

            mkdir -p -m 0755 "$root"/sbin "$root"/etc
            touch "$root"/etc/os-release

            ${if system == sysProfile then ''
              if [ -e "${sysProfile}"/prepare-root ]; then
                initSource="${containerSystem}"/prepare-root
              else
                initSource="${containerSystem}"/init
              fi
              ln -sf "$initSource" "$root"/sbin/init
            '' else ''
              ln -sf "${containerSystem}/prepare-root" "$root"/sbin/init
            ''}
          '';
          postStop =
          ''
            rm -rf "$root"
          '';
          reload =
          # `switch-to-configuration test` switches config without trying to update bootloader
          ''
            [ -e "${system}"/bin/switch-to-configuration ] && \
              systemd-run --pipe --machine ${n} -- "${containerSystem}"/bin/switch-to-configuration test
          '';

          wantedBy = optional c.autoStart "machines.target";
        };
        network.networks."80-container-${n}-vb" = mkIf (c.networking.bridge != null) {
          matchConfig = {
            Name = "vb-${n}";
            Driver = "veth";
          };
          networkConfig = {
            # systemd LLDP doesn't work on bridge interfaces
            LLDP = true;
            EmitLLDP = "customer-bridge";
            # Although nspawn will set the veth's master, systemd will clear it (systemd 250 adds a `KeepMaster`
            # to avoid this)
            Bridge = c.networking.bridge;
          };
        };
      }) cfg.instances);
    })

    # Inside container
    (mkIf config.boot.isContainer {
      assertions = [
        {
          assertion = config.systemd.network.enable;
          message = "Containers currently require systemd-networkd!";
        }
      ];

      nix = {
        gc.automatic = false;
      };

      my = {
        tmproot = {
          enable = true;
          persistence.dir = "/persist";
        };
      };

      system.activationScripts = {
        # So that update-users-groups.pl can see the saved info. Normally stage-1-init.sh would do these mounts early.
        earlyPersist.text = ''
          if ! mountpoint -q /var/lib/nixos; then
            mkdir -p {/persist,}/var/lib/nixos
            mount --bind {/persist,}/var/lib/nixos
          fi
        '';
        users.deps = [ "earlyPersist" ];

        # Ordinarily I think the Nix daemon does this but ofc it doesn't in the container
        createNixPerUserDirs = {
          text =
            let
              users = attrValues (filterAttrs (_: u: u.isNormalUser) config.users.users);
            in
              concatMapStringsSep "\n"
                (u: ''install -d -o ${u.name} -g ${u.group} /nix/var/nix/{profiles,gcroots}/per-user/"${u.name}"'') users;
          deps = [ "users" "groups" ];
        };

        # age requires all keys to at least exist, even if they're not going to be used
        agenixInstall.deps = [ "ensureDevKey" ];
        ensureDevKey.text =
        ''
          [ ! -e "${devVMKeyPath}" ] && touch "${devVMKeyPath}"
        '';
      };

      networking = {
        useHostResolvConf = false;
      };
      # Replace the pre-installed 80-container-host0
      systemd.network.networks."80-container-host0" = {
        matchConfig = {
          Name = "host0";
          Virtualization = "container";
        };
        networkConfig = {
          LLDP = true;
          EmitLLDP = "customer-bridge";
        };
      };

      # If the host is a dev VM
      age.identityPaths = [ devVMKeyPath ];
    })
  ];
}