Use GHA eval to assign rebuild labels (#359704)

2024-11-29 23:21:39 +01:00 · 2024-11-29 23:21:39 +01:00 · 82434f382c
commit 82434f382c
parent c9c0906085 af1aa40e73
4 changed files with 292 additions and 16 deletions
--- a/.github/workflows/eval.yml
+++ b/.github/workflows/eval.yml
@ -1,6 +1,16 @@
 name: Eval
-on: pull_request_target
+on:
  pull_request_target:
  push:
    # Keep this synced with ci/request-reviews/dev-branches.txt
    branches:
      - master
      - staging
      - release-*
      - staging-*
      - haskell-updates
      - python-updates
 permissions:
  contents: read
@ -11,6 +21,7 @@ jobs:
    runs-on: ubuntu-latest
    outputs:
      mergedSha: ${{ steps.merged.outputs.mergedSha }}
      baseSha: ${{ steps.baseSha.outputs.baseSha }}
      systems: ${{ steps.systems.outputs.systems }}
    steps:
      # Important: Because of `pull_request_target`, this doesn't check out the PR,
@ -24,14 +35,22 @@ jobs:
        id: merged
        env:
          GH_TOKEN: ${{ github.token }}
          GH_EVENT: ${{ github.event_name }}
        run: |
-          if mergedSha=$(base/ci/get-merge-commit.sh ${{ github.repository }} ${{ github.event.number }}); then
+          case "$GH_EVENT" in
-            echo "Checking the merge commit $mergedSha"
+            push)
-            echo "mergedSha=$mergedSha" >> "$GITHUB_OUTPUT"
+              echo "mergedSha=${{ github.sha }}" >> "$GITHUB_OUTPUT"
-          else
+              ;;
-            # Skipping so that no notifications are sent
+            pull_request_target)
-            echo "Skipping the rest..."
+              if mergedSha=$(base/ci/get-merge-commit.sh ${{ github.repository }} ${{ github.event.number }}); then
-          fi
+                echo "Checking the merge commit $mergedSha"
                echo "mergedSha=$mergedSha" >> "$GITHUB_OUTPUT"
              else
                # Skipping so that no notifications are sent
                echo "Skipping the rest..."
              fi
              ;;
          esac
          rm -rf base
      - name: Check out the PR at the test merge commit
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@ -39,8 +58,16 @@ jobs:
        if: steps.merged.outputs.mergedSha
        with:
          ref: ${{ steps.merged.outputs.mergedSha }}
          fetch-depth: 2
          path: nixpkgs
      - name: Determine base commit
        if: github.event_name == 'pull_request_target' && steps.merged.outputs.mergedSha
        id: baseSha
        run: |
          baseSha=$(git -C nixpkgs rev-parse HEAD^1)
          echo "baseSha=$baseSha" >> "$GITHUB_OUTPUT"
      - name: Install Nix
        uses: cachix/install-nix-action@08dcb3a5e62fa31e2da3d490afc4176ef55ecd72 # v30
        if: steps.merged.outputs.mergedSha
@ -105,6 +132,8 @@ jobs:
    name: Process
    runs-on: ubuntu-latest
    needs: [ outpaths, attrs ]
    outputs:
      baseRunId: ${{ steps.baseRunId.outputs.baseRunId }}
    steps:
      - name: Download output paths and eval stats for all systems
        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
@ -124,18 +153,93 @@ jobs:
      - name: Combine all output paths and eval stats
        run: |
          nix-build nixpkgs/ci -A eval.combine \
-            --arg resultsDir ./intermediate
+            --arg resultsDir ./intermediate \
            -o prResult
      - name: Upload the combined results
        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
        with:
          name: result
-          path: result/*
+          path: prResult/*
      - name: Get base run id
        if: needs.attrs.outputs.baseSha
        id: baseRunId
        run: |
          # Get the latest eval.yml workflow run for the PR's base commit
          if ! run=$(gh api --method GET /repos/"$REPOSITORY"/actions/workflows/eval.yml/runs \
            -f head_sha="$BASE_SHA" \
            --jq '.workflow_runs | sort_by(.run_started_at) | .[-1]') \
            || [[ -z "$run" ]]; then
            echo "Could not find an eval.yml workflow run for $BASE_SHA, cannot make comparison"
            exit 0
          fi
          echo "Comparing against $(jq .html_url <<< "$run")"
          runId=$(jq .id <<< "$run")
          conclusion=$(jq -r .conclusion <<< "$run")
-      # TODO: Run this workflow also on `push` (on at least the main development branches)
+          while [[ "$conclusion" == null ]]; do
-      # Then add an extra step here that waits for the base branch (not the merge base, because that could be very different)
+            echo "Workflow not done, waiting 10 seconds before checking again"
-      # to have completed the eval, then use
+            sleep 10
-      # gh api --method GET /repos/NixOS/nixpkgs/actions/workflows/eval.yml/runs -f head_sha=<BASE>
+            conclusion=$(gh api /repos/"$REPOSITORY"/actions/runs/"$runId" --jq '.conclusion')
-      # and follow it to the artifact results, where you can then download the outpaths.json from the base branch
+          done
-      # That can then be used to compare the number of changed paths, get evaluation stats and ping appropriate reviewers
+
          if [[ "$conclusion" != "success" ]]; then
            echo "Workflow was not successful, cannot make comparison"
            exit 0
          fi
          echo "baseRunId=$runId" >> "$GITHUB_OUTPUT"
        env:
          REPOSITORY: ${{ github.repository }}
          BASE_SHA: ${{ needs.attrs.outputs.baseSha }}
          GH_TOKEN: ${{ github.token }}
      - uses: actions/download-artifact@v4
        if: steps.baseRunId.outputs.baseRunId
        with:
          name: result
          path: baseResult
          github-token: ${{ github.token }}
          run-id: ${{ steps.baseRunId.outputs.baseRunId }}
      - name: Compare against the base branch
        if: steps.baseRunId.outputs.baseRunId
        run: |
          nix-build nixpkgs/ci -A eval.compare \
            --arg beforeResultDir ./baseResult \
            --arg afterResultDir ./prResult \
            -o comparison
          # TODO: Request reviews from maintainers for packages whose files are modified in the PR
      - name: Upload the combined results
        if: steps.baseRunId.outputs.baseRunId
        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
        with:
          name: comparison
          path: comparison/*
  # Separate job to have a very tightly scoped PR write token
  tag:
    name: Tag
    runs-on: ubuntu-latest
    needs: process
    if: needs.process.outputs.baseRunId
    permissions:
      pull-requests: write
    steps:
      - name: Download process result
        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
        with:
          name: comparison
          path: comparison
      - name: Tagging pull request
        run: |
          gh api \
            --method POST \
            /repos/${{ github.repository }}/issues/${{ github.event.number }}/labels \
            --input <(jq -c '{ labels: .labels }' comparison/changed-paths.json)
        env:
          GH_TOKEN: ${{ github.token }}
--- a/ci/eval/compare.jq
+++ b/ci/eval/compare.jq
@ -0,0 +1,152 @@
 # Turns
 #
 #   {
 #     "hello.aarch64-linux": "a",
 #     "hello.x86_64-linux": "b",
 #     "hello.aarch64-darwin": "c",
 #     "hello.x86_64-darwin": "d"
 #   }
 #
 # into
 #
 #   {
 #     "hello": {
 #       "linux": {
 #         "aarch64": "a",
 #         "x86_64": "b"
 #       },
 #       "darwin": {
 #         "aarch64": "c",
 #         "x86_64": "d"
 #       }
 #     }
 #   }
 #
 # while filtering out any attribute paths that don't match this pattern
 def expand_system:
  to_entries
  | map(
    .key |= split(".")
    | select(.key | length > 1)
    | .double = (.key[-1] | split("-"))
    | select(.double | length == 2)
  )
  | group_by(.key[0:-1])
  | map(
    {
      key: .[0].key[0:-1] | join("."),
      value:
        group_by(.double[1])
        | map(
          {
            key: .[0].double[1],
            value: map(.key = .double[0]) | from_entries
          }
        )
        | from_entries
    })
  | from_entries
  ;
 # Transposes
 #
 #   {
 #     "a": [ "x", "y" ],
 #     "b": [ "x" ],
 #   }
 #
 # into
 #
 #   {
 #     "x": [ "a", "b" ],
 #     "y": [ "a" ]
 #   }
 def transpose:
  [
    to_entries[]
    | {
      key: .key,
      value: .value[]
    }
  ]
  | group_by(.value)
  | map({
    key: .[0].value,
    value: map(.key)
  })
  | from_entries
  ;
 # Computes the key difference for two objects:
 # {
 #   added: [ <keys only in the second object> ],
 #   removed: [ <keys only in the first object> ],
 #   changed: [ <keys with different values between the two objects> ],
 # }
 #
 def diff($before; $after):
  {
    added: $after | delpaths($before | keys | map([.])) | keys,
    removed: $before | delpaths($after | keys | map([.])) | keys,
    changed:
      $before
      | to_entries
      | map(
        $after."\(.key)" as $after2
        | select(
          # Filter out attributes that don't exist anymore
          ($after2 != null)
          and
          # Filter out attributes that are the same as the new value
          (.value != $after2)
        )
        | .key
      )
  }
  ;
 ($before[0] | expand_system) as $before
 | ($after[0] | expand_system) as $after
 | .attrdiff = diff($before; $after)
 | .rebuildsByKernel = (
  .attrdiff.changed
  | map({
    key: .,
    value: diff($before."\(.)"; $after."\(.)").changed
  })
  | from_entries
  | transpose
 )
 | .rebuildCountByKernel = (
  .rebuildsByKernel
  | with_entries(.value |= length)
  | pick(.linux, .darwin)
  | {
    linux: (.linux // 0),
    darwin: (.darwin // 0),
  }
 )
 | .labels = (
  .rebuildCountByKernel
  | to_entries
  | map(
    "10.rebuild-\(.key): " +
      if .value == 0 then
        "0"
      elif .value <= 10 then
        "1-10"
      elif .value <= 100 then
        "11-100"
      elif .value <= 500 then
        "101-500"
      elif .value <= 1000 then
        "501-1000"
      elif .value <= 2500 then
        "1001-2500"
      elif .value <= 5000 then
        "2501-5000"
      else
        "5000+"
      end
  )
 )
--- a/ci/eval/default.nix
+++ b/ci/eval/default.nix
@ -246,6 +246,24 @@ let
          jq -s from_entries > $out/stats.json
      '';
  compare =
    { beforeResultDir, afterResultDir }:
    runCommand "compare"
      {
        nativeBuildInputs = [
          jq
        ];
      }
      ''
        mkdir $out
        jq -n -f ${./compare.jq} \
          --slurpfile before ${beforeResultDir}/outpaths.json \
          --slurpfile after ${afterResultDir}/outpaths.json \
          > $out/changed-paths.json
        # TODO: Compare eval stats
      '';
  full =
    {
      # Whether to evaluate just a single system, by default all are evaluated
@ -276,6 +294,7 @@ in
    attrpathsSuperset
    singleSystem
    combine
    compare
    # The above three are used by separate VMs in a GitHub workflow,
    # while the below is intended for testing on a single local machine
    full
--- a/ci/request-reviews/dev-branches.txt
+++ b/ci/request-reviews/dev-branches.txt
@ -1,5 +1,6 @@
 # Trusted development branches:
 # These generally require PRs to update and are built by Hydra.
 # Keep this synced with the branches in .github/workflows/eval.yml
 master
 staging
 release-*