fred/.github/workflows/eval.yml

name: Run Agent Eval

on:
  schedule:
    - cron: "0 0 * * *"

  pull_request:
    branches:
      - "**"
    types: [opened, synchronize, reopened, labeled]

  workflow_dispatch:

concurrency:
  # Allow only one workflow per any non-`main` branch.
  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
  cancel-in-progress: true

env:
  CARGO_TERM_COLOR: always
  CARGO_INCREMENTAL: 0
  RUST_BACKTRACE: 1
  ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
  ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
  ZED_EVAL_TELEMETRY: 1

jobs:
  # This is a no-op job that we run to prevent GitHub from marking the workflow
  # as failed for PRs that don't have the `run-eval` label.
  noop:
    name: No-op
    runs-on: ubuntu-latest
    steps:
      - name: No-op
        run: echo "Nothing to do"

  run_eval:
    timeout-minutes: 60
    name: Run Agent Eval
    if: >
      github.repository_owner == 'zed-industries' &&
      (github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
    runs-on:
      - buildjet-16vcpu-ubuntu-2204
    steps:
      - name: Add Rust to the PATH
        run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH

      - name: Checkout repo
        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
        with:
          clean: false

      - name: Cache dependencies
        uses: swatinem/rust-cache@9d47c6ad4b02e050fd481d890b2ea34778fd09d6 # v2
        with:
          save-if: ${{ github.ref == 'refs/heads/main' }}
          cache-provider: "buildjet"

      - name: Install Linux dependencies
        run: ./script/linux

      - name: Configure CI
        run: |
          mkdir -p ./../.cargo
          cp ./.cargo/ci-config.toml ./../.cargo/config.toml

      - name: Compile eval
        run: cargo build --package=eval

      - name: Run eval
        run: cargo run --package=eval -- --repetitions=8 --concurrency=1

      # Even the Linux runner is not stateful, in theory there is no need to do this cleanup.
      # But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code
      # to clean up the config file, I’ve included the cleanup code here as a precaution.
      # While it’s not strictly necessary at this moment, I believe it’s better to err on the side of caution.
      - name: Clean CI config file
        if: always()
        run: rm -rf ./../.cargo