Make the default repetitions be 8, and concurrency 4 (#29576)

This is based on having observed that there is a lot of variation between runs on `n=1` and `n=3`. * With `n=8` two runs on the same branch give answers that seem close enough to be reasonably consistent. * With higher concurrency, trying to run this many repetitions seems to lead language servers to time out a lot, causing evals to fail. Release Notes: - N/A
2025-04-30 15:21:19 -04:00 · 2025-04-30 15:21:19 -04:00 · 04c68dc0cf
commit 04c68dc0cf
parent 399eced884
2 changed files with 3 additions and 3 deletions
--- a/.github/workflows/eval.yml
+++ b/.github/workflows/eval.yml
@ -69,7 +69,7 @@ jobs:
        run: cargo build --package=eval

      - name: Run eval
-        run: cargo run --package=eval -- --repetitions=3 --concurrency=1
+        run: cargo run --package=eval -- --repetitions=8 --concurrency=1

      # Even the Linux runner is not stateful, in theory there is no need to do this cleanup.
      # But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code
--- a/crates/eval/src/eval.rs
+++ b/crates/eval/src/eval.rs
@ -52,10 +52,10 @@ struct Args {
    #[arg(long, value_delimiter = ',', default_value = "rs,ts")]
    languages: Vec<String>,
    /// How many times to run each example.
-    #[arg(long, default_value = "1")]
+    #[arg(long, default_value = "8")]
    repetitions: usize,
    /// Maximum number of examples to run concurrently.
-    #[arg(long, default_value = "10")]
+    #[arg(long, default_value = "4")]
    concurrency: usize,
 }