Make the default repetitions be 8, and concurrency 4 (#29576)
This is based on having observed that there is a lot of variation between runs on `n=1` and `n=3`. * With `n=8` two runs on the same branch give answers that seem close enough to be reasonably consistent. * With higher concurrency, trying to run this many repetitions seems to lead language servers to time out a lot, causing evals to fail. Release Notes: - N/A
This commit is contained in:
parent
399eced884
commit
04c68dc0cf
2
.github/workflows/eval.yml
vendored
2
.github/workflows/eval.yml
vendored
@ -69,7 +69,7 @@ jobs:
|
|||||||
run: cargo build --package=eval
|
run: cargo build --package=eval
|
||||||
|
|
||||||
- name: Run eval
|
- name: Run eval
|
||||||
run: cargo run --package=eval -- --repetitions=3 --concurrency=1
|
run: cargo run --package=eval -- --repetitions=8 --concurrency=1
|
||||||
|
|
||||||
# Even the Linux runner is not stateful, in theory there is no need to do this cleanup.
|
# Even the Linux runner is not stateful, in theory there is no need to do this cleanup.
|
||||||
# But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code
|
# But, to avoid potential issues in the future if we choose to use a stateful Linux runner and forget to add code
|
||||||
|
@ -52,10 +52,10 @@ struct Args {
|
|||||||
#[arg(long, value_delimiter = ',', default_value = "rs,ts")]
|
#[arg(long, value_delimiter = ',', default_value = "rs,ts")]
|
||||||
languages: Vec<String>,
|
languages: Vec<String>,
|
||||||
/// How many times to run each example.
|
/// How many times to run each example.
|
||||||
#[arg(long, default_value = "1")]
|
#[arg(long, default_value = "8")]
|
||||||
repetitions: usize,
|
repetitions: usize,
|
||||||
/// Maximum number of examples to run concurrently.
|
/// Maximum number of examples to run concurrently.
|
||||||
#[arg(long, default_value = "10")]
|
#[arg(long, default_value = "4")]
|
||||||
concurrency: usize,
|
concurrency: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user