Get terminal tool working in evals (#29831)

Bypass our terminal subsystem and just run a shell in a pty.

- [x] make sure we use the same working directory
- [x] strip control chars from the pty output (?)
- [x] tests

Release Notes:

- N/A
This commit is contained in:
Cole Miller 2025-05-05 08:07:43 -04:00 committed by Joseph T. Lyons
parent 298b30c1f0
commit 711a855699
5 changed files with 287 additions and 29 deletions

74
Cargo.lock generated
View File

@ -736,6 +736,8 @@ dependencies = [
"language_models",
"linkme",
"open",
"paths",
"portable-pty",
"pretty_assertions",
"project",
"rand 0.8.5",
@ -762,6 +764,7 @@ dependencies = [
"workspace",
"workspace-hack",
"zed_llm_client",
"zlog",
]
[[package]]
@ -3991,7 +3994,7 @@ version = "3.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "697b5419f348fd5ae2478e8018cb016c00a5881c7f46c717de98ffd135a5651c"
dependencies = [
"nix",
"nix 0.29.0",
"windows-sys 0.59.0",
]
@ -9074,6 +9077,18 @@ version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
[[package]]
name = "nix"
version = "0.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
dependencies = [
"bitflags 2.9.0",
"cfg-if",
"cfg_aliases 0.1.1",
"libc",
]
[[package]]
name = "nix"
version = "0.29.0"
@ -10897,6 +10912,27 @@ dependencies = [
"portable-atomic",
]
[[package]]
name = "portable-pty"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4a596a2b3d2752d94f51fac2d4a96737b8705dddd311a32b9af47211f08671e"
dependencies = [
"anyhow",
"bitflags 1.3.2",
"downcast-rs",
"filedescriptor",
"lazy_static",
"libc",
"log",
"nix 0.28.0",
"serial2",
"shared_library",
"shell-words",
"winapi",
"winreg 0.10.1",
]
[[package]]
name = "postage"
version = "0.5.0"
@ -13308,6 +13344,17 @@ dependencies = [
"serde",
]
[[package]]
name = "serial2"
version = "0.2.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7d1d08630509d69f90eff4afcd02c3bd974d979225cbd815ff5942351b14375"
dependencies = [
"cfg-if",
"libc",
"winapi",
]
[[package]]
name = "session"
version = "0.1.0"
@ -13408,6 +13455,16 @@ dependencies = [
"lazy_static",
]
[[package]]
name = "shared_library"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a9e7e0f2bfae24d8a5b5a66c5b257a83c7412304311512a0c054cd5e619da11"
dependencies = [
"lazy_static",
"libc",
]
[[package]]
name = "shell-words"
version = "1.1.0"
@ -17758,6 +17815,15 @@ dependencies = [
"memchr",
]
[[package]]
name = "winreg"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d"
dependencies = [
"winapi",
]
[[package]]
name = "winreg"
version = "0.50.0"
@ -18210,7 +18276,7 @@ dependencies = [
"miniz_oxide",
"mio",
"naga",
"nix",
"nix 0.29.0",
"nom",
"num-bigint",
"num-bigint-dig",
@ -18587,7 +18653,7 @@ dependencies = [
"futures-core",
"futures-lite 2.6.0",
"hex",
"nix",
"nix 0.29.0",
"ordered-stream",
"serde",
"serde_repr",
@ -18700,7 +18766,7 @@ dependencies = [
"menu",
"migrator",
"mimalloc",
"nix",
"nix 0.29.0",
"node_runtime",
"notifications",
"outline",

View File

@ -493,6 +493,7 @@ pet-fs = { git = "https://github.com/microsoft/python-environment-tools.git", re
pet-pixi = { git = "https://github.com/microsoft/python-environment-tools.git", rev = "845945b830297a50de0e24020b980a65e4820559" }
pet-poetry = { git = "https://github.com/microsoft/python-environment-tools.git", rev = "845945b830297a50de0e24020b980a65e4820559" }
pet-reporter = { git = "https://github.com/microsoft/python-environment-tools.git", rev = "845945b830297a50de0e24020b980a65e4820559" }
portable-pty = "0.9.0"
postage = { version = "0.5", features = ["futures-traits"] }
pretty_assertions = { version = "1.3.0", features = ["unstable"] }
proc-macro2 = "1.0.93"

View File

@ -37,6 +37,8 @@ language.workspace = true
language_model.workspace = true
linkme.workspace = true
open.workspace = true
paths.workspace = true
portable-pty.workspace = true
project.workspace = true
regex.workspace = true
rust-embed.workspace = true
@ -75,6 +77,8 @@ reqwest_client.workspace = true
settings = { workspace = true, features = ["test-support"] }
task = { workspace = true, features = ["test-support"]}
tempfile.workspace = true
theme.workspace = true
tree-sitter-rust.workspace = true
workspace = { workspace = true, features = ["test-support"] }
unindent.workspace = true
zlog.workspace = true

View File

@ -1,11 +1,13 @@
use crate::schema::json_schema_for;
use anyhow::{Context as _, Result, anyhow};
use anyhow::{Context as _, Result, anyhow, bail};
use assistant_tool::{ActionLog, Tool, ToolCard, ToolResult, ToolUseStatus};
use gpui::{
Animation, AnimationExt, AnyWindowHandle, App, AppContext, Empty, Entity, EntityId, Task,
Transformation, WeakEntity, Window, percentage,
};
use language::LineEnding;
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
use portable_pty::{CommandBuilder, PtySize, native_pty_system};
use project::{Project, terminals::TerminalKind};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
@ -87,26 +89,65 @@ impl Tool for TerminalTool {
window: Option<AnyWindowHandle>,
cx: &mut App,
) -> ToolResult {
let Some(window) = window else {
return Task::ready(Err(anyhow!("no window options"))).into();
};
let input: TerminalToolInput = match serde_json::from_value(input) {
Ok(input) => input,
Err(err) => return Task::ready(Err(anyhow!(err))).into(),
};
let input_path = Path::new(&input.cd);
let working_dir = match working_dir(cx, &input, &project, input_path) {
let working_dir = match working_dir(&input, &project, input_path, cx) {
Ok(dir) => dir,
Err(err) => return Task::ready(Err(anyhow!(err))).into(),
Err(err) => return Task::ready(Err(err)).into(),
};
let command = get_system_shell();
let args = vec!["-c".into(), input.command.clone()];
let cwd = working_dir.clone();
let Some(window) = window else {
// Headless setup, a test or eval. Our terminal subsystem requires a workspace,
// so bypass it and provide a convincing imitation using a pty.
let task = cx.background_spawn(async move {
let pty_system = native_pty_system();
let mut cmd = CommandBuilder::new(command);
cmd.args(args);
if let Some(cwd) = cwd {
cmd.cwd(cwd);
}
let pair = pty_system.openpty(PtySize {
rows: 24,
cols: 80,
..Default::default()
})?;
let mut child = pair.slave.spawn_command(cmd)?;
let mut reader = pair.master.try_clone_reader()?;
drop(pair);
let mut content = Vec::new();
reader.read_to_end(&mut content)?;
let mut content = String::from_utf8(content)?;
// Massage the pty output a bit to try to match what the terminal codepath gives us
LineEnding::normalize(&mut content);
content = content
.chars()
.filter(|c| c.is_ascii_whitespace() || !c.is_ascii_control())
.collect();
let content = content.trim_start().trim_start_matches("^D");
let exit_status = child.wait()?;
let (processed_content, _) =
process_content(content, &input.command, Some(exit_status));
Ok(processed_content)
});
return ToolResult {
output: task,
card: None,
};
};
let terminal = project.update(cx, |project, cx| {
project.create_terminal(
TerminalKind::Task(task::SpawnInTerminal {
command: get_system_shell(),
args: vec!["-c".into(), input.command.clone()],
cwd: working_dir.clone(),
command,
args,
cwd,
..Default::default()
}),
window,
@ -152,8 +193,11 @@ impl Tool for TerminalTool {
})?;
let previous_len = content.len();
let (processed_content, finished_with_empty_output) =
process_content(content, &input.command, exit_status);
let (processed_content, finished_with_empty_output) = process_content(
&content,
&input.command,
exit_status.map(portable_pty::ExitStatus::from),
);
let _ = card.update(cx, |card, _| {
card.command_finished = true;
@ -177,9 +221,9 @@ impl Tool for TerminalTool {
}
fn process_content(
content: String,
content: &str,
command: &str,
exit_status: Option<ExitStatus>,
exit_status: Option<portable_pty::ExitStatus>,
) -> (String, bool) {
let should_truncate = content.len() > COMMAND_OUTPUT_LIMIT;
@ -192,7 +236,7 @@ fn process_content(
end_ix = content[..end_ix].rfind('\n').unwrap_or(end_ix);
&content[..end_ix]
} else {
content.as_str()
content
};
let is_empty = content.trim().is_empty();
@ -221,11 +265,16 @@ fn process_content(
}
}
Some(exit_status) => {
let code = exit_status.code().unwrap_or(-1);
if is_empty {
format!("Command \"{command}\" failed with exit code {code}.")
format!(
"Command \"{command}\" failed with exit code {}.",
exit_status.exit_code()
)
} else {
format!("Command \"{command}\" failed with exit code {code}.\n\n{content}")
format!(
"Command \"{command}\" failed with exit code {}.\n\n{content}",
exit_status.exit_code()
)
}
}
None => {
@ -239,11 +288,11 @@ fn process_content(
}
fn working_dir(
cx: &mut App,
input: &TerminalToolInput,
project: &Entity<Project>,
input_path: &Path,
) -> Result<Option<PathBuf>, &'static str> {
cx: &mut App,
) -> Result<Option<PathBuf>> {
let project = project.read(cx);
if input.cd == "." {
@ -253,7 +302,7 @@ fn working_dir(
match worktrees.next() {
Some(worktree) => {
if worktrees.next().is_some() {
return Err(
bail!(
"'.' is ambiguous in multi-root workspaces. Please specify a root directory explicitly.",
);
}
@ -267,13 +316,13 @@ fn working_dir(
.worktrees(cx)
.any(|worktree| input_path.starts_with(&worktree.read(cx).abs_path()))
{
return Err("The absolute path must be within one of the project's worktrees");
bail!("The absolute path must be within one of the project's worktrees");
}
Ok(Some(input_path.into()))
} else {
let Some(worktree) = project.worktree_for_root_name(&input.cd, cx) else {
return Err("`cd` directory {} not found in the project");
bail!("`cd` directory {:?} not found in the project", input.cd);
};
Ok(Some(worktree.read(cx).abs_path().to_path_buf()))
@ -507,3 +556,141 @@ impl ToolCard for TerminalToolCard {
.into_any()
}
}
#[cfg(test)]
mod tests {
use editor::EditorSettings;
use fs::RealFs;
use gpui::{BackgroundExecutor, TestAppContext};
use pretty_assertions::assert_eq;
use serde_json::json;
use settings::{Settings, SettingsStore};
use terminal::terminal_settings::TerminalSettings;
use theme::ThemeSettings;
use util::{ResultExt as _, test::TempTree};
use super::*;
#[gpui::test]
async fn test_working_directory(executor: BackgroundExecutor, cx: &mut TestAppContext) {
if cfg!(windows) {
return;
}
zlog::init();
zlog::init_output_stdout();
executor.allow_parking();
cx.update(|cx| {
let settings_store = SettingsStore::test(cx);
cx.set_global(settings_store);
language::init(cx);
Project::init_settings(cx);
workspace::init_settings(cx);
ThemeSettings::register(cx);
TerminalSettings::register(cx);
EditorSettings::register(cx);
});
let fs = Arc::new(RealFs::new(None, executor));
let tree = TempTree::new(json!({
"project": {},
"other-project": {},
}));
let project: Entity<Project> =
Project::test(fs, [tree.path().join("project").as_path()], cx).await;
let action_log = cx.update(|cx| cx.new(|_| ActionLog::new(project.clone())));
let check = |input, expected, cx: &mut App| {
let headless_result = TerminalTool::run(
Arc::new(TerminalTool),
serde_json::to_value(input).unwrap(),
&[],
project.clone(),
action_log.clone(),
None,
cx,
);
cx.spawn(async move |_| {
let output = headless_result.output.await.log_err();
assert_eq!(output, expected);
})
};
cx.update(|cx| {
check(
TerminalToolInput {
command: "pwd".into(),
cd: "project".into(),
},
Some(format!(
"```\n{}\n```",
tree.path().join("project").display()
)),
cx,
)
})
.await;
cx.update(|cx| {
check(
TerminalToolInput {
command: "pwd".into(),
cd: ".".into(),
},
Some(format!(
"```\n{}\n```",
tree.path().join("project").display()
)),
cx,
)
})
.await;
// Absolute path above the worktree root
cx.update(|cx| {
check(
TerminalToolInput {
command: "pwd".into(),
cd: tree.path().to_string_lossy().into(),
},
None,
cx,
)
})
.await;
project
.update(cx, |project, cx| {
project.create_worktree(tree.path().join("other-project"), true, cx)
})
.await
.unwrap();
cx.update(|cx| {
check(
TerminalToolInput {
command: "pwd".into(),
cd: "other-project".into(),
},
Some(format!(
"```\n{}\n```",
tree.path().join("other-project").display()
)),
cx,
)
})
.await;
cx.update(|cx| {
check(
TerminalToolInput {
command: "pwd".into(),
cd: ".".into(),
},
None,
cx,
)
})
.await;
}
}

View File

@ -539,7 +539,7 @@ tokio-rustls = { version = "0.26", default-features = false, features = ["ring"]
tokio-socks = { version = "0.5", features = ["futures-io"] }
tokio-stream = { version = "0.1", features = ["fs"] }
tower = { version = "0.5", default-features = false, features = ["timeout", "util"] }
winapi = { version = "0.3", default-features = false, features = ["cfg", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "in6addr", "inaddr", "knownfolders", "minwinbase", "ntsecapi", "objbase", "processenv", "processthreadsapi", "shlobj", "std", "sysinfoapi", "winbase", "windef", "winerror", "winioctl"] }
winapi = { version = "0.3", default-features = false, features = ["cfg", "commapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "knownfolders", "minwinbase", "minwindef", "namedpipeapi", "ntsecapi", "objbase", "processenv", "processthreadsapi", "shlobj", "std", "synchapi", "sysinfoapi", "timezoneapi", "winbase", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser"] }
windows-core = { version = "0.61" }
windows-numerics = { version = "0.2" }
windows-sys-73dcd821b1037cfd = { package = "windows-sys", version = "0.59", features = ["Wdk_Foundation", "Wdk_Storage_FileSystem", "Win32_Globalization", "Win32_NetworkManagement_IpHelper", "Win32_Networking_WinSock", "Win32_Security_Authentication_Identity", "Win32_Security_Credentials", "Win32_Security_Cryptography", "Win32_Storage_FileSystem", "Win32_System_Com", "Win32_System_Console", "Win32_System_Diagnostics_Debug", "Win32_System_IO", "Win32_System_Ioctl", "Win32_System_Kernel", "Win32_System_LibraryLoader", "Win32_System_Memory", "Win32_System_Performance", "Win32_System_Pipes", "Win32_System_Registry", "Win32_System_SystemInformation", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_Time", "Win32_System_WindowsProgramming", "Win32_UI_Input_KeyboardAndMouse", "Win32_UI_Shell", "Win32_UI_WindowsAndMessaging"] }
@ -564,7 +564,7 @@ tokio-rustls = { version = "0.26", default-features = false, features = ["ring"]
tokio-socks = { version = "0.5", features = ["futures-io"] }
tokio-stream = { version = "0.1", features = ["fs"] }
tower = { version = "0.5", default-features = false, features = ["timeout", "util"] }
winapi = { version = "0.3", default-features = false, features = ["cfg", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "in6addr", "inaddr", "knownfolders", "minwinbase", "ntsecapi", "objbase", "processenv", "processthreadsapi", "shlobj", "std", "sysinfoapi", "winbase", "windef", "winerror", "winioctl"] }
winapi = { version = "0.3", default-features = false, features = ["cfg", "commapi", "consoleapi", "errhandlingapi", "evntrace", "fileapi", "handleapi", "impl-debug", "impl-default", "in6addr", "inaddr", "ioapiset", "knownfolders", "minwinbase", "minwindef", "namedpipeapi", "ntsecapi", "objbase", "processenv", "processthreadsapi", "shlobj", "std", "synchapi", "sysinfoapi", "timezoneapi", "winbase", "windef", "winerror", "winioctl", "winnt", "winreg", "winsock2", "winuser"] }
windows-core = { version = "0.61" }
windows-numerics = { version = "0.2" }
windows-sys-73dcd821b1037cfd = { package = "windows-sys", version = "0.59", features = ["Wdk_Foundation", "Wdk_Storage_FileSystem", "Win32_Globalization", "Win32_NetworkManagement_IpHelper", "Win32_Networking_WinSock", "Win32_Security_Authentication_Identity", "Win32_Security_Credentials", "Win32_Security_Cryptography", "Win32_Storage_FileSystem", "Win32_System_Com", "Win32_System_Console", "Win32_System_Diagnostics_Debug", "Win32_System_IO", "Win32_System_Ioctl", "Win32_System_Kernel", "Win32_System_LibraryLoader", "Win32_System_Memory", "Win32_System_Performance", "Win32_System_Pipes", "Win32_System_Registry", "Win32_System_SystemInformation", "Win32_System_SystemServices", "Win32_System_Threading", "Win32_System_Time", "Win32_System_WindowsProgramming", "Win32_UI_Input_KeyboardAndMouse", "Win32_UI_Shell", "Win32_UI_WindowsAndMessaging"] }