From 8b5835de171349d0d0e5aaa5f1563934b98e98c0 Mon Sep 17 00:00:00 2001 From: Agus Zubiaga Date: Wed, 23 Apr 2025 21:24:41 -0300 Subject: [PATCH] agent: Improve initial file search quality (#29317) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR significantly improves the quality of the initial file search that occurs when the model doesn't yet know the full path to a file it needs to read/edit. Previously, the assertions in file_search often failed on main as the model attempted to guess full file paths. On this branch, it reliably calls `find_path` (previously `path_search`) before reading files. After getting the model to find paths first, I noticed it would try using `grep` instead of `path_search`. This motivated renaming `path_search` to `find_path` (continuing the analogy to unix commands) and adding system prompt instructions about proper tool selection. Note: I know the command is just called `find`, but that seemed too general. In my eval runs, the `file_search` example improved from 40% ± 10% to 98% ± 2%. The only assertion I'm seeing occasionally fail is "glob starts with `**` or project". We can probably add some instructions in that regard. Release Notes: - N/A --- assets/prompts/assistant_system_prompt.hbs | 3 ++ assets/settings/default.json | 4 +- crates/assistant_tools/src/assistant_tools.rs | 8 ++-- crates/assistant_tools/src/batch_tool.rs | 2 +- ...{path_search_tool.rs => find_path_tool.rs} | 18 ++++---- .../description.md | 2 +- crates/assistant_tools/src/grep_tool.rs | 2 + .../src/list_directory_tool/description.md | 2 +- crates/eval/src/examples/file_search.rs | 15 +++--- crates/migrator/src/migrations.rs | 6 +++ .../src/migrations/m_2025_04_23/settings.rs | 27 +++++++++++ crates/migrator/src/migrator.rs | 46 ++++++++++++++++++- 12 files changed, 107 insertions(+), 28 deletions(-) rename crates/assistant_tools/src/{path_search_tool.rs => find_path_tool.rs} (92%) rename crates/assistant_tools/src/{path_search_tool => find_path_tool}/description.md (84%) create mode 100644 crates/migrator/src/migrations/m_2025_04_23/settings.rs diff --git a/assets/prompts/assistant_system_prompt.hbs b/assets/prompts/assistant_system_prompt.hbs index 1aca0a938b..4a3e574e67 100644 --- a/assets/prompts/assistant_system_prompt.hbs +++ b/assets/prompts/assistant_system_prompt.hbs @@ -31,6 +31,9 @@ If appropriate, use tool calls to explore the current project, which contains th - When looking for symbols in the project, prefer the `grep` tool. - As you learn about the structure of the project, use that information to scope `grep` searches to targeted subtrees of the project. - Bias towards not asking the user for help if you can find the answer yourself. +{{! TODO: Only mention tools if they are enabled }} +- The user might specify a partial file path. If you don't know the full path, use `find_path` (not `grep`) before you read the file. +- Before you read or edit a file, you must first find the full path. DO NOT ever guess a file path! ## Fixing Diagnostics diff --git a/assets/settings/default.json b/assets/settings/default.json index 60c8708e23..44492b0653 100644 --- a/assets/settings/default.json +++ b/assets/settings/default.json @@ -646,7 +646,7 @@ "fetch": true, "list_directory": false, "now": true, - "path_search": true, + "find_path": true, "read_file": true, "grep": true, "thinking": true, @@ -670,7 +670,7 @@ "list_directory": true, "move_path": false, "now": false, - "path_search": true, + "find_path": true, "read_file": true, "grep": true, "rename": false, diff --git a/crates/assistant_tools/src/assistant_tools.rs b/crates/assistant_tools/src/assistant_tools.rs index 2063c7d2a0..1eca808f6d 100644 --- a/crates/assistant_tools/src/assistant_tools.rs +++ b/crates/assistant_tools/src/assistant_tools.rs @@ -9,12 +9,12 @@ mod delete_path_tool; mod diagnostics_tool; mod edit_file_tool; mod fetch_tool; +mod find_path_tool; mod grep_tool; mod list_directory_tool; mod move_path_tool; mod now_tool; mod open_tool; -mod path_search_tool; mod read_file_tool; mod rename_tool; mod replace; @@ -45,11 +45,11 @@ use crate::delete_path_tool::DeletePathTool; use crate::diagnostics_tool::DiagnosticsTool; use crate::edit_file_tool::EditFileTool; use crate::fetch_tool::FetchTool; +use crate::find_path_tool::FindPathTool; use crate::grep_tool::GrepTool; use crate::list_directory_tool::ListDirectoryTool; use crate::now_tool::NowTool; use crate::open_tool::OpenTool; -use crate::path_search_tool::PathSearchTool; use crate::read_file_tool::ReadFileTool; use crate::rename_tool::RenameTool; use crate::symbol_info_tool::SymbolInfoTool; @@ -58,7 +58,7 @@ use crate::thinking_tool::ThinkingTool; pub use create_file_tool::CreateFileToolInput; pub use edit_file_tool::EditFileToolInput; -pub use path_search_tool::PathSearchToolInput; +pub use find_path_tool::FindPathToolInput; pub use read_file_tool::ReadFileToolInput; pub fn init(http_client: Arc, cx: &mut App) { @@ -81,7 +81,7 @@ pub fn init(http_client: Arc, cx: &mut App) { registry.register_tool(OpenTool); registry.register_tool(CodeSymbolsTool); registry.register_tool(ContentsTool); - registry.register_tool(PathSearchTool); + registry.register_tool(FindPathTool); registry.register_tool(ReadFileTool); registry.register_tool(GrepTool); registry.register_tool(RenameTool); diff --git a/crates/assistant_tools/src/batch_tool.rs b/crates/assistant_tools/src/batch_tool.rs index 08f2067ca3..c8514f2f9e 100644 --- a/crates/assistant_tools/src/batch_tool.rs +++ b/crates/assistant_tools/src/batch_tool.rs @@ -97,7 +97,7 @@ pub struct BatchToolInput { /// } /// }, /// { - /// "name": "path_search", + /// "name": "find_path", /// "input": { /// "glob": "**/*test*.rs" /// } diff --git a/crates/assistant_tools/src/path_search_tool.rs b/crates/assistant_tools/src/find_path_tool.rs similarity index 92% rename from crates/assistant_tools/src/path_search_tool.rs rename to crates/assistant_tools/src/find_path_tool.rs index 694d89cd30..3b2133bd60 100644 --- a/crates/assistant_tools/src/path_search_tool.rs +++ b/crates/assistant_tools/src/find_path_tool.rs @@ -12,7 +12,7 @@ use util::paths::PathMatcher; use worktree::Snapshot; #[derive(Debug, Serialize, Deserialize, JsonSchema)] -pub struct PathSearchToolInput { +pub struct FindPathToolInput { /// The glob to match against every path in the project. /// /// @@ -34,11 +34,11 @@ pub struct PathSearchToolInput { const RESULTS_PER_PAGE: usize = 50; -pub struct PathSearchTool; +pub struct FindPathTool; -impl Tool for PathSearchTool { +impl Tool for FindPathTool { fn name(&self) -> String { - "path_search".into() + "find_path".into() } fn needs_confirmation(&self, _: &serde_json::Value, _: &App) -> bool { @@ -46,7 +46,7 @@ impl Tool for PathSearchTool { } fn description(&self) -> String { - include_str!("./path_search_tool/description.md").into() + include_str!("./find_path_tool/description.md").into() } fn icon(&self) -> IconName { @@ -54,11 +54,11 @@ impl Tool for PathSearchTool { } fn input_schema(&self, format: LanguageModelToolSchemaFormat) -> Result { - json_schema_for::(format) + json_schema_for::(format) } fn ui_text(&self, input: &serde_json::Value) -> String { - match serde_json::from_value::(input.clone()) { + match serde_json::from_value::(input.clone()) { Ok(input) => format!("Find paths matching “`{}`”", input.glob), Err(_) => "Search paths".to_string(), } @@ -73,7 +73,7 @@ impl Tool for PathSearchTool { _window: Option, cx: &mut App, ) -> ToolResult { - let (offset, glob) = match serde_json::from_value::(input) { + let (offset, glob) = match serde_json::from_value::(input) { Ok(input) => (input.offset, input.glob), Err(err) => return Task::ready(Err(anyhow!(err))).into(), }; @@ -144,7 +144,7 @@ mod test { use util::path; #[gpui::test] - async fn test_path_search_tool(cx: &mut TestAppContext) { + async fn test_find_path_tool(cx: &mut TestAppContext) { init_test(cx); let fs = FakeFs::new(cx.executor()); diff --git a/crates/assistant_tools/src/path_search_tool/description.md b/crates/assistant_tools/src/find_path_tool/description.md similarity index 84% rename from crates/assistant_tools/src/path_search_tool/description.md rename to crates/assistant_tools/src/find_path_tool/description.md index 8939aec0a9..f7a697c467 100644 --- a/crates/assistant_tools/src/path_search_tool/description.md +++ b/crates/assistant_tools/src/find_path_tool/description.md @@ -1,4 +1,4 @@ -Fast file pattern matching tool that works with any codebase size +Fast file path pattern matching tool that works with any codebase size - Supports glob patterns like "**/*.js" or "src/**/*.ts" - Returns matching file paths sorted alphabetically diff --git a/crates/assistant_tools/src/grep_tool.rs b/crates/assistant_tools/src/grep_tool.rs index 32d612c54f..91bd78ca5a 100644 --- a/crates/assistant_tools/src/grep_tool.rs +++ b/crates/assistant_tools/src/grep_tool.rs @@ -20,6 +20,8 @@ use util::paths::PathMatcher; pub struct GrepToolInput { /// A regex pattern to search for in the entire project. Note that the regex /// will be parsed by the Rust `regex` crate. + /// + /// Do NOT specify a path here! This will only be matched against the code **content**. pub regex: String, /// A glob pattern for the paths of files to include in the search. diff --git a/crates/assistant_tools/src/list_directory_tool/description.md b/crates/assistant_tools/src/list_directory_tool/description.md index 7fe3a6d441..30dcc012ff 100644 --- a/crates/assistant_tools/src/list_directory_tool/description.md +++ b/crates/assistant_tools/src/list_directory_tool/description.md @@ -1 +1 @@ -Lists files and directories in a given path. Prefer the `grep` or `path_search` tools when searching the codebase. +Lists files and directories in a given path. Prefer the `grep` or `find_path` tools when searching the codebase. diff --git a/crates/eval/src/examples/file_search.rs b/crates/eval/src/examples/file_search.rs index bbee8f008c..f4b5f75213 100644 --- a/crates/eval/src/examples/file_search.rs +++ b/crates/eval/src/examples/file_search.rs @@ -1,5 +1,5 @@ use anyhow::Result; -use assistant_tools::PathSearchToolInput; +use assistant_tools::FindPathToolInput; use async_trait::async_trait; use regex::Regex; @@ -15,7 +15,7 @@ impl Example for FileSearchExample { url: "https://github.com/zed-industries/zed.git".to_string(), revision: "03ecb88fe30794873f191ddb728f597935b3101c".to_string(), language_server: None, - max_assertions: Some(4), + max_assertions: Some(3), } } @@ -32,21 +32,18 @@ impl Example for FileSearchExample { )); let response = cx.run_turn().await?; - let tool_use = response.expect_tool("path_search", cx)?; - let input = tool_use.parse_input::()?; + let tool_use = response.expect_tool("find_path", cx)?; + let input = tool_use.parse_input::()?; let glob = input.glob; - cx.assert( - glob.ends_with(FILENAME), - format!("glob ends with `{FILENAME}`"), - )?; + cx.assert(glob.ends_with(FILENAME), "glob ends with file name")?; let without_filename = glob.replace(FILENAME, ""); let matches = Regex::new("(\\*\\*|zed)/(\\*\\*?/)?") .unwrap() .is_match(&without_filename); - cx.assert(matches, "glob starts with either `**` or `zed`")?; + cx.assert(matches, "glob starts with `**` or project")?; Ok(()) } diff --git a/crates/migrator/src/migrations.rs b/crates/migrator/src/migrations.rs index ab5be31206..4aeb869706 100644 --- a/crates/migrator/src/migrations.rs +++ b/crates/migrator/src/migrations.rs @@ -51,3 +51,9 @@ pub(crate) mod m_2025_04_21 { pub(crate) use settings::SETTINGS_PATTERNS; } + +pub(crate) mod m_2025_04_23 { + mod settings; + + pub(crate) use settings::SETTINGS_PATTERNS; +} diff --git a/crates/migrator/src/migrations/m_2025_04_23/settings.rs b/crates/migrator/src/migrations/m_2025_04_23/settings.rs new file mode 100644 index 0000000000..54bb999640 --- /dev/null +++ b/crates/migrator/src/migrations/m_2025_04_23/settings.rs @@ -0,0 +1,27 @@ +use std::ops::Range; +use tree_sitter::{Query, QueryMatch}; + +use crate::MigrationPatterns; +use crate::patterns::SETTINGS_ASSISTANT_TOOLS_PATTERN; + +pub const SETTINGS_PATTERNS: MigrationPatterns = + &[(SETTINGS_ASSISTANT_TOOLS_PATTERN, rename_path_search_tool)]; + +fn rename_path_search_tool( + contents: &str, + mat: &QueryMatch, + query: &Query, +) -> Option<(Range, String)> { + let tool_name_capture_ix = query.capture_index_for_name("tool_name")?; + let tool_name_range = mat + .nodes_for_capture_index(tool_name_capture_ix) + .next()? + .byte_range(); + let tool_name = contents.get(tool_name_range.clone())?; + + if tool_name == "path_search" { + return Some((tool_name_range, "find_path".to_string())); + } + + None +} diff --git a/crates/migrator/src/migrator.rs b/crates/migrator/src/migrator.rs index 1a6b9d162e..c180ea8e48 100644 --- a/crates/migrator/src/migrator.rs +++ b/crates/migrator/src/migrator.rs @@ -132,6 +132,10 @@ pub fn migrate_settings(text: &str) -> Result> { migrations::m_2025_04_21::SETTINGS_PATTERNS, &SETTINGS_QUERY_2025_04_21, ), + ( + migrations::m_2025_04_23::SETTINGS_PATTERNS, + &SETTINGS_QUERY_2025_04_23, + ), ]; run_migrations(text, migrations) } @@ -214,6 +218,10 @@ define_query!( SETTINGS_QUERY_2025_04_21, migrations::m_2025_04_21::SETTINGS_PATTERNS ); +define_query!( + SETTINGS_QUERY_2025_04_23, + migrations::m_2025_04_23::SETTINGS_PATTERNS +); // custom query static EDIT_PREDICTION_SETTINGS_MIGRATION_QUERY: LazyLock = LazyLock::new(|| { @@ -639,7 +647,7 @@ mod tests { "name": "Custom", "tools": { "diagnostics": true, - "path_search": true, + "find_path": true, "read_file": true } } @@ -650,4 +658,40 @@ mod tests { None, ) } + + #[test] + fn test_rename_path_search_to_find_path() { + assert_migrate_settings( + r#" + { + "assistant": { + "profiles": { + "default": { + "tools": { + "path_search": true, + "read_file": true + } + } + } + } + } + "#, + Some( + r#" + { + "assistant": { + "profiles": { + "default": { + "tools": { + "find_path": true, + "read_file": true + } + } + } + } + } + "#, + ), + ); + } }