diff --git a/Cargo.lock b/Cargo.lock index faf38b65bf..3a18bc678a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -690,6 +690,7 @@ dependencies = [ "pretty_assertions", "project", "rand 0.8.5", + "regex", "serde", "serde_json", "settings", diff --git a/crates/agent/src/context.rs b/crates/agent/src/context.rs index 18ca193b0e..709f6ccb68 100644 --- a/crates/agent/src/context.rs +++ b/crates/agent/src/context.rs @@ -3,11 +3,12 @@ use std::hash::{Hash, Hasher}; use std::path::PathBuf; use std::{ops::Range, path::Path, sync::Arc}; +use assistant_tool::outline; use collections::HashSet; use futures::future; use futures::{FutureExt, future::Shared}; use gpui::{App, AppContext as _, Entity, SharedString, Task}; -use language::Buffer; +use language::{Buffer, ParseStatus}; use language_model::{LanguageModelImage, LanguageModelRequestMessage, MessageContent}; use project::{Project, ProjectEntryId, ProjectPath, Worktree}; use prompt_store::{PromptStore, UserPromptId}; @@ -152,6 +153,7 @@ pub struct FileContext { pub handle: FileContextHandle, pub full_path: Arc, pub text: SharedString, + pub is_outline: bool, } impl FileContextHandle { @@ -177,14 +179,51 @@ impl FileContextHandle { log::error!("file context missing path"); return Task::ready(None); }; - let full_path = file.full_path(cx); + let full_path: Arc = file.full_path(cx).into(); let rope = buffer_ref.as_rope().clone(); let buffer = self.buffer.clone(); - cx.background_spawn(async move { + + cx.spawn(async move |cx| { + // For large files, use outline instead of full content + if rope.len() > outline::AUTO_OUTLINE_SIZE { + // Wait until the buffer has been fully parsed, so we can read its outline + if let Ok(mut parse_status) = + buffer.read_with(cx, |buffer, _| buffer.parse_status()) + { + while *parse_status.borrow() != ParseStatus::Idle { + parse_status.changed().await.log_err(); + } + + if let Ok(snapshot) = buffer.read_with(cx, |buffer, _| buffer.snapshot()) { + if let Some(outline) = snapshot.outline(None) { + let items = outline + .items + .into_iter() + .map(|item| item.to_point(&snapshot)); + + if let Ok(outline_text) = + outline::render_outline(items, None, 0, usize::MAX).await + { + let context = AgentContext::File(FileContext { + handle: self, + full_path, + text: outline_text.into(), + is_outline: true, + }); + return Some((context, vec![buffer])); + } + } + } + } + } + + // Fallback to full content if we couldn't build an outline + // (or didn't need to because the file was small enough) let context = AgentContext::File(FileContext { handle: self, - full_path: full_path.into(), + full_path, text: rope.to_string().into(), + is_outline: false, }); Some((context, vec![buffer])) }) @@ -996,3 +1035,115 @@ impl Hash for AgentContextKey { } } } + +#[cfg(test)] +mod tests { + use super::*; + use gpui::TestAppContext; + use project::{FakeFs, Project}; + use serde_json::json; + use settings::SettingsStore; + use util::path; + + fn init_test_settings(cx: &mut TestAppContext) { + cx.update(|cx| { + let settings_store = SettingsStore::test(cx); + cx.set_global(settings_store); + language::init(cx); + Project::init_settings(cx); + }); + } + + // Helper to create a test project with test files + async fn create_test_project( + cx: &mut TestAppContext, + files: serde_json::Value, + ) -> Entity { + let fs = FakeFs::new(cx.background_executor.clone()); + fs.insert_tree(path!("/test"), files).await; + Project::test(fs, [path!("/test").as_ref()], cx).await + } + + #[gpui::test] + async fn test_large_file_uses_outline(cx: &mut TestAppContext) { + init_test_settings(cx); + + // Create a large file that exceeds AUTO_OUTLINE_SIZE + const LINE: &str = "Line with some text\n"; + let large_content = LINE.repeat(2 * (outline::AUTO_OUTLINE_SIZE / LINE.len())); + let content_len = large_content.len(); + + assert!(content_len > outline::AUTO_OUTLINE_SIZE); + + let file_context = file_context_for(large_content, cx).await; + + assert!( + file_context.is_outline, + "Large file should use outline format" + ); + + assert!( + file_context.text.len() < content_len, + "Outline should be smaller than original content" + ); + } + + #[gpui::test] + async fn test_small_file_uses_full_content(cx: &mut TestAppContext) { + init_test_settings(cx); + + let small_content = "This is a small file.\n"; + let content_len = small_content.len(); + + assert!(content_len < outline::AUTO_OUTLINE_SIZE); + + let file_context = file_context_for(small_content.to_string(), cx).await; + + assert!( + !file_context.is_outline, + "Small files should not get an outline" + ); + + assert_eq!(file_context.text, small_content); + } + + async fn file_context_for(content: String, cx: &mut TestAppContext) -> FileContext { + // Create a test project with the file + let project = create_test_project( + cx, + json!({ + "file.txt": content, + }), + ) + .await; + + // Open the buffer + let buffer_path = project + .read_with(cx, |project, cx| project.find_project_path("file.txt", cx)) + .unwrap(); + + let buffer = project + .update(cx, |project, cx| project.open_buffer(buffer_path, cx)) + .await + .unwrap(); + + let context_handle = AgentContextHandle::File(FileContextHandle { + buffer: buffer.clone(), + context_id: ContextId::zero(), + }); + + cx.update(|cx| load_context(vec![context_handle], &project, &None, cx)) + .await + .loaded_context + .contexts + .into_iter() + .find_map(|ctx| { + if let AgentContext::File(file_ctx) = ctx { + Some(file_ctx) + } else { + None + } + }) + .expect("Should have found a file context") + } +} diff --git a/crates/assistant_tool/Cargo.toml b/crates/assistant_tool/Cargo.toml index f19586397d..e903cb5358 100644 --- a/crates/assistant_tool/Cargo.toml +++ b/crates/assistant_tool/Cargo.toml @@ -24,6 +24,7 @@ language.workspace = true language_model.workspace = true parking_lot.workspace = true project.workspace = true +regex.workspace = true serde.workspace = true serde_json.workspace = true text.workspace = true diff --git a/crates/assistant_tool/src/assistant_tool.rs b/crates/assistant_tool/src/assistant_tool.rs index 6cd62485e9..3ef54d57da 100644 --- a/crates/assistant_tool/src/assistant_tool.rs +++ b/crates/assistant_tool/src/assistant_tool.rs @@ -1,4 +1,5 @@ mod action_log; +pub mod outline; mod tool_registry; mod tool_schema; mod tool_working_set; diff --git a/crates/assistant_tool/src/outline.rs b/crates/assistant_tool/src/outline.rs new file mode 100644 index 0000000000..99d0d1957b --- /dev/null +++ b/crates/assistant_tool/src/outline.rs @@ -0,0 +1,132 @@ +use crate::ActionLog; +use anyhow::{Result, anyhow}; +use gpui::{AsyncApp, Entity}; +use language::{OutlineItem, ParseStatus}; +use project::Project; +use regex::Regex; +use std::fmt::Write; +use text::Point; + +/// For files over this size, instead of reading them (or including them in context), +/// we automatically provide the file's symbol outline instead, with line numbers. +pub const AUTO_OUTLINE_SIZE: usize = 16384; + +pub async fn file_outline( + project: Entity, + path: String, + action_log: Entity, + regex: Option, + cx: &mut AsyncApp, +) -> anyhow::Result { + let buffer = { + let project_path = project.read_with(cx, |project, cx| { + project + .find_project_path(&path, cx) + .ok_or_else(|| anyhow!("Path {path} not found in project")) + })??; + + project + .update(cx, |project, cx| project.open_buffer(project_path, cx))? + .await? + }; + + action_log.update(cx, |action_log, cx| { + action_log.track_buffer(buffer.clone(), cx); + })?; + + // Wait until the buffer has been fully parsed, so that we can read its outline. + let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?; + while *parse_status.borrow() != ParseStatus::Idle { + parse_status.changed().await?; + } + + let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?; + let Some(outline) = snapshot.outline(None) else { + return Err(anyhow!("No outline information available for this file.")); + }; + + render_outline( + outline + .items + .into_iter() + .map(|item| item.to_point(&snapshot)), + regex, + 0, + usize::MAX, + ) + .await +} + +pub async fn render_outline( + items: impl IntoIterator>, + regex: Option, + offset: usize, + results_per_page: usize, +) -> Result { + let mut items = items.into_iter().skip(offset); + + let entries = items + .by_ref() + .filter(|item| { + regex + .as_ref() + .is_none_or(|regex| regex.is_match(&item.text)) + }) + .take(results_per_page) + .collect::>(); + let has_more = items.next().is_some(); + + let mut output = String::new(); + let entries_rendered = render_entries(&mut output, entries); + + // Calculate pagination information + let page_start = offset + 1; + let page_end = offset + entries_rendered; + let total_symbols = if has_more { + format!("more than {}", page_end) + } else { + page_end.to_string() + }; + + // Add pagination information + if has_more { + writeln!(&mut output, "\nShowing symbols {page_start}-{page_end} (there were more symbols found; use offset: {page_end} to see next page)", + ) + } else { + writeln!( + &mut output, + "\nShowing symbols {page_start}-{page_end} (total symbols: {total_symbols})", + ) + } + .ok(); + + Ok(output) +} + +fn render_entries( + output: &mut String, + items: impl IntoIterator>, +) -> usize { + let mut entries_rendered = 0; + + for item in items { + // Indent based on depth ("" for level 0, " " for level 1, etc.) + for _ in 0..item.depth { + output.push(' '); + } + output.push_str(&item.text); + + // Add position information - convert to 1-based line numbers for display + let start_line = item.range.start.row + 1; + let end_line = item.range.end.row + 1; + + if start_line == end_line { + writeln!(output, " [L{}]", start_line).ok(); + } else { + writeln!(output, " [L{}-{}]", start_line, end_line).ok(); + } + entries_rendered += 1; + } + + entries_rendered +} diff --git a/crates/assistant_tools/src/code_symbols_tool.rs b/crates/assistant_tools/src/code_symbols_tool.rs index 73d6684773..8da5646ed0 100644 --- a/crates/assistant_tools/src/code_symbols_tool.rs +++ b/crates/assistant_tools/src/code_symbols_tool.rs @@ -4,10 +4,10 @@ use std::sync::Arc; use crate::schema::json_schema_for; use anyhow::{Result, anyhow}; +use assistant_tool::outline; use assistant_tool::{ActionLog, Tool, ToolResult}; use collections::IndexMap; use gpui::{AnyWindowHandle, App, AsyncApp, Entity, Task}; -use language::{OutlineItem, ParseStatus, Point}; use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat}; use project::{Project, Symbol}; use regex::{Regex, RegexBuilder}; @@ -148,59 +148,13 @@ impl Tool for CodeSymbolsTool { }; cx.spawn(async move |cx| match input.path { - Some(path) => file_outline(project, path, action_log, regex, cx).await, + Some(path) => outline::file_outline(project, path, action_log, regex, cx).await, None => project_symbols(project, regex, input.offset, cx).await, }) .into() } } -pub async fn file_outline( - project: Entity, - path: String, - action_log: Entity, - regex: Option, - cx: &mut AsyncApp, -) -> anyhow::Result { - let buffer = { - let project_path = project.read_with(cx, |project, cx| { - project - .find_project_path(&path, cx) - .ok_or_else(|| anyhow!("Path {path} not found in project")) - })??; - - project - .update(cx, |project, cx| project.open_buffer(project_path, cx))? - .await? - }; - - action_log.update(cx, |action_log, cx| { - action_log.track_buffer(buffer.clone(), cx); - })?; - - // Wait until the buffer has been fully parsed, so that we can read its outline. - let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?; - while *parse_status.borrow() != ParseStatus::Idle { - parse_status.changed().await?; - } - - let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?; - let Some(outline) = snapshot.outline(None) else { - return Err(anyhow!("No outline information available for this file.")); - }; - - render_outline( - outline - .items - .into_iter() - .map(|item| item.to_point(&snapshot)), - regex, - 0, - usize::MAX, - ) - .await -} - async fn project_symbols( project: Entity, regex: Option, @@ -291,77 +245,3 @@ async fn project_symbols( output }) } - -async fn render_outline( - items: impl IntoIterator>, - regex: Option, - offset: usize, - results_per_page: usize, -) -> Result { - let mut items = items.into_iter().skip(offset); - - let entries = items - .by_ref() - .filter(|item| { - regex - .as_ref() - .is_none_or(|regex| regex.is_match(&item.text)) - }) - .take(results_per_page) - .collect::>(); - let has_more = items.next().is_some(); - - let mut output = String::new(); - let entries_rendered = render_entries(&mut output, entries); - - // Calculate pagination information - let page_start = offset + 1; - let page_end = offset + entries_rendered; - let total_symbols = if has_more { - format!("more than {}", page_end) - } else { - page_end.to_string() - }; - - // Add pagination information - if has_more { - writeln!(&mut output, "\nShowing symbols {page_start}-{page_end} (there were more symbols found; use offset: {page_end} to see next page)", - ) - } else { - writeln!( - &mut output, - "\nShowing symbols {page_start}-{page_end} (total symbols: {total_symbols})", - ) - } - .ok(); - - Ok(output) -} - -fn render_entries( - output: &mut String, - items: impl IntoIterator>, -) -> usize { - let mut entries_rendered = 0; - - for item in items { - // Indent based on depth ("" for level 0, " " for level 1, etc.) - for _ in 0..item.depth { - output.push(' '); - } - output.push_str(&item.text); - - // Add position information - convert to 1-based line numbers for display - let start_line = item.range.start.row + 1; - let end_line = item.range.end.row + 1; - - if start_line == end_line { - writeln!(output, " [L{}]", start_line).ok(); - } else { - writeln!(output, " [L{}-{}]", start_line, end_line).ok(); - } - entries_rendered += 1; - } - - entries_rendered -} diff --git a/crates/assistant_tools/src/contents_tool.rs b/crates/assistant_tools/src/contents_tool.rs index a7494816f5..48cdaee516 100644 --- a/crates/assistant_tools/src/contents_tool.rs +++ b/crates/assistant_tools/src/contents_tool.rs @@ -1,8 +1,8 @@ use std::sync::Arc; -use crate::{code_symbols_tool::file_outline, schema::json_schema_for}; +use crate::schema::json_schema_for; use anyhow::{Result, anyhow}; -use assistant_tool::{ActionLog, Tool, ToolResult}; +use assistant_tool::{ActionLog, Tool, ToolResult, outline}; use gpui::{AnyWindowHandle, App, Entity, Task}; use itertools::Itertools; use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat}; @@ -14,10 +14,6 @@ use ui::IconName; use util::markdown::MarkdownInlineCode; /// If the model requests to read a file whose size exceeds this, then -/// the tool will return the file's symbol outline instead of its contents, -/// and suggest trying again using line ranges from the outline. -const MAX_FILE_SIZE_TO_READ: usize = 16384; - /// If the model requests to list the entries in a directory with more /// entries than this, then the tool will return a subset of the entries /// and suggest trying again. @@ -218,7 +214,7 @@ impl Tool for ContentsTool { // No line ranges specified, so check file size to see if it's too big. let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?; - if file_size <= MAX_FILE_SIZE_TO_READ { + if file_size <= outline::AUTO_OUTLINE_SIZE { let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?; action_log.update(cx, |log, cx| { @@ -229,7 +225,7 @@ impl Tool for ContentsTool { } else { // File is too big, so return its outline and a suggestion to // read again with a line number range specified. - let outline = file_outline(project, file_path, action_log, None, cx).await?; + let outline = outline::file_outline(project, file_path, action_log, None, cx).await?; Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start and end fields to see the implementations of symbols in the outline.")) } diff --git a/crates/assistant_tools/src/read_file_tool.rs b/crates/assistant_tools/src/read_file_tool.rs index 901f790382..38308df882 100644 --- a/crates/assistant_tools/src/read_file_tool.rs +++ b/crates/assistant_tools/src/read_file_tool.rs @@ -1,5 +1,6 @@ -use crate::{code_symbols_tool::file_outline, schema::json_schema_for}; +use crate::schema::json_schema_for; use anyhow::{Result, anyhow}; +use assistant_tool::outline; use assistant_tool::{ActionLog, Tool, ToolResult}; use gpui::{AnyWindowHandle, App, Entity, Task}; @@ -14,10 +15,6 @@ use ui::IconName; use util::markdown::MarkdownInlineCode; /// If the model requests to read a file whose size exceeds this, then -/// the tool will return an error along with the model's symbol outline, -/// and suggest trying again using line ranges from the outline. -const MAX_FILE_SIZE_TO_READ: usize = 16384; - #[derive(Debug, Serialize, Deserialize, JsonSchema)] pub struct ReadFileToolInput { /// The relative path of the file to read. @@ -144,7 +141,7 @@ impl Tool for ReadFileTool { // No line ranges specified, so check file size to see if it's too big. let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?; - if file_size <= MAX_FILE_SIZE_TO_READ { + if file_size <= outline::AUTO_OUTLINE_SIZE { // File is small enough, so return its contents. let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?; @@ -154,9 +151,9 @@ impl Tool for ReadFileTool { Ok(result) } else { - // File is too big, so return an error with the outline + // File is too big, so return the outline // and a suggestion to read again with line numbers. - let outline = file_outline(project, file_path, action_log, None, cx).await?; + let outline = outline::file_outline(project, file_path, action_log, None, cx).await?; Ok(formatdoc! {" This file was too big to read all at once. Here is an outline of its symbols: