Use outline (#29687)

## Before ![Screenshot 2025-04-30 at 10 56 36 AM](https://github.com/user-attachments/assets/3a435f4c-ad45-4f26-a847-2d5c9d03648e) ## After ![Screenshot 2025-04-30 at 10 55 27 AM](https://github.com/user-attachments/assets/cc3a8144-b6fe-4a15-8a47-b2487ce4f66e) Release Notes: - Context picker and `@`-mentions now work with very large files.
2025-04-30 14:00:00 -04:00 · 2025-04-30 14:00:00 -04:00 · 50f705e779
commit 50f705e779
parent 8173534ad5
8 changed files with 301 additions and 142 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -690,6 +690,7 @@ dependencies = [
 "pretty_assertions",
 "project",
 "rand 0.8.5",
+ "regex",
 "serde",
 "serde_json",
 "settings",
--- a/crates/agent/src/context.rs
+++ b/crates/agent/src/context.rs
@ -3,11 +3,12 @@ use std::hash::{Hash, Hasher};
 use std::path::PathBuf;
 use std::{ops::Range, path::Path, sync::Arc};

+use assistant_tool::outline;
 use collections::HashSet;
 use futures::future;
 use futures::{FutureExt, future::Shared};
 use gpui::{App, AppContext as _, Entity, SharedString, Task};
-use language::Buffer;
+use language::{Buffer, ParseStatus};
 use language_model::{LanguageModelImage, LanguageModelRequestMessage, MessageContent};
 use project::{Project, ProjectEntryId, ProjectPath, Worktree};
 use prompt_store::{PromptStore, UserPromptId};
@ -152,6 +153,7 @@ pub struct FileContext {
    pub handle: FileContextHandle,
    pub full_path: Arc<Path>,
    pub text: SharedString,
+    pub is_outline: bool,
 }

 impl FileContextHandle {
@ -177,14 +179,51 @@ impl FileContextHandle {
            log::error!("file context missing path");
            return Task::ready(None);
        };
-        let full_path = file.full_path(cx);
+        let full_path: Arc<Path> = file.full_path(cx).into();
        let rope = buffer_ref.as_rope().clone();
        let buffer = self.buffer.clone();
-        cx.background_spawn(async move {
+
+        cx.spawn(async move |cx| {
+            // For large files, use outline instead of full content
+            if rope.len() > outline::AUTO_OUTLINE_SIZE {
+                // Wait until the buffer has been fully parsed, so we can read its outline
+                if let Ok(mut parse_status) =
+                    buffer.read_with(cx, |buffer, _| buffer.parse_status())
+                {
+                    while *parse_status.borrow() != ParseStatus::Idle {
+                        parse_status.changed().await.log_err();
+                    }
+
+                    if let Ok(snapshot) = buffer.read_with(cx, |buffer, _| buffer.snapshot()) {
+                        if let Some(outline) = snapshot.outline(None) {
+                            let items = outline
+                                .items
+                                .into_iter()
+                                .map(|item| item.to_point(&snapshot));
+
+                            if let Ok(outline_text) =
+                                outline::render_outline(items, None, 0, usize::MAX).await
+                            {
+                                let context = AgentContext::File(FileContext {
+                                    handle: self,
+                                    full_path,
+                                    text: outline_text.into(),
+                                    is_outline: true,
+                                });
+                                return Some((context, vec![buffer]));
+                            }
+                        }
+                    }
+                }
+            }
+
+            // Fallback to full content if we couldn't build an outline
+            // (or didn't need to because the file was small enough)
            let context = AgentContext::File(FileContext {
                handle: self,
-                full_path: full_path.into(),
+                full_path,
                text: rope.to_string().into(),
+                is_outline: false,
            });
            Some((context, vec![buffer]))
        })
@ -996,3 +1035,115 @@ impl Hash for AgentContextKey {
        }
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use gpui::TestAppContext;
+    use project::{FakeFs, Project};
+    use serde_json::json;
+    use settings::SettingsStore;
+    use util::path;
+
+    fn init_test_settings(cx: &mut TestAppContext) {
+        cx.update(|cx| {
+            let settings_store = SettingsStore::test(cx);
+            cx.set_global(settings_store);
+            language::init(cx);
+            Project::init_settings(cx);
+        });
+    }
+
+    // Helper to create a test project with test files
+    async fn create_test_project(
+        cx: &mut TestAppContext,
+        files: serde_json::Value,
+    ) -> Entity<Project> {
+        let fs = FakeFs::new(cx.background_executor.clone());
+        fs.insert_tree(path!("/test"), files).await;
+        Project::test(fs, [path!("/test").as_ref()], cx).await
+    }
+
+    #[gpui::test]
+    async fn test_large_file_uses_outline(cx: &mut TestAppContext) {
+        init_test_settings(cx);
+
+        // Create a large file that exceeds AUTO_OUTLINE_SIZE
+        const LINE: &str = "Line with some text\n";
+        let large_content = LINE.repeat(2 * (outline::AUTO_OUTLINE_SIZE / LINE.len()));
+        let content_len = large_content.len();
+
+        assert!(content_len > outline::AUTO_OUTLINE_SIZE);
+
+        let file_context = file_context_for(large_content, cx).await;
+
+        assert!(
+            file_context.is_outline,
+            "Large file should use outline format"
+        );
+
+        assert!(
+            file_context.text.len() < content_len,
+            "Outline should be smaller than original content"
+        );
+    }
+
+    #[gpui::test]
+    async fn test_small_file_uses_full_content(cx: &mut TestAppContext) {
+        init_test_settings(cx);
+
+        let small_content = "This is a small file.\n";
+        let content_len = small_content.len();
+
+        assert!(content_len < outline::AUTO_OUTLINE_SIZE);
+
+        let file_context = file_context_for(small_content.to_string(), cx).await;
+
+        assert!(
+            !file_context.is_outline,
+            "Small files should not get an outline"
+        );
+
+        assert_eq!(file_context.text, small_content);
+    }
+
+    async fn file_context_for(content: String, cx: &mut TestAppContext) -> FileContext {
+        // Create a test project with the file
+        let project = create_test_project(
+            cx,
+            json!({
+                "file.txt": content,
+            }),
+        )
+        .await;
+
+        // Open the buffer
+        let buffer_path = project
+            .read_with(cx, |project, cx| project.find_project_path("file.txt", cx))
+            .unwrap();
+
+        let buffer = project
+            .update(cx, |project, cx| project.open_buffer(buffer_path, cx))
+            .await
+            .unwrap();
+
+        let context_handle = AgentContextHandle::File(FileContextHandle {
+            buffer: buffer.clone(),
+            context_id: ContextId::zero(),
+        });
+
+        cx.update(|cx| load_context(vec![context_handle], &project, &None, cx))
+            .await
+            .loaded_context
+            .contexts
+            .into_iter()
+            .find_map(|ctx| {
+                if let AgentContext::File(file_ctx) = ctx {
+                    Some(file_ctx)
+                } else {
+                    None
+                }
+            })
+            .expect("Should have found a file context")
+    }
+}
--- a/crates/assistant_tool/Cargo.toml
+++ b/crates/assistant_tool/Cargo.toml
@ -24,6 +24,7 @@ language.workspace = true
 language_model.workspace = true
 parking_lot.workspace = true
 project.workspace = true
+regex.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 text.workspace = true
--- a/crates/assistant_tool/src/assistant_tool.rs
+++ b/crates/assistant_tool/src/assistant_tool.rs
@ -1,4 +1,5 @@
 mod action_log;
+pub mod outline;
 mod tool_registry;
 mod tool_schema;
 mod tool_working_set;
--- a/crates/assistant_tool/src/outline.rs
+++ b/crates/assistant_tool/src/outline.rs
@ -0,0 +1,132 @@
+use crate::ActionLog;
+use anyhow::{Result, anyhow};
+use gpui::{AsyncApp, Entity};
+use language::{OutlineItem, ParseStatus};
+use project::Project;
+use regex::Regex;
+use std::fmt::Write;
+use text::Point;
+
+/// For files over this size, instead of reading them (or including them in context),
+/// we automatically provide the file's symbol outline instead, with line numbers.
+pub const AUTO_OUTLINE_SIZE: usize = 16384;
+
+pub async fn file_outline(
+    project: Entity<Project>,
+    path: String,
+    action_log: Entity<ActionLog>,
+    regex: Option<Regex>,
+    cx: &mut AsyncApp,
+) -> anyhow::Result<String> {
+    let buffer = {
+        let project_path = project.read_with(cx, |project, cx| {
+            project
+                .find_project_path(&path, cx)
+                .ok_or_else(|| anyhow!("Path {path} not found in project"))
+        })??;
+
+        project
+            .update(cx, |project, cx| project.open_buffer(project_path, cx))?
+            .await?
+    };
+
+    action_log.update(cx, |action_log, cx| {
+        action_log.track_buffer(buffer.clone(), cx);
+    })?;
+
+    // Wait until the buffer has been fully parsed, so that we can read its outline.
+    let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
+    while *parse_status.borrow() != ParseStatus::Idle {
+        parse_status.changed().await?;
+    }
+
+    let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
+    let Some(outline) = snapshot.outline(None) else {
+        return Err(anyhow!("No outline information available for this file."));
+    };
+
+    render_outline(
+        outline
+            .items
+            .into_iter()
+            .map(|item| item.to_point(&snapshot)),
+        regex,
+        0,
+        usize::MAX,
+    )
+    .await
+}
+
+pub async fn render_outline(
+    items: impl IntoIterator<Item = OutlineItem<Point>>,
+    regex: Option<Regex>,
+    offset: usize,
+    results_per_page: usize,
+) -> Result<String> {
+    let mut items = items.into_iter().skip(offset);
+
+    let entries = items
+        .by_ref()
+        .filter(|item| {
+            regex
+                .as_ref()
+                .is_none_or(|regex| regex.is_match(&item.text))
+        })
+        .take(results_per_page)
+        .collect::<Vec<_>>();
+    let has_more = items.next().is_some();
+
+    let mut output = String::new();
+    let entries_rendered = render_entries(&mut output, entries);
+
+    // Calculate pagination information
+    let page_start = offset + 1;
+    let page_end = offset + entries_rendered;
+    let total_symbols = if has_more {
+        format!("more than {}", page_end)
+    } else {
+        page_end.to_string()
+    };
+
+    // Add pagination information
+    if has_more {
+        writeln!(&mut output, "\nShowing symbols {page_start}-{page_end} (there were more symbols found; use offset: {page_end} to see next page)",
+        )
+    } else {
+        writeln!(
+            &mut output,
+            "\nShowing symbols {page_start}-{page_end} (total symbols: {total_symbols})",
+        )
+    }
+    .ok();
+
+    Ok(output)
+}
+
+fn render_entries(
+    output: &mut String,
+    items: impl IntoIterator<Item = OutlineItem<Point>>,
+) -> usize {
+    let mut entries_rendered = 0;
+
+    for item in items {
+        // Indent based on depth ("" for level 0, "  " for level 1, etc.)
+        for _ in 0..item.depth {
+            output.push(' ');
+        }
+        output.push_str(&item.text);
+
+        // Add position information - convert to 1-based line numbers for display
+        let start_line = item.range.start.row + 1;
+        let end_line = item.range.end.row + 1;
+
+        if start_line == end_line {
+            writeln!(output, " [L{}]", start_line).ok();
+        } else {
+            writeln!(output, " [L{}-{}]", start_line, end_line).ok();
+        }
+        entries_rendered += 1;
+    }
+
+    entries_rendered
+}
--- a/crates/assistant_tools/src/code_symbols_tool.rs
+++ b/crates/assistant_tools/src/code_symbols_tool.rs
@ -4,10 +4,10 @@ use std::sync::Arc;

 use crate::schema::json_schema_for;
 use anyhow::{Result, anyhow};
+use assistant_tool::outline;
 use assistant_tool::{ActionLog, Tool, ToolResult};
 use collections::IndexMap;
 use gpui::{AnyWindowHandle, App, AsyncApp, Entity, Task};
-use language::{OutlineItem, ParseStatus, Point};
 use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
 use project::{Project, Symbol};
 use regex::{Regex, RegexBuilder};
@ -148,59 +148,13 @@ impl Tool for CodeSymbolsTool {
        };

        cx.spawn(async move |cx| match input.path {
-            Some(path) => file_outline(project, path, action_log, regex, cx).await,
+            Some(path) => outline::file_outline(project, path, action_log, regex, cx).await,
            None => project_symbols(project, regex, input.offset, cx).await,
        })
        .into()
    }
 }

-pub async fn file_outline(
-    project: Entity<Project>,
-    path: String,
-    action_log: Entity<ActionLog>,
-    regex: Option<Regex>,
-    cx: &mut AsyncApp,
-) -> anyhow::Result<String> {
-    let buffer = {
-        let project_path = project.read_with(cx, |project, cx| {
-            project
-                .find_project_path(&path, cx)
-                .ok_or_else(|| anyhow!("Path {path} not found in project"))
-        })??;
-
-        project
-            .update(cx, |project, cx| project.open_buffer(project_path, cx))?
-            .await?
-    };
-
-    action_log.update(cx, |action_log, cx| {
-        action_log.track_buffer(buffer.clone(), cx);
-    })?;
-
-    // Wait until the buffer has been fully parsed, so that we can read its outline.
-    let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
-    while *parse_status.borrow() != ParseStatus::Idle {
-        parse_status.changed().await?;
-    }
-
-    let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
-    let Some(outline) = snapshot.outline(None) else {
-        return Err(anyhow!("No outline information available for this file."));
-    };
-
-    render_outline(
-        outline
-            .items
-            .into_iter()
-            .map(|item| item.to_point(&snapshot)),
-        regex,
-        0,
-        usize::MAX,
-    )
-    .await
-}
-
 async fn project_symbols(
    project: Entity<Project>,
    regex: Option<Regex>,
@ -291,77 +245,3 @@ async fn project_symbols(
        output
    })
 }
-
-async fn render_outline(
-    items: impl IntoIterator<Item = OutlineItem<Point>>,
-    regex: Option<Regex>,
-    offset: usize,
-    results_per_page: usize,
-) -> Result<String> {
-    let mut items = items.into_iter().skip(offset);
-
-    let entries = items
-        .by_ref()
-        .filter(|item| {
-            regex
-                .as_ref()
-                .is_none_or(|regex| regex.is_match(&item.text))
-        })
-        .take(results_per_page)
-        .collect::<Vec<_>>();
-    let has_more = items.next().is_some();
-
-    let mut output = String::new();
-    let entries_rendered = render_entries(&mut output, entries);
-
-    // Calculate pagination information
-    let page_start = offset + 1;
-    let page_end = offset + entries_rendered;
-    let total_symbols = if has_more {
-        format!("more than {}", page_end)
-    } else {
-        page_end.to_string()
-    };
-
-    // Add pagination information
-    if has_more {
-        writeln!(&mut output, "\nShowing symbols {page_start}-{page_end} (there were more symbols found; use offset: {page_end} to see next page)",
-        )
-    } else {
-        writeln!(
-            &mut output,
-            "\nShowing symbols {page_start}-{page_end} (total symbols: {total_symbols})",
-        )
-    }
-    .ok();
-
-    Ok(output)
-}
-
-fn render_entries(
-    output: &mut String,
-    items: impl IntoIterator<Item = OutlineItem<Point>>,
-) -> usize {
-    let mut entries_rendered = 0;
-
-    for item in items {
-        // Indent based on depth ("" for level 0, "  " for level 1, etc.)
-        for _ in 0..item.depth {
-            output.push(' ');
-        }
-        output.push_str(&item.text);
-
-        // Add position information - convert to 1-based line numbers for display
-        let start_line = item.range.start.row + 1;
-        let end_line = item.range.end.row + 1;
-
-        if start_line == end_line {
-            writeln!(output, " [L{}]", start_line).ok();
-        } else {
-            writeln!(output, " [L{}-{}]", start_line, end_line).ok();
-        }
-        entries_rendered += 1;
-    }
-
-    entries_rendered
-}
--- a/crates/assistant_tools/src/contents_tool.rs
+++ b/crates/assistant_tools/src/contents_tool.rs
@ -1,8 +1,8 @@
 use std::sync::Arc;

-use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
+use crate::schema::json_schema_for;
 use anyhow::{Result, anyhow};
-use assistant_tool::{ActionLog, Tool, ToolResult};
+use assistant_tool::{ActionLog, Tool, ToolResult, outline};
 use gpui::{AnyWindowHandle, App, Entity, Task};
 use itertools::Itertools;
 use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
@ -14,10 +14,6 @@ use ui::IconName;
 use util::markdown::MarkdownInlineCode;

 /// If the model requests to read a file whose size exceeds this, then
-/// the tool will return the file's symbol outline instead of its contents,
-/// and suggest trying again using line ranges from the outline.
-const MAX_FILE_SIZE_TO_READ: usize = 16384;
-
 /// If the model requests to list the entries in a directory with more
 /// entries than this, then the tool will return a subset of the entries
 /// and suggest trying again.
@ -218,7 +214,7 @@ impl Tool for ContentsTool {
                    // No line ranges specified, so check file size to see if it's too big.
                    let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;

-                    if file_size <= MAX_FILE_SIZE_TO_READ {
+                    if file_size <= outline::AUTO_OUTLINE_SIZE {
                        let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;

                        action_log.update(cx, |log, cx| {
@ -229,7 +225,7 @@ impl Tool for ContentsTool {
                    } else {
                        // File is too big, so return its outline and a suggestion to
                        // read again with a line number range specified.
-                        let outline = file_outline(project, file_path, action_log, None, cx).await?;
+                        let outline = outline::file_outline(project, file_path, action_log, None, cx).await?;

                        Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start and end fields to see the implementations of symbols in the outline."))
                    }
--- a/crates/assistant_tools/src/read_file_tool.rs
+++ b/crates/assistant_tools/src/read_file_tool.rs
@ -1,5 +1,6 @@
-use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
+use crate::schema::json_schema_for;
 use anyhow::{Result, anyhow};
+use assistant_tool::outline;
 use assistant_tool::{ActionLog, Tool, ToolResult};
 use gpui::{AnyWindowHandle, App, Entity, Task};

@ -14,10 +15,6 @@ use ui::IconName;
 use util::markdown::MarkdownInlineCode;

 /// If the model requests to read a file whose size exceeds this, then
-/// the tool will return an error along with the model's symbol outline,
-/// and suggest trying again using line ranges from the outline.
-const MAX_FILE_SIZE_TO_READ: usize = 16384;
-
 #[derive(Debug, Serialize, Deserialize, JsonSchema)]
 pub struct ReadFileToolInput {
    /// The relative path of the file to read.
@ -144,7 +141,7 @@ impl Tool for ReadFileTool {
                // No line ranges specified, so check file size to see if it's too big.
                let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;

-                if file_size <= MAX_FILE_SIZE_TO_READ {
+                if file_size <= outline::AUTO_OUTLINE_SIZE {
                    // File is small enough, so return its contents.
                    let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;

@ -154,9 +151,9 @@ impl Tool for ReadFileTool {

                    Ok(result)
                } else {
-                    // File is too big, so return an error with the outline
+                    // File is too big, so return the outline
                    // and a suggestion to read again with line numbers.
-                    let outline = file_outline(project, file_path, action_log, None, cx).await?;
+                    let outline = outline::file_outline(project, file_path, action_log, None, cx).await?;
                    Ok(formatdoc! {"
                        This file was too big to read all at once. Here is an outline of its symbols: