Use outline (#29687)

## Before

![Screenshot 2025-04-30 at 10 56
36 AM](https://github.com/user-attachments/assets/3a435f4c-ad45-4f26-a847-2d5c9d03648e)

## After

![Screenshot 2025-04-30 at 10 55
27 AM](https://github.com/user-attachments/assets/cc3a8144-b6fe-4a15-8a47-b2487ce4f66e)

Release Notes:

- Context picker and `@`-mentions now work with very large files.
This commit is contained in:
Richard Feldman 2025-04-30 14:00:00 -04:00 committed by GitHub
parent 8173534ad5
commit 50f705e779
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 301 additions and 142 deletions

1
Cargo.lock generated
View File

@ -690,6 +690,7 @@ dependencies = [
"pretty_assertions",
"project",
"rand 0.8.5",
"regex",
"serde",
"serde_json",
"settings",

View File

@ -3,11 +3,12 @@ use std::hash::{Hash, Hasher};
use std::path::PathBuf;
use std::{ops::Range, path::Path, sync::Arc};
use assistant_tool::outline;
use collections::HashSet;
use futures::future;
use futures::{FutureExt, future::Shared};
use gpui::{App, AppContext as _, Entity, SharedString, Task};
use language::Buffer;
use language::{Buffer, ParseStatus};
use language_model::{LanguageModelImage, LanguageModelRequestMessage, MessageContent};
use project::{Project, ProjectEntryId, ProjectPath, Worktree};
use prompt_store::{PromptStore, UserPromptId};
@ -152,6 +153,7 @@ pub struct FileContext {
pub handle: FileContextHandle,
pub full_path: Arc<Path>,
pub text: SharedString,
pub is_outline: bool,
}
impl FileContextHandle {
@ -177,14 +179,51 @@ impl FileContextHandle {
log::error!("file context missing path");
return Task::ready(None);
};
let full_path = file.full_path(cx);
let full_path: Arc<Path> = file.full_path(cx).into();
let rope = buffer_ref.as_rope().clone();
let buffer = self.buffer.clone();
cx.background_spawn(async move {
cx.spawn(async move |cx| {
// For large files, use outline instead of full content
if rope.len() > outline::AUTO_OUTLINE_SIZE {
// Wait until the buffer has been fully parsed, so we can read its outline
if let Ok(mut parse_status) =
buffer.read_with(cx, |buffer, _| buffer.parse_status())
{
while *parse_status.borrow() != ParseStatus::Idle {
parse_status.changed().await.log_err();
}
if let Ok(snapshot) = buffer.read_with(cx, |buffer, _| buffer.snapshot()) {
if let Some(outline) = snapshot.outline(None) {
let items = outline
.items
.into_iter()
.map(|item| item.to_point(&snapshot));
if let Ok(outline_text) =
outline::render_outline(items, None, 0, usize::MAX).await
{
let context = AgentContext::File(FileContext {
handle: self,
full_path,
text: outline_text.into(),
is_outline: true,
});
return Some((context, vec![buffer]));
}
}
}
}
}
// Fallback to full content if we couldn't build an outline
// (or didn't need to because the file was small enough)
let context = AgentContext::File(FileContext {
handle: self,
full_path: full_path.into(),
full_path,
text: rope.to_string().into(),
is_outline: false,
});
Some((context, vec![buffer]))
})
@ -996,3 +1035,115 @@ impl Hash for AgentContextKey {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use gpui::TestAppContext;
use project::{FakeFs, Project};
use serde_json::json;
use settings::SettingsStore;
use util::path;
fn init_test_settings(cx: &mut TestAppContext) {
cx.update(|cx| {
let settings_store = SettingsStore::test(cx);
cx.set_global(settings_store);
language::init(cx);
Project::init_settings(cx);
});
}
// Helper to create a test project with test files
async fn create_test_project(
cx: &mut TestAppContext,
files: serde_json::Value,
) -> Entity<Project> {
let fs = FakeFs::new(cx.background_executor.clone());
fs.insert_tree(path!("/test"), files).await;
Project::test(fs, [path!("/test").as_ref()], cx).await
}
#[gpui::test]
async fn test_large_file_uses_outline(cx: &mut TestAppContext) {
init_test_settings(cx);
// Create a large file that exceeds AUTO_OUTLINE_SIZE
const LINE: &str = "Line with some text\n";
let large_content = LINE.repeat(2 * (outline::AUTO_OUTLINE_SIZE / LINE.len()));
let content_len = large_content.len();
assert!(content_len > outline::AUTO_OUTLINE_SIZE);
let file_context = file_context_for(large_content, cx).await;
assert!(
file_context.is_outline,
"Large file should use outline format"
);
assert!(
file_context.text.len() < content_len,
"Outline should be smaller than original content"
);
}
#[gpui::test]
async fn test_small_file_uses_full_content(cx: &mut TestAppContext) {
init_test_settings(cx);
let small_content = "This is a small file.\n";
let content_len = small_content.len();
assert!(content_len < outline::AUTO_OUTLINE_SIZE);
let file_context = file_context_for(small_content.to_string(), cx).await;
assert!(
!file_context.is_outline,
"Small files should not get an outline"
);
assert_eq!(file_context.text, small_content);
}
async fn file_context_for(content: String, cx: &mut TestAppContext) -> FileContext {
// Create a test project with the file
let project = create_test_project(
cx,
json!({
"file.txt": content,
}),
)
.await;
// Open the buffer
let buffer_path = project
.read_with(cx, |project, cx| project.find_project_path("file.txt", cx))
.unwrap();
let buffer = project
.update(cx, |project, cx| project.open_buffer(buffer_path, cx))
.await
.unwrap();
let context_handle = AgentContextHandle::File(FileContextHandle {
buffer: buffer.clone(),
context_id: ContextId::zero(),
});
cx.update(|cx| load_context(vec![context_handle], &project, &None, cx))
.await
.loaded_context
.contexts
.into_iter()
.find_map(|ctx| {
if let AgentContext::File(file_ctx) = ctx {
Some(file_ctx)
} else {
None
}
})
.expect("Should have found a file context")
}
}

View File

@ -24,6 +24,7 @@ language.workspace = true
language_model.workspace = true
parking_lot.workspace = true
project.workspace = true
regex.workspace = true
serde.workspace = true
serde_json.workspace = true
text.workspace = true

View File

@ -1,4 +1,5 @@
mod action_log;
pub mod outline;
mod tool_registry;
mod tool_schema;
mod tool_working_set;

View File

@ -0,0 +1,132 @@
use crate::ActionLog;
use anyhow::{Result, anyhow};
use gpui::{AsyncApp, Entity};
use language::{OutlineItem, ParseStatus};
use project::Project;
use regex::Regex;
use std::fmt::Write;
use text::Point;
/// For files over this size, instead of reading them (or including them in context),
/// we automatically provide the file's symbol outline instead, with line numbers.
pub const AUTO_OUTLINE_SIZE: usize = 16384;
pub async fn file_outline(
project: Entity<Project>,
path: String,
action_log: Entity<ActionLog>,
regex: Option<Regex>,
cx: &mut AsyncApp,
) -> anyhow::Result<String> {
let buffer = {
let project_path = project.read_with(cx, |project, cx| {
project
.find_project_path(&path, cx)
.ok_or_else(|| anyhow!("Path {path} not found in project"))
})??;
project
.update(cx, |project, cx| project.open_buffer(project_path, cx))?
.await?
};
action_log.update(cx, |action_log, cx| {
action_log.track_buffer(buffer.clone(), cx);
})?;
// Wait until the buffer has been fully parsed, so that we can read its outline.
let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
while *parse_status.borrow() != ParseStatus::Idle {
parse_status.changed().await?;
}
let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
let Some(outline) = snapshot.outline(None) else {
return Err(anyhow!("No outline information available for this file."));
};
render_outline(
outline
.items
.into_iter()
.map(|item| item.to_point(&snapshot)),
regex,
0,
usize::MAX,
)
.await
}
pub async fn render_outline(
items: impl IntoIterator<Item = OutlineItem<Point>>,
regex: Option<Regex>,
offset: usize,
results_per_page: usize,
) -> Result<String> {
let mut items = items.into_iter().skip(offset);
let entries = items
.by_ref()
.filter(|item| {
regex
.as_ref()
.is_none_or(|regex| regex.is_match(&item.text))
})
.take(results_per_page)
.collect::<Vec<_>>();
let has_more = items.next().is_some();
let mut output = String::new();
let entries_rendered = render_entries(&mut output, entries);
// Calculate pagination information
let page_start = offset + 1;
let page_end = offset + entries_rendered;
let total_symbols = if has_more {
format!("more than {}", page_end)
} else {
page_end.to_string()
};
// Add pagination information
if has_more {
writeln!(&mut output, "\nShowing symbols {page_start}-{page_end} (there were more symbols found; use offset: {page_end} to see next page)",
)
} else {
writeln!(
&mut output,
"\nShowing symbols {page_start}-{page_end} (total symbols: {total_symbols})",
)
}
.ok();
Ok(output)
}
fn render_entries(
output: &mut String,
items: impl IntoIterator<Item = OutlineItem<Point>>,
) -> usize {
let mut entries_rendered = 0;
for item in items {
// Indent based on depth ("" for level 0, " " for level 1, etc.)
for _ in 0..item.depth {
output.push(' ');
}
output.push_str(&item.text);
// Add position information - convert to 1-based line numbers for display
let start_line = item.range.start.row + 1;
let end_line = item.range.end.row + 1;
if start_line == end_line {
writeln!(output, " [L{}]", start_line).ok();
} else {
writeln!(output, " [L{}-{}]", start_line, end_line).ok();
}
entries_rendered += 1;
}
entries_rendered
}

View File

@ -4,10 +4,10 @@ use std::sync::Arc;
use crate::schema::json_schema_for;
use anyhow::{Result, anyhow};
use assistant_tool::outline;
use assistant_tool::{ActionLog, Tool, ToolResult};
use collections::IndexMap;
use gpui::{AnyWindowHandle, App, AsyncApp, Entity, Task};
use language::{OutlineItem, ParseStatus, Point};
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
use project::{Project, Symbol};
use regex::{Regex, RegexBuilder};
@ -148,59 +148,13 @@ impl Tool for CodeSymbolsTool {
};
cx.spawn(async move |cx| match input.path {
Some(path) => file_outline(project, path, action_log, regex, cx).await,
Some(path) => outline::file_outline(project, path, action_log, regex, cx).await,
None => project_symbols(project, regex, input.offset, cx).await,
})
.into()
}
}
pub async fn file_outline(
project: Entity<Project>,
path: String,
action_log: Entity<ActionLog>,
regex: Option<Regex>,
cx: &mut AsyncApp,
) -> anyhow::Result<String> {
let buffer = {
let project_path = project.read_with(cx, |project, cx| {
project
.find_project_path(&path, cx)
.ok_or_else(|| anyhow!("Path {path} not found in project"))
})??;
project
.update(cx, |project, cx| project.open_buffer(project_path, cx))?
.await?
};
action_log.update(cx, |action_log, cx| {
action_log.track_buffer(buffer.clone(), cx);
})?;
// Wait until the buffer has been fully parsed, so that we can read its outline.
let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
while *parse_status.borrow() != ParseStatus::Idle {
parse_status.changed().await?;
}
let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
let Some(outline) = snapshot.outline(None) else {
return Err(anyhow!("No outline information available for this file."));
};
render_outline(
outline
.items
.into_iter()
.map(|item| item.to_point(&snapshot)),
regex,
0,
usize::MAX,
)
.await
}
async fn project_symbols(
project: Entity<Project>,
regex: Option<Regex>,
@ -291,77 +245,3 @@ async fn project_symbols(
output
})
}
async fn render_outline(
items: impl IntoIterator<Item = OutlineItem<Point>>,
regex: Option<Regex>,
offset: usize,
results_per_page: usize,
) -> Result<String> {
let mut items = items.into_iter().skip(offset);
let entries = items
.by_ref()
.filter(|item| {
regex
.as_ref()
.is_none_or(|regex| regex.is_match(&item.text))
})
.take(results_per_page)
.collect::<Vec<_>>();
let has_more = items.next().is_some();
let mut output = String::new();
let entries_rendered = render_entries(&mut output, entries);
// Calculate pagination information
let page_start = offset + 1;
let page_end = offset + entries_rendered;
let total_symbols = if has_more {
format!("more than {}", page_end)
} else {
page_end.to_string()
};
// Add pagination information
if has_more {
writeln!(&mut output, "\nShowing symbols {page_start}-{page_end} (there were more symbols found; use offset: {page_end} to see next page)",
)
} else {
writeln!(
&mut output,
"\nShowing symbols {page_start}-{page_end} (total symbols: {total_symbols})",
)
}
.ok();
Ok(output)
}
fn render_entries(
output: &mut String,
items: impl IntoIterator<Item = OutlineItem<Point>>,
) -> usize {
let mut entries_rendered = 0;
for item in items {
// Indent based on depth ("" for level 0, " " for level 1, etc.)
for _ in 0..item.depth {
output.push(' ');
}
output.push_str(&item.text);
// Add position information - convert to 1-based line numbers for display
let start_line = item.range.start.row + 1;
let end_line = item.range.end.row + 1;
if start_line == end_line {
writeln!(output, " [L{}]", start_line).ok();
} else {
writeln!(output, " [L{}-{}]", start_line, end_line).ok();
}
entries_rendered += 1;
}
entries_rendered
}

View File

@ -1,8 +1,8 @@
use std::sync::Arc;
use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
use crate::schema::json_schema_for;
use anyhow::{Result, anyhow};
use assistant_tool::{ActionLog, Tool, ToolResult};
use assistant_tool::{ActionLog, Tool, ToolResult, outline};
use gpui::{AnyWindowHandle, App, Entity, Task};
use itertools::Itertools;
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
@ -14,10 +14,6 @@ use ui::IconName;
use util::markdown::MarkdownInlineCode;
/// If the model requests to read a file whose size exceeds this, then
/// the tool will return the file's symbol outline instead of its contents,
/// and suggest trying again using line ranges from the outline.
const MAX_FILE_SIZE_TO_READ: usize = 16384;
/// If the model requests to list the entries in a directory with more
/// entries than this, then the tool will return a subset of the entries
/// and suggest trying again.
@ -218,7 +214,7 @@ impl Tool for ContentsTool {
// No line ranges specified, so check file size to see if it's too big.
let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
if file_size <= MAX_FILE_SIZE_TO_READ {
if file_size <= outline::AUTO_OUTLINE_SIZE {
let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
action_log.update(cx, |log, cx| {
@ -229,7 +225,7 @@ impl Tool for ContentsTool {
} else {
// File is too big, so return its outline and a suggestion to
// read again with a line number range specified.
let outline = file_outline(project, file_path, action_log, None, cx).await?;
let outline = outline::file_outline(project, file_path, action_log, None, cx).await?;
Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start and end fields to see the implementations of symbols in the outline."))
}

View File

@ -1,5 +1,6 @@
use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
use crate::schema::json_schema_for;
use anyhow::{Result, anyhow};
use assistant_tool::outline;
use assistant_tool::{ActionLog, Tool, ToolResult};
use gpui::{AnyWindowHandle, App, Entity, Task};
@ -14,10 +15,6 @@ use ui::IconName;
use util::markdown::MarkdownInlineCode;
/// If the model requests to read a file whose size exceeds this, then
/// the tool will return an error along with the model's symbol outline,
/// and suggest trying again using line ranges from the outline.
const MAX_FILE_SIZE_TO_READ: usize = 16384;
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
pub struct ReadFileToolInput {
/// The relative path of the file to read.
@ -144,7 +141,7 @@ impl Tool for ReadFileTool {
// No line ranges specified, so check file size to see if it's too big.
let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
if file_size <= MAX_FILE_SIZE_TO_READ {
if file_size <= outline::AUTO_OUTLINE_SIZE {
// File is small enough, so return its contents.
let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
@ -154,9 +151,9 @@ impl Tool for ReadFileTool {
Ok(result)
} else {
// File is too big, so return an error with the outline
// File is too big, so return the outline
// and a suggestion to read again with line numbers.
let outline = file_outline(project, file_path, action_log, None, cx).await?;
let outline = outline::file_outline(project, file_path, action_log, None, cx).await?;
Ok(formatdoc! {"
This file was too big to read all at once. Here is an outline of its symbols: