Use outline (#29687)
## Before  ## After  Release Notes: - Context picker and `@`-mentions now work with very large files.
This commit is contained in:
parent
8173534ad5
commit
50f705e779
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -690,6 +690,7 @@ dependencies = [
|
||||
"pretty_assertions",
|
||||
"project",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"settings",
|
||||
|
@ -3,11 +3,12 @@ use std::hash::{Hash, Hasher};
|
||||
use std::path::PathBuf;
|
||||
use std::{ops::Range, path::Path, sync::Arc};
|
||||
|
||||
use assistant_tool::outline;
|
||||
use collections::HashSet;
|
||||
use futures::future;
|
||||
use futures::{FutureExt, future::Shared};
|
||||
use gpui::{App, AppContext as _, Entity, SharedString, Task};
|
||||
use language::Buffer;
|
||||
use language::{Buffer, ParseStatus};
|
||||
use language_model::{LanguageModelImage, LanguageModelRequestMessage, MessageContent};
|
||||
use project::{Project, ProjectEntryId, ProjectPath, Worktree};
|
||||
use prompt_store::{PromptStore, UserPromptId};
|
||||
@ -152,6 +153,7 @@ pub struct FileContext {
|
||||
pub handle: FileContextHandle,
|
||||
pub full_path: Arc<Path>,
|
||||
pub text: SharedString,
|
||||
pub is_outline: bool,
|
||||
}
|
||||
|
||||
impl FileContextHandle {
|
||||
@ -177,14 +179,51 @@ impl FileContextHandle {
|
||||
log::error!("file context missing path");
|
||||
return Task::ready(None);
|
||||
};
|
||||
let full_path = file.full_path(cx);
|
||||
let full_path: Arc<Path> = file.full_path(cx).into();
|
||||
let rope = buffer_ref.as_rope().clone();
|
||||
let buffer = self.buffer.clone();
|
||||
cx.background_spawn(async move {
|
||||
|
||||
cx.spawn(async move |cx| {
|
||||
// For large files, use outline instead of full content
|
||||
if rope.len() > outline::AUTO_OUTLINE_SIZE {
|
||||
// Wait until the buffer has been fully parsed, so we can read its outline
|
||||
if let Ok(mut parse_status) =
|
||||
buffer.read_with(cx, |buffer, _| buffer.parse_status())
|
||||
{
|
||||
while *parse_status.borrow() != ParseStatus::Idle {
|
||||
parse_status.changed().await.log_err();
|
||||
}
|
||||
|
||||
if let Ok(snapshot) = buffer.read_with(cx, |buffer, _| buffer.snapshot()) {
|
||||
if let Some(outline) = snapshot.outline(None) {
|
||||
let items = outline
|
||||
.items
|
||||
.into_iter()
|
||||
.map(|item| item.to_point(&snapshot));
|
||||
|
||||
if let Ok(outline_text) =
|
||||
outline::render_outline(items, None, 0, usize::MAX).await
|
||||
{
|
||||
let context = AgentContext::File(FileContext {
|
||||
handle: self,
|
||||
full_path,
|
||||
text: outline_text.into(),
|
||||
is_outline: true,
|
||||
});
|
||||
return Some((context, vec![buffer]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to full content if we couldn't build an outline
|
||||
// (or didn't need to because the file was small enough)
|
||||
let context = AgentContext::File(FileContext {
|
||||
handle: self,
|
||||
full_path: full_path.into(),
|
||||
full_path,
|
||||
text: rope.to_string().into(),
|
||||
is_outline: false,
|
||||
});
|
||||
Some((context, vec![buffer]))
|
||||
})
|
||||
@ -996,3 +1035,115 @@ impl Hash for AgentContextKey {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use gpui::TestAppContext;
|
||||
use project::{FakeFs, Project};
|
||||
use serde_json::json;
|
||||
use settings::SettingsStore;
|
||||
use util::path;
|
||||
|
||||
fn init_test_settings(cx: &mut TestAppContext) {
|
||||
cx.update(|cx| {
|
||||
let settings_store = SettingsStore::test(cx);
|
||||
cx.set_global(settings_store);
|
||||
language::init(cx);
|
||||
Project::init_settings(cx);
|
||||
});
|
||||
}
|
||||
|
||||
// Helper to create a test project with test files
|
||||
async fn create_test_project(
|
||||
cx: &mut TestAppContext,
|
||||
files: serde_json::Value,
|
||||
) -> Entity<Project> {
|
||||
let fs = FakeFs::new(cx.background_executor.clone());
|
||||
fs.insert_tree(path!("/test"), files).await;
|
||||
Project::test(fs, [path!("/test").as_ref()], cx).await
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_large_file_uses_outline(cx: &mut TestAppContext) {
|
||||
init_test_settings(cx);
|
||||
|
||||
// Create a large file that exceeds AUTO_OUTLINE_SIZE
|
||||
const LINE: &str = "Line with some text\n";
|
||||
let large_content = LINE.repeat(2 * (outline::AUTO_OUTLINE_SIZE / LINE.len()));
|
||||
let content_len = large_content.len();
|
||||
|
||||
assert!(content_len > outline::AUTO_OUTLINE_SIZE);
|
||||
|
||||
let file_context = file_context_for(large_content, cx).await;
|
||||
|
||||
assert!(
|
||||
file_context.is_outline,
|
||||
"Large file should use outline format"
|
||||
);
|
||||
|
||||
assert!(
|
||||
file_context.text.len() < content_len,
|
||||
"Outline should be smaller than original content"
|
||||
);
|
||||
}
|
||||
|
||||
#[gpui::test]
|
||||
async fn test_small_file_uses_full_content(cx: &mut TestAppContext) {
|
||||
init_test_settings(cx);
|
||||
|
||||
let small_content = "This is a small file.\n";
|
||||
let content_len = small_content.len();
|
||||
|
||||
assert!(content_len < outline::AUTO_OUTLINE_SIZE);
|
||||
|
||||
let file_context = file_context_for(small_content.to_string(), cx).await;
|
||||
|
||||
assert!(
|
||||
!file_context.is_outline,
|
||||
"Small files should not get an outline"
|
||||
);
|
||||
|
||||
assert_eq!(file_context.text, small_content);
|
||||
}
|
||||
|
||||
async fn file_context_for(content: String, cx: &mut TestAppContext) -> FileContext {
|
||||
// Create a test project with the file
|
||||
let project = create_test_project(
|
||||
cx,
|
||||
json!({
|
||||
"file.txt": content,
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
// Open the buffer
|
||||
let buffer_path = project
|
||||
.read_with(cx, |project, cx| project.find_project_path("file.txt", cx))
|
||||
.unwrap();
|
||||
|
||||
let buffer = project
|
||||
.update(cx, |project, cx| project.open_buffer(buffer_path, cx))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let context_handle = AgentContextHandle::File(FileContextHandle {
|
||||
buffer: buffer.clone(),
|
||||
context_id: ContextId::zero(),
|
||||
});
|
||||
|
||||
cx.update(|cx| load_context(vec![context_handle], &project, &None, cx))
|
||||
.await
|
||||
.loaded_context
|
||||
.contexts
|
||||
.into_iter()
|
||||
.find_map(|ctx| {
|
||||
if let AgentContext::File(file_ctx) = ctx {
|
||||
Some(file_ctx)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.expect("Should have found a file context")
|
||||
}
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ language.workspace = true
|
||||
language_model.workspace = true
|
||||
parking_lot.workspace = true
|
||||
project.workspace = true
|
||||
regex.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
text.workspace = true
|
||||
|
@ -1,4 +1,5 @@
|
||||
mod action_log;
|
||||
pub mod outline;
|
||||
mod tool_registry;
|
||||
mod tool_schema;
|
||||
mod tool_working_set;
|
||||
|
132
crates/assistant_tool/src/outline.rs
Normal file
132
crates/assistant_tool/src/outline.rs
Normal file
@ -0,0 +1,132 @@
|
||||
use crate::ActionLog;
|
||||
use anyhow::{Result, anyhow};
|
||||
use gpui::{AsyncApp, Entity};
|
||||
use language::{OutlineItem, ParseStatus};
|
||||
use project::Project;
|
||||
use regex::Regex;
|
||||
use std::fmt::Write;
|
||||
use text::Point;
|
||||
|
||||
/// For files over this size, instead of reading them (or including them in context),
|
||||
/// we automatically provide the file's symbol outline instead, with line numbers.
|
||||
pub const AUTO_OUTLINE_SIZE: usize = 16384;
|
||||
|
||||
pub async fn file_outline(
|
||||
project: Entity<Project>,
|
||||
path: String,
|
||||
action_log: Entity<ActionLog>,
|
||||
regex: Option<Regex>,
|
||||
cx: &mut AsyncApp,
|
||||
) -> anyhow::Result<String> {
|
||||
let buffer = {
|
||||
let project_path = project.read_with(cx, |project, cx| {
|
||||
project
|
||||
.find_project_path(&path, cx)
|
||||
.ok_or_else(|| anyhow!("Path {path} not found in project"))
|
||||
})??;
|
||||
|
||||
project
|
||||
.update(cx, |project, cx| project.open_buffer(project_path, cx))?
|
||||
.await?
|
||||
};
|
||||
|
||||
action_log.update(cx, |action_log, cx| {
|
||||
action_log.track_buffer(buffer.clone(), cx);
|
||||
})?;
|
||||
|
||||
// Wait until the buffer has been fully parsed, so that we can read its outline.
|
||||
let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
|
||||
while *parse_status.borrow() != ParseStatus::Idle {
|
||||
parse_status.changed().await?;
|
||||
}
|
||||
|
||||
let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
|
||||
let Some(outline) = snapshot.outline(None) else {
|
||||
return Err(anyhow!("No outline information available for this file."));
|
||||
};
|
||||
|
||||
render_outline(
|
||||
outline
|
||||
.items
|
||||
.into_iter()
|
||||
.map(|item| item.to_point(&snapshot)),
|
||||
regex,
|
||||
0,
|
||||
usize::MAX,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn render_outline(
|
||||
items: impl IntoIterator<Item = OutlineItem<Point>>,
|
||||
regex: Option<Regex>,
|
||||
offset: usize,
|
||||
results_per_page: usize,
|
||||
) -> Result<String> {
|
||||
let mut items = items.into_iter().skip(offset);
|
||||
|
||||
let entries = items
|
||||
.by_ref()
|
||||
.filter(|item| {
|
||||
regex
|
||||
.as_ref()
|
||||
.is_none_or(|regex| regex.is_match(&item.text))
|
||||
})
|
||||
.take(results_per_page)
|
||||
.collect::<Vec<_>>();
|
||||
let has_more = items.next().is_some();
|
||||
|
||||
let mut output = String::new();
|
||||
let entries_rendered = render_entries(&mut output, entries);
|
||||
|
||||
// Calculate pagination information
|
||||
let page_start = offset + 1;
|
||||
let page_end = offset + entries_rendered;
|
||||
let total_symbols = if has_more {
|
||||
format!("more than {}", page_end)
|
||||
} else {
|
||||
page_end.to_string()
|
||||
};
|
||||
|
||||
// Add pagination information
|
||||
if has_more {
|
||||
writeln!(&mut output, "\nShowing symbols {page_start}-{page_end} (there were more symbols found; use offset: {page_end} to see next page)",
|
||||
)
|
||||
} else {
|
||||
writeln!(
|
||||
&mut output,
|
||||
"\nShowing symbols {page_start}-{page_end} (total symbols: {total_symbols})",
|
||||
)
|
||||
}
|
||||
.ok();
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
fn render_entries(
|
||||
output: &mut String,
|
||||
items: impl IntoIterator<Item = OutlineItem<Point>>,
|
||||
) -> usize {
|
||||
let mut entries_rendered = 0;
|
||||
|
||||
for item in items {
|
||||
// Indent based on depth ("" for level 0, " " for level 1, etc.)
|
||||
for _ in 0..item.depth {
|
||||
output.push(' ');
|
||||
}
|
||||
output.push_str(&item.text);
|
||||
|
||||
// Add position information - convert to 1-based line numbers for display
|
||||
let start_line = item.range.start.row + 1;
|
||||
let end_line = item.range.end.row + 1;
|
||||
|
||||
if start_line == end_line {
|
||||
writeln!(output, " [L{}]", start_line).ok();
|
||||
} else {
|
||||
writeln!(output, " [L{}-{}]", start_line, end_line).ok();
|
||||
}
|
||||
entries_rendered += 1;
|
||||
}
|
||||
|
||||
entries_rendered
|
||||
}
|
@ -4,10 +4,10 @@ use std::sync::Arc;
|
||||
|
||||
use crate::schema::json_schema_for;
|
||||
use anyhow::{Result, anyhow};
|
||||
use assistant_tool::outline;
|
||||
use assistant_tool::{ActionLog, Tool, ToolResult};
|
||||
use collections::IndexMap;
|
||||
use gpui::{AnyWindowHandle, App, AsyncApp, Entity, Task};
|
||||
use language::{OutlineItem, ParseStatus, Point};
|
||||
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
|
||||
use project::{Project, Symbol};
|
||||
use regex::{Regex, RegexBuilder};
|
||||
@ -148,59 +148,13 @@ impl Tool for CodeSymbolsTool {
|
||||
};
|
||||
|
||||
cx.spawn(async move |cx| match input.path {
|
||||
Some(path) => file_outline(project, path, action_log, regex, cx).await,
|
||||
Some(path) => outline::file_outline(project, path, action_log, regex, cx).await,
|
||||
None => project_symbols(project, regex, input.offset, cx).await,
|
||||
})
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn file_outline(
|
||||
project: Entity<Project>,
|
||||
path: String,
|
||||
action_log: Entity<ActionLog>,
|
||||
regex: Option<Regex>,
|
||||
cx: &mut AsyncApp,
|
||||
) -> anyhow::Result<String> {
|
||||
let buffer = {
|
||||
let project_path = project.read_with(cx, |project, cx| {
|
||||
project
|
||||
.find_project_path(&path, cx)
|
||||
.ok_or_else(|| anyhow!("Path {path} not found in project"))
|
||||
})??;
|
||||
|
||||
project
|
||||
.update(cx, |project, cx| project.open_buffer(project_path, cx))?
|
||||
.await?
|
||||
};
|
||||
|
||||
action_log.update(cx, |action_log, cx| {
|
||||
action_log.track_buffer(buffer.clone(), cx);
|
||||
})?;
|
||||
|
||||
// Wait until the buffer has been fully parsed, so that we can read its outline.
|
||||
let mut parse_status = buffer.read_with(cx, |buffer, _| buffer.parse_status())?;
|
||||
while *parse_status.borrow() != ParseStatus::Idle {
|
||||
parse_status.changed().await?;
|
||||
}
|
||||
|
||||
let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
|
||||
let Some(outline) = snapshot.outline(None) else {
|
||||
return Err(anyhow!("No outline information available for this file."));
|
||||
};
|
||||
|
||||
render_outline(
|
||||
outline
|
||||
.items
|
||||
.into_iter()
|
||||
.map(|item| item.to_point(&snapshot)),
|
||||
regex,
|
||||
0,
|
||||
usize::MAX,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn project_symbols(
|
||||
project: Entity<Project>,
|
||||
regex: Option<Regex>,
|
||||
@ -291,77 +245,3 @@ async fn project_symbols(
|
||||
output
|
||||
})
|
||||
}
|
||||
|
||||
async fn render_outline(
|
||||
items: impl IntoIterator<Item = OutlineItem<Point>>,
|
||||
regex: Option<Regex>,
|
||||
offset: usize,
|
||||
results_per_page: usize,
|
||||
) -> Result<String> {
|
||||
let mut items = items.into_iter().skip(offset);
|
||||
|
||||
let entries = items
|
||||
.by_ref()
|
||||
.filter(|item| {
|
||||
regex
|
||||
.as_ref()
|
||||
.is_none_or(|regex| regex.is_match(&item.text))
|
||||
})
|
||||
.take(results_per_page)
|
||||
.collect::<Vec<_>>();
|
||||
let has_more = items.next().is_some();
|
||||
|
||||
let mut output = String::new();
|
||||
let entries_rendered = render_entries(&mut output, entries);
|
||||
|
||||
// Calculate pagination information
|
||||
let page_start = offset + 1;
|
||||
let page_end = offset + entries_rendered;
|
||||
let total_symbols = if has_more {
|
||||
format!("more than {}", page_end)
|
||||
} else {
|
||||
page_end.to_string()
|
||||
};
|
||||
|
||||
// Add pagination information
|
||||
if has_more {
|
||||
writeln!(&mut output, "\nShowing symbols {page_start}-{page_end} (there were more symbols found; use offset: {page_end} to see next page)",
|
||||
)
|
||||
} else {
|
||||
writeln!(
|
||||
&mut output,
|
||||
"\nShowing symbols {page_start}-{page_end} (total symbols: {total_symbols})",
|
||||
)
|
||||
}
|
||||
.ok();
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
fn render_entries(
|
||||
output: &mut String,
|
||||
items: impl IntoIterator<Item = OutlineItem<Point>>,
|
||||
) -> usize {
|
||||
let mut entries_rendered = 0;
|
||||
|
||||
for item in items {
|
||||
// Indent based on depth ("" for level 0, " " for level 1, etc.)
|
||||
for _ in 0..item.depth {
|
||||
output.push(' ');
|
||||
}
|
||||
output.push_str(&item.text);
|
||||
|
||||
// Add position information - convert to 1-based line numbers for display
|
||||
let start_line = item.range.start.row + 1;
|
||||
let end_line = item.range.end.row + 1;
|
||||
|
||||
if start_line == end_line {
|
||||
writeln!(output, " [L{}]", start_line).ok();
|
||||
} else {
|
||||
writeln!(output, " [L{}-{}]", start_line, end_line).ok();
|
||||
}
|
||||
entries_rendered += 1;
|
||||
}
|
||||
|
||||
entries_rendered
|
||||
}
|
||||
|
@ -1,8 +1,8 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
|
||||
use crate::schema::json_schema_for;
|
||||
use anyhow::{Result, anyhow};
|
||||
use assistant_tool::{ActionLog, Tool, ToolResult};
|
||||
use assistant_tool::{ActionLog, Tool, ToolResult, outline};
|
||||
use gpui::{AnyWindowHandle, App, Entity, Task};
|
||||
use itertools::Itertools;
|
||||
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
|
||||
@ -14,10 +14,6 @@ use ui::IconName;
|
||||
use util::markdown::MarkdownInlineCode;
|
||||
|
||||
/// If the model requests to read a file whose size exceeds this, then
|
||||
/// the tool will return the file's symbol outline instead of its contents,
|
||||
/// and suggest trying again using line ranges from the outline.
|
||||
const MAX_FILE_SIZE_TO_READ: usize = 16384;
|
||||
|
||||
/// If the model requests to list the entries in a directory with more
|
||||
/// entries than this, then the tool will return a subset of the entries
|
||||
/// and suggest trying again.
|
||||
@ -218,7 +214,7 @@ impl Tool for ContentsTool {
|
||||
// No line ranges specified, so check file size to see if it's too big.
|
||||
let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
|
||||
|
||||
if file_size <= MAX_FILE_SIZE_TO_READ {
|
||||
if file_size <= outline::AUTO_OUTLINE_SIZE {
|
||||
let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
|
||||
|
||||
action_log.update(cx, |log, cx| {
|
||||
@ -229,7 +225,7 @@ impl Tool for ContentsTool {
|
||||
} else {
|
||||
// File is too big, so return its outline and a suggestion to
|
||||
// read again with a line number range specified.
|
||||
let outline = file_outline(project, file_path, action_log, None, cx).await?;
|
||||
let outline = outline::file_outline(project, file_path, action_log, None, cx).await?;
|
||||
|
||||
Ok(format!("This file was too big to read all at once. Here is an outline of its symbols:\n\n{outline}\n\nUsing the line numbers in this outline, you can call this tool again while specifying the start and end fields to see the implementations of symbols in the outline."))
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
use crate::{code_symbols_tool::file_outline, schema::json_schema_for};
|
||||
use crate::schema::json_schema_for;
|
||||
use anyhow::{Result, anyhow};
|
||||
use assistant_tool::outline;
|
||||
use assistant_tool::{ActionLog, Tool, ToolResult};
|
||||
use gpui::{AnyWindowHandle, App, Entity, Task};
|
||||
|
||||
@ -14,10 +15,6 @@ use ui::IconName;
|
||||
use util::markdown::MarkdownInlineCode;
|
||||
|
||||
/// If the model requests to read a file whose size exceeds this, then
|
||||
/// the tool will return an error along with the model's symbol outline,
|
||||
/// and suggest trying again using line ranges from the outline.
|
||||
const MAX_FILE_SIZE_TO_READ: usize = 16384;
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
||||
pub struct ReadFileToolInput {
|
||||
/// The relative path of the file to read.
|
||||
@ -144,7 +141,7 @@ impl Tool for ReadFileTool {
|
||||
// No line ranges specified, so check file size to see if it's too big.
|
||||
let file_size = buffer.read_with(cx, |buffer, _cx| buffer.text().len())?;
|
||||
|
||||
if file_size <= MAX_FILE_SIZE_TO_READ {
|
||||
if file_size <= outline::AUTO_OUTLINE_SIZE {
|
||||
// File is small enough, so return its contents.
|
||||
let result = buffer.read_with(cx, |buffer, _cx| buffer.text())?;
|
||||
|
||||
@ -154,9 +151,9 @@ impl Tool for ReadFileTool {
|
||||
|
||||
Ok(result)
|
||||
} else {
|
||||
// File is too big, so return an error with the outline
|
||||
// File is too big, so return the outline
|
||||
// and a suggestion to read again with line numbers.
|
||||
let outline = file_outline(project, file_path, action_log, None, cx).await?;
|
||||
let outline = outline::file_outline(project, file_path, action_log, None, cx).await?;
|
||||
Ok(formatdoc! {"
|
||||
This file was too big to read all at once. Here is an outline of its symbols:
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user