From 06e9f0e3099f34d0873515a63224e00b1feda920 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 18 Mar 2025 11:44:41 -0400 Subject: [PATCH] Paginate regex and path search tools (#26997) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Screenshot 2025-03-18 at 10 50 17 AM Release Notes: - N/A --- .../assistant_tools/src/path_search_tool.rs | 33 ++++++++- .../src/path_search_tool/description.md | 4 +- crates/assistant_tools/src/read_file_tool.rs | 16 +++-- crates/assistant_tools/src/regex_search.rs | 72 +++++++++++++++---- .../src/regex_search_tool/description.md | 2 + 5 files changed, 101 insertions(+), 26 deletions(-) diff --git a/crates/assistant_tools/src/path_search_tool.rs b/crates/assistant_tools/src/path_search_tool.rs index 5e8080c8cf..c4e9250892 100644 --- a/crates/assistant_tools/src/path_search_tool.rs +++ b/crates/assistant_tools/src/path_search_tool.rs @@ -23,8 +23,15 @@ pub struct PathSearchToolInput { /// You can get back the first two paths by providing a glob of "*thing*.txt" /// pub glob: String, + + /// Optional starting position for paginated results (0-based). + /// When not provided, starts from the beginning. + #[serde(default)] + pub offset: Option, } +const RESULTS_PER_PAGE: usize = 50; + pub struct PathSearchTool; impl Tool for PathSearchTool { @@ -49,8 +56,8 @@ impl Tool for PathSearchTool { _action_log: Entity, cx: &mut App, ) -> Task> { - let glob = match serde_json::from_value::(input) { - Ok(input) => input.glob, + let (offset, glob) = match serde_json::from_value::(input) { + Ok(input) => (input.offset.unwrap_or(0), input.glob), Err(err) => return Task::ready(Err(anyhow!(err))), }; let path_matcher = match PathMatcher::new(&[glob.clone()]) { @@ -87,7 +94,27 @@ impl Tool for PathSearchTool { } else { // Sort to group entries in the same directory together. matches.sort(); - Ok(matches.join("\n")) + + let total_matches = matches.len(); + let response = if total_matches > offset + RESULTS_PER_PAGE { + let paginated_matches: Vec<_> = matches + .into_iter() + .skip(offset) + .take(RESULTS_PER_PAGE) + .collect(); + + format!( + "Found {} total matches. Showing results {}-{} (provide 'offset' parameter for more results):\n\n{}", + total_matches, + offset + 1, + offset + paginated_matches.len(), + paginated_matches.join("\n") + ) + } else { + matches.join("\n") + }; + + Ok(response) } }) } diff --git a/crates/assistant_tools/src/path_search_tool/description.md b/crates/assistant_tools/src/path_search_tool/description.md index 6541024714..129aaa7c8e 100644 --- a/crates/assistant_tools/src/path_search_tool/description.md +++ b/crates/assistant_tools/src/path_search_tool/description.md @@ -1 +1,3 @@ -Returns all the paths in the project which match the given glob. +Returns paths in the project which match the given glob. + +Results are paginated with 50 matches per page. Use the optional 'offset' parameter to request subsequent pages. diff --git a/crates/assistant_tools/src/read_file_tool.rs b/crates/assistant_tools/src/read_file_tool.rs index 231f21e9ba..59c6b55071 100644 --- a/crates/assistant_tools/src/read_file_tool.rs +++ b/crates/assistant_tools/src/read_file_tool.rs @@ -28,13 +28,13 @@ pub struct ReadFileToolInput { /// pub path: Arc, - /// Optional line number to start reading from (0-based index) + /// Optional line number to start reading on (1-based index) #[serde(default)] pub start_line: Option, - /// Optional number of lines to read + /// Optional line number to end reading on (1-based index) #[serde(default)] - pub line_count: Option, + pub end_line: Option, } pub struct ReadFileTool; @@ -83,10 +83,12 @@ impl Tool for ReadFileTool { .map_or(false, |file| file.disk_state().exists()) { let text = buffer.text(); - let string = if input.start_line.is_some() || input.line_count.is_some() { - let lines = text.split('\n').skip(input.start_line.unwrap_or(0)); - if let Some(line_count) = input.line_count { - Itertools::intersperse(lines.take(line_count), "\n").collect() + let string = if input.start_line.is_some() || input.end_line.is_some() { + let start = input.start_line.unwrap_or(1); + let lines = text.split('\n').skip(start - 1); + if let Some(end) = input.end_line { + let count = end.saturating_sub(start); + Itertools::intersperse(lines.take(count), "\n").collect() } else { Itertools::intersperse(lines, "\n").collect() } diff --git a/crates/assistant_tools/src/regex_search.rs b/crates/assistant_tools/src/regex_search.rs index a6eca996a7..54f680d1f2 100644 --- a/crates/assistant_tools/src/regex_search.rs +++ b/crates/assistant_tools/src/regex_search.rs @@ -4,7 +4,10 @@ use futures::StreamExt; use gpui::{App, Entity, Task}; use language::OffsetRangeExt; use language_model::LanguageModelRequestMessage; -use project::{search::SearchQuery, Project}; +use project::{ + search::{SearchQuery, SearchResult}, + Project, +}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::{cmp, fmt::Write, sync::Arc}; @@ -15,8 +18,15 @@ pub struct RegexSearchToolInput { /// A regex pattern to search for in the entire project. Note that the regex /// will be parsed by the Rust `regex` crate. pub regex: String, + + /// Optional starting position for paginated results (0-based). + /// When not provided, starts from the beginning. + #[serde(default)] + pub offset: Option, } +const RESULTS_PER_PAGE: usize = 20; + pub struct RegexSearchTool; impl Tool for RegexSearchTool { @@ -43,13 +53,13 @@ impl Tool for RegexSearchTool { ) -> Task> { const CONTEXT_LINES: u32 = 2; - let input = match serde_json::from_value::(input) { - Ok(input) => input, + let (offset, regex) = match serde_json::from_value::(input) { + Ok(input) => (input.offset.unwrap_or(0), input.regex), Err(err) => return Task::ready(Err(anyhow!(err))), }; let query = match SearchQuery::regex( - &input.regex, + ®ex, false, false, false, @@ -62,20 +72,23 @@ impl Tool for RegexSearchTool { }; let results = project.update(cx, |project, cx| project.search(query, cx)); + cx.spawn(|cx| async move { futures::pin_mut!(results); let mut output = String::new(); - while let Some(project::search::SearchResult::Buffer { buffer, ranges }) = - results.next().await - { + let mut skips_remaining = offset; + let mut matches_found = 0; + let mut has_more_matches = false; + + while let Some(SearchResult::Buffer { buffer, ranges }) = results.next().await { if ranges.is_empty() { continue; } - buffer.read_with(&cx, |buffer, cx| { + buffer.read_with(&cx, |buffer, cx| -> Result<(), anyhow::Error> { if let Some(path) = buffer.file().map(|file| file.full_path(cx)) { - writeln!(output, "### Found matches in {}:\n", path.display()).unwrap(); + let mut file_header_written = false; let mut ranges = ranges .into_iter() .map(|range| { @@ -93,6 +106,17 @@ impl Tool for RegexSearchTool { .peekable(); while let Some(mut range) = ranges.next() { + if skips_remaining > 0 { + skips_remaining -= 1; + continue; + } + + // We'd already found a full page of matches, and we just found one more. + if matches_found >= RESULTS_PER_PAGE { + has_more_matches = true; + return Ok(()); + } + while let Some(next_range) = ranges.peek() { if range.end.row >= next_range.start.row { range.end = next_range.end; @@ -102,18 +126,36 @@ impl Tool for RegexSearchTool { } } - writeln!(output, "```").unwrap(); + if !file_header_written { + writeln!(output, "\n## Matches in {}", path.display())?; + file_header_written = true; + } + + let start_line = range.start.row + 1; + let end_line = range.end.row + 1; + writeln!(output, "\n### Lines {start_line}-{end_line}\n```")?; output.extend(buffer.text_for_range(range)); - writeln!(output, "\n```\n").unwrap(); + output.push_str("\n```\n"); + + matches_found += 1; } } - })?; + + Ok(()) + })??; } - if output.is_empty() { + if matches_found == 0 { Ok("No matches found".to_string()) - } else { - Ok(output) + } else if has_more_matches { + Ok(format!( + "Showing matches {}-{} (there were more matches found; use offset: {} to see next page):\n{output}", + offset + 1, + offset + matches_found, + offset + RESULTS_PER_PAGE, + )) + } else { + Ok(format!("Found {matches_found} matches:\n{output}")) } }) } diff --git a/crates/assistant_tools/src/regex_search_tool/description.md b/crates/assistant_tools/src/regex_search_tool/description.md index 9b9b0f64b6..68928d6de1 100644 --- a/crates/assistant_tools/src/regex_search_tool/description.md +++ b/crates/assistant_tools/src/regex_search_tool/description.md @@ -1,3 +1,5 @@ Searches the entire project for the given regular expression. Returns a list of paths that matched the query. For each path, it returns a list of excerpts of the matched text. + +Results are paginated with 20 matches per page. Use the optional 'offset' parameter to request subsequent pages.