diff --git a/Cargo.lock b/Cargo.lock index 4349156d61..69ff223a01 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4993,6 +4993,7 @@ dependencies = [ "language_model", "language_models", "languages", + "markdown", "node_runtime", "pathdiff", "paths", diff --git a/assets/prompts/assistant_system_prompt.hbs b/assets/prompts/assistant_system_prompt.hbs index bdd2a211e4..b0b514359b 100644 --- a/assets/prompts/assistant_system_prompt.hbs +++ b/assets/prompts/assistant_system_prompt.hbs @@ -39,18 +39,78 @@ If appropriate, use tool calls to explore the current project, which contains th ## Code Block Formatting -Whenever you mention a code block, you MUST use ONLY use the following format when the code in the block comes from a file -in the project: - +Whenever you mention a code block, you MUST use ONLY use the following format: ```path/to/Something.blah#L123-456 (code goes here) ``` - The `#L123-456` means the line number range 123 through 456, and the path/to/Something.blah -is a path in the project. (If this code block does not come from a file in the project, then you may instead use -the normal markdown style of three backticks followed by language name. However, you MUST use this format if -the code in the block comes from a file in the project.) - +is a path in the project. (If there is no valid path in the project, then you can use +/dev/null/path.extension for its path.) This is the ONLY valid way to format code blocks, because the Markdown parser +does not understand the more common ```language syntax, or bare ``` blocks. It only +understands this path-based syntax, and if the path is missing, then it will error and you will have to do it over again. +Just to be really clear about this, if you ever find yourself writing three backticks followed by a language name, STOP! +You have made a mistake. You can only ever put paths after triple backticks! + +Based on all the information I've gathered, here's a summary of how this system works: +1. The README file is loaded into the system. +2. The system finds the first two headers, including everything in between. In this case, that would be: +```path/to/README.md#L8-12 +# First Header +This is the info under the first header. +## Sub-header +``` +3. Then the system finds the last header in the README: +```path/to/README.md#L27-29 +## Last Header +This is the last header in the README. +``` +4. Finally, it passes this information on to the next process. + + +In Markdown, hash marks signify headings. For example: +```/dev/null/example.md#L1-3 +# Level 1 heading +## Level 2 heading +### Level 3 heading +``` + +Here are examples of ways you must never render code blocks: + +In Markdown, hash marks signify headings. For example: +``` +# Level 1 heading +## Level 2 heading +### Level 3 heading +``` + +This example is unacceptable because it does not include the path. + +In Markdown, hash marks signify headings. For example: +```markdown +# Level 1 heading +## Level 2 heading +### Level 3 heading +``` + +This example is unacceptable because it has the language instead of the path. + +In Markdown, hash marks signify headings. For example: + # Level 1 heading + ## Level 2 heading + ### Level 3 heading + +This example is unacceptable because it uses indentation to mark the code block +instead of backticks with a path. + +In Markdown, hash marks signify headings. For example: +```markdown +/dev/null/example.md#L1-3 +# Level 1 heading +## Level 2 heading +### Level 3 heading +``` + +This example is unacceptable because the path is in the wrong place. The path must be directly after the opening backticks. ## Fixing Diagnostics 1. Make 1-2 attempts at fixing diagnostics, then defer to the user. diff --git a/crates/agent/src/active_thread.rs b/crates/agent/src/active_thread.rs index 7d11216931..c465308a50 100644 --- a/crates/agent/src/active_thread.rs +++ b/crates/agent/src/active_thread.rs @@ -23,7 +23,7 @@ use gpui::{ Task, TextStyle, TextStyleRefinement, Transformation, UnderlineStyle, WeakEntity, WindowHandle, linear_color_stop, linear_gradient, list, percentage, pulsating_between, }; -use language::{Buffer, LanguageRegistry}; +use language::{Buffer, Language, LanguageRegistry}; use language_model::{ LanguageModelRequestMessage, LanguageModelToolUseId, MessageContent, RequestUsage, Role, StopReason, @@ -33,6 +33,7 @@ use markdown::{HeadingLevelStyles, Markdown, MarkdownElement, MarkdownStyle, Par use project::{ProjectEntryId, ProjectItem as _}; use rope::Point; use settings::{Settings as _, update_settings_file}; +use std::ffi::OsStr; use std::path::Path; use std::rc::Rc; use std::sync::Arc; @@ -346,130 +347,130 @@ fn render_markdown_code_block( .child(Label::new("untitled").size(LabelSize::Small)) .into_any_element(), ), - CodeBlockKind::FencedLang(raw_language_name) => Some( - h_flex() - .gap_1() - .children( - parsed_markdown - .languages_by_name - .get(raw_language_name) - .and_then(|language| { - language - .config() - .matcher - .path_suffixes - .iter() - .find_map(|extension| { - file_icons::FileIcons::get_icon(Path::new(extension), cx) - }) - .map(Icon::from_path) - .map(|icon| icon.color(Color::Muted).size(IconSize::Small)) - }), - ) - .child( - Label::new( - parsed_markdown - .languages_by_name - .get(raw_language_name) - .map(|language| language.name().into()) - .clone() - .unwrap_or_else(|| raw_language_name.clone()), - ) - .size(LabelSize::Small), - ) - .into_any_element(), - ), + CodeBlockKind::FencedLang(raw_language_name) => Some(render_code_language( + parsed_markdown.languages_by_name.get(raw_language_name), + raw_language_name.clone(), + cx, + )), CodeBlockKind::FencedSrc(path_range) => path_range.path.file_name().map(|file_name| { - let content = if let Some(parent) = path_range.path.parent() { - h_flex() - .ml_1() - .gap_1() - .child( - Label::new(file_name.to_string_lossy().to_string()).size(LabelSize::Small), - ) - .child( - Label::new(parent.to_string_lossy().to_string()) - .color(Color::Muted) - .size(LabelSize::Small), - ) - .into_any_element() - } else { - Label::new(path_range.path.to_string_lossy().to_string()) - .size(LabelSize::Small) - .ml_1() - .into_any_element() - }; + // We tell the model to use /dev/null for the path instead of using ```language + // because otherwise it consistently fails to use code citations. + if path_range.path.starts_with("/dev/null") { + let ext = path_range + .path + .extension() + .and_then(OsStr::to_str) + .map(|str| SharedString::new(str.to_string())) + .unwrap_or_default(); - h_flex() - .id(("code-block-header-label", ix)) - .w_full() - .max_w_full() - .px_1() - .gap_0p5() - .cursor_pointer() - .rounded_sm() - .hover(|item| item.bg(cx.theme().colors().element_hover.opacity(0.5))) - .tooltip(Tooltip::text("Jump to File")) - .child( - h_flex() - .gap_0p5() - .children( - file_icons::FileIcons::get_icon(&path_range.path, cx) - .map(Icon::from_path) - .map(|icon| icon.color(Color::Muted).size(IconSize::XSmall)), - ) - .child(content) - .child( - Icon::new(IconName::ArrowUpRight) - .size(IconSize::XSmall) - .color(Color::Ignored), - ), + render_code_language( + parsed_markdown + .languages_by_path + .get(&path_range.path) + .or_else(|| parsed_markdown.languages_by_name.get(&ext)), + ext, + cx, ) - .on_click({ - let path_range = path_range.clone(); - move |_, window, cx| { - workspace - .update(cx, { - |workspace, cx| { - let Some(project_path) = workspace - .project() - .read(cx) - .find_project_path(&path_range.path, cx) - else { - return; - }; - let Some(target) = path_range.range.as_ref().map(|range| { - Point::new( - // Line number is 1-based - range.start.line.saturating_sub(1), - range.start.col.unwrap_or(0), - ) - }) else { - return; - }; - let open_task = - workspace.open_path(project_path, None, true, window, cx); - window - .spawn(cx, async move |cx| { - let item = open_task.await?; - if let Some(active_editor) = item.downcast::() { - active_editor - .update_in(cx, |editor, window, cx| { - editor.go_to_singleton_buffer_point( - target, window, cx, - ); - }) - .ok(); - } - anyhow::Ok(()) - }) - .detach_and_log_err(cx); - } - }) - .ok(); - } - }) - .into_any_element() + } else { + let content = if let Some(parent) = path_range.path.parent() { + h_flex() + .ml_1() + .gap_1() + .child( + Label::new(file_name.to_string_lossy().to_string()) + .size(LabelSize::Small), + ) + .child( + Label::new(parent.to_string_lossy().to_string()) + .color(Color::Muted) + .size(LabelSize::Small), + ) + .into_any_element() + } else { + Label::new(path_range.path.to_string_lossy().to_string()) + .size(LabelSize::Small) + .ml_1() + .into_any_element() + }; + + h_flex() + .id(("code-block-header-label", ix)) + .w_full() + .max_w_full() + .px_1() + .gap_0p5() + .cursor_pointer() + .rounded_sm() + .hover(|item| item.bg(cx.theme().colors().element_hover.opacity(0.5))) + .tooltip(Tooltip::text("Jump to File")) + .child( + h_flex() + .gap_0p5() + .children( + file_icons::FileIcons::get_icon(&path_range.path, cx) + .map(Icon::from_path) + .map(|icon| icon.color(Color::Muted).size(IconSize::XSmall)), + ) + .child(content) + .child( + Icon::new(IconName::ArrowUpRight) + .size(IconSize::XSmall) + .color(Color::Ignored), + ), + ) + .on_click({ + let path_range = path_range.clone(); + move |_, window, cx| { + workspace + .update(cx, { + |workspace, cx| { + let Some(project_path) = workspace + .project() + .read(cx) + .find_project_path(&path_range.path, cx) + else { + return; + }; + let Some(target) = path_range.range.as_ref().map(|range| { + Point::new( + // Line number is 1-based + range.start.line.saturating_sub(1), + range.start.col.unwrap_or(0), + ) + }) else { + return; + }; + let open_task = workspace.open_path( + project_path, + None, + true, + window, + cx, + ); + window + .spawn(cx, async move |cx| { + let item = open_task.await?; + if let Some(active_editor) = + item.downcast::() + { + active_editor + .update_in(cx, |editor, window, cx| { + editor.go_to_singleton_buffer_point( + target, window, cx, + ); + }) + .ok(); + } + anyhow::Ok(()) + }) + .detach_and_log_err(cx); + } + }) + .ok(); + } + }) + .into_any_element() + } }), }; @@ -604,6 +605,32 @@ fn render_markdown_code_block( ) } +fn render_code_language( + language: Option<&Arc>, + name_fallback: SharedString, + cx: &App, +) -> AnyElement { + let icon_path = language.and_then(|language| { + language + .config() + .matcher + .path_suffixes + .iter() + .find_map(|extension| file_icons::FileIcons::get_icon(Path::new(extension), cx)) + .map(Icon::from_path) + }); + + let language_label = language + .map(|language| language.name().into()) + .unwrap_or(name_fallback); + + h_flex() + .gap_1() + .children(icon_path.map(|icon| icon.color(Color::Muted).size(IconSize::Small))) + .child(Label::new(language_label).size(LabelSize::Small)) + .into_any_element() +} + fn open_markdown_link( text: SharedString, workspace: WeakEntity, diff --git a/crates/assistant_tools/src/edit_file_tool.rs b/crates/assistant_tools/src/edit_file_tool.rs index fc88db2351..9a766cf7a2 100644 --- a/crates/assistant_tools/src/edit_file_tool.rs +++ b/crates/assistant_tools/src/edit_file_tool.rs @@ -174,6 +174,7 @@ impl Tool for EditFileTool { "The `old_string` and `new_string` are identical, so no changes would be made." )); } + let old_string = input.old_string.clone(); let result = cx .background_spawn(async move { @@ -213,6 +214,21 @@ impl Tool for EditFileTool { input.path.display() ) } else { + let old_string_with_buffer = format!( + "old_string:\n\n{}\n\n-------file-------\n\n{}", + &old_string, + buffer.text() + ); + let path = { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let mut hasher = DefaultHasher::new(); + old_string_with_buffer.hash(&mut hasher); + + PathBuf::from(format!("failed_tool_{}.txt", hasher.finish())) + }; + std::fs::write(path, old_string_with_buffer).unwrap(); anyhow!("Failed to match the provided `old_string`") } })?; diff --git a/crates/eval/Cargo.toml b/crates/eval/Cargo.toml index 29ce6d2531..8c8d3a7830 100644 --- a/crates/eval/Cargo.toml +++ b/crates/eval/Cargo.toml @@ -44,6 +44,7 @@ language_extension.workspace = true language_model.workspace = true language_models.workspace = true languages = { workspace = true, features = ["load-grammars"] } +markdown.workspace = true node_runtime.workspace = true pathdiff.workspace = true paths.workspace = true diff --git a/crates/eval/src/example.rs b/crates/eval/src/example.rs index 328fb25df8..497836b0fc 100644 --- a/crates/eval/src/example.rs +++ b/crates/eval/src/example.rs @@ -10,13 +10,13 @@ use crate::{ ToolMetrics, assertions::{AssertionsReport, RanAssertion, RanAssertionResult}, }; -use agent::{ContextLoadResult, ThreadEvent}; +use agent::{ContextLoadResult, Thread, ThreadEvent}; use anyhow::{Result, anyhow}; use async_trait::async_trait; use buffer_diff::DiffHunkStatus; use collections::HashMap; use futures::{FutureExt as _, StreamExt, channel::mpsc, select_biased}; -use gpui::{AppContext, AsyncApp, Entity}; +use gpui::{App, AppContext, AsyncApp, Entity}; use language_model::{LanguageModel, Role, StopReason}; pub const THREAD_EVENT_TIMEOUT: Duration = Duration::from_secs(60 * 2); @@ -314,7 +314,7 @@ impl ExampleContext { for message in thread.messages().skip(message_count_before) { messages.push(Message { _role: message.role, - _text: message.to_string(), + text: message.to_string(), tool_use: thread .tool_uses_for_message(message.id, cx) .into_iter() @@ -362,6 +362,90 @@ impl ExampleContext { }) .unwrap() } + + pub fn agent_thread(&self) -> Entity { + self.agent_thread.clone() + } +} + +impl AppContext for ExampleContext { + type Result = anyhow::Result; + + fn new( + &mut self, + build_entity: impl FnOnce(&mut gpui::Context) -> T, + ) -> Self::Result> { + self.app.new(build_entity) + } + + fn reserve_entity(&mut self) -> Self::Result> { + self.app.reserve_entity() + } + + fn insert_entity( + &mut self, + reservation: gpui::Reservation, + build_entity: impl FnOnce(&mut gpui::Context) -> T, + ) -> Self::Result> { + self.app.insert_entity(reservation, build_entity) + } + + fn update_entity( + &mut self, + handle: &Entity, + update: impl FnOnce(&mut T, &mut gpui::Context) -> R, + ) -> Self::Result + where + T: 'static, + { + self.app.update_entity(handle, update) + } + + fn read_entity( + &self, + handle: &Entity, + read: impl FnOnce(&T, &App) -> R, + ) -> Self::Result + where + T: 'static, + { + self.app.read_entity(handle, read) + } + + fn update_window(&mut self, window: gpui::AnyWindowHandle, f: F) -> Result + where + F: FnOnce(gpui::AnyView, &mut gpui::Window, &mut App) -> T, + { + self.app.update_window(window, f) + } + + fn read_window( + &self, + window: &gpui::WindowHandle, + read: impl FnOnce(Entity, &App) -> R, + ) -> Result + where + T: 'static, + { + self.app.read_window(window, read) + } + + fn background_spawn( + &self, + future: impl std::future::Future + Send + 'static, + ) -> gpui::Task + where + R: Send + 'static, + { + self.app.background_spawn(future) + } + + fn read_global(&self, callback: impl FnOnce(&G, &App) -> R) -> Self::Result + where + G: gpui::Global, + { + self.app.read_global(callback) + } } #[derive(Debug)] @@ -391,12 +475,16 @@ impl Response { pub fn tool_uses(&self) -> impl Iterator { self.messages.iter().flat_map(|msg| &msg.tool_use) } + + pub fn texts(&self) -> impl Iterator { + self.messages.iter().map(|message| message.text.clone()) + } } #[derive(Debug)] pub struct Message { _role: Role, - _text: String, + text: String, tool_use: Vec, } diff --git a/crates/eval/src/examples/code_block_citations.rs b/crates/eval/src/examples/code_block_citations.rs new file mode 100644 index 0000000000..595d430841 --- /dev/null +++ b/crates/eval/src/examples/code_block_citations.rs @@ -0,0 +1,191 @@ +use anyhow::Result; +use async_trait::async_trait; +use markdown::PathWithRange; + +use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion, LanguageServer}; + +pub struct CodeBlockCitations; + +const FENCE: &str = "```"; + +#[async_trait(?Send)] +impl Example for CodeBlockCitations { + fn meta(&self) -> ExampleMetadata { + ExampleMetadata { + name: "code_block_citations".to_string(), + url: "https://github.com/zed-industries/zed.git".to_string(), + revision: "f69aeb6311dde3c0b8979c293d019d66498d54f2".to_string(), + language_server: Some(LanguageServer { + file_extension: "rs".to_string(), + allow_preexisting_diagnostics: false, + }), + max_assertions: None, + } + } + + async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> { + const FILENAME: &str = "assistant_tool.rs"; + cx.push_user_message(format!( + r#" + Show me the method bodies of all the methods of the `Tool` trait in {FILENAME}. + + Please show each method in a separate code snippet. + "# + )); + + // Verify that the messages all have the correct formatting. + let texts: Vec = cx.run_to_end().await?.texts().collect(); + let closing_fence = format!("\n{FENCE}"); + + for text in texts.iter() { + let mut text = text.as_str(); + + while let Some(index) = text.find(FENCE) { + // Advance text past the opening backticks. + text = &text[index + FENCE.len()..]; + + // Find the closing backticks. + let content_len = text.find(&closing_fence); + + // Verify the citation format - e.g. ```path/to/foo.txt#L123-456 + if let Some(citation_len) = text.find('\n') { + let citation = &text[..citation_len]; + + if let Ok(()) = + cx.assert(citation.contains("/"), format!("Slash in {citation:?}",)) + { + let path_range = PathWithRange::new(citation); + let path = cx + .agent_thread() + .update(cx, |thread, cx| { + thread + .project() + .read(cx) + .find_project_path(path_range.path, cx) + }) + .ok() + .flatten(); + + if let Ok(path) = cx.assert_some(path, format!("Valid path: {citation:?}")) + { + let buffer_text = { + let buffer = match cx.agent_thread().update(cx, |thread, cx| { + thread + .project() + .update(cx, |project, cx| project.open_buffer(path, cx)) + }) { + Ok(buffer_task) => buffer_task.await.ok(), + Err(err) => { + cx.assert( + false, + format!("Expected Ok(buffer), not {err:?}"), + ) + .ok(); + break; + } + }; + + let Ok(buffer_text) = cx.assert_some( + buffer.and_then(|buffer| { + buffer.read_with(cx, |buffer, _| buffer.text()).ok() + }), + "Reading buffer text succeeded", + ) else { + continue; + }; + buffer_text + }; + + if let Some(content_len) = content_len { + // + 1 because there's a newline character after the citation. + let content = + &text[(citation.len() + 1)..content_len - (citation.len() + 1)]; + + cx.assert( + buffer_text.contains(&content), + "Code block content was found in file", + ) + .ok(); + + if let Some(range) = path_range.range { + let start_line_index = range.start.line.saturating_sub(1); + let line_count = + range.end.line.saturating_sub(start_line_index); + let mut snippet = buffer_text + .lines() + .skip(start_line_index as usize) + .take(line_count as usize) + .collect::>() + .join("\n"); + + if let Some(start_col) = range.start.col { + snippet = snippet[start_col as usize..].to_string(); + } + + if let Some(end_col) = range.end.col { + let last_line = snippet.lines().last().unwrap(); + snippet = snippet + [..snippet.len() - last_line.len() + end_col as usize] + .to_string(); + } + + cx.assert_eq( + snippet.as_str(), + content, + "Code block snippet was at specified line/col", + ) + .ok(); + } + } + } + } + } else { + cx.assert( + false, + format!("Opening {FENCE} did not have a newline anywhere after it."), + ) + .ok(); + } + + if let Some(content_len) = content_len { + // Advance past the closing backticks + text = &text[content_len + FENCE.len()..]; + } else { + // There were no closing backticks associated with these opening backticks. + cx.assert( + false, + "Code block opening had matching closing backticks.".to_string(), + ) + .ok(); + + // There are no more code blocks to parse, so we're done. + break; + } + } + } + + Ok(()) + } + + fn thread_assertions(&self) -> Vec { + vec![ + JudgeAssertion { + id: "trait method bodies are shown".to_string(), + description: + "All method bodies of the Tool trait are shown." + .to_string(), + }, + JudgeAssertion { + id: "code blocks used".to_string(), + description: + "All code snippets are rendered inside markdown code blocks (as opposed to any other formatting besides code blocks)." + .to_string(), + }, + JudgeAssertion { + id: "code blocks use backticks".to_string(), + description: + format!("All markdown code blocks use backtick fences ({FENCE}) rather than indentation.") + } + ] + } +} diff --git a/crates/eval/src/examples/mod.rs b/crates/eval/src/examples/mod.rs index 83e44b6bfd..7e451e4ff6 100644 --- a/crates/eval/src/examples/mod.rs +++ b/crates/eval/src/examples/mod.rs @@ -12,12 +12,14 @@ use util::serde::default_true; use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion}; mod add_arg_to_trait_method; +mod code_block_citations; mod file_search; pub fn all(examples_dir: &Path) -> Vec> { let mut threads: Vec> = vec![ Rc::new(file_search::FileSearchExample), Rc::new(add_arg_to_trait_method::AddArgToTraitMethod), + Rc::new(code_block_citations::CodeBlockCitations), ]; for example_path in list_declarative_examples(examples_dir).unwrap() { diff --git a/crates/markdown/src/markdown.rs b/crates/markdown/src/markdown.rs index 134a19a65b..1bc6f99207 100644 --- a/crates/markdown/src/markdown.rs +++ b/crates/markdown/src/markdown.rs @@ -1,6 +1,8 @@ pub mod parser; mod path_range; +pub use path_range::{LineCol, PathWithRange}; + use std::borrow::Cow; use std::collections::HashSet; use std::iter; diff --git a/crates/markdown/src/path_range.rs b/crates/markdown/src/path_range.rs index 2bb757b389..0fb7f4cd23 100644 --- a/crates/markdown/src/path_range.rs +++ b/crates/markdown/src/path_range.rs @@ -32,6 +32,20 @@ impl LineCol { } impl PathWithRange { + // Note: We could try out this as an alternative, and see how it does on evals. + // + // The closest to a standard way of including a filename is this: + // ```rust filename="path/to/file.rs#42:43" + // ``` + // + // or, alternatively, + // ```rust filename="path/to/file.rs" lines="42:43" + // ``` + // + // Examples where it's used this way: + // - https://mdxjs.com/guides/syntax-highlighting/#syntax-highlighting-with-the-meta-field + // - https://docusaurus.io/docs/markdown-features/code-blocks + // - https://spec.commonmark.org/0.31.2/#example-143 pub fn new(str: impl AsRef) -> Self { let str = str.as_ref(); // Sometimes the model will include a language at the start,