diff --git a/crates/assistant_tools/src/assistant_tools.rs b/crates/assistant_tools/src/assistant_tools.rs
index 29c67e28d6..832183908a 100644
--- a/crates/assistant_tools/src/assistant_tools.rs
+++ b/crates/assistant_tools/src/assistant_tools.rs
@@ -77,7 +77,6 @@ pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
     registry.register_tool(TerminalTool);
     registry.register_tool(BatchTool);
     registry.register_tool(CreateDirectoryTool);
-    registry.register_tool(CreateFileTool);
     registry.register_tool(CopyPathTool);
     registry.register_tool(DeletePathTool);
     registry.register_tool(SymbolInfoTool);
@@ -125,12 +124,14 @@ pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
 fn register_edit_file_tool(cx: &mut App) {
     let registry = ToolRegistry::global(cx);
 
+    registry.unregister_tool(CreateFileTool);
     registry.unregister_tool(EditFileTool);
     registry.unregister_tool(StreamingEditFileTool);
 
     if AssistantSettings::get_global(cx).stream_edits(cx) {
         registry.register_tool(StreamingEditFileTool);
     } else {
+        registry.register_tool(CreateFileTool);
         registry.register_tool(EditFileTool);
     }
 }
diff --git a/crates/assistant_tools/src/edit_agent.rs b/crates/assistant_tools/src/edit_agent.rs
index 3ab1d6ffc1..8925f9b7fc 100644
--- a/crates/assistant_tools/src/edit_agent.rs
+++ b/crates/assistant_tools/src/edit_agent.rs
@@ -10,6 +10,7 @@ use edit_parser::{EditParser, EditParserEvent, EditParserMetrics};
 use futures::{
     Stream, StreamExt,
     channel::mpsc::{self, UnboundedReceiver},
+    pin_mut,
     stream::BoxStream,
 };
 use gpui::{AppContext, AsyncApp, Entity, SharedString, Task};
@@ -23,19 +24,29 @@ use std::{cmp, iter, mem, ops::Range, path::PathBuf, sync::Arc, task::Poll};
 use streaming_diff::{CharOperation, StreamingDiff};
 
 #[derive(Serialize)]
-pub struct EditAgentTemplate {
+struct CreateFilePromptTemplate {
     path: Option<PathBuf>,
     edit_description: String,
 }
 
-impl Template for EditAgentTemplate {
-    const TEMPLATE_NAME: &'static str = "edit_agent.hbs";
+impl Template for CreateFilePromptTemplate {
+    const TEMPLATE_NAME: &'static str = "create_file_prompt.hbs";
+}
+
+#[derive(Serialize)]
+struct EditFilePromptTemplate {
+    path: Option<PathBuf>,
+    edit_description: String,
+}
+
+impl Template for EditFilePromptTemplate {
+    const TEMPLATE_NAME: &'static str = "edit_file_prompt.hbs";
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub enum EditAgentOutputEvent {
     Edited,
-    HallucinatedOldText(SharedString),
+    OldTextNotFound(SharedString),
 }
 
 #[derive(Clone, Debug)]
@@ -64,6 +75,82 @@ impl EditAgent {
         }
     }
 
+    pub fn overwrite(
+        &self,
+        buffer: Entity<Buffer>,
+        edit_description: String,
+        previous_messages: Vec<LanguageModelRequestMessage>,
+        cx: &mut AsyncApp,
+    ) -> (
+        Task<Result<EditAgentOutput>>,
+        mpsc::UnboundedReceiver<EditAgentOutputEvent>,
+    ) {
+        let this = self.clone();
+        let (events_tx, events_rx) = mpsc::unbounded();
+        let output = cx.spawn(async move |cx| {
+            let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
+            let path = cx.update(|cx| snapshot.resolve_file_path(cx, true))?;
+            let prompt = CreateFilePromptTemplate {
+                path,
+                edit_description,
+            }
+            .render(&this.templates)?;
+            let new_chunks = this.request(previous_messages, prompt, cx).await?;
+
+            let (output, mut inner_events) = this.replace_text_with_chunks(buffer, new_chunks, cx);
+            while let Some(event) = inner_events.next().await {
+                events_tx.unbounded_send(event).ok();
+            }
+            output.await
+        });
+        (output, events_rx)
+    }
+
+    fn replace_text_with_chunks(
+        &self,
+        buffer: Entity<Buffer>,
+        edit_chunks: impl 'static + Send + Stream<Item = Result<String, LanguageModelCompletionError>>,
+        cx: &mut AsyncApp,
+    ) -> (
+        Task<Result<EditAgentOutput>>,
+        mpsc::UnboundedReceiver<EditAgentOutputEvent>,
+    ) {
+        let (output_events_tx, output_events_rx) = mpsc::unbounded();
+        let this = self.clone();
+        let task = cx.spawn(async move |cx| {
+            // Ensure the buffer is tracked by the action log.
+            this.action_log
+                .update(cx, |log, cx| log.track_buffer(buffer.clone(), cx))?;
+
+            cx.update(|cx| {
+                buffer.update(cx, |buffer, cx| buffer.set_text("", cx));
+                this.action_log
+                    .update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx));
+            })?;
+
+            let mut raw_edits = String::new();
+            pin_mut!(edit_chunks);
+            while let Some(chunk) = edit_chunks.next().await {
+                let chunk = chunk?;
+                raw_edits.push_str(&chunk);
+                cx.update(|cx| {
+                    buffer.update(cx, |buffer, cx| buffer.append(chunk, cx));
+                    this.action_log
+                        .update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx));
+                })?;
+                output_events_tx
+                    .unbounded_send(EditAgentOutputEvent::Edited)
+                    .ok();
+            }
+
+            Ok(EditAgentOutput {
+                _raw_edits: raw_edits,
+                _parser_metrics: EditParserMetrics::default(),
+            })
+        });
+        (task, output_events_rx)
+    }
+
     pub fn edit(
         &self,
         buffer: Entity<Buffer>,
@@ -78,10 +165,15 @@ impl EditAgent {
         let (events_tx, events_rx) = mpsc::unbounded();
         let output = cx.spawn(async move |cx| {
             let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
-            let edit_chunks = this
-                .request_edits(snapshot, edit_description, previous_messages, cx)
-                .await?;
-            let (output, mut inner_events) = this.apply_edits(buffer, edit_chunks, cx);
+            let path = cx.update(|cx| snapshot.resolve_file_path(cx, true))?;
+            let prompt = EditFilePromptTemplate {
+                path,
+                edit_description,
+            }
+            .render(&this.templates)?;
+            let edit_chunks = this.request(previous_messages, prompt, cx).await?;
+
+            let (output, mut inner_events) = this.apply_edit_chunks(buffer, edit_chunks, cx);
             while let Some(event) = inner_events.next().await {
                 events_tx.unbounded_send(event).ok();
             }
@@ -90,7 +182,7 @@ impl EditAgent {
         (output, events_rx)
     }
 
-    fn apply_edits(
+    fn apply_edit_chunks(
         &self,
         buffer: Entity<Buffer>,
         edit_chunks: impl 'static + Send + Stream<Item = Result<String, LanguageModelCompletionError>>,
@@ -138,7 +230,7 @@ impl EditAgent {
             let Some(old_range) = old_range else {
                 // We couldn't find the old text in the buffer. Report the error.
                 output_events
-                    .unbounded_send(EditAgentOutputEvent::HallucinatedOldText(old_text_query))
+                    .unbounded_send(EditAgentOutputEvent::OldTextNotFound(old_text_query))
                     .ok();
                 continue;
             };
@@ -232,7 +324,7 @@ impl EditAgent {
     ) {
         let (tx, rx) = mpsc::unbounded();
         let output = cx.background_spawn(async move {
-            futures::pin_mut!(chunks);
+            pin_mut!(chunks);
 
             let mut parser = EditParser::new();
             let mut raw_edits = String::new();
@@ -336,20 +428,12 @@ impl EditAgent {
         })
     }
 
-    async fn request_edits(
+    async fn request(
         &self,
-        snapshot: BufferSnapshot,
-        edit_description: String,
         mut messages: Vec<LanguageModelRequestMessage>,
+        prompt: String,
         cx: &mut AsyncApp,
     ) -> Result<BoxStream<'static, Result<String, LanguageModelCompletionError>>> {
-        let path = cx.update(|cx| snapshot.resolve_file_path(cx, true))?;
-        let prompt = EditAgentTemplate {
-            path,
-            edit_description,
-        }
-        .render(&self.templates)?;
-
         let mut message_content = Vec::new();
         if let Some(last_message) = messages.last_mut() {
             if last_message.role == Role::Assistant {
@@ -611,7 +695,8 @@ mod tests {
             &mut rng,
             cx,
         );
-        let (apply, _events) = agent.apply_edits(buffer.clone(), raw_edits, &mut cx.to_async());
+        let (apply, _events) =
+            agent.apply_edit_chunks(buffer.clone(), raw_edits, &mut cx.to_async());
         apply.await.unwrap();
         pretty_assertions::assert_eq!(
             buffer.read_with(cx, |buffer, _| buffer.snapshot().text()),
@@ -648,7 +733,8 @@ mod tests {
             &mut rng,
             cx,
         );
-        let (apply, _events) = agent.apply_edits(buffer.clone(), raw_edits, &mut cx.to_async());
+        let (apply, _events) =
+            agent.apply_edit_chunks(buffer.clone(), raw_edits, &mut cx.to_async());
         apply.await.unwrap();
         assert_eq!(
             buffer.read_with(cx, |buffer, _| buffer.snapshot().text()),
@@ -679,7 +765,8 @@ mod tests {
             &mut rng,
             cx,
         );
-        let (apply, _events) = agent.apply_edits(buffer.clone(), raw_edits, &mut cx.to_async());
+        let (apply, _events) =
+            agent.apply_edit_chunks(buffer.clone(), raw_edits, &mut cx.to_async());
         apply.await.unwrap();
         assert_eq!(
             buffer.read_with(cx, |buffer, _| buffer.snapshot().text()),
@@ -692,7 +779,7 @@ mod tests {
         let agent = init_test(cx).await;
         let buffer = cx.new(|cx| Buffer::local("abc\ndef\nghi", cx));
         let (chunks_tx, chunks_rx) = mpsc::unbounded();
-        let (apply, mut events) = agent.apply_edits(
+        let (apply, mut events) = agent.apply_edit_chunks(
             buffer.clone(),
             chunks_rx.map(|chunk: &str| Ok(chunk.to_string())),
             &mut cx.to_async(),
@@ -744,7 +831,7 @@ mod tests {
         cx.run_until_parked();
         assert_eq!(
             drain_events(&mut events),
-            vec![EditAgentOutputEvent::HallucinatedOldText(
+            vec![EditAgentOutputEvent::OldTextNotFound(
                 "hallucinated old".into()
             )]
         );
diff --git a/crates/assistant_tools/src/edit_agent/evals.rs b/crates/assistant_tools/src/edit_agent/evals.rs
index d22f6bda57..dfdc60fd2d 100644
--- a/crates/assistant_tools/src/edit_agent/evals.rs
+++ b/crates/assistant_tools/src/edit_agent/evals.rs
@@ -4,10 +4,11 @@ use crate::{
     streaming_edit_file_tool::StreamingEditFileToolInput,
 };
 use Role::*;
-use anyhow::{Context, anyhow};
+use anyhow::anyhow;
 use client::{Client, UserStore};
 use collections::HashMap;
 use fs::FakeFs;
+use futures::{FutureExt, future::LocalBoxFuture};
 use gpui::{AppContext, TestAppContext};
 use indoc::indoc;
 use language_model::{
@@ -71,14 +72,15 @@ fn eval_extract_handle_command_output() {
                         StreamingEditFileToolInput {
                             display_description: edit_description.into(),
                             path: input_file_path.into(),
+                            create_or_overwrite: false,
                         },
                     )],
                 ),
             ],
             input_path: input_file_path.into(),
-            input_content: input_file_content.into(),
+            input_content: Some(input_file_content.into()),
             edit_description: edit_description.into(),
-            assertion: EvalAssertion::AssertEqual(output_file_content.into()),
+            assertion: EvalAssertion::assert_eq(output_file_content),
         },
     );
 }
@@ -126,14 +128,15 @@ fn eval_delete_run_git_blame() {
                         StreamingEditFileToolInput {
                             display_description: edit_description.into(),
                             path: input_file_path.into(),
+                            create_or_overwrite: false,
                         },
                     )],
                 ),
             ],
             input_path: input_file_path.into(),
-            input_content: input_file_content.into(),
+            input_content: Some(input_file_content.into()),
             edit_description: edit_description.into(),
-            assertion: EvalAssertion::AssertEqual(output_file_content.into()),
+            assertion: EvalAssertion::assert_eq(output_file_content),
         },
     );
 }
@@ -240,14 +243,15 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
                         StreamingEditFileToolInput {
                             display_description: edit_description.into(),
                             path: input_file_path.into(),
+                            create_or_overwrite: false,
                         },
                     )],
                 ),
             ],
             input_path: input_file_path.into(),
-            input_content: input_file_content.into(),
+            input_content: Some(input_file_content.into()),
             edit_description: edit_description.into(),
-            assertion: EvalAssertion::JudgeDiff(indoc! {"
+            assertion: EvalAssertion::judge_diff(indoc! {"
                 - The compile_parser_to_wasm method has been changed to use wasi-sdk
                 - ureq is used to download the SDK for current platform and architecture
             "}),
@@ -315,14 +319,15 @@ fn eval_disable_cursor_blinking() {
                         StreamingEditFileToolInput {
                             display_description: edit_description.into(),
                             path: input_file_path.into(),
+                            create_or_overwrite: false,
                         },
                     )],
                 ),
             ],
             input_path: input_file_path.into(),
-            input_content: input_file_content.into(),
+            input_content: Some(input_file_content.into()),
             edit_description: edit_description.into(),
-            assertion: EvalAssertion::AssertEqual(output_file_content.into()),
+            assertion: EvalAssertion::assert_eq(output_file_content),
         },
     );
 }
@@ -504,14 +509,15 @@ fn eval_from_pixels_constructor() {
                         StreamingEditFileToolInput {
                             display_description: edit_description.into(),
                             path: input_file_path.into(),
+                            create_or_overwrite: false,
                         },
                     )],
                 ),
             ],
             input_path: input_file_path.into(),
-            input_content: input_file_content.into(),
+            input_content: Some(input_file_content.into()),
             edit_description: edit_description.into(),
-            assertion: EvalAssertion::JudgeDiff(indoc! {"
+            assertion: EvalAssertion::assert_eq(indoc! {"
                 - The diff contains a new `from_pixels` constructor
                 - The diff contains new tests for the `from_pixels` constructor
             "}),
@@ -519,6 +525,104 @@ fn eval_from_pixels_constructor() {
     );
 }
 
+#[test]
+#[cfg_attr(not(feature = "eval"), ignore)]
+fn eval_zode() {
+    let input_file_path = "root/zode.py";
+    let edit_description = "Create the main Zode CLI script";
+    eval(
+        200,
+        1.,
+        EvalInput {
+            conversation: vec![
+                message(User, [text(include_str!("evals/fixtures/zode/prompt.md"))]),
+                message(
+                    Assistant,
+                    [
+                        tool_use(
+                            "tool_1",
+                            "read_file",
+                            ReadFileToolInput {
+                                path: "root/eval/react.py".into(),
+                                start_line: None,
+                                end_line: None,
+                            },
+                        ),
+                        tool_use(
+                            "tool_2",
+                            "read_file",
+                            ReadFileToolInput {
+                                path: "root/eval/react_test.py".into(),
+                                start_line: None,
+                                end_line: None,
+                            },
+                        ),
+                    ],
+                ),
+                message(
+                    User,
+                    [
+                        tool_result(
+                            "tool_1",
+                            "read_file",
+                            include_str!("evals/fixtures/zode/react.py"),
+                        ),
+                        tool_result(
+                            "tool_2",
+                            "read_file",
+                            include_str!("evals/fixtures/zode/react_test.py"),
+                        ),
+                    ],
+                ),
+                message(
+                    Assistant,
+                    [
+                        text(
+                            "Now that I understand what we need to build, I'll create the main Python script:",
+                        ),
+                        tool_use(
+                            "tool_3",
+                            "edit_file",
+                            StreamingEditFileToolInput {
+                                display_description: edit_description.into(),
+                                path: input_file_path.into(),
+                                create_or_overwrite: true,
+                            },
+                        ),
+                    ],
+                ),
+            ],
+            input_path: input_file_path.into(),
+            input_content: None,
+            edit_description: edit_description.into(),
+            assertion: EvalAssertion::new(async move |sample, _, _cx| {
+                let invalid_starts = [' ', '`', '\n'];
+                let mut message = String::new();
+                for start in invalid_starts {
+                    if sample.text.starts_with(start) {
+                        message.push_str(&format!("The sample starts with a {:?}\n", start));
+                        break;
+                    }
+                }
+                // Remove trailing newline.
+                message.pop();
+
+                if message.is_empty() {
+                    Ok(EvalAssertionOutcome {
+                        score: 100,
+                        message: None,
+                    })
+                } else {
+                    Ok(EvalAssertionOutcome {
+                        score: 0,
+                        message: Some(message),
+                    })
+                }
+            }),
+        },
+    );
+}
+
 fn message(
     role: Role,
     contents: impl IntoIterator<Item = MessageContent>,
@@ -574,11 +678,135 @@ fn tool_result(
 struct EvalInput {
     conversation: Vec<LanguageModelRequestMessage>,
     input_path: PathBuf,
-    input_content: String,
+    input_content: Option<String>,
     edit_description: String,
     assertion: EvalAssertion,
 }
 
+#[derive(Clone)]
+struct EvalSample {
+    text: String,
+    edit_output: EditAgentOutput,
+    diff: String,
+}
+
+trait AssertionFn: 'static + Send + Sync {
+    fn assert<'a>(
+        &'a self,
+        sample: &'a EvalSample,
+        judge_model: Arc<dyn LanguageModel>,
+        cx: &'a mut TestAppContext,
+    ) -> LocalBoxFuture<'a, Result<EvalAssertionOutcome>>;
+}
+
+impl<F> AssertionFn for F
+where
+    F: 'static
+        + Send
+        + Sync
+        + AsyncFn(
+            &EvalSample,
+            Arc<dyn LanguageModel>,
+            &mut TestAppContext,
+        ) -> Result<EvalAssertionOutcome>,
+{
+    fn assert<'a>(
+        &'a self,
+        sample: &'a EvalSample,
+        judge_model: Arc<dyn LanguageModel>,
+        cx: &'a mut TestAppContext,
+    ) -> LocalBoxFuture<'a, Result<EvalAssertionOutcome>> {
+        (self)(sample, judge_model, cx).boxed_local()
+    }
+}
+
+#[derive(Clone)]
+struct EvalAssertion(Arc<dyn AssertionFn>);
+
+impl EvalAssertion {
+    fn new<F>(f: F) -> Self
+    where
+        F: 'static
+            + Send
+            + Sync
+            + AsyncFn(
+                &EvalSample,
+                Arc<dyn LanguageModel>,
+                &mut TestAppContext,
+            ) -> Result<EvalAssertionOutcome>,
+    {
+        EvalAssertion(Arc::new(f))
+    }
+
+    fn assert_eq(expected: impl Into<String>) -> Self {
+        let expected = expected.into();
+        Self::new(async move |sample, _judge, _cx| {
+            Ok(EvalAssertionOutcome {
+                score: if strip_empty_lines(&sample.text) == strip_empty_lines(&expected) {
+                    100
+                } else {
+                    0
+                },
+                message: None,
+            })
+        })
+    }
+
+    fn judge_diff(assertions: &'static str) -> Self {
+        Self::new(async move |sample, judge, cx| {
+            let prompt = DiffJudgeTemplate {
+                diff: sample.diff.clone(),
+                assertions,
+            }
+            .render(&Templates::new())
+            .unwrap();
+
+            let request = LanguageModelRequest {
+                messages: vec![LanguageModelRequestMessage {
+                    role: Role::User,
+                    content: vec![prompt.into()],
+                    cache: false,
+                }],
+                ..Default::default()
+            };
+            let mut response = judge
+                .stream_completion_text(request, &cx.to_async())
+                .await?;
+            let mut output = String::new();
+            while let Some(chunk) = response.stream.next().await {
+                let chunk = chunk?;
+                output.push_str(&chunk);
+            }
+
+            // Parse the score from the response
+            let re = regex::Regex::new(r"<score>(\d+)</score>").unwrap();
+            if let Some(captures) = re.captures(&output) {
+                if let Some(score_match) = captures.get(1) {
+                    let score = score_match.as_str().parse().unwrap_or(0);
+                    return Ok(EvalAssertionOutcome {
+                        score,
+                        message: Some(output),
+                    });
+                }
+            }
+
+            Err(anyhow!(
+                "No score found in response. Raw output: {}",
+                output
+            ))
+        })
+    }
+
+    async fn run(
+        &self,
+        input: &EvalSample,
+        judge_model: Arc<dyn LanguageModel>,
+        cx: &mut TestAppContext,
+    ) -> Result<EvalAssertionOutcome> {
+        self.0.assert(input, judge_model, cx).await
+    }
+}
+
 fn eval(iterations: usize, expected_pass_ratio: f32, mut eval: EvalInput) {
     let mut evaluated_count = 0;
     report_progress(evaluated_count, iterations);
@@ -606,12 +834,12 @@ fn eval(iterations: usize, expected_pass_ratio: f32, mut eval: EvalInput) {
     while let Ok(output) = rx.recv() {
         match output {
             Ok(output) => {
-                cumulative_parser_metrics += output.edit_output._parser_metrics.clone();
+                cumulative_parser_metrics += output.sample.edit_output._parser_metrics.clone();
                 eval_outputs.push(output.clone());
                 if output.assertion.score < 80 {
                     failed_count += 1;
                     failed_evals
-                        .entry(output.buffer_text.clone())
+                        .entry(output.sample.text.clone())
                         .or_insert(Vec::new())
                         .push(output);
                 }
@@ -671,10 +899,8 @@ fn run_eval(eval: EvalInput, tx: mpsc::Sender<Result<EvalOutput>>) {
 
 #[derive(Clone)]
 struct EvalOutput {
-    assertion: EvalAssertionResult,
-    buffer_text: String,
-    edit_output: EditAgentOutput,
-    diff: String,
+    sample: EvalSample,
+    assertion: EvalAssertionOutcome,
 }
 
 impl Display for EvalOutput {
@@ -684,14 +910,14 @@ impl Display for EvalOutput {
             writeln!(f, "Message: {}", message)?;
         }
 
-        writeln!(f, "Diff:\n{}", self.diff)?;
+        writeln!(f, "Diff:\n{}", self.sample.diff)?;
 
         writeln!(
             f,
             "Parser Metrics:\n{:#?}",
-            self.edit_output._parser_metrics
+            self.sample.edit_output._parser_metrics
         )?;
-        writeln!(f, "Raw Edits:\n{}", self.edit_output._raw_edits)?;
+        writeln!(f, "Raw Edits:\n{}", self.sample.edit_output._raw_edits)?;
         Ok(())
     }
 }
@@ -777,96 +1003,45 @@ impl EditAgentTest {
             .update(cx, |project, cx| project.open_buffer(path, cx))
             .await
             .unwrap();
-        buffer.update(cx, |buffer, cx| {
-            buffer.set_text(eval.input_content.clone(), cx)
-        });
-        let (edit_output, _events) = self.agent.edit(
-            buffer.clone(),
-            eval.edit_description,
-            eval.conversation,
-            &mut cx.to_async(),
-        );
-        let edit_output = edit_output.await?;
+        let edit_output = if let Some(input_content) = eval.input_content.as_deref() {
+            buffer.update(cx, |buffer, cx| buffer.set_text(input_content, cx));
+            let (edit_output, _) = self.agent.edit(
+                buffer.clone(),
+                eval.edit_description,
+                eval.conversation,
+                &mut cx.to_async(),
+            );
+            edit_output.await?
+        } else {
+            let (edit_output, _) = self.agent.overwrite(
+                buffer.clone(),
+                eval.edit_description,
+                eval.conversation,
+                &mut cx.to_async(),
+            );
+            edit_output.await?
+        };
+
         let buffer_text = buffer.read_with(cx, |buffer, _| buffer.text());
-        let actual_diff = language::unified_diff(&eval.input_content, &buffer_text);
-        let assertion = match eval.assertion {
-            EvalAssertion::AssertEqual(expected_output) => EvalAssertionResult {
-                score: if strip_empty_lines(&buffer_text) == strip_empty_lines(&expected_output) {
-                    100
-                } else {
-                    0
-                },
-                message: None,
-            },
-            EvalAssertion::JudgeDiff(assertions) => self
-                .judge_diff(&actual_diff, assertions, &cx.to_async())
-                .await
-                .context("failed comparing diffs")?,
-        };
-
-        Ok(EvalOutput {
-            assertion,
-            diff: actual_diff,
-            buffer_text,
+        let sample = EvalSample {
             edit_output,
-        })
-    }
-
-    async fn judge_diff(
-        &self,
-        diff: &str,
-        assertions: &'static str,
-        cx: &AsyncApp,
-    ) -> Result<EvalAssertionResult> {
-        let prompt = DiffJudgeTemplate {
-            diff: diff.to_string(),
-            assertions,
-        }
-        .render(&self.agent.templates)
-        .unwrap();
-
-        let request = LanguageModelRequest {
-            messages: vec![LanguageModelRequestMessage {
-                role: Role::User,
-                content: vec![prompt.into()],
-                cache: false,
-            }],
-            ..Default::default()
+            diff: language::unified_diff(
+                eval.input_content.as_deref().unwrap_or_default(),
+                &buffer_text,
+            ),
+            text: buffer_text,
         };
-        let mut response = self.judge_model.stream_completion_text(request, cx).await?;
-        let mut output = String::new();
-        while let Some(chunk) = response.stream.next().await {
-            let chunk = chunk?;
-            output.push_str(&chunk);
-        }
+        let assertion = eval
+            .assertion
+            .run(&sample, self.judge_model.clone(), cx)
+            .await?;
 
-        // Parse the score from the response
-        let re = regex::Regex::new(r"<score>(\d+)</score>").unwrap();
-        if let Some(captures) = re.captures(&output) {
-            if let Some(score_match) = captures.get(1) {
-                let score = score_match.as_str().parse().unwrap_or(0);
-                return Ok(EvalAssertionResult {
-                    score,
-                    message: Some(output),
-                });
-            }
-        }
-
-        Err(anyhow!(
-            "No score found in response. Raw output: {}",
-            output
-        ))
+        Ok(EvalOutput { assertion, sample })
     }
 }
 
 #[derive(Clone, Debug, Eq, PartialEq, Hash)]
-enum EvalAssertion {
-    AssertEqual(String),
-    JudgeDiff(&'static str),
-}
-
-#[derive(Clone, Debug, Eq, PartialEq, Hash)]
-struct EvalAssertionResult {
+struct EvalAssertionOutcome {
     score: usize,
     message: Option<String>,
 }
diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/zode/prompt.md b/crates/assistant_tools/src/edit_agent/evals/fixtures/zode/prompt.md
new file mode 100644
index 0000000000..2496c3582b
--- /dev/null
+++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/zode/prompt.md
@@ -0,0 +1,2193 @@
+- We're building a CLI code agent tool called Zode that is intended to work like Aider or Claude code
+- We're starting from a completely blank project
+- Like Aider/Claude Code you take the user's initial prompt and then call the LLM and perform tool calls in a loop until the ultimate goal is achieved.
+- Unlike Aider or Claude code, it's not intended to be interactive. Once the initial prompt is passed in, there will be no further input from the user.
+- The system you will build must reach the stated goal just by performing too calls and calling the LLM
+- I want you to build this in python. Use the anthropic python sdk and the model context protocol sdk. Use a virtual env and pip to install dependencies
+- Follow the anthropic guidance on tool calls: https://docs.anthropic.com/en/docs/build-with-claude/tool-use/overview
+- Use this Anthropic model: `claude-3-7-sonnet-20250219`
+- Use this Anthropic API Key: `sk-ant-api03-qweeryiofdjsncmxquywefidopsugus`
+- One of the most important pieces to this is having good too calls. We will be using the tools provided by the Claude MCP server. You can start this server using `claude mcp serve` and then you will need to write code that acts as an MCP **client** to connect to this mcp server via MCP. Likely you want to start this using a subprocess. The JSON schema showing the tools available via this sdk are available below. Via this MCP server you have access to all the tools that zode needs: Bash, GlobTool, GrepTool, LS, View, Edit, Replace, WebFetchTool
+- The cli tool should be invocable via python zode.py file.md where file.md is any possible file that contains the users prompt. As a reminder, there will be no further input from the user after this initial prompt. Zode must take it from there and call the LLM and tools until the user goal is accomplished
+- Try and keep all code in zode.py and make heavy use of the asks I mentioned
+- Once you’ve implemented this, you must run python zode.py eval/instructions.md to see how well our new agent tool does!
+
+Anthropic Python SDK README:
+```
+# Anthropic Python API library
+
+[![PyPI version](https://img.shields.io/pypi/v/anthropic.svg)](https://pypi.org/project/anthropic/)
+
+The Anthropic Python library provides convenient access to the Anthropic REST API from any Python 3.8+
+application. It includes type definitions for all request params and response fields,
+and offers both synchronous and asynchronous clients powered by [httpx](https://github.com/encode/httpx).
+
+## Documentation
+
+The REST API documentation can be found on [docs.anthropic.com](https://docs.anthropic.com/claude/reference/). The full API of this library can be found in [api.md](api.md).
+
+## Installation
+
+```sh
+# install from PyPI
+pip install anthropic
+```
+
+## Usage
+
+The full API of this library can be found in [api.md](api.md).
+
+```python
+import os
+from anthropic import Anthropic
+
+client = Anthropic(
+    api_key=os.environ.get("ANTHROPIC_API_KEY"),  # This is the default and can be omitted
+)
+
+message = client.messages.create(
+    max_tokens=1024,
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello, Claude",
+        }
+    ],
+    model="claude-3-5-sonnet-latest",
+)
+print(message.content)
+```
+
+While you can provide an `api_key` keyword argument,
+we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/)
+to add `ANTHROPIC_API_KEY="my-anthropic-api-key"` to your `.env` file
+so that your API Key is not stored in source control.
+
+## Async usage
+
+Simply import `AsyncAnthropic` instead of `Anthropic` and use `await` with each API call:
+
+```python
+import os
+import asyncio
+from anthropic import AsyncAnthropic
+
+client = AsyncAnthropic(
+    api_key=os.environ.get("ANTHROPIC_API_KEY"),  # This is the default and can be omitted
+)
+
+
+async def main() -> None:
+    message = await client.messages.create(
+        max_tokens=1024,
+        messages=[
+            {
+                "role": "user",
+                "content": "Hello, Claude",
+            }
+        ],
+        model="claude-3-5-sonnet-latest",
+    )
+    print(message.content)
+
+
+asyncio.run(main())
+```
+
+Functionality between the synchronous and asynchronous clients is otherwise identical.
+
+## Streaming responses
+
+We provide support for streaming responses using Server Side Events (SSE).
+
+```python
+from anthropic import Anthropic
+
+client = Anthropic()
+
+stream = client.messages.create(
+    max_tokens=1024,
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello, Claude",
+        }
+    ],
+    model="claude-3-5-sonnet-latest",
+    stream=True,
+)
+for event in stream:
+    print(event.type)
+```
+
+The async client uses the exact same interface.
+
+```python
+from anthropic import AsyncAnthropic
+
+client = AsyncAnthropic()
+
+stream = await client.messages.create(
+    max_tokens=1024,
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello, Claude",
+        }
+    ],
+    model="claude-3-5-sonnet-latest",
+    stream=True,
+)
+async for event in stream:
+    print(event.type)
+```
+
+### Streaming Helpers
+
+This library provides several conveniences for streaming messages, for example:
+
+```py
+import asyncio
+from anthropic import AsyncAnthropic
+
+client = AsyncAnthropic()
+
+async def main() -> None:
+    async with client.messages.stream(
+        max_tokens=1024,
+        messages=[
+            {
+                "role": "user",
+                "content": "Say hello there!",
+            }
+        ],
+        model="claude-3-5-sonnet-latest",
+    ) as stream:
+        async for text in stream.text_stream:
+            print(text, end="", flush=True)
+        print()
+
+    message = await stream.get_final_message()
+    print(message.to_json())
+
+asyncio.run(main())
+```
+
+Streaming with `client.messages.stream(...)` exposes [various helpers for your convenience](helpers.md) including accumulation & SDK-specific events.
+
+Alternatively, you can use `client.messages.create(..., stream=True)` which only returns an async iterable of the events in the stream and thus uses less memory (it does not build up a final message object for you).
+
+## Token counting
+
+To get the token count for a message without creating it you can use the `client.beta.messages.count_tokens()` method. This takes the same `messages` list as the `.create()` method.
+
+```py
+count = client.beta.messages.count_tokens(
+    model="claude-3-5-sonnet-20241022",
+    messages=[
+        {"role": "user", "content": "Hello, world"}
+    ]
+)
+count.input_tokens  # 10
+```
+
+You can also see the exact usage for a given request through the `usage` response property, e.g.
+
+```py
+message = client.messages.create(...)
+message.usage
+# Usage(input_tokens=25, output_tokens=13)
+```
+
+## Message Batches
+
+This SDK provides beta support for the [Message Batches API](https://docs.anthropic.com/en/docs/build-with-claude/message-batches) under the `client.beta.messages.batches` namespace.
+
+
+### Creating a batch
+
+Message Batches take the exact same request params as the standard Messages API:
+
+```python
+await client.beta.messages.batches.create(
+    requests=[
+        {
+            "custom_id": "my-first-request",
+            "params": {
+                "model": "claude-3-5-sonnet-latest",
+                "max_tokens": 1024,
+                "messages": [{"role": "user", "content": "Hello, world"}],
+            },
+        },
+        {
+            "custom_id": "my-second-request",
+            "params": {
+                "model": "claude-3-5-sonnet-latest",
+                "max_tokens": 1024,
+                "messages": [{"role": "user", "content": "Hi again, friend"}],
+            },
+        },
+    ]
+)
+```
+
+
+### Getting results from a batch
+
+Once a Message Batch has been processed, indicated by `.processing_status === 'ended'`, you can access the results with `.batches.results()`
+
+```python
+result_stream = await client.beta.messages.batches.results(batch_id)
+async for entry in result_stream:
+    if entry.result.type == "succeeded":
+        print(entry.result.message.content)
+```
+
+## Tool use
+
+This SDK provides support for tool use, aka function calling. More details can be found in [the documentation](https://docs.anthropic.com/claude/docs/tool-use).
+
+## AWS Bedrock
+
+This library also provides support for the [Anthropic Bedrock API](https://aws.amazon.com/bedrock/claude/) if you install this library with the `bedrock` extra, e.g. `pip install -U anthropic[bedrock]`.
+
+You can then import and instantiate a separate `AnthropicBedrock` class, the rest of the API is the same.
+
+```py
+from anthropic import AnthropicBedrock
+
+client = AnthropicBedrock()
+
+message = client.messages.create(
+    max_tokens=1024,
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello!",
+        }
+    ],
+    model="anthropic.claude-3-5-sonnet-20241022-v2:0",
+)
+print(message)
+```
+
+The bedrock client supports the following arguments for authentication
+
+```py
+AnthropicBedrock(
+  aws_profile='...',
+  aws_region='us-east'
+  aws_secret_key='...',
+  aws_access_key='...',
+  aws_session_token='...',
+)
+```
+
+For a more fully fledged example see [`examples/bedrock.py`](https://github.com/anthropics/anthropic-sdk-python/blob/main/examples/bedrock.py).
+
+## Google Vertex
+
+This library also provides support for the [Anthropic Vertex API](https://cloud.google.com/vertex-ai?hl=en) if you install this library with the `vertex` extra, e.g. `pip install -U anthropic[vertex]`.
+
+You can then import and instantiate a separate `AnthropicVertex`/`AsyncAnthropicVertex` class, which has the same API as the base `Anthropic`/`AsyncAnthropic` class.
+
+```py
+from anthropic import AnthropicVertex
+
+client = AnthropicVertex()
+
+message = client.messages.create(
+    model="claude-3-5-sonnet-v2@20241022",
+    max_tokens=100,
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello!",
+        }
+    ],
+)
+print(message)
+```
+
+For a more complete example see [`examples/vertex.py`](https://github.com/anthropics/anthropic-sdk-python/blob/main/examples/vertex.py).
+
+## Using types
+
+Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev) which also provide helper methods for things like:
+
+- Serializing back into JSON, `model.to_json()`
+- Converting to a dictionary, `model.to_dict()`
+
+Typed requests and responses provide autocomplete and documentation within your editor. If you would like to see type errors in VS Code to help catch bugs earlier, set `python.analysis.typeCheckingMode` to `basic`.
+
+## Pagination
+
+List methods in the Anthropic API are paginated.
+
+This library provides auto-paginating iterators with each list response, so you do not have to request successive pages manually:
+
+```python
+from anthropic import Anthropic
+
+client = Anthropic()
+
+all_batches = []
+# Automatically fetches more pages as needed.
+for batch in client.beta.messages.batches.list(
+    limit=20,
+):
+    # Do something with batch here
+    all_batches.append(batch)
+print(all_batches)
+```
+
+Or, asynchronously:
+
+```python
+import asyncio
+from anthropic import AsyncAnthropic
+
+client = AsyncAnthropic()
+
+
+async def main() -> None:
+    all_batches = []
+    # Iterate through items across all pages, issuing requests as needed.
+    async for batch in client.beta.messages.batches.list(
+        limit=20,
+    ):
+        all_batches.append(batch)
+    print(all_batches)
+
+
+asyncio.run(main())
+```
+
+Alternatively, you can use the `.has_next_page()`, `.next_page_info()`, or `.get_next_page()` methods for more granular control working with pages:
+
+```python
+first_page = await client.beta.messages.batches.list(
+    limit=20,
+)
+if first_page.has_next_page():
+    print(f"will fetch next page using these details: {first_page.next_page_info()}")
+    next_page = await first_page.get_next_page()
+    print(f"number of items we just fetched: {len(next_page.data)}")
+
+# Remove `await` for non-async usage.
+```
+
+Or just work directly with the returned data:
+
+```python
+first_page = await client.beta.messages.batches.list(
+    limit=20,
+)
+
+print(f"next page cursor: {first_page.last_id}")  # => "next page cursor: ..."
+for batch in first_page.data:
+    print(batch.id)
+
+# Remove `await` for non-async usage.
+```
+
+## Handling errors
+
+When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `anthropic.APIConnectionError` is raised.
+
+When the API returns a non-success status code (that is, 4xx or 5xx
+response), a subclass of `anthropic.APIStatusError` is raised, containing `status_code` and `response` properties.
+
+All errors inherit from `anthropic.APIError`.
+
+```python
+import anthropic
+from anthropic import Anthropic
+
+client = Anthropic()
+
+try:
+    client.messages.create(
+        max_tokens=1024,
+        messages=[
+            {
+                "role": "user",
+                "content": "Hello, Claude",
+            }
+        ],
+        model="claude-3-5-sonnet-latest",
+    )
+except anthropic.APIConnectionError as e:
+    print("The server could not be reached")
+    print(e.__cause__)  # an underlying Exception, likely raised within httpx.
+except anthropic.RateLimitError as e:
+    print("A 429 status code was received; we should back off a bit.")
+except anthropic.APIStatusError as e:
+    print("Another non-200-range status code was received")
+    print(e.status_code)
+    print(e.response)
+```
+
+Error codes are as follows:
+
+| Status Code | Error Type                 |
+| ----------- | -------------------------- |
+| 400         | `BadRequestError`          |
+| 401         | `AuthenticationError`      |
+| 403         | `PermissionDeniedError`    |
+| 404         | `NotFoundError`            |
+| 422         | `UnprocessableEntityError` |
+| 429         | `RateLimitError`           |
+| >=500       | `InternalServerError`      |
+| N/A         | `APIConnectionError`       |
+
+## Request IDs
+
+> For more information on debugging requests, see [these docs](https://docs.anthropic.com/en/api/errors#request-id)
+
+All object responses in the SDK provide a `_request_id` property which is added from the `request-id` response header so that you can quickly log failing requests and report them back to Anthropic.
+
+```python
+message = client.messages.create(
+    max_tokens=1024,
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello, Claude",
+        }
+    ],
+    model="claude-3-5-sonnet-latest",
+)
+print(message._request_id)  # req_018EeWyXxfu5pfWkrYcMdjWG
+```
+
+Note that unlike other properties that use an `_` prefix, the `_request_id` property
+*is* public. Unless documented otherwise, *all* other `_` prefix properties,
+methods and modules are *private*.
+
+### Retries
+
+Certain errors are automatically retried 2 times by default, with a short exponential backoff.
+Connection errors (for example, due to a network connectivity problem), 408 Request Timeout, 409 Conflict,
+429 Rate Limit, and >=500 Internal errors are all retried by default.
+
+You can use the `max_retries` option to configure or disable retry settings:
+
+```python
+from anthropic import Anthropic
+
+# Configure the default for all requests:
+client = Anthropic(
+    # default is 2
+    max_retries=0,
+)
+
+# Or, configure per-request:
+client.with_options(max_retries=5).messages.create(
+    max_tokens=1024,
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello, Claude",
+        }
+    ],
+    model="claude-3-5-sonnet-latest",
+)
+```
+
+### Timeouts
+
+By default requests time out after 10 minutes. You can configure this with a `timeout` option,
+which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/#fine-tuning-the-configuration) object:
+
+```python
+from anthropic import Anthropic
+
+# Configure the default for all requests:
+client = Anthropic(
+    # 20 seconds (default is 10 minutes)
+    timeout=20.0,
+)
+
+# More granular control:
+client = Anthropic(
+    timeout=httpx.Timeout(60.0, read=5.0, write=10.0, connect=2.0),
+)
+
+# Override per-request:
+client.with_options(timeout=5.0).messages.create(
+    max_tokens=1024,
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello, Claude",
+        }
+    ],
+    model="claude-3-5-sonnet-latest",
+)
+```
+
+On timeout, an `APITimeoutError` is thrown.
+
+Note that requests that time out are [retried twice by default](#retries).
+
+### Long Requests
+
+> [!IMPORTANT]
+> We highly encourage you use the streaming [Messages API](#streaming-responses) for longer running requests.
+
+We do not recommend setting a large `max_tokens` values without using streaming.
+Some networks may drop idle connections after a certain period of time, which
+can cause the request to fail or [timeout](#timeouts) without receiving a response from Anthropic.
+
+This SDK will also throw a `ValueError` if a non-streaming request is expected to be above roughly 10 minutes long.
+Passing `stream=True` or [overriding](#timeouts) the `timeout` option at the client or request level disables this error.
+
+An expected request latency longer than the [timeout](#timeouts) for a non-streaming request
+will result in the client terminating the connection and retrying without receiving a response.
+
+We set a [TCP socket keep-alive](https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html) option in order
+to reduce the impact of idle connection timeouts on some networks.
+This can be [overridden](#Configuring-the-HTTP-client) by passing a `http_client` option to the client.
+
+## Default Headers
+
+We automatically send the `anthropic-version` header set to `2023-06-01`.
+
+If you need to, you can override it by setting default headers per-request or on the client object.
+
+Be aware that doing so may result in incorrect types and other unexpected or undefined behavior in the SDK.
+
+```python
+from anthropic import Anthropic
+
+client = Anthropic(
+    default_headers={"anthropic-version": "My-Custom-Value"},
+)
+```
+
+## Advanced
+
+### Logging
+
+We use the standard library [`logging`](https://docs.python.org/3/library/logging.html) module.
+
+You can enable logging by setting the environment variable `ANTHROPIC_LOG` to `info`.
+
+```shell
+$ export ANTHROPIC_LOG=info
+```
+
+Or to `debug` for more verbose logging.
+
+### How to tell whether `None` means `null` or missing
+
+In an API response, a field may be explicitly `null`, or missing entirely; in either case, its value is `None` in this library. You can differentiate the two cases with `.model_fields_set`:
+
+```py
+if response.my_field is None:
+  if 'my_field' not in response.model_fields_set:
+    print('Got json like {}, without a "my_field" key present at all.')
+  else:
+    print('Got json like {"my_field": null}.')
+```
+
+### Accessing raw response data (e.g. headers)
+
+The "raw" Response object can be accessed by prefixing `.with_raw_response.` to any HTTP method call, e.g.,
+
+```py
+from anthropic import Anthropic
+
+client = Anthropic()
+response = client.messages.with_raw_response.create(
+    max_tokens=1024,
+    messages=[{
+        "role": "user",
+        "content": "Hello, Claude",
+    }],
+    model="claude-3-5-sonnet-latest",
+)
+print(response.headers.get('X-My-Header'))
+
+message = response.parse()  # get the object that `messages.create()` would have returned
+print(message.content)
+```
+
+These methods return a [`LegacyAPIResponse`](https://github.com/anthropics/anthropic-sdk-python/tree/main/src/anthropic/_legacy_response.py) object. This is a legacy class as we're changing it slightly in the next major version.
+
+For the sync client this will mostly be the same with the exception
+of `content` & `text` will be methods instead of properties. In the
+async client, all methods will be async.
+
+A migration script will be provided & the migration in general should
+be smooth.
+
+#### `.with_streaming_response`
+
+The above interface eagerly reads the full response body when you make the request, which may not always be what you want.
+
+To stream the response body, use `.with_streaming_response` instead, which requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. In the async client, these are async methods.
+
+As such, `.with_streaming_response` methods return a different [`APIResponse`](https://github.com/anthropics/anthropic-sdk-python/tree/main/src/anthropic/_response.py) object, and the async client returns an [`AsyncAPIResponse`](https://github.com/anthropics/anthropic-sdk-python/tree/main/src/anthropic/_response.py) object.
+
+```python
+with client.messages.with_streaming_response.create(
+    max_tokens=1024,
+    messages=[
+        {
+            "role": "user",
+            "content": "Hello, Claude",
+        }
+    ],
+    model="claude-3-5-sonnet-latest",
+) as response:
+    print(response.headers.get("X-My-Header"))
+
+    for line in response.iter_lines():
+        print(line)
+```
+
+The context manager is required so that the response will reliably be closed.
+
+### Making custom/undocumented requests
+
+This library is typed for convenient access to the documented API.
+
+If you need to access undocumented endpoints, params, or response properties, the library can still be used.
+
+#### Undocumented endpoints
+
+To make requests to undocumented endpoints, you can make requests using `client.get`, `client.post`, and other
+http verbs. Options on the client will be respected (such as retries) when making this request.
+
+```py
+import httpx
+
+response = client.post(
+    "/foo",
+    cast_to=httpx.Response,
+    body={"my_param": True},
+)
+
+print(response.headers.get("x-foo"))
+```
+
+#### Undocumented request params
+
+If you want to explicitly send an extra param, you can do so with the `extra_query`, `extra_body`, and `extra_headers` request
+options.
+
+#### Undocumented response properties
+
+To access undocumented response properties, you can access the extra fields like `response.unknown_prop`. You
+can also get all the extra fields on the Pydantic model as a dict with
+[`response.model_extra`](https://docs.pydantic.dev/latest/api/base_model/#pydantic.BaseModel.model_extra).
+
+### Configuring the HTTP client
+
+You can directly override the [httpx client](https://www.python-httpx.org/api/#client) to customize it for your use case, including:
+
+- Support for [proxies](https://www.python-httpx.org/advanced/proxies/)
+- Custom [transports](https://www.python-httpx.org/advanced/transports/)
+- Additional [advanced](https://www.python-httpx.org/advanced/clients/) functionality
+
+```python
+import httpx
+from anthropic import Anthropic, DefaultHttpxClient
+
+client = Anthropic(
+    # Or use the `ANTHROPIC_BASE_URL` env var
+    base_url="http://my.test.server.example.com:8083",
+    http_client=DefaultHttpxClient(
+        proxy="http://my.test.proxy.example.com",
+        transport=httpx.HTTPTransport(local_address="0.0.0.0"),
+    ),
+)
+```
+
+You can also customize the client on a per-request basis by using `with_options()`:
+
+```python
+client.with_options(http_client=DefaultHttpxClient(...))
+```
+
+### Managing HTTP resources
+
+By default the library closes underlying HTTP connections whenever the client is [garbage collected](https://docs.python.org/3/reference/datamodel.html#object.__del__). You can manually close the client using the `.close()` method if desired, or with a context manager that closes when exiting.
+
+```py
+from anthropic import Anthropic
+
+with Anthropic() as client:
+  # make requests here
+  ...
+
+# HTTP client is now closed
+```
+
+## Versioning
+
+This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) conventions, though certain backwards-incompatible changes may be released as minor versions:
+
+1. Changes that only affect static types, without breaking runtime behavior.
+2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals.)_
+3. Changes that we do not expect to impact the vast majority of users in practice.
+
+We take backwards-compatibility seriously and work hard to ensure you can rely on a smooth upgrade experience.
+
+We are keen for your feedback; please open an [issue](https://www.github.com/anthropics/anthropic-sdk-python/issues) with questions, bugs, or suggestions.
+
+### Determining the installed version
+
+If you've upgraded to the latest version but aren't seeing any new features you were expecting then your python environment is likely still using an older version.
+
+You can determine the version that is being used at runtime with:
+
+```py
+import anthropic
+print(anthropic.__version__)
+```
+
+## Requirements
+
+Python 3.8 or higher.
+
+## Contributing
+
+See [the contributing documentation](./CONTRIBUTING.md).
+```
+
+
+MCP Python SDK README:
+# MCP Python SDK
+
+<div align="center">
+
+<strong>Python implementation of the Model Context Protocol (MCP)</strong>
+
+[![PyPI][pypi-badge]][pypi-url]
+[![MIT licensed][mit-badge]][mit-url]
+[![Python Version][python-badge]][python-url]
+[![Documentation][docs-badge]][docs-url]
+[![Specification][spec-badge]][spec-url]
+[![GitHub Discussions][discussions-badge]][discussions-url]
+
+</div>
+
+<!-- omit in toc -->
+## Table of Contents
+
+- [MCP Python SDK](#mcp-python-sdk)
+  - [Overview](#overview)
+  - [Installation](#installation)
+    - [Adding MCP to your python project](#adding-mcp-to-your-python-project)
+    - [Running the standalone MCP development tools](#running-the-standalone-mcp-development-tools)
+  - [Quickstart](#quickstart)
+  - [What is MCP?](#what-is-mcp)
+  - [Core Concepts](#core-concepts)
+    - [Server](#server)
+    - [Resources](#resources)
+    - [Tools](#tools)
+    - [Prompts](#prompts)
+    - [Images](#images)
+    - [Context](#context)
+  - [Running Your Server](#running-your-server)
+    - [Development Mode](#development-mode)
+    - [Claude Desktop Integration](#claude-desktop-integration)
+    - [Direct Execution](#direct-execution)
+    - [Mounting to an Existing ASGI Server](#mounting-to-an-existing-asgi-server)
+  - [Examples](#examples)
+    - [Echo Server](#echo-server)
+    - [SQLite Explorer](#sqlite-explorer)
+  - [Advanced Usage](#advanced-usage)
+    - [Low-Level Server](#low-level-server)
+    - [Writing MCP Clients](#writing-mcp-clients)
+    - [MCP Primitives](#mcp-primitives)
+    - [Server Capabilities](#server-capabilities)
+  - [Documentation](#documentation)
+  - [Contributing](#contributing)
+  - [License](#license)
+
+[pypi-badge]: https://img.shields.io/pypi/v/mcp.svg
+[pypi-url]: https://pypi.org/project/mcp/
+[mit-badge]: https://img.shields.io/pypi/l/mcp.svg
+[mit-url]: https://github.com/modelcontextprotocol/python-sdk/blob/main/LICENSE
+[python-badge]: https://img.shields.io/pypi/pyversions/mcp.svg
+[python-url]: https://www.python.org/downloads/
+[docs-badge]: https://img.shields.io/badge/docs-modelcontextprotocol.io-blue.svg
+[docs-url]: https://modelcontextprotocol.io
+[spec-badge]: https://img.shields.io/badge/spec-spec.modelcontextprotocol.io-blue.svg
+[spec-url]: https://spec.modelcontextprotocol.io
+[discussions-badge]: https://img.shields.io/github/discussions/modelcontextprotocol/python-sdk
+[discussions-url]: https://github.com/modelcontextprotocol/python-sdk/discussions
+
+## Overview
+
+The Model Context Protocol allows applications to provide context for LLMs in a standardized way, separating the concerns of providing context from the actual LLM interaction. This Python SDK implements the full MCP specification, making it easy to:
+
+- Build MCP clients that can connect to any MCP server
+- Create MCP servers that expose resources, prompts and tools
+- Use standard transports like stdio and SSE
+- Handle all MCP protocol messages and lifecycle events
+
+## Installation
+
+### Adding MCP to your python project
+
+We recommend using [uv](https://docs.astral.sh/uv/) to manage your Python projects.
+
+If you haven't created a uv-managed project yet, create one:
+
+   ```bash
+   uv init mcp-server-demo
+   cd mcp-server-demo
+   ```
+
+   Then add MCP to your project dependencies:
+
+   ```bash
+   uv add "mcp[cli]"
+   ```
+
+Alternatively, for projects using pip for dependencies:
+```bash
+pip install "mcp[cli]"
+```
+
+### Running the standalone MCP development tools
+
+To run the mcp command with uv:
+
+```bash
+uv run mcp
+```
+
+## Quickstart
+
+Let's create a simple MCP server that exposes a calculator tool and some data:
+
+```python
+# server.py
+from mcp.server.fastmcp import FastMCP
+
+# Create an MCP server
+mcp = FastMCP("Demo")
+
+
+# Add an addition tool
+@mcp.tool()
+def add(a: int, b: int) -> int:
+    """Add two numbers"""
+    return a + b
+
+
+# Add a dynamic greeting resource
+@mcp.resource("greeting://{name}")
+def get_greeting(name: str) -> str:
+    """Get a personalized greeting"""
+    return f"Hello, {name}!"
+```
+
+You can install this server in [Claude Desktop](https://claude.ai/download) and interact with it right away by running:
+```bash
+mcp install server.py
+```
+
+Alternatively, you can test it with the MCP Inspector:
+```bash
+mcp dev server.py
+```
+
+## What is MCP?
+
+The [Model Context Protocol (MCP)](https://modelcontextprotocol.io) lets you build servers that expose data and functionality to LLM applications in a secure, standardized way. Think of it like a web API, but specifically designed for LLM interactions. MCP servers can:
+
+- Expose data through **Resources** (think of these sort of like GET endpoints; they are used to load information into the LLM's context)
+- Provide functionality through **Tools** (sort of like POST endpoints; they are used to execute code or otherwise produce a side effect)
+- Define interaction patterns through **Prompts** (reusable templates for LLM interactions)
+- And more!
+
+## Core Concepts
+
+### Server
+
+The FastMCP server is your core interface to the MCP protocol. It handles connection management, protocol compliance, and message routing:
+
+```python
+# Add lifespan support for startup/shutdown with strong typing
+from contextlib import asynccontextmanager
+from collections.abc import AsyncIterator
+from dataclasses import dataclass
+
+from fake_database import Database  # Replace with your actual DB type
+
+from mcp.server.fastmcp import Context, FastMCP
+
+# Create a named server
+mcp = FastMCP("My App")
+
+# Specify dependencies for deployment and development
+mcp = FastMCP("My App", dependencies=["pandas", "numpy"])
+
+
+@dataclass
+class AppContext:
+    db: Database
+
+
+@asynccontextmanager
+async def app_lifespan(server: FastMCP) -> AsyncIterator[AppContext]:
+    """Manage application lifecycle with type-safe context"""
+    # Initialize on startup
+    db = await Database.connect()
+    try:
+        yield AppContext(db=db)
+    finally:
+        # Cleanup on shutdown
+        await db.disconnect()
+
+
+# Pass lifespan to server
+mcp = FastMCP("My App", lifespan=app_lifespan)
+
+
+# Access type-safe lifespan context in tools
+@mcp.tool()
+def query_db(ctx: Context) -> str:
+    """Tool that uses initialized resources"""
+    db = ctx.request_context.lifespan_context["db"]
+    return db.query()
+```
+
+### Resources
+
+Resources are how you expose data to LLMs. They're similar to GET endpoints in a REST API - they provide data but shouldn't perform significant computation or have side effects:
+
+```python
+from mcp.server.fastmcp import FastMCP
+
+mcp = FastMCP("My App")
+
+
+@mcp.resource("config://app")
+def get_config() -> str:
+    """Static configuration data"""
+    return "App configuration here"
+
+
+@mcp.resource("users://{user_id}/profile")
+def get_user_profile(user_id: str) -> str:
+    """Dynamic user data"""
+    return f"Profile data for user {user_id}"
+```
+
+### Tools
+
+Tools let LLMs take actions through your server. Unlike resources, tools are expected to perform computation and have side effects:
+
+```python
+import httpx
+from mcp.server.fastmcp import FastMCP
+
+mcp = FastMCP("My App")
+
+
+@mcp.tool()
+def calculate_bmi(weight_kg: float, height_m: float) -> float:
+    """Calculate BMI given weight in kg and height in meters"""
+    return weight_kg / (height_m**2)
+
+
+@mcp.tool()
+async def fetch_weather(city: str) -> str:
+    """Fetch current weather for a city"""
+    async with httpx.AsyncClient() as client:
+        response = await client.get(f"https://api.weather.com/{city}")
+        return response.text
+```
+
+### Prompts
+
+Prompts are reusable templates that help LLMs interact with your server effectively:
+
+```python
+from mcp.server.fastmcp import FastMCP
+from mcp.server.fastmcp.prompts import base
+
+mcp = FastMCP("My App")
+
+
+@mcp.prompt()
+def review_code(code: str) -> str:
+    return f"Please review this code:\n\n{code}"
+
+
+@mcp.prompt()
+def debug_error(error: str) -> list[base.Message]:
+    return [
+        base.UserMessage("I'm seeing this error:"),
+        base.UserMessage(error),
+        base.AssistantMessage("I'll help debug that. What have you tried so far?"),
+    ]
+```
+
+### Images
+
+FastMCP provides an `Image` class that automatically handles image data:
+
+```python
+from mcp.server.fastmcp import FastMCP, Image
+from PIL import Image as PILImage
+
+mcp = FastMCP("My App")
+
+
+@mcp.tool()
+def create_thumbnail(image_path: str) -> Image:
+    """Create a thumbnail from an image"""
+    img = PILImage.open(image_path)
+    img.thumbnail((100, 100))
+    return Image(data=img.tobytes(), format="png")
+```
+
+### Context
+
+The Context object gives your tools and resources access to MCP capabilities:
+
+```python
+from mcp.server.fastmcp import FastMCP, Context
+
+mcp = FastMCP("My App")
+
+
+@mcp.tool()
+async def long_task(files: list[str], ctx: Context) -> str:
+    """Process multiple files with progress tracking"""
+    for i, file in enumerate(files):
+        ctx.info(f"Processing {file}")
+        await ctx.report_progress(i, len(files))
+        data, mime_type = await ctx.read_resource(f"file://{file}")
+    return "Processing complete"
+```
+
+## Running Your Server
+
+### Development Mode
+
+The fastest way to test and debug your server is with the MCP Inspector:
+
+```bash
+mcp dev server.py
+
+# Add dependencies
+mcp dev server.py --with pandas --with numpy
+
+# Mount local code
+mcp dev server.py --with-editable .
+```
+
+### Claude Desktop Integration
+
+Once your server is ready, install it in Claude Desktop:
+
+```bash
+mcp install server.py
+
+# Custom name
+mcp install server.py --name "My Analytics Server"
+
+# Environment variables
+mcp install server.py -v API_KEY=abc123 -v DB_URL=postgres://...
+mcp install server.py -f .env
+```
+
+### Direct Execution
+
+For advanced scenarios like custom deployments:
+
+```python
+from mcp.server.fastmcp import FastMCP
+
+mcp = FastMCP("My App")
+
+if __name__ == "__main__":
+    mcp.run()
+```
+
+Run it with:
+```bash
+python server.py
+# or
+mcp run server.py
+```
+
+### Mounting to an Existing ASGI Server
+
+You can mount the SSE server to an existing ASGI server using the `sse_app` method. This allows you to integrate the SSE server with other ASGI applications.
+
+```python
+from starlette.applications import Starlette
+from starlette.routing import Mount, Host
+from mcp.server.fastmcp import FastMCP
+
+
+mcp = FastMCP("My App")
+
+# Mount the SSE server to the existing ASGI server
+app = Starlette(
+    routes=[
+        Mount('/', app=mcp.sse_app()),
+    ]
+)
+
+# or dynamically mount as host
+app.router.routes.append(Host('mcp.acme.corp', app=mcp.sse_app()))
+```
+
+For more information on mounting applications in Starlette, see the [Starlette documentation](https://www.starlette.io/routing/#submounting-routes).
+
+## Examples
+
+### Echo Server
+
+A simple server demonstrating resources, tools, and prompts:
+
+```python
+from mcp.server.fastmcp import FastMCP
+
+mcp = FastMCP("Echo")
+
+
+@mcp.resource("echo://{message}")
+def echo_resource(message: str) -> str:
+    """Echo a message as a resource"""
+    return f"Resource echo: {message}"
+
+
+@mcp.tool()
+def echo_tool(message: str) -> str:
+    """Echo a message as a tool"""
+    return f"Tool echo: {message}"
+
+
+@mcp.prompt()
+def echo_prompt(message: str) -> str:
+    """Create an echo prompt"""
+    return f"Please process this message: {message}"
+```
+
+### SQLite Explorer
+
+A more complex example showing database integration:
+
+```python
+import sqlite3
+
+from mcp.server.fastmcp import FastMCP
+
+mcp = FastMCP("SQLite Explorer")
+
+
+@mcp.resource("schema://main")
+def get_schema() -> str:
+    """Provide the database schema as a resource"""
+    conn = sqlite3.connect("database.db")
+    schema = conn.execute("SELECT sql FROM sqlite_master WHERE type='table'").fetchall()
+    return "\n".join(sql[0] for sql in schema if sql[0])
+
+
+@mcp.tool()
+def query_data(sql: str) -> str:
+    """Execute SQL queries safely"""
+    conn = sqlite3.connect("database.db")
+    try:
+        result = conn.execute(sql).fetchall()
+        return "\n".join(str(row) for row in result)
+    except Exception as e:
+        return f"Error: {str(e)}"
+```
+
+## Advanced Usage
+
+### Low-Level Server
+
+For more control, you can use the low-level server implementation directly. This gives you full access to the protocol and allows you to customize every aspect of your server, including lifecycle management through the lifespan API:
+
+```python
+from contextlib import asynccontextmanager
+from collections.abc import AsyncIterator
+
+from fake_database import Database  # Replace with your actual DB type
+
+from mcp.server import Server
+
+
+@asynccontextmanager
+async def server_lifespan(server: Server) -> AsyncIterator[dict]:
+    """Manage server startup and shutdown lifecycle."""
+    # Initialize resources on startup
+    db = await Database.connect()
+    try:
+        yield {"db": db}
+    finally:
+        # Clean up on shutdown
+        await db.disconnect()
+
+
+# Pass lifespan to server
+server = Server("example-server", lifespan=server_lifespan)
+
+
+# Access lifespan context in handlers
+@server.call_tool()
+async def query_db(name: str, arguments: dict) -> list:
+    ctx = server.request_context
+    db = ctx.lifespan_context["db"]
+    return await db.query(arguments["query"])
+```
+
+The lifespan API provides:
+- A way to initialize resources when the server starts and clean them up when it stops
+- Access to initialized resources through the request context in handlers
+- Type-safe context passing between lifespan and request handlers
+
+```python
+import mcp.server.stdio
+import mcp.types as types
+from mcp.server.lowlevel import NotificationOptions, Server
+from mcp.server.models import InitializationOptions
+
+# Create a server instance
+server = Server("example-server")
+
+
+@server.list_prompts()
+async def handle_list_prompts() -> list[types.Prompt]:
+    return [
+        types.Prompt(
+            name="example-prompt",
+            description="An example prompt template",
+            arguments=[
+                types.PromptArgument(
+                    name="arg1", description="Example argument", required=True
+                )
+            ],
+        )
+    ]
+
+
+@server.get_prompt()
+async def handle_get_prompt(
+    name: str, arguments: dict[str, str] | None
+) -> types.GetPromptResult:
+    if name != "example-prompt":
+        raise ValueError(f"Unknown prompt: {name}")
+
+    return types.GetPromptResult(
+        description="Example prompt",
+        messages=[
+            types.PromptMessage(
+                role="user",
+                content=types.TextContent(type="text", text="Example prompt text"),
+            )
+        ],
+    )
+
+
+async def run():
+    async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
+        await server.run(
+            read_stream,
+            write_stream,
+            InitializationOptions(
+                server_name="example",
+                server_version="0.1.0",
+                capabilities=server.get_capabilities(
+                    notification_options=NotificationOptions(),
+                    experimental_capabilities={},
+                ),
+            ),
+        )
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(run())
+```
+
+### Writing MCP Clients
+
+The SDK provides a high-level client interface for connecting to MCP servers:
+
+```python
+from mcp import ClientSession, StdioServerParameters, types
+from mcp.client.stdio import stdio_client
+
+# Create server parameters for stdio connection
+server_params = StdioServerParameters(
+    command="python",  # Executable
+    args=["example_server.py"],  # Optional command line arguments
+    env=None,  # Optional environment variables
+)
+
+
+# Optional: create a sampling callback
+async def handle_sampling_message(
+    message: types.CreateMessageRequestParams,
+) -> types.CreateMessageResult:
+    return types.CreateMessageResult(
+        role="assistant",
+        content=types.TextContent(
+            type="text",
+            text="Hello, world! from model",
+        ),
+        model="gpt-3.5-turbo",
+        stopReason="endTurn",
+    )
+
+
+async def run():
+    async with stdio_client(server_params) as (read, write):
+        async with ClientSession(
+            read, write, sampling_callback=handle_sampling_message
+        ) as session:
+            # Initialize the connection
+            await session.initialize()
+
+            # List available prompts
+            prompts = await session.list_prompts()
+
+            # Get a prompt
+            prompt = await session.get_prompt(
+                "example-prompt", arguments={"arg1": "value"}
+            )
+
+            # List available resources
+            resources = await session.list_resources()
+
+            # List available tools
+            tools = await session.list_tools()
+
+            # Read a resource
+            content, mime_type = await session.read_resource("file://some/path")
+
+            # Call a tool
+            result = await session.call_tool("tool-name", arguments={"arg1": "value"})
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(run())
+```
+
+### MCP Primitives
+
+The MCP protocol defines three core primitives that servers can implement:
+
+| Primitive | Control               | Description                                         | Example Use                  |
+|-----------|-----------------------|-----------------------------------------------------|------------------------------|
+| Prompts   | User-controlled       | Interactive templates invoked by user choice        | Slash commands, menu options |
+| Resources | Application-controlled| Contextual data managed by the client application   | File contents, API responses |
+| Tools     | Model-controlled      | Functions exposed to the LLM to take actions        | API calls, data updates      |
+
+### Server Capabilities
+
+MCP servers declare capabilities during initialization:
+
+| Capability  | Feature Flag                 | Description                        |
+|-------------|------------------------------|------------------------------------|
+| `prompts`   | `listChanged`                | Prompt template management         |
+| `resources` | `subscribe`<br/>`listChanged`| Resource exposure and updates      |
+| `tools`     | `listChanged`                | Tool discovery and execution       |
+| `logging`   | -                            | Server logging configuration       |
+| `completion`| -                            | Argument completion suggestions    |
+
+## Documentation
+
+- [Model Context Protocol documentation](https://modelcontextprotocol.io)
+- [Model Context Protocol specification](https://spec.modelcontextprotocol.io)
+- [Officially supported servers](https://github.com/modelcontextprotocol/servers)
+
+## Contributing
+
+We are passionate about supporting contributors of all levels of experience and would love to see you get involved in the project. See the [contributing guide](CONTRIBUTING.md) to get started.
+
+## License
+
+This project is licensed under the MIT License - see the LICENSE file for details.
+
+
+MCP Python SDK example of an MCP client:
+```py
+import asyncio
+import json
+import logging
+import os
+import shutil
+from contextlib import AsyncExitStack
+from typing import Any
+
+import httpx
+from dotenv import load_dotenv
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
+
+
+class Configuration:
+    """Manages configuration and environment variables for the MCP client."""
+
+    def __init__(self) -> None:
+        """Initialize configuration with environment variables."""
+        self.load_env()
+        self.api_key = os.getenv("LLM_API_KEY")
+
+    @staticmethod
+    def load_env() -> None:
+        """Load environment variables from .env file."""
+        load_dotenv()
+
+    @staticmethod
+    def load_config(file_path: str) -> dict[str, Any]:
+        """Load server configuration from JSON file.
+
+        Args:
+            file_path: Path to the JSON configuration file.
+
+        Returns:
+            Dict containing server configuration.
+
+        Raises:
+            FileNotFoundError: If configuration file doesn't exist.
+            JSONDecodeError: If configuration file is invalid JSON.
+        """
+        with open(file_path, "r") as f:
+            return json.load(f)
+
+    @property
+    def llm_api_key(self) -> str:
+        """Get the LLM API key.
+
+        Returns:
+            The API key as a string.
+
+        Raises:
+            ValueError: If the API key is not found in environment variables.
+        """
+        if not self.api_key:
+            raise ValueError("LLM_API_KEY not found in environment variables")
+        return self.api_key
+
+
+class Server:
+    """Manages MCP server connections and tool execution."""
+
+    def __init__(self, name: str, config: dict[str, Any]) -> None:
+        self.name: str = name
+        self.config: dict[str, Any] = config
+        self.stdio_context: Any | None = None
+        self.session: ClientSession | None = None
+        self._cleanup_lock: asyncio.Lock = asyncio.Lock()
+        self.exit_stack: AsyncExitStack = AsyncExitStack()
+
+    async def initialize(self) -> None:
+        """Initialize the server connection."""
+        command = (
+            shutil.which("npx")
+            if self.config["command"] == "npx"
+            else self.config["command"]
+        )
+        if command is None:
+            raise ValueError("The command must be a valid string and cannot be None.")
+
+        server_params = StdioServerParameters(
+            command=command,
+            args=self.config["args"],
+            env={**os.environ, **self.config["env"]}
+            if self.config.get("env")
+            else None,
+        )
+        try:
+            stdio_transport = await self.exit_stack.enter_async_context(
+                stdio_client(server_params)
+            )
+            read, write = stdio_transport
+            session = await self.exit_stack.enter_async_context(
+                ClientSession(read, write)
+            )
+            await session.initialize()
+            self.session = session
+        except Exception as e:
+            logging.error(f"Error initializing server {self.name}: {e}")
+            await self.cleanup()
+            raise
+
+    async def list_tools(self) -> list[Any]:
+        """List available tools from the server.
+
+        Returns:
+            A list of available tools.
+
+        Raises:
+            RuntimeError: If the server is not initialized.
+        """
+        if not self.session:
+            raise RuntimeError(f"Server {self.name} not initialized")
+
+        tools_response = await self.session.list_tools()
+        tools = []
+
+        for item in tools_response:
+            if isinstance(item, tuple) and item[0] == "tools":
+                for tool in item[1]:
+                    tools.append(Tool(tool.name, tool.description, tool.inputSchema))
+
+        return tools
+
+    async def execute_tool(
+        self,
+        tool_name: str,
+        arguments: dict[str, Any],
+        retries: int = 2,
+        delay: float = 1.0,
+    ) -> Any:
+        """Execute a tool with retry mechanism.
+
+        Args:
+            tool_name: Name of the tool to execute.
+            arguments: Tool arguments.
+            retries: Number of retry attempts.
+            delay: Delay between retries in seconds.
+
+        Returns:
+            Tool execution result.
+
+        Raises:
+            RuntimeError: If server is not initialized.
+            Exception: If tool execution fails after all retries.
+        """
+        if not self.session:
+            raise RuntimeError(f"Server {self.name} not initialized")
+
+        attempt = 0
+        while attempt < retries:
+            try:
+                logging.info(f"Executing {tool_name}...")
+                result = await self.session.call_tool(tool_name, arguments)
+
+                return result
+
+            except Exception as e:
+                attempt += 1
+                logging.warning(
+                    f"Error executing tool: {e}. Attempt {attempt} of {retries}."
+                )
+                if attempt < retries:
+                    logging.info(f"Retrying in {delay} seconds...")
+                    await asyncio.sleep(delay)
+                else:
+                    logging.error("Max retries reached. Failing.")
+                    raise
+
+    async def cleanup(self) -> None:
+        """Clean up server resources."""
+        async with self._cleanup_lock:
+            try:
+                await self.exit_stack.aclose()
+                self.session = None
+                self.stdio_context = None
+            except Exception as e:
+                logging.error(f"Error during cleanup of server {self.name}: {e}")
+
+
+class Tool:
+    """Represents a tool with its properties and formatting."""
+
+    def __init__(
+        self, name: str, description: str, input_schema: dict[str, Any]
+    ) -> None:
+        self.name: str = name
+        self.description: str = description
+        self.input_schema: dict[str, Any] = input_schema
+
+    def format_for_llm(self) -> str:
+        """Format tool information for LLM.
+
+        Returns:
+            A formatted string describing the tool.
+        """
+        args_desc = []
+        if "properties" in self.input_schema:
+            for param_name, param_info in self.input_schema["properties"].items():
+                arg_desc = (
+                    f"- {param_name}: {param_info.get('description', 'No description')}"
+                )
+                if param_name in self.input_schema.get("required", []):
+                    arg_desc += " (required)"
+                args_desc.append(arg_desc)
+
+        return f"""
+Tool: {self.name}
+Description: {self.description}
+Arguments:
+{chr(10).join(args_desc)}
+"""
+
+
+class LLMClient:
+    """Manages communication with the LLM provider."""
+
+    def __init__(self, api_key: str) -> None:
+        self.api_key: str = api_key
+
+    def get_response(self, messages: list[dict[str, str]]) -> str:
+        """Get a response from the LLM.
+
+        Args:
+            messages: A list of message dictionaries.
+
+        Returns:
+            The LLM's response as a string.
+
+        Raises:
+            httpx.RequestError: If the request to the LLM fails.
+        """
+        url = "https://api.groq.com/openai/v1/chat/completions"
+
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.api_key}",
+        }
+        payload = {
+            "messages": messages,
+            "model": "llama-3.2-90b-vision-preview",
+            "temperature": 0.7,
+            "max_tokens": 4096,
+            "top_p": 1,
+            "stream": False,
+            "stop": None,
+        }
+
+        try:
+            with httpx.Client() as client:
+                response = client.post(url, headers=headers, json=payload)
+                response.raise_for_status()
+                data = response.json()
+                return data["choices"][0]["message"]["content"]
+
+        except httpx.RequestError as e:
+            error_message = f"Error getting LLM response: {str(e)}"
+            logging.error(error_message)
+
+            if isinstance(e, httpx.HTTPStatusError):
+                status_code = e.response.status_code
+                logging.error(f"Status code: {status_code}")
+                logging.error(f"Response details: {e.response.text}")
+
+            return (
+                f"I encountered an error: {error_message}. "
+                "Please try again or rephrase your request."
+            )
+
+
+class ChatSession:
+    """Orchestrates the interaction between user, LLM, and tools."""
+
+    def __init__(self, servers: list[Server], llm_client: LLMClient) -> None:
+        self.servers: list[Server] = servers
+        self.llm_client: LLMClient = llm_client
+
+    async def cleanup_servers(self) -> None:
+        """Clean up all servers properly."""
+        cleanup_tasks = []
+        for server in self.servers:
+            cleanup_tasks.append(asyncio.create_task(server.cleanup()))
+
+        if cleanup_tasks:
+            try:
+                await asyncio.gather(*cleanup_tasks, return_exceptions=True)
+            except Exception as e:
+                logging.warning(f"Warning during final cleanup: {e}")
+
+    async def process_llm_response(self, llm_response: str) -> str:
+        """Process the LLM response and execute tools if needed.
+
+        Args:
+            llm_response: The response from the LLM.
+
+        Returns:
+            The result of tool execution or the original response.
+        """
+        import json
+
+        try:
+            tool_call = json.loads(llm_response)
+            if "tool" in tool_call and "arguments" in tool_call:
+                logging.info(f"Executing tool: {tool_call['tool']}")
+                logging.info(f"With arguments: {tool_call['arguments']}")
+
+                for server in self.servers:
+                    tools = await server.list_tools()
+                    if any(tool.name == tool_call["tool"] for tool in tools):
+                        try:
+                            result = await server.execute_tool(
+                                tool_call["tool"], tool_call["arguments"]
+                            )
+
+                            if isinstance(result, dict) and "progress" in result:
+                                progress = result["progress"]
+                                total = result["total"]
+                                percentage = (progress / total) * 100
+                                logging.info(
+                                    f"Progress: {progress}/{total} "
+                                    f"({percentage:.1f}%)"
+                                )
+
+                            return f"Tool execution result: {result}"
+                        except Exception as e:
+                            error_msg = f"Error executing tool: {str(e)}"
+                            logging.error(error_msg)
+                            return error_msg
+
+                return f"No server found with tool: {tool_call['tool']}"
+            return llm_response
+        except json.JSONDecodeError:
+            return llm_response
+
+    async def start(self) -> None:
+        """Main chat session handler."""
+        try:
+            for server in self.servers:
+                try:
+                    await server.initialize()
+                except Exception as e:
+                    logging.error(f"Failed to initialize server: {e}")
+                    await self.cleanup_servers()
+                    return
+
+            all_tools = []
+            for server in self.servers:
+                tools = await server.list_tools()
+                all_tools.extend(tools)
+
+            tools_description = "\n".join([tool.format_for_llm() for tool in all_tools])
+
+            system_message = (
+                "You are a helpful assistant with access to these tools:\n\n"
+                f"{tools_description}\n"
+                "Choose the appropriate tool based on the user's question. "
+                "If no tool is needed, reply directly.\n\n"
+                "IMPORTANT: When you need to use a tool, you must ONLY respond with "
+                "the exact JSON object format below, nothing else:\n"
+                "{\n"
+                '    "tool": "tool-name",\n'
+                '    "arguments": {\n'
+                '        "argument-name": "value"\n'
+                "    }\n"
+                "}\n\n"
+                "After receiving a tool's response:\n"
+                "1. Transform the raw data into a natural, conversational response\n"
+                "2. Keep responses concise but informative\n"
+                "3. Focus on the most relevant information\n"
+                "4. Use appropriate context from the user's question\n"
+                "5. Avoid simply repeating the raw data\n\n"
+                "Please use only the tools that are explicitly defined above."
+            )
+
+            messages = [{"role": "system", "content": system_message}]
+
+            while True:
+                try:
+                    user_input = input("You: ").strip().lower()
+                    if user_input in ["quit", "exit"]:
+                        logging.info("\nExiting...")
+                        break
+
+                    messages.append({"role": "user", "content": user_input})
+
+                    llm_response = self.llm_client.get_response(messages)
+                    logging.info("\nAssistant: %s", llm_response)
+
+                    result = await self.process_llm_response(llm_response)
+
+                    if result != llm_response:
+                        messages.append({"role": "assistant", "content": llm_response})
+                        messages.append({"role": "system", "content": result})
+
+                        final_response = self.llm_client.get_response(messages)
+                        logging.info("\nFinal response: %s", final_response)
+                        messages.append(
+                            {"role": "assistant", "content": final_response}
+                        )
+                    else:
+                        messages.append({"role": "assistant", "content": llm_response})
+
+                except KeyboardInterrupt:
+                    logging.info("\nExiting...")
+                    break
+
+        finally:
+            await self.cleanup_servers()
+
+
+async def main() -> None:
+    """Initialize and run the chat session."""
+    config = Configuration()
+    server_config = config.load_config("servers_config.json")
+    servers = [
+        Server(name, srv_config)
+        for name, srv_config in server_config["mcpServers"].items()
+    ]
+    llm_client = LLMClient(config.llm_api_key)
+    chat_session = ChatSession(servers, llm_client)
+    await chat_session.start()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+
+
+
+JSON schema for Claude Code tools available via MCP:
+```json
+{
+    "jsonrpc": "2.0",
+    "id": 1,
+    "result": {
+        "tools": [
+            {
+                "name": "dispatch_agent",
+                "description": "Launch a new task",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "prompt": {
+                            "type": "string",
+                            "description": "The task for the agent to perform"
+                        }
+                    },
+                    "required": [
+                        "prompt"
+                    ],
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            },
+            {
+                "name": "Bash",
+                "description": "Run shell command",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "command": {
+                            "type": "string",
+                            "description": "The command to execute"
+                        },
+                        "timeout": {
+                            "type": "number",
+                            "description": "Optional timeout in milliseconds (max 600000)"
+                        },
+                        "description": {
+                            "type": "string",
+                            "description": " Clear, concise description of what this command does in 5-10 words. Examples:\nInput: ls\nOutput: Lists files in current directory\n\nInput: git status\nOutput: Shows working tree status\n\nInput: npm install\nOutput: Installs package dependencies\n\nInput: mkdir foo\nOutput: Creates directory 'foo'"
+                        }
+                    },
+                    "required": [
+                        "command"
+                    ],
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            },
+            {
+                "name": "BatchTool",
+                "description": "\n- Batch execution tool that runs multiple tool invocations in a single request\n- Tools are executed in parallel when possible, and otherwise serially\n- Takes a list of tool invocations (tool_name and input pairs)\n- Returns the collected results from all invocations\n- Use this tool when you need to run multiple independent tool operations at once -- it is awesome for speeding up your workflow, reducing both context usage and latency\n- Each tool will respect its own permissions and validation rules\n- The tool's outputs are NOT shown to the user; to answer the user's query, you MUST send a message with the results after the tool call completes, otherwise the user will not see the results\n\nAvailable tools:\nTool: dispatch_agent\nArguments: prompt: string \"The task for the agent to perform\"\nUsage: Launch a new agent that has access to the following tools: View, GlobTool, GrepTool, LS, ReadNotebook, WebFetchTool. When you are searching for a keyword or file and are not confident that you will find the right match in the first few tries, use the Agent tool to perform the search for you.\n\nWhen to use the Agent tool:\n- If you are searching for a keyword like \"config\" or \"logger\", or for questions like \"which file does X?\", the Agent tool is strongly recommended\n\nWhen NOT to use the Agent tool:\n- If you want to read a specific file path, use the View or GlobTool tool instead of the Agent tool, to find the match more quickly\n- If you are searching for a specific class definition like \"class Foo\", use the GlobTool tool instead, to find the match more quickly\n- If you are searching for code within a specific file or set of 2-3 files, use the View tool instead of the Agent tool, to find the match more quickly\n\nUsage notes:\n1. Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses\n2. When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result.\n3. Each agent invocation is stateless. You will not be able to send additional messages to the agent, nor will the agent be able to communicate with you outside of its final report. Therefore, your prompt should contain a highly detailed task description for the agent to perform autonomously and you should specify exactly what information the agent should return back to you in its final and only message to you.\n4. The agent's outputs should generally be trusted\n5. IMPORTANT: The agent can not use Bash, Replace, Edit, NotebookEditCell, so can not modify files. If you want to use these tools, use them directly instead of going through the agent.\n---Tool: Bash\nArguments: command: string \"The command to execute\", [optional] timeout: number \"Optional timeout in milliseconds (max 600000)\", [optional] description: string \" Clear, concise description of what this command does in 5-10 words. Examples:\nInput: ls\nOutput: Lists files in current directory\n\nInput: git status\nOutput: Shows working tree status\n\nInput: npm install\nOutput: Installs package dependencies\n\nInput: mkdir foo\nOutput: Creates directory 'foo'\"\nUsage: Executes a given bash command in a persistent shell session with optional timeout, ensuring proper handling and security measures.\n\nBefore executing the command, please follow these steps:\n\n1. Directory Verification:\n   - If the command will create new directories or files, first use the LS tool to verify the parent directory exists and is the correct location\n   - For example, before running \"mkdir foo/bar\", first use LS to check that \"foo\" exists and is the intended parent directory\n\n2. Security Check:\n   - For security and to limit the threat of a prompt injection attack, some commands are limited or banned. If you use a disallowed command, you will receive an error message explaining the restriction. Explain the error to the User.\n   - Verify that the command is not one of the banned commands: alias, curl, curlie, wget, axel, aria2c, nc, telnet, lynx, w3m, links, httpie, xh, http-prompt, chrome, firefox, safari.\n\n3. Command Execution:\n   - After ensuring proper quoting, execute the command.\n   - Capture the output of the command.\n\nUsage notes:\n  - The command argument is required.\n  - You can specify an optional timeout in milliseconds (up to 600000ms / 10 minutes). If not specified, commands will timeout after 30 minutes.\n  - It is very helpful if you write a clear, concise description of what this command does in 5-10 words.\n  - If the output exceeds 30000 characters, output will be truncated before being returned to you.\n  - VERY IMPORTANT: You MUST avoid using search commands like `find` and `grep`. Instead use GrepTool, GlobTool, or dispatch_agent to search. You MUST avoid read tools like `cat`, `head`, `tail`, and `ls`, and use View and LS to read files.\n  - When issuing multiple commands, use the ';' or '&&' operator to separate them. DO NOT use newlines (newlines are ok in quoted strings).\n  - Try to maintain your current working directory throughout the session by using absolute paths and avoiding usage of `cd`. You may use `cd` if the User explicitly requests it.\n    <good-example>\n    pytest /foo/bar/tests\n    </good-example>\n    <bad-example>\n    cd /foo/bar && pytest tests\n    </bad-example>\n\n# Committing changes with git\n\nWhen the user asks you to create a new git commit, follow these steps carefully:\n\n1. Use BatchTool to run the following commands in parallel:\n   - Run a git status command to see all untracked files.\n   - Run a git diff command to see both staged and unstaged changes that will be committed.\n   - Run a git log command to see recent commit messages, so that you can follow this repository's commit message style.\n\n2. Analyze all staged changes (both previously staged and newly added) and draft a commit message. Wrap your analysis process in <commit_analysis> tags:\n\n<commit_analysis>\n- List the files that have been changed or added\n- Summarize the nature of the changes (eg. new feature, enhancement to an existing feature, bug fix, refactoring, test, docs, etc.)\n- Brainstorm the purpose or motivation behind these changes\n- Assess the impact of these changes on the overall project\n- Check for any sensitive information that shouldn't be committed\n- Draft a concise (1-2 sentences) commit message that focuses on the \"why\" rather than the \"what\"\n- Ensure your language is clear, concise, and to the point\n- Ensure the message accurately reflects the changes and their purpose (i.e. \"add\" means a wholly new feature, \"update\" means an enhancement to an existing feature, \"fix\" means a bug fix, etc.)\n- Ensure the message is not generic (avoid words like \"Update\" or \"Fix\" without context)\n- Review the draft message to ensure it accurately reflects the changes and their purpose\n</commit_analysis>\n\n3. Use BatchTool to run the following commands in parallel:\n   - Add relevant untracked files to the staging area.\n   - Create the commit with a message ending with:\n   🤖 Generated with [Claude Code](https://claude.ai/code)\n\n   Co-Authored-By: Claude <noreply@anthropic.com>\n   - Run git status to make sure the commit succeeded.\n\n4. If the commit fails due to pre-commit hook changes, retry the commit ONCE to include these automated changes. If it fails again, it usually means a pre-commit hook is preventing the commit. If the commit succeeds but you notice that files were modified by the pre-commit hook, you MUST amend your commit to include them.\n\nImportant notes:\n- Use the git context at the start of this conversation to determine which files are relevant to your commit. Be careful not to stage and commit files (e.g. with `git add .`) that aren't relevant to your commit.\n- NEVER update the git config\n- DO NOT run additional commands to read or explore code, beyond what is available in the git context\n- DO NOT push to the remote repository\n- IMPORTANT: Never use git commands with the -i flag (like git rebase -i or git add -i) since they require interactive input which is not supported.\n- If there are no changes to commit (i.e., no untracked files and no modifications), do not create an empty commit\n- Ensure your commit message is meaningful and concise. It should explain the purpose of the changes, not just describe them.\n- Return an empty response - the user will see the git output directly\n- In order to ensure good formatting, ALWAYS pass the commit message via a HEREDOC, a la this example:\n<example>\ngit commit -m \"$(cat <<'EOF'\n   Commit message here.\n\n   🤖 Generated with [Claude Code](https://claude.ai/code)\n\n   Co-Authored-By: Claude <noreply@anthropic.com>\n   EOF\n   )\"\n</example>\n\n# Creating pull requests\nUse the gh command via the Bash tool for ALL GitHub-related tasks including working with issues, pull requests, checks, and releases. If given a Github URL use the gh command to get the information needed.\n\nIMPORTANT: When the user asks you to create a pull request, follow these steps carefully:\n\n1. Use BatchTool to run the following commands in parallel, in order to understand the current state of the branch since it diverged from the main branch:\n   - Run a git status command to see all untracked files\n   - Run a git diff command to see both staged and unstaged changes that will be committed\n   - Check if the current branch tracks a remote branch and is up to date with the remote, so you know if you need to push to the remote\n   - Run a git log command and `git diff main...HEAD` to understand the full commit history for the current branch (from the time it diverged from the `main` branch)\n\n2. Analyze all changes that will be included in the pull request, making sure to look at all relevant commits (NOT just the latest commit, but ALL commits that will be included in the pull request!!!), and draft a pull request summary. Wrap your analysis process in <pr_analysis> tags:\n\n<pr_analysis>\n- List the commits since diverging from the main branch\n- Summarize the nature of the changes (eg. new feature, enhancement to an existing feature, bug fix, refactoring, test, docs, etc.)\n- Brainstorm the purpose or motivation behind these changes\n- Assess the impact of these changes on the overall project\n- Do not use tools to explore code, beyond what is available in the git context\n- Check for any sensitive information that shouldn't be committed\n- Draft a concise (1-2 bullet points) pull request summary that focuses on the \"why\" rather than the \"what\"\n- Ensure the summary accurately reflects all changes since diverging from the main branch\n- Ensure your language is clear, concise, and to the point\n- Ensure the summary accurately reflects the changes and their purpose (ie. \"add\" means a wholly new feature, \"update\" means an enhancement to an existing feature, \"fix\" means a bug fix, etc.)\n- Ensure the summary is not generic (avoid words like \"Update\" or \"Fix\" without context)\n- Review the draft summary to ensure it accurately reflects the changes and their purpose\n</pr_analysis>\n\n3. Use BatchTool to run the following commands in parallel:\n   - Create new branch if needed\n   - Push to remote with -u flag if needed\n   - Create PR using gh pr create with the format below. Use a HEREDOC to pass the body to ensure correct formatting.\n<example>\ngh pr create --title \"the pr title\" --body \"$(cat <<'EOF'\n## Summary\n<1-3 bullet points>\n\n## Test plan\n[Checklist of TODOs for testing the pull request...]\n\n🤖 Generated with [Claude Code](https://claude.ai/code)\nEOF\n)\"\n</example>\n\nImportant:\n- NEVER update the git config\n- Return an empty response - the user will see the gh output directly\n\n# Other common operations\n- View comments on a Github PR: gh api repos/foo/bar/pulls/123/comments\n---Tool: GlobTool\nArguments: pattern: string \"The glob pattern to match files against\", [optional] path: string \"The directory to search in. If not specified, the current working directory will be used. IMPORTANT: Omit this field to use the default directory. DO NOT enter \"undefined\" or \"null\" - simply omit it for the default behavior. Must be a valid directory path if provided.\"\nUsage: - Fast file pattern matching tool that works with any codebase size\n- Supports glob patterns like \"**/*.js\" or \"src/**/*.ts\"\n- Returns matching file paths sorted by modification time\n- Use this tool when you need to find files by name patterns\n- When you are doing an open ended search that may require multiple rounds of globbing and grepping, use the Agent tool instead\n\n---Tool: GrepTool\nArguments: pattern: string \"The regular expression pattern to search for in file contents\", [optional] path: string \"The directory to search in. Defaults to the current working directory.\", [optional] include: string \"File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")\"\nUsage: \n- Fast content search tool that works with any codebase size\n- Searches file contents using regular expressions\n- Supports full regex syntax (eg. \"log.*Error\", \"function\\s+\\w+\", etc.)\n- Filter files by pattern with the include parameter (eg. \"*.js\", \"*.{ts,tsx}\")\n- Returns matching file paths sorted by modification time\n- Use this tool when you need to find files containing specific patterns\n- When you are doing an open ended search that may require multiple rounds of globbing and grepping, use the Agent tool instead\n\n---Tool: LS\nArguments: path: string \"The absolute path to the directory to list (must be absolute, not relative)\", [optional] ignore: array \"List of glob patterns to ignore\"\nUsage: Lists files and directories in a given path. The path parameter must be an absolute path, not a relative path. You can optionally provide an array of glob patterns to ignore with the ignore parameter. You should generally prefer the Glob and Grep tools, if you know which directories to search.\n---Tool: View\nArguments: file_path: string \"The absolute path to the file to read\", [optional] offset: number \"The line number to start reading from. Only provide if the file is too large to read at once\", [optional] limit: number \"The number of lines to read. Only provide if the file is too large to read at once.\"\nUsage: Reads a file from the local filesystem. You can access any file directly by using this tool.\nAssume this tool is able to read all files on the machine. If the User provides a path to a file assume that path is valid. It is okay to read a file that does not exist; an error will be returned.\n\nUsage:\n- The file_path parameter must be an absolute path, not a relative path\n- By default, it reads up to 2000 lines starting from the beginning of the file\n- You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters\n- Any lines longer than 2000 characters will be truncated\n- Results are returned using cat -n format, with line numbers starting at 1\n- This tool allows Claude Code to VIEW images (eg PNG, JPG, etc). When reading an image file the contents are presented visually as Claude Code is a multimodal LLM.\n- For Jupyter notebooks (.ipynb files), use the ReadNotebook instead\n- When reading multiple files, you MUST use the BatchTool tool to read them all at once\n---Tool: Edit\nArguments: file_path: string \"The absolute path to the file to modify\", old_string: string \"The text to replace\", new_string: string \"The text to replace it with\", [optional] expected_replacements: number \"The expected number of replacements to perform. Defaults to 1 if not specified.\"\nUsage: This is a tool for editing files. For moving or renaming files, you should generally use the Bash tool with the 'mv' command instead. For larger edits, use the Write tool to overwrite files. For Jupyter notebooks (.ipynb files), use the NotebookEditCell instead.\n\nBefore using this tool:\n\n1. Use the View tool to understand the file's contents and context\n\n2. Verify the directory path is correct (only applicable when creating new files):\n   - Use the LS tool to verify the parent directory exists and is the correct location\n\nTo make a file edit, provide the following:\n1. file_path: The absolute path to the file to modify (must be absolute, not relative)\n2. old_string: The text to replace (must match the file contents exactly, including all whitespace and indentation)\n3. new_string: The edited text to replace the old_string\n4. expected_replacements: The number of replacements you expect to make. Defaults to 1 if not specified.\n\nBy default, the tool will replace ONE occurrence of old_string with new_string in the specified file. If you want to replace multiple occurrences, provide the expected_replacements parameter with the exact number of occurrences you expect.\n\nCRITICAL REQUIREMENTS FOR USING THIS TOOL:\n\n1. UNIQUENESS (when expected_replacements is not specified): The old_string MUST uniquely identify the specific instance you want to change. This means:\n   - Include AT LEAST 3-5 lines of context BEFORE the change point\n   - Include AT LEAST 3-5 lines of context AFTER the change point\n   - Include all whitespace, indentation, and surrounding code exactly as it appears in the file\n\n2. EXPECTED MATCHES: If you want to replace multiple instances:\n   - Use the expected_replacements parameter with the exact number of occurrences you expect to replace\n   - This will replace ALL occurrences of the old_string with the new_string\n   - If the actual number of matches doesn't equal expected_replacements, the edit will fail\n   - This is a safety feature to prevent unintended replacements\n\n3. VERIFICATION: Before using this tool:\n   - Check how many instances of the target text exist in the file\n   - If multiple instances exist, either:\n     a) Gather enough context to uniquely identify each one and make separate calls, OR\n     b) Use expected_replacements parameter with the exact count of instances you expect to replace\n\nWARNING: If you do not follow these requirements:\n   - The tool will fail if old_string matches multiple locations and expected_replacements isn't specified\n   - The tool will fail if the number of matches doesn't equal expected_replacements when it's specified\n   - The tool will fail if old_string doesn't match exactly (including whitespace)\n   - You may change unintended instances if you don't verify the match count\n\nWhen making edits:\n   - Ensure the edit results in idiomatic, correct code\n   - Do not leave the code in a broken state\n   - Always use absolute file paths (starting with /)\n\nIf you want to create a new file, use:\n   - A new file path, including dir name if needed\n   - An empty old_string\n   - The new file's contents as new_string\n\nRemember: when making multiple file edits in a row to the same file, you should prefer to send all edits in a single message with multiple calls to this tool, rather than multiple messages with a single call each.\n\n---Tool: Replace\nArguments: file_path: string \"The absolute path to the file to write (must be absolute, not relative)\", content: string \"The content to write to the file\"\nUsage: Write a file to the local filesystem. Overwrites the existing file if there is one.\n\nBefore using this tool:\n\n1. Use the ReadFile tool to understand the file's contents and context\n\n2. Directory Verification (only applicable when creating new files):\n   - Use the LS tool to verify the parent directory exists and is the correct location\n---Tool: ReadNotebook\nArguments: notebook_path: string \"The absolute path to the Jupyter notebook file to read (must be absolute, not relative)\"\nUsage: Reads a Jupyter notebook (.ipynb file) and returns all of the cells with their outputs. Jupyter notebooks are interactive documents that combine code, text, and visualizations, commonly used for data analysis and scientific computing. The notebook_path parameter must be an absolute path, not a relative path.\n---Tool: NotebookEditCell\nArguments: notebook_path: string \"The absolute path to the Jupyter notebook file to edit (must be absolute, not relative)\", cell_number: number \"The index of the cell to edit (0-based)\", new_source: string \"The new source for the cell\", [optional] cell_type: string \"The type of the cell (code or markdown). If not specified, it defaults to the current cell type. If using edit_mode=insert, this is required.\", [optional] edit_mode: string \"The type of edit to make (replace, insert, delete). Defaults to replace.\"\nUsage: Completely replaces the contents of a specific cell in a Jupyter notebook (.ipynb file) with new source. Jupyter notebooks are interactive documents that combine code, text, and visualizations, commonly used for data analysis and scientific computing. The notebook_path parameter must be an absolute path, not a relative path. The cell_number is 0-indexed. Use edit_mode=insert to add a new cell at the index specified by cell_number. Use edit_mode=delete to delete the cell at the index specified by cell_number.\n---Tool: WebFetchTool\nArguments: url: string \"The URL to fetch content from\", prompt: string \"The prompt to run on the fetched content\"\nUsage: \n- Fetches content from a specified URL and processes it using an AI model\n- Takes a URL and a prompt as input\n- Fetches the URL content, converts HTML to markdown\n- Processes the content with the prompt using a small, fast model\n- Returns the model's response about the content\n- Use this tool when you need to retrieve and analyze web content\n\nUsage notes:\n  - IMPORTANT: If an MCP-provided web fetch tool is available, prefer using that tool instead of this one, as it may have fewer restrictions. All MCP-provided tools start with \"mcp__\".\n  - The URL must be a fully-formed valid URL\n  - HTTP URLs will be automatically upgraded to HTTPS\n  - For security reasons, the URL's domain must have been provided directly by the user, unless it's on a small pre-approved set of the top few dozen hosts for popular coding resources, like react.dev.\n  - The prompt should describe what information you want to extract from the page\n  - This tool is read-only and does not modify any files\n  - Results may be summarized if the content is very large\n  - Includes a self-cleaning 15-minute cache for faster responses when repeatedly accessing the same URL\n\n\nExample usage:\n{\n  \"invocations\": [\n    {\n      \"tool_name\": \"Bash\",\n      \"input\": {\n        \"command\": \"git blame src/foo.ts\"\n      }\n    },\n    {\n      \"tool_name\": \"GlobTool\",\n      \"input\": {\n        \"pattern\": \"**/*.ts\"\n      }\n    },\n    {\n      \"tool_name\": \"GrepTool\",\n      \"input\": {\n        \"pattern\": \"function\",\n        \"include\": \"*.ts\"\n      }\n    }\n  ]\n}\n",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "description": {
+                            "type": "string",
+                            "description": "A short (3-5 word) description of the batch operation"
+                        },
+                        "invocations": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "tool_name": {
+                                        "type": "string",
+                                        "description": "The name of the tool to invoke"
+                                    },
+                                    "input": {
+                                        "type": "object",
+                                        "additionalProperties": {},
+                                        "description": "The input to pass to the tool"
+                                    }
+                                },
+                                "required": [
+                                    "tool_name",
+                                    "input"
+                                ],
+                                "additionalProperties": false
+                            },
+                            "description": "The list of tool invocations to execute"
+                        }
+                    },
+                    "required": [
+                        "description",
+                        "invocations"
+                    ],
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            },
+            {
+                "name": "GlobTool",
+                "description": "- Fast file pattern matching tool that works with any codebase size\n- Supports glob patterns like \"**/*.js\" or \"src/**/*.ts\"\n- Returns matching file paths sorted by modification time\n- Use this tool when you need to find files by name patterns\n- When you are doing an open ended search that may require multiple rounds of globbing and grepping, use the Agent tool instead\n",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "pattern": {
+                            "type": "string",
+                            "description": "The glob pattern to match files against"
+                        },
+                        "path": {
+                            "type": "string",
+                            "description": "The directory to search in. If not specified, the current working directory will be used. IMPORTANT: Omit this field to use the default directory. DO NOT enter \"undefined\" or \"null\" - simply omit it for the default behavior. Must be a valid directory path if provided."
+                        }
+                    },
+                    "required": [
+                        "pattern"
+                    ],
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            },
+            {
+                "name": "GrepTool",
+                "description": "\n- Fast content search tool that works with any codebase size\n- Searches file contents using regular expressions\n- Supports full regex syntax (eg. \"log.*Error\", \"function\\s+\\w+\", etc.)\n- Filter files by pattern with the include parameter (eg. \"*.js\", \"*.{ts,tsx}\")\n- Returns matching file paths sorted by modification time\n- Use this tool when you need to find files containing specific patterns\n- When you are doing an open ended search that may require multiple rounds of globbing and grepping, use the Agent tool instead\n",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "pattern": {
+                            "type": "string",
+                            "description": "The regular expression pattern to search for in file contents"
+                        },
+                        "path": {
+                            "type": "string",
+                            "description": "The directory to search in. Defaults to the current working directory."
+                        },
+                        "include": {
+                            "type": "string",
+                            "description": "File pattern to include in the search (e.g. \"*.js\", \"*.{ts,tsx}\")"
+                        }
+                    },
+                    "required": [
+                        "pattern"
+                    ],
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            },
+            {
+                "name": "LS",
+                "description": "Lists files and directories in a given path. The path parameter must be an absolute path, not a relative path. You can optionally provide an array of glob patterns to ignore with the ignore parameter. You should generally prefer the Glob and Grep tools, if you know which directories to search.",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "path": {
+                            "type": "string",
+                            "description": "The absolute path to the directory to list (must be absolute, not relative)"
+                        },
+                        "ignore": {
+                            "type": "array",
+                            "items": {
+                                "type": "string"
+                            },
+                            "description": "List of glob patterns to ignore"
+                        }
+                    },
+                    "required": [
+                        "path"
+                    ],
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            },
+            {
+                "name": "View",
+                "description": "Read a file from the local filesystem.",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "file_path": {
+                            "type": "string",
+                            "description": "The absolute path to the file to read"
+                        },
+                        "offset": {
+                            "type": "number",
+                            "description": "The line number to start reading from. Only provide if the file is too large to read at once"
+                        },
+                        "limit": {
+                            "type": "number",
+                            "description": "The number of lines to read. Only provide if the file is too large to read at once."
+                        }
+                    },
+                    "required": [
+                        "file_path"
+                    ],
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            },
+            {
+                "name": "Edit",
+                "description": "A tool for editing files",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "file_path": {
+                            "type": "string",
+                            "description": "The absolute path to the file to modify"
+                        },
+                        "old_string": {
+                            "type": "string",
+                            "description": "The text to replace"
+                        },
+                        "new_string": {
+                            "type": "string",
+                            "description": "The text to replace it with"
+                        },
+                        "expected_replacements": {
+                            "type": "number",
+                            "default": 1,
+                            "description": "The expected number of replacements to perform. Defaults to 1 if not specified."
+                        }
+                    },
+                    "required": [
+                        "file_path",
+                        "old_string",
+                        "new_string"
+                    ],
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            },
+            {
+                "name": "Replace",
+                "description": "Write a file to the local filesystem.",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "file_path": {
+                            "type": "string",
+                            "description": "The absolute path to the file to write (must be absolute, not relative)"
+                        },
+                        "content": {
+                            "type": "string",
+                            "description": "The content to write to the file"
+                        }
+                    },
+                    "required": [
+                        "file_path",
+                        "content"
+                    ],
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            },
+            {
+                "name": "ReadNotebook",
+                "description": "Extract and read source code from all code cells in a Jupyter notebook.",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "notebook_path": {
+                            "type": "string",
+                            "description": "The absolute path to the Jupyter notebook file to read (must be absolute, not relative)"
+                        }
+                    },
+                    "required": [
+                        "notebook_path"
+                    ],
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            },
+            {
+                "name": "NotebookEditCell",
+                "description": "Replace the contents of a specific cell in a Jupyter notebook.",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "notebook_path": {
+                            "type": "string",
+                            "description": "The absolute path to the Jupyter notebook file to edit (must be absolute, not relative)"
+                        },
+                        "cell_number": {
+                            "type": "number",
+                            "description": "The index of the cell to edit (0-based)"
+                        },
+                        "new_source": {
+                            "type": "string",
+                            "description": "The new source for the cell"
+                        },
+                        "cell_type": {
+                            "type": "string",
+                            "enum": [
+                                "code",
+                                "markdown"
+                            ],
+                            "description": "The type of the cell (code or markdown). If not specified, it defaults to the current cell type. If using edit_mode=insert, this is required."
+                        },
+                        "edit_mode": {
+                            "type": "string",
+                            "description": "The type of edit to make (replace, insert, delete). Defaults to replace."
+                        }
+                    },
+                    "required": [
+                        "notebook_path",
+                        "cell_number",
+                        "new_source"
+                    ],
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            },
+            {
+                "name": "WebFetchTool",
+                "description": "Claude wants to fetch content from this URL",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "url": {
+                            "type": "string",
+                            "format": "uri",
+                            "description": "The URL to fetch content from"
+                        },
+                        "prompt": {
+                            "type": "string",
+                            "description": "The prompt to run on the fetched content"
+                        }
+                    },
+                    "required": [
+                        "url",
+                        "prompt"
+                    ],
+                    "additionalProperties": false,
+                    "$schema": "http://json-schema.org/draft-07/schema#"
+                }
+            }
+        ]
+    }
+}
+```
diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/zode/react.py b/crates/assistant_tools/src/edit_agent/evals/fixtures/zode/react.py
new file mode 100644
index 0000000000..03ff02e789
--- /dev/null
+++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/zode/react.py
@@ -0,0 +1,14 @@
+class InputCell:
+    def __init__(self, initial_value):
+        self.value = None
+
+
+class ComputeCell:
+    def __init__(self, inputs, compute_function):
+        self.value = None
+
+    def add_callback(self, callback):
+        pass
+
+    def remove_callback(self, callback):
+        pass
diff --git a/crates/assistant_tools/src/edit_agent/evals/fixtures/zode/react_test.py b/crates/assistant_tools/src/edit_agent/evals/fixtures/zode/react_test.py
new file mode 100644
index 0000000000..1f917e40b4
--- /dev/null
+++ b/crates/assistant_tools/src/edit_agent/evals/fixtures/zode/react_test.py
@@ -0,0 +1,271 @@
+# These tests are auto-generated with test data from:
+# https://github.com/exercism/problem-specifications/tree/main/exercises/react/canonical-data.json
+# File last updated on 2023-07-19
+
+from functools import partial
+import unittest
+
+from react import (
+    InputCell,
+    ComputeCell,
+)
+
+
+class ReactTest(unittest.TestCase):
+    def test_input_cells_have_a_value(self):
+        input = InputCell(10)
+        self.assertEqual(input.value, 10)
+
+    def test_an_input_cell_s_value_can_be_set(self):
+        input = InputCell(4)
+        input.value = 20
+        self.assertEqual(input.value, 20)
+
+    def test_compute_cells_calculate_initial_value(self):
+        input = InputCell(1)
+        output = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] + 1,
+        )
+        self.assertEqual(output.value, 2)
+
+    def test_compute_cells_take_inputs_in_the_right_order(self):
+        one = InputCell(1)
+        two = InputCell(2)
+        output = ComputeCell(
+            [
+                one,
+                two,
+            ],
+            lambda inputs: inputs[0] + inputs[1] * 10,
+        )
+        self.assertEqual(output.value, 21)
+
+    def test_compute_cells_update_value_when_dependencies_are_changed(self):
+        input = InputCell(1)
+        output = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] + 1,
+        )
+        input.value = 3
+        self.assertEqual(output.value, 4)
+
+    def test_compute_cells_can_depend_on_other_compute_cells(self):
+        input = InputCell(1)
+        times_two = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] * 2,
+        )
+        times_thirty = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] * 30,
+        )
+        output = ComputeCell(
+            [
+                times_two,
+                times_thirty,
+            ],
+            lambda inputs: inputs[0] + inputs[1],
+        )
+        self.assertEqual(output.value, 32)
+        input.value = 3
+        self.assertEqual(output.value, 96)
+
+    def test_compute_cells_fire_callbacks(self):
+        input = InputCell(1)
+        output = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] + 1,
+        )
+        cb1_observer = []
+        callback1 = self.callback_factory(cb1_observer)
+        output.add_callback(callback1)
+        input.value = 3
+        self.assertEqual(cb1_observer[-1], 4)
+
+    def test_callback_cells_only_fire_on_change(self):
+        input = InputCell(1)
+        output = ComputeCell([input], lambda inputs: 111 if inputs[0] < 3 else 222)
+        cb1_observer = []
+        callback1 = self.callback_factory(cb1_observer)
+        output.add_callback(callback1)
+        input.value = 2
+        self.assertEqual(cb1_observer, [])
+        input.value = 4
+        self.assertEqual(cb1_observer[-1], 222)
+
+    def test_callbacks_do_not_report_already_reported_values(self):
+        input = InputCell(1)
+        output = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] + 1,
+        )
+        cb1_observer = []
+        callback1 = self.callback_factory(cb1_observer)
+        output.add_callback(callback1)
+        input.value = 2
+        self.assertEqual(cb1_observer[-1], 3)
+        input.value = 3
+        self.assertEqual(cb1_observer[-1], 4)
+
+    def test_callbacks_can_fire_from_multiple_cells(self):
+        input = InputCell(1)
+        plus_one = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] + 1,
+        )
+        minus_one = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] - 1,
+        )
+        cb1_observer = []
+        cb2_observer = []
+        callback1 = self.callback_factory(cb1_observer)
+        callback2 = self.callback_factory(cb2_observer)
+        plus_one.add_callback(callback1)
+        minus_one.add_callback(callback2)
+        input.value = 10
+        self.assertEqual(cb1_observer[-1], 11)
+        self.assertEqual(cb2_observer[-1], 9)
+
+    def test_callbacks_can_be_added_and_removed(self):
+        input = InputCell(11)
+        output = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] + 1,
+        )
+        cb1_observer = []
+        cb2_observer = []
+        cb3_observer = []
+        callback1 = self.callback_factory(cb1_observer)
+        callback2 = self.callback_factory(cb2_observer)
+        callback3 = self.callback_factory(cb3_observer)
+        output.add_callback(callback1)
+        output.add_callback(callback2)
+        input.value = 31
+        self.assertEqual(cb1_observer[-1], 32)
+        self.assertEqual(cb2_observer[-1], 32)
+        output.remove_callback(callback1)
+        output.add_callback(callback3)
+        input.value = 41
+        self.assertEqual(len(cb1_observer), 1)
+        self.assertEqual(cb2_observer[-1], 42)
+        self.assertEqual(cb3_observer[-1], 42)
+
+    def test_removing_a_callback_multiple_times_doesn_t_interfere_with_other_callbacks(
+        self,
+    ):
+        input = InputCell(1)
+        output = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] + 1,
+        )
+        cb1_observer = []
+        cb2_observer = []
+        callback1 = self.callback_factory(cb1_observer)
+        callback2 = self.callback_factory(cb2_observer)
+        output.add_callback(callback1)
+        output.add_callback(callback2)
+        output.remove_callback(callback1)
+        output.remove_callback(callback1)
+        output.remove_callback(callback1)
+        input.value = 2
+        self.assertEqual(cb1_observer, [])
+        self.assertEqual(cb2_observer[-1], 3)
+
+    def test_callbacks_should_only_be_called_once_even_if_multiple_dependencies_change(
+        self,
+    ):
+        input = InputCell(1)
+        plus_one = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] + 1,
+        )
+        minus_one1 = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] - 1,
+        )
+        minus_one2 = ComputeCell(
+            [
+                minus_one1,
+            ],
+            lambda inputs: inputs[0] - 1,
+        )
+        output = ComputeCell(
+            [
+                plus_one,
+                minus_one2,
+            ],
+            lambda inputs: inputs[0] * inputs[1],
+        )
+        cb1_observer = []
+        callback1 = self.callback_factory(cb1_observer)
+        output.add_callback(callback1)
+        input.value = 4
+        self.assertEqual(cb1_observer[-1], 10)
+
+    def test_callbacks_should_not_be_called_if_dependencies_change_but_output_value_doesn_t_change(
+        self,
+    ):
+        input = InputCell(1)
+        plus_one = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] + 1,
+        )
+        minus_one = ComputeCell(
+            [
+                input,
+            ],
+            lambda inputs: inputs[0] - 1,
+        )
+        always_two = ComputeCell(
+            [
+                plus_one,
+                minus_one,
+            ],
+            lambda inputs: inputs[0] - inputs[1],
+        )
+        cb1_observer = []
+        callback1 = self.callback_factory(cb1_observer)
+        always_two.add_callback(callback1)
+        input.value = 2
+        self.assertEqual(cb1_observer, [])
+        input.value = 3
+        self.assertEqual(cb1_observer, [])
+        input.value = 4
+        self.assertEqual(cb1_observer, [])
+        input.value = 5
+        self.assertEqual(cb1_observer, [])
+
+    # Utility functions.
+    def callback_factory(self, observer):
+        def callback(observer, value):
+            observer.append(value)
+
+        return partial(callback, observer)
diff --git a/crates/assistant_tools/src/streaming_edit_file_tool.rs b/crates/assistant_tools/src/streaming_edit_file_tool.rs
index 668237fba3..f99ea60072 100644
--- a/crates/assistant_tools/src/streaming_edit_file_tool.rs
+++ b/crates/assistant_tools/src/streaming_edit_file_tool.rs
@@ -38,7 +38,7 @@ pub struct StreamingEditFileToolInput {
     /// so that we can display it immediately.
     pub display_description: String,
 
-    /// The full path of the file to modify in the project.
+    /// The full path of the file to create or modify in the project.
     ///
     /// WARNING: When specifying which file path need changing, you MUST
     /// start each path with one of the project's root directories.
@@ -58,6 +58,10 @@ pub struct StreamingEditFileToolInput {
     /// `frontend/db.js`
     /// </example>
     pub path: PathBuf,
+
+    /// If true, this tool will recreate the file from scratch.
+    /// If false, this tool will produce granular edits to an existing file.
+    pub create_or_overwrite: bool,
 }
 
 #[derive(Debug, Serialize, Deserialize, JsonSchema)]
@@ -158,7 +162,7 @@ impl Tool for StreamingEditFileTool {
         let card_clone = card.clone();
         let messages = messages.to_vec();
         let task = cx.spawn(async move |cx: &mut AsyncApp| {
-            if !exists.await? {
+            if !input.create_or_overwrite && !exists.await? {
                 return Err(anyhow!("{} not found", input.path.display()));
             }
 
@@ -182,12 +186,21 @@ impl Tool for StreamingEditFileTool {
                 })
                 .await;
 
-            let (output, mut events) = edit_agent.edit(
-                buffer.clone(),
-                input.display_description.clone(),
-                messages,
-                cx,
-            );
+            let (output, mut events) = if input.create_or_overwrite {
+                edit_agent.overwrite(
+                    buffer.clone(),
+                    input.display_description.clone(),
+                    messages,
+                    cx,
+                )
+            } else {
+                edit_agent.edit(
+                    buffer.clone(),
+                    input.display_description.clone(),
+                    messages,
+                    cx,
+                )
+            };
 
             let mut hallucinated_old_text = false;
             while let Some(event) = events.next().await {
@@ -213,7 +226,7 @@ impl Tool for StreamingEditFileTool {
                             .log_err();
                         }
                     }
-                    EditAgentOutputEvent::HallucinatedOldText(_) => hallucinated_old_text = true,
+                    EditAgentOutputEvent::OldTextNotFound(_) => hallucinated_old_text = true,
                 }
             }
             output.await?;
diff --git a/crates/assistant_tools/src/streaming_edit_file_tool/description.md b/crates/assistant_tools/src/streaming_edit_file_tool/description.md
index 14185b8eee..27f8e49dd6 100644
--- a/crates/assistant_tools/src/streaming_edit_file_tool/description.md
+++ b/crates/assistant_tools/src/streaming_edit_file_tool/description.md
@@ -1,4 +1,4 @@
-This is a tool for editing files. For moving or renaming files, you should generally use the `terminal` tool with the 'mv' command instead. For larger edits, use the `create_file` tool to overwrite files.
+This is a tool for creating a new file or editing an existing file. For moving or renaming files, you should generally use the `terminal` tool with the 'mv' command instead.
 
 Before using this tool:
 
diff --git a/crates/assistant_tools/src/templates/create_file_prompt.hbs b/crates/assistant_tools/src/templates/create_file_prompt.hbs
new file mode 100644
index 0000000000..fb26af99a8
--- /dev/null
+++ b/crates/assistant_tools/src/templates/create_file_prompt.hbs
@@ -0,0 +1,12 @@
+You are an expert engineer and your task is to write a new file from scratch.
+
+<file_to_edit>
+{{path}}
+</file_to_edit>
+
+<edit_description>
+{{edit_description}}
+</edit_description>
+
+You MUST respond directly with the file's content, without explanations, additional text or triple backticks.
+The text you output will be saved verbatim as the content of the file.
diff --git a/crates/assistant_tools/src/templates/edit_agent.hbs b/crates/assistant_tools/src/templates/edit_file_prompt.hbs
similarity index 100%
rename from crates/assistant_tools/src/templates/edit_agent.hbs
rename to crates/assistant_tools/src/templates/edit_file_prompt.hbs
diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs
index b84a0273e9..4636e19b12 100644
--- a/crates/language/src/buffer.rs
+++ b/crates/language/src/buffer.rs
@@ -2141,6 +2141,14 @@ impl Buffer {
         self.edit([(0..self.len(), text)], None, cx)
     }
 
+    /// Appends the given text to the end of the buffer.
+    pub fn append<T>(&mut self, text: T, cx: &mut Context<Self>) -> Option<clock::Lamport>
+    where
+        T: Into<Arc<str>>,
+    {
+        self.edit([(self.len()..self.len(), text)], None, cx)
+    }
+
     /// Applies the given edits to the buffer. Each edit is specified as a range of text to
     /// delete, and a string of text to insert at that location.
     ///