assistant_tools: Add fetch
tool (#26999)
This PR adds a new `fetch` tool to the set of tools the Assistant has available. This tool accepts a URL and fetches the content as Markdown. <img width="1394" alt="Screenshot 2025-03-18 at 11 52 21 AM" src="https://github.com/user-attachments/assets/e5bcde14-a0dd-4835-9d42-8f45def68f4d" /> <img width="1394" alt="Screenshot 2025-03-18 at 11 52 37 AM" src="https://github.com/user-attachments/assets/3bcce4f5-f61b-40d7-8b30-2c673ce3c06a" /> Release Notes: - N/A
This commit is contained in:
parent
baaafddbeb
commit
ed4e654fdf
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -715,6 +715,8 @@ dependencies = [
|
||||
"feature_flags",
|
||||
"futures 0.3.31",
|
||||
"gpui",
|
||||
"html_to_markdown",
|
||||
"http_client",
|
||||
"itertools 0.14.0",
|
||||
"language",
|
||||
"language_model",
|
||||
|
@ -163,7 +163,7 @@ pub fn init(cx: &mut App) -> Arc<HeadlessAppState> {
|
||||
language::init(cx);
|
||||
language_model::init(client.clone(), cx);
|
||||
language_models::init(user_store.clone(), client.clone(), fs.clone(), cx);
|
||||
assistant_tools::init(cx);
|
||||
assistant_tools::init(client.http_client().clone(), cx);
|
||||
context_server::init(cx);
|
||||
let stdout_is_a_pty = false;
|
||||
let prompt_builder = PromptBuilder::load(fs.clone(), stdout_is_a_pty, cx);
|
||||
|
@ -18,8 +18,10 @@ chrono.workspace = true
|
||||
collections.workspace = true
|
||||
feature_flags.workspace = true
|
||||
futures.workspace = true
|
||||
itertools.workspace = true
|
||||
gpui.workspace = true
|
||||
html_to_markdown.workspace = true
|
||||
http_client.workspace = true
|
||||
itertools.workspace = true
|
||||
language.workspace = true
|
||||
language_model.workspace = true
|
||||
project.workspace = true
|
||||
@ -27,17 +29,17 @@ release_channel.workspace = true
|
||||
schemars.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
settings.workspace = true
|
||||
theme.workspace = true
|
||||
ui.workspace = true
|
||||
util.workspace = true
|
||||
workspace.workspace = true
|
||||
worktree.workspace = true
|
||||
settings.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
rand.workspace = true
|
||||
collections = { workspace = true, features = ["test-support"] }
|
||||
gpui = { workspace = true, features = ["test-support"] }
|
||||
language = { workspace = true, features = ["test-support"] }
|
||||
project = { workspace = true, features = ["test-support"] }
|
||||
rand.workspace = true
|
||||
workspace = { workspace = true, features = ["test-support"] }
|
||||
|
@ -2,6 +2,7 @@ mod bash_tool;
|
||||
mod delete_path_tool;
|
||||
mod diagnostics_tool;
|
||||
mod edit_files_tool;
|
||||
mod fetch_tool;
|
||||
mod list_directory_tool;
|
||||
mod now_tool;
|
||||
mod path_search_tool;
|
||||
@ -9,13 +10,17 @@ mod read_file_tool;
|
||||
mod regex_search_tool;
|
||||
mod thinking_tool;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use assistant_tool::ToolRegistry;
|
||||
use gpui::App;
|
||||
use http_client::HttpClientWithUrl;
|
||||
|
||||
use crate::bash_tool::BashTool;
|
||||
use crate::delete_path_tool::DeletePathTool;
|
||||
use crate::diagnostics_tool::DiagnosticsTool;
|
||||
use crate::edit_files_tool::EditFilesTool;
|
||||
use crate::fetch_tool::FetchTool;
|
||||
use crate::list_directory_tool::ListDirectoryTool;
|
||||
use crate::now_tool::NowTool;
|
||||
use crate::path_search_tool::PathSearchTool;
|
||||
@ -23,7 +28,7 @@ use crate::read_file_tool::ReadFileTool;
|
||||
use crate::regex_search_tool::RegexSearchTool;
|
||||
use crate::thinking_tool::ThinkingTool;
|
||||
|
||||
pub fn init(cx: &mut App) {
|
||||
pub fn init(http_client: Arc<HttpClientWithUrl>, cx: &mut App) {
|
||||
assistant_tool::init(cx);
|
||||
crate::edit_files_tool::log::init(cx);
|
||||
|
||||
@ -38,4 +43,5 @@ pub fn init(cx: &mut App) {
|
||||
registry.register_tool(ReadFileTool);
|
||||
registry.register_tool(RegexSearchTool);
|
||||
registry.register_tool(ThinkingTool);
|
||||
registry.register_tool(FetchTool::new(http_client));
|
||||
}
|
||||
|
153
crates/assistant_tools/src/fetch_tool.rs
Normal file
153
crates/assistant_tools/src/fetch_tool.rs
Normal file
@ -0,0 +1,153 @@
|
||||
use std::cell::RefCell;
|
||||
use std::rc::Rc;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, bail, Context as _, Result};
|
||||
use assistant_tool::{ActionLog, Tool};
|
||||
use futures::AsyncReadExt as _;
|
||||
use gpui::{App, AppContext as _, Entity, Task};
|
||||
use html_to_markdown::{convert_html_to_markdown, markdown, TagHandler};
|
||||
use http_client::{AsyncBody, HttpClientWithUrl};
|
||||
use language_model::LanguageModelRequestMessage;
|
||||
use project::Project;
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
|
||||
enum ContentType {
|
||||
Html,
|
||||
Plaintext,
|
||||
Json,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
|
||||
pub struct FetchToolInput {
|
||||
/// The URL to fetch.
|
||||
url: String,
|
||||
}
|
||||
|
||||
pub struct FetchTool {
|
||||
http_client: Arc<HttpClientWithUrl>,
|
||||
}
|
||||
|
||||
impl FetchTool {
|
||||
pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
|
||||
Self { http_client }
|
||||
}
|
||||
|
||||
async fn build_message(http_client: Arc<HttpClientWithUrl>, url: &str) -> Result<String> {
|
||||
let mut url = url.to_owned();
|
||||
if !url.starts_with("https://") && !url.starts_with("http://") {
|
||||
url = format!("https://{url}");
|
||||
}
|
||||
|
||||
let mut response = http_client.get(&url, AsyncBody::default(), true).await?;
|
||||
|
||||
let mut body = Vec::new();
|
||||
response
|
||||
.body_mut()
|
||||
.read_to_end(&mut body)
|
||||
.await
|
||||
.context("error reading response body")?;
|
||||
|
||||
if response.status().is_client_error() {
|
||||
let text = String::from_utf8_lossy(body.as_slice());
|
||||
bail!(
|
||||
"status error {}, response: {text:?}",
|
||||
response.status().as_u16()
|
||||
);
|
||||
}
|
||||
|
||||
let Some(content_type) = response.headers().get("content-type") else {
|
||||
bail!("missing Content-Type header");
|
||||
};
|
||||
let content_type = content_type
|
||||
.to_str()
|
||||
.context("invalid Content-Type header")?;
|
||||
let content_type = match content_type {
|
||||
"text/html" => ContentType::Html,
|
||||
"text/plain" => ContentType::Plaintext,
|
||||
"application/json" => ContentType::Json,
|
||||
_ => ContentType::Html,
|
||||
};
|
||||
|
||||
match content_type {
|
||||
ContentType::Html => {
|
||||
let mut handlers: Vec<TagHandler> = vec![
|
||||
Rc::new(RefCell::new(markdown::WebpageChromeRemover)),
|
||||
Rc::new(RefCell::new(markdown::ParagraphHandler)),
|
||||
Rc::new(RefCell::new(markdown::HeadingHandler)),
|
||||
Rc::new(RefCell::new(markdown::ListHandler)),
|
||||
Rc::new(RefCell::new(markdown::TableHandler::new())),
|
||||
Rc::new(RefCell::new(markdown::StyledTextHandler)),
|
||||
];
|
||||
if url.contains("wikipedia.org") {
|
||||
use html_to_markdown::structure::wikipedia;
|
||||
|
||||
handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaChromeRemover)));
|
||||
handlers.push(Rc::new(RefCell::new(wikipedia::WikipediaInfoboxHandler)));
|
||||
handlers.push(Rc::new(
|
||||
RefCell::new(wikipedia::WikipediaCodeHandler::new()),
|
||||
));
|
||||
} else {
|
||||
handlers.push(Rc::new(RefCell::new(markdown::CodeHandler)));
|
||||
}
|
||||
|
||||
convert_html_to_markdown(&body[..], &mut handlers)
|
||||
}
|
||||
ContentType::Plaintext => Ok(std::str::from_utf8(&body)?.to_owned()),
|
||||
ContentType::Json => {
|
||||
let json: serde_json::Value = serde_json::from_slice(&body)?;
|
||||
|
||||
Ok(format!(
|
||||
"```json\n{}\n```",
|
||||
serde_json::to_string_pretty(&json)?
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Tool for FetchTool {
|
||||
fn name(&self) -> String {
|
||||
"fetch".to_string()
|
||||
}
|
||||
|
||||
fn description(&self) -> String {
|
||||
include_str!("./fetch_tool/description.md").to_string()
|
||||
}
|
||||
|
||||
fn input_schema(&self) -> serde_json::Value {
|
||||
let schema = schemars::schema_for!(FetchToolInput);
|
||||
serde_json::to_value(&schema).unwrap()
|
||||
}
|
||||
|
||||
fn run(
|
||||
self: Arc<Self>,
|
||||
input: serde_json::Value,
|
||||
_messages: &[LanguageModelRequestMessage],
|
||||
_project: Entity<Project>,
|
||||
_action_log: Entity<ActionLog>,
|
||||
cx: &mut App,
|
||||
) -> Task<Result<String>> {
|
||||
let input = match serde_json::from_value::<FetchToolInput>(input) {
|
||||
Ok(input) => input,
|
||||
Err(err) => return Task::ready(Err(anyhow!(err))),
|
||||
};
|
||||
|
||||
let text = cx.background_spawn({
|
||||
let http_client = self.http_client.clone();
|
||||
let url = input.url.clone();
|
||||
async move { Self::build_message(http_client, &url).await }
|
||||
});
|
||||
|
||||
cx.foreground_executor().spawn(async move {
|
||||
let text = text.await?;
|
||||
if text.trim().is_empty() {
|
||||
bail!("no textual content found");
|
||||
}
|
||||
|
||||
Ok(text)
|
||||
})
|
||||
}
|
||||
}
|
1
crates/assistant_tools/src/fetch_tool/description.md
Normal file
1
crates/assistant_tools/src/fetch_tool/description.md
Normal file
@ -0,0 +1 @@
|
||||
Fetches a URL and returns the content as Markdown.
|
@ -472,7 +472,7 @@ fn main() {
|
||||
prompt_builder.clone(),
|
||||
cx,
|
||||
);
|
||||
assistant_tools::init(cx);
|
||||
assistant_tools::init(app_state.client.http_client(), cx);
|
||||
repl::init(app_state.fs.clone(), cx);
|
||||
extension_host::init(
|
||||
extension_host_proxy,
|
||||
|
Loading…
x
Reference in New Issue
Block a user