lmstudio: Fix streaming not working in v0.3.15 (#30013)

Closes #29781

Tested this with llama3, gemma3 and qwen3.

This is a breaking change, which means after adding this code changes in
future version zed we will require atleast lmstudio >= 0.3.15. For
context why it's breaking changes check out the issue: #29781.

What this doesn't try to solve is:

* Tool calling, thinking text rendering. Will raise a seperate PR for
these as those are not required in this PR to make it work.


https://github.com/user-attachments/assets/945f9c73-6323-4a88-92e2-2219b760a249

Release Notes:

- lmstudio: Fixed Zed support for LMStudio >= v0.3.15 (breaking change -- older versions are no longer supported).

---------

Co-authored-by: Peter Tripp <peter@zed.dev>
This commit is contained in:
Umesh Yadav 2025-05-06 22:29:36 +05:30 committed by GitHub
parent bbfcd885ab
commit a743035286
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 54 additions and 30 deletions

View File

@ -330,41 +330,23 @@ impl LanguageModel for LmStudioLanguageModel {
let future = self.request_limiter.stream(async move {
let response = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
let stream = response
.filter_map(|response| async move {
match response {
Ok(fragment) => {
// Skip empty deltas
if fragment.choices[0].delta.is_object()
&& fragment.choices[0].delta.as_object().unwrap().is_empty()
{
return None;
}
// Try to parse the delta as ChatMessage
if let Ok(chat_message) = serde_json::from_value::<ChatMessage>(
fragment.choices[0].delta.clone(),
) {
let content = match chat_message {
ChatMessage::User { content } => content,
ChatMessage::Assistant { content, .. } => {
content.unwrap_or_default()
}
ChatMessage::System { content } => content,
};
if !content.is_empty() {
Some(Ok(content))
} else {
None
}
} else {
None
}
}
// Create a stream mapper to handle content across multiple deltas
let stream_mapper = LmStudioStreamMapper::new();
let stream = response
.map(move |response| {
response.and_then(|fragment| stream_mapper.process_fragment(fragment))
})
.filter_map(|result| async move {
match result {
Ok(Some(content)) => Some(Ok(content)),
Ok(None) => None,
Err(error) => Some(Err(error)),
}
})
.boxed();
Ok(stream)
});
@ -382,6 +364,40 @@ impl LanguageModel for LmStudioLanguageModel {
}
}
// This will be more useful when we implement tool calling. Currently keeping it empty.
struct LmStudioStreamMapper {}
impl LmStudioStreamMapper {
fn new() -> Self {
Self {}
}
fn process_fragment(&self, fragment: lmstudio::ChatResponse) -> Result<Option<String>> {
// Most of the time, there will be only one choice
let Some(choice) = fragment.choices.first() else {
return Ok(None);
};
// Extract the delta content
if let Ok(delta) =
serde_json::from_value::<lmstudio::ResponseMessageDelta>(choice.delta.clone())
{
if let Some(content) = delta.content {
if !content.is_empty() {
return Ok(Some(content));
}
}
}
// If there's a finish_reason, we're done
if choice.finish_reason.is_some() {
return Ok(None);
}
Ok(None)
}
}
struct ConfigurationView {
state: gpui::Entity<State>,
loading_models_task: Option<Task<()>>,

View File

@ -221,6 +221,14 @@ pub enum CompatibilityType {
Mlx,
}
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
pub struct ResponseMessageDelta {
pub role: Option<Role>,
pub content: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub tool_calls: Option<Vec<ToolCallChunk>>,
}
pub async fn complete(
client: &dyn HttpClient,
api_url: &str,