lmstudio: Fix streaming not working in v0.3.15 (#30013)

Closes #29781 Tested this with llama3, gemma3 and qwen3. This is a breaking change, which means after adding this code changes in future version zed we will require atleast lmstudio >= 0.3.15. For context why it's breaking changes check out the issue: #29781. What this doesn't try to solve is: * Tool calling, thinking text rendering. Will raise a seperate PR for these as those are not required in this PR to make it work. https://github.com/user-attachments/assets/945f9c73-6323-4a88-92e2-2219b760a249 Release Notes: - lmstudio: Fixed Zed support for LMStudio >= v0.3.15 (breaking change -- older versions are no longer supported). --------- Co-authored-by: Peter Tripp <peter@zed.dev>
2025-05-06 22:29:36 +05:30 · 2025-05-06 22:29:36 +05:30 · a743035286
commit a743035286
parent bbfcd885ab
2 changed files with 54 additions and 30 deletions
--- a/crates/language_models/src/provider/lmstudio.rs
+++ b/crates/language_models/src/provider/lmstudio.rs
@ -330,41 +330,23 @@ impl LanguageModel for LmStudioLanguageModel {

        let future = self.request_limiter.stream(async move {
            let response = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
-            let stream = response
-                .filter_map(|response| async move {
-                    match response {
-                        Ok(fragment) => {
-                            // Skip empty deltas
-                            if fragment.choices[0].delta.is_object()
-                                && fragment.choices[0].delta.as_object().unwrap().is_empty()
-                            {
-                                return None;
-                            }

-                            // Try to parse the delta as ChatMessage
-                            if let Ok(chat_message) = serde_json::from_value::<ChatMessage>(
-                                fragment.choices[0].delta.clone(),
-                            ) {
-                                let content = match chat_message {
-                                    ChatMessage::User { content } => content,
-                                    ChatMessage::Assistant { content, .. } => {
-                                        content.unwrap_or_default()
-                                    }
-                                    ChatMessage::System { content } => content,
-                                };
-                                if !content.is_empty() {
-                                    Some(Ok(content))
-                                } else {
-                                    None
-                                }
-                            } else {
-                                None
-                            }
-                        }
+            // Create a stream mapper to handle content across multiple deltas
+            let stream_mapper = LmStudioStreamMapper::new();
+
+            let stream = response
+                .map(move |response| {
+                    response.and_then(|fragment| stream_mapper.process_fragment(fragment))
+                })
+                .filter_map(|result| async move {
+                    match result {
+                        Ok(Some(content)) => Some(Ok(content)),
+                        Ok(None) => None,
                        Err(error) => Some(Err(error)),
                    }
                })
                .boxed();
+
            Ok(stream)
        });

@ -382,6 +364,40 @@ impl LanguageModel for LmStudioLanguageModel {
    }
 }

+// This will be more useful when we implement tool calling. Currently keeping it empty.
+struct LmStudioStreamMapper {}
+
+impl LmStudioStreamMapper {
+    fn new() -> Self {
+        Self {}
+    }
+
+    fn process_fragment(&self, fragment: lmstudio::ChatResponse) -> Result<Option<String>> {
+        // Most of the time, there will be only one choice
+        let Some(choice) = fragment.choices.first() else {
+            return Ok(None);
+        };
+
+        // Extract the delta content
+        if let Ok(delta) =
+            serde_json::from_value::<lmstudio::ResponseMessageDelta>(choice.delta.clone())
+        {
+            if let Some(content) = delta.content {
+                if !content.is_empty() {
+                    return Ok(Some(content));
+                }
+            }
+        }
+
+        // If there's a finish_reason, we're done
+        if choice.finish_reason.is_some() {
+            return Ok(None);
+        }
+
+        Ok(None)
+    }
+}
+
 struct ConfigurationView {
    state: gpui::Entity<State>,
    loading_models_task: Option<Task<()>>,
--- a/crates/lmstudio/src/lmstudio.rs
+++ b/crates/lmstudio/src/lmstudio.rs
@ -221,6 +221,14 @@ pub enum CompatibilityType {
    Mlx,
 }

+#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
+pub struct ResponseMessageDelta {
+    pub role: Option<Role>,
+    pub content: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub tool_calls: Option<Vec<ToolCallChunk>>,
+}
+
 pub async fn complete(
    client: &dyn HttpClient,
    api_url: &str,