lmstudio: Fix streaming not working in v0.3.15 (#30013)
Closes #29781 Tested this with llama3, gemma3 and qwen3. This is a breaking change, which means after adding this code changes in future version zed we will require atleast lmstudio >= 0.3.15. For context why it's breaking changes check out the issue: #29781. What this doesn't try to solve is: * Tool calling, thinking text rendering. Will raise a seperate PR for these as those are not required in this PR to make it work. https://github.com/user-attachments/assets/945f9c73-6323-4a88-92e2-2219b760a249 Release Notes: - lmstudio: Fixed Zed support for LMStudio >= v0.3.15 (breaking change -- older versions are no longer supported). --------- Co-authored-by: Peter Tripp <peter@zed.dev>
This commit is contained in:
parent
bbfcd885ab
commit
a743035286
@ -330,41 +330,23 @@ impl LanguageModel for LmStudioLanguageModel {
|
||||
|
||||
let future = self.request_limiter.stream(async move {
|
||||
let response = stream_chat_completion(http_client.as_ref(), &api_url, request).await?;
|
||||
let stream = response
|
||||
.filter_map(|response| async move {
|
||||
match response {
|
||||
Ok(fragment) => {
|
||||
// Skip empty deltas
|
||||
if fragment.choices[0].delta.is_object()
|
||||
&& fragment.choices[0].delta.as_object().unwrap().is_empty()
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
// Try to parse the delta as ChatMessage
|
||||
if let Ok(chat_message) = serde_json::from_value::<ChatMessage>(
|
||||
fragment.choices[0].delta.clone(),
|
||||
) {
|
||||
let content = match chat_message {
|
||||
ChatMessage::User { content } => content,
|
||||
ChatMessage::Assistant { content, .. } => {
|
||||
content.unwrap_or_default()
|
||||
}
|
||||
ChatMessage::System { content } => content,
|
||||
};
|
||||
if !content.is_empty() {
|
||||
Some(Ok(content))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
// Create a stream mapper to handle content across multiple deltas
|
||||
let stream_mapper = LmStudioStreamMapper::new();
|
||||
|
||||
let stream = response
|
||||
.map(move |response| {
|
||||
response.and_then(|fragment| stream_mapper.process_fragment(fragment))
|
||||
})
|
||||
.filter_map(|result| async move {
|
||||
match result {
|
||||
Ok(Some(content)) => Some(Ok(content)),
|
||||
Ok(None) => None,
|
||||
Err(error) => Some(Err(error)),
|
||||
}
|
||||
})
|
||||
.boxed();
|
||||
|
||||
Ok(stream)
|
||||
});
|
||||
|
||||
@ -382,6 +364,40 @@ impl LanguageModel for LmStudioLanguageModel {
|
||||
}
|
||||
}
|
||||
|
||||
// This will be more useful when we implement tool calling. Currently keeping it empty.
|
||||
struct LmStudioStreamMapper {}
|
||||
|
||||
impl LmStudioStreamMapper {
|
||||
fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
|
||||
fn process_fragment(&self, fragment: lmstudio::ChatResponse) -> Result<Option<String>> {
|
||||
// Most of the time, there will be only one choice
|
||||
let Some(choice) = fragment.choices.first() else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
// Extract the delta content
|
||||
if let Ok(delta) =
|
||||
serde_json::from_value::<lmstudio::ResponseMessageDelta>(choice.delta.clone())
|
||||
{
|
||||
if let Some(content) = delta.content {
|
||||
if !content.is_empty() {
|
||||
return Ok(Some(content));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there's a finish_reason, we're done
|
||||
if choice.finish_reason.is_some() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
struct ConfigurationView {
|
||||
state: gpui::Entity<State>,
|
||||
loading_models_task: Option<Task<()>>,
|
||||
|
@ -221,6 +221,14 @@ pub enum CompatibilityType {
|
||||
Mlx,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
|
||||
pub struct ResponseMessageDelta {
|
||||
pub role: Option<Role>,
|
||||
pub content: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub tool_calls: Option<Vec<ToolCallChunk>>,
|
||||
}
|
||||
|
||||
pub async fn complete(
|
||||
client: &dyn HttpClient,
|
||||
api_url: &str,
|
||||
|
Loading…
Reference in New Issue
Block a user