Properly score fuzzy match queries with multiple chars in lower case (#29794)

Closes https://github.com/zed-industries/zed/issues/29526

Release Notes:

- Fixed file finder crashing for certain file names with multiple chars
in lowercase form
This commit is contained in:
Kirill Bulatov 2025-05-02 18:02:53 +03:00 committed by GitHub
parent d1b35be353
commit 7e2de84155
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 68 additions and 17 deletions

View File

@ -242,6 +242,38 @@ async fn test_matching_paths(cx: &mut TestAppContext) {
}
}
#[gpui::test]
async fn test_unicode_paths(cx: &mut TestAppContext) {
let app_state = init_test(cx);
app_state
.fs
.as_fake()
.insert_tree(
path!("/root"),
json!({
"a": {
"İg": " ",
}
}),
)
.await;
let project = Project::test(app_state.fs.clone(), [path!("/root").as_ref()], cx).await;
let (picker, workspace, cx) = build_find_picker(project, cx);
cx.simulate_input("g");
picker.update(cx, |picker, _| {
assert_eq!(picker.delegate.matches.len(), 1);
});
cx.dispatch_action(SelectNext);
cx.dispatch_action(Confirm);
cx.read(|cx| {
let active_editor = workspace.read(cx).active_item_as::<Editor>(cx).unwrap();
assert_eq!(active_editor.read(cx).title(cx), "İg");
});
}
#[gpui::test]
async fn test_absolute_paths(cx: &mut TestAppContext) {
let app_state = init_test(cx);

View File

@ -1,5 +1,6 @@
use std::{
borrow::{Borrow, Cow},
collections::BTreeMap,
sync::atomic::{self, AtomicBool},
};
@ -50,7 +51,7 @@ impl<'a> Matcher<'a> {
/// Filter and score fuzzy match candidates. Results are returned unsorted, in the same order as
/// the input candidates.
pub fn match_candidates<C, R, F, T>(
pub(crate) fn match_candidates<C, R, F, T>(
&mut self,
prefix: &[char],
lowercase_prefix: &[char],
@ -65,6 +66,7 @@ impl<'a> Matcher<'a> {
{
let mut candidate_chars = Vec::new();
let mut lowercase_candidate_chars = Vec::new();
let mut extra_lowercase_chars = BTreeMap::new();
for candidate in candidates {
if !candidate.borrow().has_chars(self.query_char_bag) {
@ -77,9 +79,14 @@ impl<'a> Matcher<'a> {
candidate_chars.clear();
lowercase_candidate_chars.clear();
for c in candidate.borrow().to_string().chars() {
extra_lowercase_chars.clear();
for (i, c) in candidate.borrow().to_string().chars().enumerate() {
candidate_chars.push(c);
lowercase_candidate_chars.append(&mut c.to_lowercase().collect::<Vec<_>>());
let mut char_lowercased = c.to_lowercase().collect::<Vec<_>>();
if char_lowercased.len() > 1 {
extra_lowercase_chars.insert(i, char_lowercased.len() - 1);
}
lowercase_candidate_chars.append(&mut char_lowercased);
}
if !self.find_last_positions(lowercase_prefix, &lowercase_candidate_chars) {
@ -97,6 +104,7 @@ impl<'a> Matcher<'a> {
&lowercase_candidate_chars,
prefix,
lowercase_prefix,
&extra_lowercase_chars,
);
if score > 0.0 {
@ -131,18 +139,20 @@ impl<'a> Matcher<'a> {
fn score_match(
&mut self,
path: &[char],
path_cased: &[char],
path_lowercased: &[char],
prefix: &[char],
lowercase_prefix: &[char],
extra_lowercase_chars: &BTreeMap<usize, usize>,
) -> f64 {
let score = self.recursive_score_match(
path,
path_cased,
path_lowercased,
prefix,
lowercase_prefix,
0,
0,
self.query.len() as f64,
extra_lowercase_chars,
) * self.query.len() as f64;
if score <= 0.0 {
@ -173,12 +183,13 @@ impl<'a> Matcher<'a> {
fn recursive_score_match(
&mut self,
path: &[char],
path_cased: &[char],
path_lowercased: &[char],
prefix: &[char],
lowercase_prefix: &[char],
query_idx: usize,
path_idx: usize,
cur_score: f64,
extra_lowercase_chars: &BTreeMap<usize, usize>,
) -> f64 {
use std::path::MAIN_SEPARATOR;
@ -200,15 +211,22 @@ impl<'a> Matcher<'a> {
let mut last_slash = 0;
for j in path_idx..=limit {
let path_char = if j < prefix.len() {
let extra_lowercase_chars_count = extra_lowercase_chars
.iter()
.take_while(|(i, _)| i < &&j)
.map(|(_, increment)| increment)
.sum::<usize>();
let j_regular = j - extra_lowercase_chars_count;
let path_char = if j_regular < prefix.len() {
lowercase_prefix[j]
} else {
path_cased[j - prefix.len()]
path_lowercased[j - prefix.len()]
};
let is_path_sep = path_char == MAIN_SEPARATOR;
if query_idx == 0 && is_path_sep {
last_slash = j;
last_slash = j_regular;
}
#[cfg(not(target_os = "windows"))]
@ -218,18 +236,18 @@ impl<'a> Matcher<'a> {
#[cfg(target_os = "windows")]
let need_to_score = query_char == path_char || (is_path_sep && query_char == '_');
if need_to_score {
let curr = if j < prefix.len() {
prefix[j]
let curr = if j_regular < prefix.len() {
prefix[j_regular]
} else {
path[j - prefix.len()]
path[j_regular - prefix.len()]
};
let mut char_score = 1.0;
if j > path_idx {
let last = if j - 1 < prefix.len() {
prefix[j - 1]
let last = if j_regular - 1 < prefix.len() {
prefix[j_regular - 1]
} else {
path[j - 1 - prefix.len()]
path[j_regular - 1 - prefix.len()]
};
if last == MAIN_SEPARATOR {
@ -279,17 +297,18 @@ impl<'a> Matcher<'a> {
let new_score = self.recursive_score_match(
path,
path_cased,
path_lowercased,
prefix,
lowercase_prefix,
query_idx + 1,
j + 1,
next_score,
extra_lowercase_chars,
) * multiplier;
if new_score > score {
score = new_score;
best_position = j;
best_position = j_regular;
// Optimization: can't score better than 1.
if new_score == 1.0 {
break;