From 7e2de84155223a8d282c1f73a4349a5dd78f692a Mon Sep 17 00:00:00 2001 From: Kirill Bulatov Date: Fri, 2 May 2025 18:02:53 +0300 Subject: [PATCH] Properly score fuzzy match queries with multiple chars in lower case (#29794) Closes https://github.com/zed-industries/zed/issues/29526 Release Notes: - Fixed file finder crashing for certain file names with multiple chars in lowercase form --- crates/file_finder/src/file_finder_tests.rs | 32 +++++++++++++ crates/fuzzy/src/matcher.rs | 53 ++++++++++++++------- 2 files changed, 68 insertions(+), 17 deletions(-) diff --git a/crates/file_finder/src/file_finder_tests.rs b/crates/file_finder/src/file_finder_tests.rs index 180994c4db..37d12562c6 100644 --- a/crates/file_finder/src/file_finder_tests.rs +++ b/crates/file_finder/src/file_finder_tests.rs @@ -242,6 +242,38 @@ async fn test_matching_paths(cx: &mut TestAppContext) { } } +#[gpui::test] +async fn test_unicode_paths(cx: &mut TestAppContext) { + let app_state = init_test(cx); + app_state + .fs + .as_fake() + .insert_tree( + path!("/root"), + json!({ + "a": { + "İg": " ", + } + }), + ) + .await; + + let project = Project::test(app_state.fs.clone(), [path!("/root").as_ref()], cx).await; + + let (picker, workspace, cx) = build_find_picker(project, cx); + + cx.simulate_input("g"); + picker.update(cx, |picker, _| { + assert_eq!(picker.delegate.matches.len(), 1); + }); + cx.dispatch_action(SelectNext); + cx.dispatch_action(Confirm); + cx.read(|cx| { + let active_editor = workspace.read(cx).active_item_as::(cx).unwrap(); + assert_eq!(active_editor.read(cx).title(cx), "İg"); + }); +} + #[gpui::test] async fn test_absolute_paths(cx: &mut TestAppContext) { let app_state = init_test(cx); diff --git a/crates/fuzzy/src/matcher.rs b/crates/fuzzy/src/matcher.rs index 44fd89c966..0fe5ff098d 100644 --- a/crates/fuzzy/src/matcher.rs +++ b/crates/fuzzy/src/matcher.rs @@ -1,5 +1,6 @@ use std::{ borrow::{Borrow, Cow}, + collections::BTreeMap, sync::atomic::{self, AtomicBool}, }; @@ -50,7 +51,7 @@ impl<'a> Matcher<'a> { /// Filter and score fuzzy match candidates. Results are returned unsorted, in the same order as /// the input candidates. - pub fn match_candidates( + pub(crate) fn match_candidates( &mut self, prefix: &[char], lowercase_prefix: &[char], @@ -65,6 +66,7 @@ impl<'a> Matcher<'a> { { let mut candidate_chars = Vec::new(); let mut lowercase_candidate_chars = Vec::new(); + let mut extra_lowercase_chars = BTreeMap::new(); for candidate in candidates { if !candidate.borrow().has_chars(self.query_char_bag) { @@ -77,9 +79,14 @@ impl<'a> Matcher<'a> { candidate_chars.clear(); lowercase_candidate_chars.clear(); - for c in candidate.borrow().to_string().chars() { + extra_lowercase_chars.clear(); + for (i, c) in candidate.borrow().to_string().chars().enumerate() { candidate_chars.push(c); - lowercase_candidate_chars.append(&mut c.to_lowercase().collect::>()); + let mut char_lowercased = c.to_lowercase().collect::>(); + if char_lowercased.len() > 1 { + extra_lowercase_chars.insert(i, char_lowercased.len() - 1); + } + lowercase_candidate_chars.append(&mut char_lowercased); } if !self.find_last_positions(lowercase_prefix, &lowercase_candidate_chars) { @@ -97,6 +104,7 @@ impl<'a> Matcher<'a> { &lowercase_candidate_chars, prefix, lowercase_prefix, + &extra_lowercase_chars, ); if score > 0.0 { @@ -131,18 +139,20 @@ impl<'a> Matcher<'a> { fn score_match( &mut self, path: &[char], - path_cased: &[char], + path_lowercased: &[char], prefix: &[char], lowercase_prefix: &[char], + extra_lowercase_chars: &BTreeMap, ) -> f64 { let score = self.recursive_score_match( path, - path_cased, + path_lowercased, prefix, lowercase_prefix, 0, 0, self.query.len() as f64, + extra_lowercase_chars, ) * self.query.len() as f64; if score <= 0.0 { @@ -173,12 +183,13 @@ impl<'a> Matcher<'a> { fn recursive_score_match( &mut self, path: &[char], - path_cased: &[char], + path_lowercased: &[char], prefix: &[char], lowercase_prefix: &[char], query_idx: usize, path_idx: usize, cur_score: f64, + extra_lowercase_chars: &BTreeMap, ) -> f64 { use std::path::MAIN_SEPARATOR; @@ -200,15 +211,22 @@ impl<'a> Matcher<'a> { let mut last_slash = 0; for j in path_idx..=limit { - let path_char = if j < prefix.len() { + let extra_lowercase_chars_count = extra_lowercase_chars + .iter() + .take_while(|(i, _)| i < &&j) + .map(|(_, increment)| increment) + .sum::(); + let j_regular = j - extra_lowercase_chars_count; + + let path_char = if j_regular < prefix.len() { lowercase_prefix[j] } else { - path_cased[j - prefix.len()] + path_lowercased[j - prefix.len()] }; let is_path_sep = path_char == MAIN_SEPARATOR; if query_idx == 0 && is_path_sep { - last_slash = j; + last_slash = j_regular; } #[cfg(not(target_os = "windows"))] @@ -218,18 +236,18 @@ impl<'a> Matcher<'a> { #[cfg(target_os = "windows")] let need_to_score = query_char == path_char || (is_path_sep && query_char == '_'); if need_to_score { - let curr = if j < prefix.len() { - prefix[j] + let curr = if j_regular < prefix.len() { + prefix[j_regular] } else { - path[j - prefix.len()] + path[j_regular - prefix.len()] }; let mut char_score = 1.0; if j > path_idx { - let last = if j - 1 < prefix.len() { - prefix[j - 1] + let last = if j_regular - 1 < prefix.len() { + prefix[j_regular - 1] } else { - path[j - 1 - prefix.len()] + path[j_regular - 1 - prefix.len()] }; if last == MAIN_SEPARATOR { @@ -279,17 +297,18 @@ impl<'a> Matcher<'a> { let new_score = self.recursive_score_match( path, - path_cased, + path_lowercased, prefix, lowercase_prefix, query_idx + 1, j + 1, next_score, + extra_lowercase_chars, ) * multiplier; if new_score > score { score = new_score; - best_position = j; + best_position = j_regular; // Optimization: can't score better than 1. if new_score == 1.0 { break;