From 98891e4c702badb4c9323a8f9bd4354b53938408 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Mon, 14 Apr 2025 13:18:45 +0200 Subject: [PATCH] language: Optimize language_for_file (#28671) While working on #28670 this function showed up in my profiles; this PR makes it evaluate some of it's conditions lazily + prevent constant rebuilding of globset::Candidates. Release Notes: - N/A *or* Added/Fixed/Improved ... --- crates/language/src/language_registry.rs | 62 ++++++++++++++---------- crates/language/src/language_settings.rs | 6 +-- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/crates/language/src/language_registry.rs b/crates/language/src/language_registry.rs index a575e08022..d7a4293ee4 100644 --- a/crates/language/src/language_registry.rs +++ b/crates/language/src/language_registry.rs @@ -8,7 +8,7 @@ use crate::{ with_parser, }; use anyhow::{Context as _, Result, anyhow}; -use collections::{HashMap, HashSet, hash_map}; +use collections::{FxHashMap, HashMap, HashSet, hash_map}; use futures::{ Future, @@ -21,8 +21,10 @@ use parking_lot::{Mutex, RwLock}; use postage::watch; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use smallvec::SmallVec; use std::{ borrow::{Borrow, Cow}, + cell::LazyCell, ffi::OsStr, ops::Not, path::{Path, PathBuf}, @@ -674,7 +676,7 @@ impl LanguageRegistry { self: &Arc, path: &Path, content: Option<&Rope>, - user_file_types: Option<&HashMap, GlobSet>>, + user_file_types: Option<&FxHashMap, GlobSet>>, ) -> Option { let filename = path.file_name().and_then(|name| name.to_str()); // `Path.extension()` returns None for files with a leading '.' @@ -682,32 +684,42 @@ impl LanguageRegistry { // as we want `.zshrc` to result in extension being `Some("zshrc")` let extension = filename.and_then(|filename| filename.split('.').next_back()); let path_suffixes = [extension, filename, path.to_str()]; + let path_suffixes_candidates = path_suffixes + .iter() + .filter_map(|suffix| suffix.map(globset::Candidate::new)) + .collect::>(); let empty = GlobSet::empty(); - + let content = LazyCell::new(|| { + content.map(|content| { + let end = content.clip_point(Point::new(0, 256), Bias::Left); + let end = content.point_to_offset(end); + content.chunks_in_range(0..end).collect::() + }) + }); self.find_matching_language(move |language_name, config| { - let path_matches_default_suffix = config - .path_suffixes - .iter() - .any(|suffix| path_suffixes.contains(&Some(suffix.as_str()))); - let custom_suffixes = user_file_types - .and_then(|types| types.get(language_name.as_ref())) - .unwrap_or(&empty); - let path_matches_custom_suffix = path_suffixes - .iter() - .map(|suffix| suffix.unwrap_or("")) - .any(|suffix| custom_suffixes.is_match(suffix)); - let content_matches = content.zip(config.first_line_pattern.as_ref()).map_or( - false, - |(content, pattern)| { - let end = content.clip_point(Point::new(0, 256), Bias::Left); - let end = content.point_to_offset(end); - let text = content.chunks_in_range(0..end).collect::(); - pattern.is_match(&text) - }, - ); - if path_matches_custom_suffix { + let path_matches_default_suffix = || { + config + .path_suffixes + .iter() + .any(|suffix| path_suffixes.contains(&Some(suffix.as_str()))) + }; + let path_matches_custom_suffix = || { + let custom_suffixes = user_file_types + .and_then(|types| types.get(language_name.as_ref())) + .unwrap_or(&empty); + path_suffixes_candidates + .iter() + .any(|suffix| custom_suffixes.is_match_candidate(suffix)) + }; + let content_matches = || { + content + .as_ref() + .zip(config.first_line_pattern.as_ref()) + .map_or(false, |(text, pattern)| pattern.is_match(&text)) + }; + if path_matches_custom_suffix() { 2 - } else if path_matches_default_suffix || content_matches { + } else if path_matches_default_suffix() || content_matches() { 1 } else { 0 diff --git a/crates/language/src/language_settings.rs b/crates/language/src/language_settings.rs index ca2c33419f..56ffbbef2f 100644 --- a/crates/language/src/language_settings.rs +++ b/crates/language/src/language_settings.rs @@ -2,7 +2,7 @@ use crate::{File, Language, LanguageName, LanguageServerName}; use anyhow::Result; -use collections::{HashMap, HashSet}; +use collections::{FxHashMap, HashMap, HashSet}; use core::slice; use ec4rs::{ Properties as EditorconfigProperties, @@ -63,7 +63,7 @@ pub struct AllLanguageSettings { pub edit_predictions: EditPredictionSettings, pub defaults: LanguageSettings, languages: HashMap, - pub(crate) file_types: HashMap, GlobSet>, + pub(crate) file_types: FxHashMap, GlobSet>, } /// The settings for a particular language. @@ -1217,7 +1217,7 @@ impl settings::Settings for AllLanguageSettings { .map(|settings| settings.enabled_in_assistant) .unwrap_or(true); - let mut file_types: HashMap, GlobSet> = HashMap::default(); + let mut file_types: FxHashMap, GlobSet> = FxHashMap::default(); for (language, suffixes) in &default_value.file_types { let mut builder = GlobSetBuilder::new();