language: Optimize language_for_file (#28671)

While working on #28670 this function showed up in my profiles; this PR
makes it evaluate some of it's conditions lazily + prevent constant
rebuilding of globset::Candidates.

Release Notes:

- N/A *or* Added/Fixed/Improved ...
This commit is contained in:
Piotr Osiewicz 2025-04-14 13:18:45 +02:00 committed by GitHub
parent 5e57f148ac
commit 98891e4c70
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 40 additions and 28 deletions

View File

@ -8,7 +8,7 @@ use crate::{
with_parser, with_parser,
}; };
use anyhow::{Context as _, Result, anyhow}; use anyhow::{Context as _, Result, anyhow};
use collections::{HashMap, HashSet, hash_map}; use collections::{FxHashMap, HashMap, HashSet, hash_map};
use futures::{ use futures::{
Future, Future,
@ -21,8 +21,10 @@ use parking_lot::{Mutex, RwLock};
use postage::watch; use postage::watch;
use schemars::JsonSchema; use schemars::JsonSchema;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
use std::{ use std::{
borrow::{Borrow, Cow}, borrow::{Borrow, Cow},
cell::LazyCell,
ffi::OsStr, ffi::OsStr,
ops::Not, ops::Not,
path::{Path, PathBuf}, path::{Path, PathBuf},
@ -674,7 +676,7 @@ impl LanguageRegistry {
self: &Arc<Self>, self: &Arc<Self>,
path: &Path, path: &Path,
content: Option<&Rope>, content: Option<&Rope>,
user_file_types: Option<&HashMap<Arc<str>, GlobSet>>, user_file_types: Option<&FxHashMap<Arc<str>, GlobSet>>,
) -> Option<AvailableLanguage> { ) -> Option<AvailableLanguage> {
let filename = path.file_name().and_then(|name| name.to_str()); let filename = path.file_name().and_then(|name| name.to_str());
// `Path.extension()` returns None for files with a leading '.' // `Path.extension()` returns None for files with a leading '.'
@ -682,32 +684,42 @@ impl LanguageRegistry {
// as we want `.zshrc` to result in extension being `Some("zshrc")` // as we want `.zshrc` to result in extension being `Some("zshrc")`
let extension = filename.and_then(|filename| filename.split('.').next_back()); let extension = filename.and_then(|filename| filename.split('.').next_back());
let path_suffixes = [extension, filename, path.to_str()]; let path_suffixes = [extension, filename, path.to_str()];
let path_suffixes_candidates = path_suffixes
.iter()
.filter_map(|suffix| suffix.map(globset::Candidate::new))
.collect::<SmallVec<[_; 3]>>();
let empty = GlobSet::empty(); let empty = GlobSet::empty();
let content = LazyCell::new(|| {
content.map(|content| {
let end = content.clip_point(Point::new(0, 256), Bias::Left);
let end = content.point_to_offset(end);
content.chunks_in_range(0..end).collect::<String>()
})
});
self.find_matching_language(move |language_name, config| { self.find_matching_language(move |language_name, config| {
let path_matches_default_suffix = config let path_matches_default_suffix = || {
.path_suffixes config
.iter() .path_suffixes
.any(|suffix| path_suffixes.contains(&Some(suffix.as_str()))); .iter()
let custom_suffixes = user_file_types .any(|suffix| path_suffixes.contains(&Some(suffix.as_str())))
.and_then(|types| types.get(language_name.as_ref())) };
.unwrap_or(&empty); let path_matches_custom_suffix = || {
let path_matches_custom_suffix = path_suffixes let custom_suffixes = user_file_types
.iter() .and_then(|types| types.get(language_name.as_ref()))
.map(|suffix| suffix.unwrap_or("")) .unwrap_or(&empty);
.any(|suffix| custom_suffixes.is_match(suffix)); path_suffixes_candidates
let content_matches = content.zip(config.first_line_pattern.as_ref()).map_or( .iter()
false, .any(|suffix| custom_suffixes.is_match_candidate(suffix))
|(content, pattern)| { };
let end = content.clip_point(Point::new(0, 256), Bias::Left); let content_matches = || {
let end = content.point_to_offset(end); content
let text = content.chunks_in_range(0..end).collect::<String>(); .as_ref()
pattern.is_match(&text) .zip(config.first_line_pattern.as_ref())
}, .map_or(false, |(text, pattern)| pattern.is_match(&text))
); };
if path_matches_custom_suffix { if path_matches_custom_suffix() {
2 2
} else if path_matches_default_suffix || content_matches { } else if path_matches_default_suffix() || content_matches() {
1 1
} else { } else {
0 0

View File

@ -2,7 +2,7 @@
use crate::{File, Language, LanguageName, LanguageServerName}; use crate::{File, Language, LanguageName, LanguageServerName};
use anyhow::Result; use anyhow::Result;
use collections::{HashMap, HashSet}; use collections::{FxHashMap, HashMap, HashSet};
use core::slice; use core::slice;
use ec4rs::{ use ec4rs::{
Properties as EditorconfigProperties, Properties as EditorconfigProperties,
@ -63,7 +63,7 @@ pub struct AllLanguageSettings {
pub edit_predictions: EditPredictionSettings, pub edit_predictions: EditPredictionSettings,
pub defaults: LanguageSettings, pub defaults: LanguageSettings,
languages: HashMap<LanguageName, LanguageSettings>, languages: HashMap<LanguageName, LanguageSettings>,
pub(crate) file_types: HashMap<Arc<str>, GlobSet>, pub(crate) file_types: FxHashMap<Arc<str>, GlobSet>,
} }
/// The settings for a particular language. /// The settings for a particular language.
@ -1217,7 +1217,7 @@ impl settings::Settings for AllLanguageSettings {
.map(|settings| settings.enabled_in_assistant) .map(|settings| settings.enabled_in_assistant)
.unwrap_or(true); .unwrap_or(true);
let mut file_types: HashMap<Arc<str>, GlobSet> = HashMap::default(); let mut file_types: FxHashMap<Arc<str>, GlobSet> = FxHashMap::default();
for (language, suffixes) in &default_value.file_types { for (language, suffixes) in &default_value.file_types {
let mut builder = GlobSetBuilder::new(); let mut builder = GlobSetBuilder::new();