From 975800eb5b8c0bca0ee4077a9c5091021cb40847 Mon Sep 17 00:00:00 2001 From: Vincent Prouillet Date: Mon, 22 Feb 2021 22:26:19 +0100 Subject: [PATCH] Enforce unic valid language codes --- Cargo.lock | 25 +++++++++++++++++ components/config/Cargo.toml | 1 + components/config/src/config/languages.rs | 27 ++++++++++++++---- components/config/src/config/mod.rs | 22 +++++++++------ components/config/src/lib.rs | 2 +- components/library/src/content/file_info.rs | 14 +++++----- components/library/src/content/page.rs | 8 +++--- components/library/src/content/section.rs | 8 +++--- components/library/src/taxonomies/mod.rs | 16 +++-------- components/site/src/lib.rs | 31 ++++++++------------- components/templates/src/global_fns/mod.rs | 8 +++--- test_site_i18n/config.toml | 9 +++--- 12 files changed, 102 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5fca5c19..67fa6d4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -305,6 +305,7 @@ dependencies = [ "serde_derive", "syntect", "toml", + "unic-langid", "utils", ] @@ -2759,6 +2760,12 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "tinystr" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29738eedb4388d9ea620eeab9384884fc3f06f586a2eddb56bedc5885126c7c1" + [[package]] name = "tinyvec" version = "1.1.1" @@ -2898,6 +2905,24 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" +[[package]] +name = "unic-langid" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73328fcd730a030bdb19ddf23e192187a6b01cd98be6d3140622a89129459ce5" +dependencies = [ + "unic-langid-impl", +] + +[[package]] +name = "unic-langid-impl" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a4a8eeaf0494862c1404c95ec2f4c33a2acff5076f64314b465e3ddae1b934d" +dependencies = [ + "tinystr", +] + [[package]] name = "unic-segment" version = "0.9.0" diff --git a/components/config/Cargo.toml b/components/config/Cargo.toml index 43bbcddb..97589409 100644 --- a/components/config/Cargo.toml +++ b/components/config/Cargo.toml @@ -13,6 +13,7 @@ chrono = "0.4" globset = "0.4" lazy_static = "1" syntect = "4.1" +unic-langid = "0.9" errors = { path = "../errors" } utils = { path = "../utils" } diff --git a/components/config/src/config/languages.rs b/components/config/src/config/languages.rs index dee61ffb..20aaa00e 100644 --- a/components/config/src/config/languages.rs +++ b/components/config/src/config/languages.rs @@ -1,16 +1,31 @@ use std::collections::HashMap; +use errors::{bail, Result}; use serde_derive::{Deserialize, Serialize}; +use unic_langid::LanguageIdentifier; -#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(default)] -pub struct Language { - /// The language code - pub code: String, +pub struct LanguageOptions { /// Whether to generate a feed for that language, defaults to `false` - pub feed: bool, + pub generate_feed: bool, /// Whether to generate search index for that language, defaults to `false` - pub search: bool, + pub build_search_index: bool, +} + +impl Default for LanguageOptions { + fn default() -> Self { + LanguageOptions { generate_feed: false, build_search_index: false } + } } pub type TranslateTerm = HashMap; + +/// We want to ensure the language codes are valid ones +pub fn validate_code(code: &str) -> Result<()> { + if LanguageIdentifier::from_bytes(code.as_bytes()).is_err() { + bail!("Language `{}` is not a valid Unicode Language Identifier (see http://unicode.org/reports/tr35/#Unicode_language_identifier)", code) + } + + Ok(()) +} diff --git a/components/config/src/config/mod.rs b/components/config/src/config/mod.rs index 57682b3b..70edef14 100644 --- a/components/config/src/config/mod.rs +++ b/components/config/src/config/mod.rs @@ -44,7 +44,7 @@ pub struct Config { /// The language used in the site. Defaults to "en" pub default_language: String, /// The list of supported languages outside of the default one - pub languages: Vec, + pub languages: HashMap, /// Languages list and translated strings /// @@ -129,10 +129,15 @@ impl Config { bail!("Highlight theme {} defined in config does not exist.", highlight_theme); } - if config.languages.iter().any(|l| l.code == config.default_language) { + if config.languages.iter().any(|(code, _)| code == &config.default_language) { bail!("Default language `{}` should not appear both in `config.default_language` and `config.languages`", config.default_language) } + languages::validate_code(&config.default_language)?; + for code in config.languages.keys() { + languages::validate_code(&code)?; + } + if !config.ignored_content.is_empty() { // Convert the file glob strings into a compiled glob set matcher. We want to do this once, // at program initialization, rather than for every page, for example. We arrange for the @@ -280,7 +285,7 @@ impl Config { /// Returns the codes of all additional languages pub fn languages_codes(&self) -> Vec<&str> { - self.languages.iter().map(|l| l.code.as_ref()).collect() + self.languages.iter().map(|(code, _)| code.as_ref()).collect() } pub fn is_in_build_mode(&self) -> bool { @@ -362,7 +367,7 @@ impl Default for Config { highlight_code: false, highlight_theme: "base16-ocean-dark".to_string(), default_language: "en".to_string(), - languages: Vec::new(), + languages: HashMap::new(), generate_feed: false, feed_limit: None, feed_filename: "atom.xml".to_string(), @@ -671,10 +676,11 @@ anchors = "off" let config_str = r#" base_url = "https://remplace-par-ton-url.fr" default_language = "fr" -languages = [ - { code = "fr" }, - { code = "en" }, -] + +[languages.fr] + +[languages.en] + "#; let config = Config::parse(config_str); let err = config.unwrap_err(); diff --git a/components/config/src/lib.rs b/components/config/src/lib.rs index f431f232..0f5829ac 100644 --- a/components/config/src/lib.rs +++ b/components/config/src/lib.rs @@ -2,7 +2,7 @@ mod config; pub mod highlighting; mod theme; pub use crate::config::{ - languages::Language, link_checker::LinkChecker, slugify::Slugify, taxonomies::Taxonomy, Config, + languages::LanguageOptions, link_checker::LinkChecker, slugify::Slugify, taxonomies::Taxonomy, Config, }; use errors::Result; diff --git a/components/library/src/content/file_info.rs b/components/library/src/content/file_info.rs index 167d6048..d137ecb9 100644 --- a/components/library/src/content/file_info.rs +++ b/components/library/src/content/file_info.rs @@ -152,7 +152,7 @@ impl FileInfo { mod tests { use std::path::{Path, PathBuf}; - use config::{Config, Language}; + use config::{Config, LanguageOptions}; use super::{find_content_components, FileInfo}; @@ -184,7 +184,7 @@ mod tests { #[test] fn can_find_valid_language_in_page() { let mut config = Config::default(); - config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let mut file = FileInfo::new_page( &Path::new("/home/vincent/code/site/content/posts/tutorials/python.fr.md"), &PathBuf::new(), @@ -197,7 +197,7 @@ mod tests { #[test] fn can_find_valid_language_with_default_locale() { let mut config = Config::default(); - config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let mut file = FileInfo::new_page( &Path::new("/home/vincent/code/site/content/posts/tutorials/python.en.md"), &PathBuf::new(), @@ -210,7 +210,7 @@ mod tests { #[test] fn can_find_valid_language_in_page_with_assets() { let mut config = Config::default(); - config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let mut file = FileInfo::new_page( &Path::new("/home/vincent/code/site/content/posts/tutorials/python/index.fr.md"), &PathBuf::new(), @@ -236,7 +236,7 @@ mod tests { #[test] fn errors_on_unknown_language_in_page_with_i18n_on() { let mut config = Config::default(); - config.languages.push(Language { code: String::from("it"), feed: false, search: false }); + config.languages.insert("it".to_owned(), LanguageOptions::default()); let mut file = FileInfo::new_page( &Path::new("/home/vincent/code/site/content/posts/tutorials/python.fr.md"), &PathBuf::new(), @@ -248,7 +248,7 @@ mod tests { #[test] fn can_find_valid_language_in_section() { let mut config = Config::default(); - config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let mut file = FileInfo::new_section( &Path::new("/home/vincent/code/site/content/posts/tutorials/_index.fr.md"), &PathBuf::new(), @@ -275,7 +275,7 @@ mod tests { #[test] fn correct_canonical_after_find_language() { let mut config = Config::default(); - config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let mut file = FileInfo::new_page( &Path::new("/home/vincent/code/site/content/posts/tutorials/python/index.fr.md"), &PathBuf::new(), diff --git a/components/library/src/content/page.rs b/components/library/src/content/page.rs index 7774dfcd..b0e8df85 100644 --- a/components/library/src/content/page.rs +++ b/components/library/src/content/page.rs @@ -333,7 +333,7 @@ mod tests { use tera::Tera; use super::Page; - use config::{Config, Language}; + use config::{Config, LanguageOptions}; use front_matter::InsertAnchor; use utils::slugs::SlugifyStrategy; @@ -805,7 +805,7 @@ Hello world #[test] fn can_specify_language_in_filename() { let mut config = Config::default(); - config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let content = r#" +++ +++ @@ -822,7 +822,7 @@ Bonjour le monde"# #[test] fn can_specify_language_in_filename_with_date() { let mut config = Config::default(); - config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let content = r#" +++ +++ @@ -841,7 +841,7 @@ Bonjour le monde"# #[test] fn i18n_frontmatter_path_overrides_default_permalink() { let mut config = Config::default(); - config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let content = r#" +++ path = "bonjour" diff --git a/components/library/src/content/section.rs b/components/library/src/content/section.rs index 299af1a5..00c9024f 100644 --- a/components/library/src/content/section.rs +++ b/components/library/src/content/section.rs @@ -254,7 +254,7 @@ mod tests { use tempfile::tempdir; use super::Section; - use config::{Config, Language}; + use config::{Config, LanguageOptions}; #[test] fn section_with_assets_gets_right_info() { @@ -312,7 +312,7 @@ mod tests { #[test] fn can_specify_language_in_filename() { let mut config = Config::default(); - config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let content = r#" +++ +++ @@ -334,7 +334,7 @@ Bonjour le monde"# #[test] fn can_make_links_to_translated_sections_without_double_trailing_slash() { let mut config = Config::default(); - config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let content = r#" +++ +++ @@ -351,7 +351,7 @@ Bonjour le monde"# #[test] fn can_make_links_to_translated_subsections_with_trailing_slash() { let mut config = Config::default(); - config.languages.push(Language { code: String::from("fr"), feed: false, search: false }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let content = r#" +++ +++ diff --git a/components/library/src/taxonomies/mod.rs b/components/library/src/taxonomies/mod.rs index f4660d1b..d3291678 100644 --- a/components/library/src/taxonomies/mod.rs +++ b/components/library/src/taxonomies/mod.rs @@ -270,7 +270,7 @@ mod tests { use crate::content::Page; use crate::library::Library; - use config::{Config, Language, Slugify, Taxonomy as TaxonomyConfig}; + use config::{Config, LanguageOptions, Slugify, Taxonomy as TaxonomyConfig}; use utils::slugs::SlugifyStrategy; #[test] @@ -495,7 +495,7 @@ mod tests { #[test] fn can_make_taxonomies_in_multiple_languages() { let mut config = Config::default(); - config.languages.push(Language { feed: false, code: "fr".to_string(), search: false }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let mut library = Library::new(2, 0, true); config.taxonomies = vec![ @@ -605,11 +605,7 @@ mod tests { fn can_make_utf8_taxonomies() { let mut config = Config::default(); config.slugify.taxonomies = SlugifyStrategy::Safe; - config.languages.push(Language { - feed: false, - code: "fr".to_string(), - ..Language::default() - }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let mut library = Library::new(2, 0, true); config.taxonomies = vec![TaxonomyConfig { @@ -638,11 +634,7 @@ mod tests { fn can_make_slugified_taxonomies_in_multiple_languages() { let mut config = Config::default(); config.slugify.taxonomies = SlugifyStrategy::On; - config.languages.push(Language { - feed: false, - code: "fr".to_string(), - ..Language::default() - }); + config.languages.insert("fr".to_owned(), LanguageOptions::default()); let mut library = Library::new(2, 0, true); config.taxonomies = vec![ diff --git a/components/site/src/lib.rs b/components/site/src/lib.rs index 97704778..bf725abf 100644 --- a/components/site/src/lib.rs +++ b/components/site/src/lib.rs @@ -126,11 +126,8 @@ impl Site { /// There are one index section for the default language + 1 per language fn index_section_paths(&self) -> Vec<(PathBuf, Option)> { let mut res = vec![(self.content_path.join("_index.md"), None)]; - for language in &self.config.languages { - res.push(( - self.content_path.join(format!("_index.{}.md", language.code)), - Some(language.code.clone()), - )); + for code in self.config.languages.keys() { + res.push((self.content_path.join(format!("_index.{}.md", code)), Some(code.clone()))); } res } @@ -177,7 +174,7 @@ impl Site { // so it's kinda necessecary let mut dir_walker = WalkDir::new(format!("{}/{}", base_path, "content/")).into_iter(); let mut allowed_index_filenames: Vec<_> = - self.config.languages.iter().map(|l| format!("_index.{}.md", l.code)).collect(); + self.config.languages.iter().map(|(code, _)| format!("_index.{}.md", code)).collect(); allowed_index_filenames.push("_index.md".to_string()); loop { @@ -228,7 +225,7 @@ impl Site { Ok(f) => { let path_str = f.path().file_name().unwrap().to_str().unwrap(); if f.path().is_file() - && allowed_index_filenames.iter().find(|&s| *s == path_str).is_some() + && allowed_index_filenames.iter().any(|s| s == path_str) { Some(f) } else { @@ -660,13 +657,13 @@ impl Site { start = log_time(start, "Generated feed in default language"); } - for lang in &self.config.languages { - if !lang.feed { + for (code, language) in &self.config.languages { + if !language.generate_feed { continue; } let pages = - library.pages_values().iter().filter(|p| p.lang == lang.code).cloned().collect(); - self.render_feed(pages, Some(&PathBuf::from(lang.code.clone())), &lang.code, |c| c)?; + library.pages_values().iter().filter(|p| &p.lang == code).cloned().collect(); + self.render_feed(pages, Some(&PathBuf::from(code)), &code, |c| c)?; start = log_time(start, "Generated feed in other language"); } @@ -704,17 +701,13 @@ impl Site { ), )?; - for language in &self.config.languages { - if language.code != self.config.default_language && language.search { + for (code, language) in &self.config.languages { + if code != &self.config.default_language && language.build_search_index { create_file( - &self.output_path.join(&format!("search_index.{}.js", &language.code)), + &self.output_path.join(&format!("search_index.{}.js", &code)), &format!( "window.searchIndex = {};", - search::build_index( - &language.code, - &self.library.read().unwrap(), - &self.config - )? + search::build_index(&code, &self.library.read().unwrap(), &self.config)? ), )?; } diff --git a/components/templates/src/global_fns/mod.rs b/components/templates/src/global_fns/mod.rs index 7038c157..fa71cf75 100644 --- a/components/templates/src/global_fns/mod.rs +++ b/components/templates/src/global_fns/mod.rs @@ -67,7 +67,7 @@ fn make_path_with_lang(path: String, lang: &str, config: &Config) -> Result