Enforce unic valid language codes

This commit is contained in:
Vincent Prouillet 2021-02-22 22:26:19 +01:00
parent ba8939b240
commit 975800eb5b
12 changed files with 102 additions and 69 deletions

25
Cargo.lock generated
View File

@ -305,6 +305,7 @@ dependencies = [
"serde_derive",
"syntect",
"toml",
"unic-langid",
"utils",
]
@ -2759,6 +2760,12 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "tinystr"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29738eedb4388d9ea620eeab9384884fc3f06f586a2eddb56bedc5885126c7c1"
[[package]]
name = "tinyvec"
version = "1.1.1"
@ -2898,6 +2905,24 @@ version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc"
[[package]]
name = "unic-langid"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73328fcd730a030bdb19ddf23e192187a6b01cd98be6d3140622a89129459ce5"
dependencies = [
"unic-langid-impl",
]
[[package]]
name = "unic-langid-impl"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a4a8eeaf0494862c1404c95ec2f4c33a2acff5076f64314b465e3ddae1b934d"
dependencies = [
"tinystr",
]
[[package]]
name = "unic-segment"
version = "0.9.0"

View File

@ -13,6 +13,7 @@ chrono = "0.4"
globset = "0.4"
lazy_static = "1"
syntect = "4.1"
unic-langid = "0.9"
errors = { path = "../errors" }
utils = { path = "../utils" }

View File

@ -1,16 +1,31 @@
use std::collections::HashMap;
use errors::{bail, Result};
use serde_derive::{Deserialize, Serialize};
use unic_langid::LanguageIdentifier;
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(default)]
pub struct Language {
/// The language code
pub code: String,
pub struct LanguageOptions {
/// Whether to generate a feed for that language, defaults to `false`
pub feed: bool,
pub generate_feed: bool,
/// Whether to generate search index for that language, defaults to `false`
pub search: bool,
pub build_search_index: bool,
}
impl Default for LanguageOptions {
fn default() -> Self {
LanguageOptions { generate_feed: false, build_search_index: false }
}
}
pub type TranslateTerm = HashMap<String, String>;
/// We want to ensure the language codes are valid ones
pub fn validate_code(code: &str) -> Result<()> {
if LanguageIdentifier::from_bytes(code.as_bytes()).is_err() {
bail!("Language `{}` is not a valid Unicode Language Identifier (see http://unicode.org/reports/tr35/#Unicode_language_identifier)", code)
}
Ok(())
}

View File

@ -44,7 +44,7 @@ pub struct Config {
/// The language used in the site. Defaults to "en"
pub default_language: String,
/// The list of supported languages outside of the default one
pub languages: Vec<languages::Language>,
pub languages: HashMap<String, languages::LanguageOptions>,
/// Languages list and translated strings
///
@ -129,10 +129,15 @@ impl Config {
bail!("Highlight theme {} defined in config does not exist.", highlight_theme);
}
if config.languages.iter().any(|l| l.code == config.default_language) {
if config.languages.iter().any(|(code, _)| code == &config.default_language) {
bail!("Default language `{}` should not appear both in `config.default_language` and `config.languages`", config.default_language)
}
languages::validate_code(&config.default_language)?;
for code in config.languages.keys() {
languages::validate_code(&code)?;
}
if !config.ignored_content.is_empty() {
// Convert the file glob strings into a compiled glob set matcher. We want to do this once,
// at program initialization, rather than for every page, for example. We arrange for the
@ -280,7 +285,7 @@ impl Config {
/// Returns the codes of all additional languages
pub fn languages_codes(&self) -> Vec<&str> {
self.languages.iter().map(|l| l.code.as_ref()).collect()
self.languages.iter().map(|(code, _)| code.as_ref()).collect()
}
pub fn is_in_build_mode(&self) -> bool {
@ -362,7 +367,7 @@ impl Default for Config {
highlight_code: false,
highlight_theme: "base16-ocean-dark".to_string(),
default_language: "en".to_string(),
languages: Vec::new(),
languages: HashMap::new(),
generate_feed: false,
feed_limit: None,
feed_filename: "atom.xml".to_string(),
@ -671,10 +676,11 @@ anchors = "off"
let config_str = r#"
base_url = "https://remplace-par-ton-url.fr"
default_language = "fr"
languages = [
{ code = "fr" },
{ code = "en" },
]
[languages.fr]
[languages.en]
"#;
let config = Config::parse(config_str);
let err = config.unwrap_err();

View File

@ -2,7 +2,7 @@ mod config;
pub mod highlighting;
mod theme;
pub use crate::config::{
languages::Language, link_checker::LinkChecker, slugify::Slugify, taxonomies::Taxonomy, Config,
languages::LanguageOptions, link_checker::LinkChecker, slugify::Slugify, taxonomies::Taxonomy, Config,
};
use errors::Result;

View File

@ -152,7 +152,7 @@ impl FileInfo {
mod tests {
use std::path::{Path, PathBuf};
use config::{Config, Language};
use config::{Config, LanguageOptions};
use super::{find_content_components, FileInfo};
@ -184,7 +184,7 @@ mod tests {
#[test]
fn can_find_valid_language_in_page() {
let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false });
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut file = FileInfo::new_page(
&Path::new("/home/vincent/code/site/content/posts/tutorials/python.fr.md"),
&PathBuf::new(),
@ -197,7 +197,7 @@ mod tests {
#[test]
fn can_find_valid_language_with_default_locale() {
let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false });
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut file = FileInfo::new_page(
&Path::new("/home/vincent/code/site/content/posts/tutorials/python.en.md"),
&PathBuf::new(),
@ -210,7 +210,7 @@ mod tests {
#[test]
fn can_find_valid_language_in_page_with_assets() {
let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false });
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut file = FileInfo::new_page(
&Path::new("/home/vincent/code/site/content/posts/tutorials/python/index.fr.md"),
&PathBuf::new(),
@ -236,7 +236,7 @@ mod tests {
#[test]
fn errors_on_unknown_language_in_page_with_i18n_on() {
let mut config = Config::default();
config.languages.push(Language { code: String::from("it"), feed: false, search: false });
config.languages.insert("it".to_owned(), LanguageOptions::default());
let mut file = FileInfo::new_page(
&Path::new("/home/vincent/code/site/content/posts/tutorials/python.fr.md"),
&PathBuf::new(),
@ -248,7 +248,7 @@ mod tests {
#[test]
fn can_find_valid_language_in_section() {
let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false });
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut file = FileInfo::new_section(
&Path::new("/home/vincent/code/site/content/posts/tutorials/_index.fr.md"),
&PathBuf::new(),
@ -275,7 +275,7 @@ mod tests {
#[test]
fn correct_canonical_after_find_language() {
let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false });
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut file = FileInfo::new_page(
&Path::new("/home/vincent/code/site/content/posts/tutorials/python/index.fr.md"),
&PathBuf::new(),

View File

@ -333,7 +333,7 @@ mod tests {
use tera::Tera;
use super::Page;
use config::{Config, Language};
use config::{Config, LanguageOptions};
use front_matter::InsertAnchor;
use utils::slugs::SlugifyStrategy;
@ -805,7 +805,7 @@ Hello world
#[test]
fn can_specify_language_in_filename() {
let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false });
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let content = r#"
+++
+++
@ -822,7 +822,7 @@ Bonjour le monde"#
#[test]
fn can_specify_language_in_filename_with_date() {
let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false });
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let content = r#"
+++
+++
@ -841,7 +841,7 @@ Bonjour le monde"#
#[test]
fn i18n_frontmatter_path_overrides_default_permalink() {
let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false });
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let content = r#"
+++
path = "bonjour"

View File

@ -254,7 +254,7 @@ mod tests {
use tempfile::tempdir;
use super::Section;
use config::{Config, Language};
use config::{Config, LanguageOptions};
#[test]
fn section_with_assets_gets_right_info() {
@ -312,7 +312,7 @@ mod tests {
#[test]
fn can_specify_language_in_filename() {
let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false });
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let content = r#"
+++
+++
@ -334,7 +334,7 @@ Bonjour le monde"#
#[test]
fn can_make_links_to_translated_sections_without_double_trailing_slash() {
let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false });
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let content = r#"
+++
+++
@ -351,7 +351,7 @@ Bonjour le monde"#
#[test]
fn can_make_links_to_translated_subsections_with_trailing_slash() {
let mut config = Config::default();
config.languages.push(Language { code: String::from("fr"), feed: false, search: false });
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let content = r#"
+++
+++

View File

@ -270,7 +270,7 @@ mod tests {
use crate::content::Page;
use crate::library::Library;
use config::{Config, Language, Slugify, Taxonomy as TaxonomyConfig};
use config::{Config, LanguageOptions, Slugify, Taxonomy as TaxonomyConfig};
use utils::slugs::SlugifyStrategy;
#[test]
@ -495,7 +495,7 @@ mod tests {
#[test]
fn can_make_taxonomies_in_multiple_languages() {
let mut config = Config::default();
config.languages.push(Language { feed: false, code: "fr".to_string(), search: false });
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut library = Library::new(2, 0, true);
config.taxonomies = vec![
@ -605,11 +605,7 @@ mod tests {
fn can_make_utf8_taxonomies() {
let mut config = Config::default();
config.slugify.taxonomies = SlugifyStrategy::Safe;
config.languages.push(Language {
feed: false,
code: "fr".to_string(),
..Language::default()
});
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut library = Library::new(2, 0, true);
config.taxonomies = vec![TaxonomyConfig {
@ -638,11 +634,7 @@ mod tests {
fn can_make_slugified_taxonomies_in_multiple_languages() {
let mut config = Config::default();
config.slugify.taxonomies = SlugifyStrategy::On;
config.languages.push(Language {
feed: false,
code: "fr".to_string(),
..Language::default()
});
config.languages.insert("fr".to_owned(), LanguageOptions::default());
let mut library = Library::new(2, 0, true);
config.taxonomies = vec![

View File

@ -126,11 +126,8 @@ impl Site {
/// There are one index section for the default language + 1 per language
fn index_section_paths(&self) -> Vec<(PathBuf, Option<String>)> {
let mut res = vec![(self.content_path.join("_index.md"), None)];
for language in &self.config.languages {
res.push((
self.content_path.join(format!("_index.{}.md", language.code)),
Some(language.code.clone()),
));
for code in self.config.languages.keys() {
res.push((self.content_path.join(format!("_index.{}.md", code)), Some(code.clone())));
}
res
}
@ -177,7 +174,7 @@ impl Site {
// so it's kinda necessecary
let mut dir_walker = WalkDir::new(format!("{}/{}", base_path, "content/")).into_iter();
let mut allowed_index_filenames: Vec<_> =
self.config.languages.iter().map(|l| format!("_index.{}.md", l.code)).collect();
self.config.languages.iter().map(|(code, _)| format!("_index.{}.md", code)).collect();
allowed_index_filenames.push("_index.md".to_string());
loop {
@ -228,7 +225,7 @@ impl Site {
Ok(f) => {
let path_str = f.path().file_name().unwrap().to_str().unwrap();
if f.path().is_file()
&& allowed_index_filenames.iter().find(|&s| *s == path_str).is_some()
&& allowed_index_filenames.iter().any(|s| s == path_str)
{
Some(f)
} else {
@ -660,13 +657,13 @@ impl Site {
start = log_time(start, "Generated feed in default language");
}
for lang in &self.config.languages {
if !lang.feed {
for (code, language) in &self.config.languages {
if !language.generate_feed {
continue;
}
let pages =
library.pages_values().iter().filter(|p| p.lang == lang.code).cloned().collect();
self.render_feed(pages, Some(&PathBuf::from(lang.code.clone())), &lang.code, |c| c)?;
library.pages_values().iter().filter(|p| &p.lang == code).cloned().collect();
self.render_feed(pages, Some(&PathBuf::from(code)), &code, |c| c)?;
start = log_time(start, "Generated feed in other language");
}
@ -704,17 +701,13 @@ impl Site {
),
)?;
for language in &self.config.languages {
if language.code != self.config.default_language && language.search {
for (code, language) in &self.config.languages {
if code != &self.config.default_language && language.build_search_index {
create_file(
&self.output_path.join(&format!("search_index.{}.js", &language.code)),
&self.output_path.join(&format!("search_index.{}.js", &code)),
&format!(
"window.searchIndex = {};",
search::build_index(
&language.code,
&self.library.read().unwrap(),
&self.config
)?
search::build_index(&code, &self.library.read().unwrap(), &self.config)?
),
)?;
}

View File

@ -67,7 +67,7 @@ fn make_path_with_lang(path: String, lang: &str, config: &Config) -> Result<Stri
return Ok(path);
}
if !config.languages.iter().any(|x| x.code == lang) {
if !config.languages.iter().any(|(x, _)| x == lang) {
return Err(
format!("`{}` is not an authorized language (check config.languages).", lang).into()
);
@ -728,9 +728,9 @@ mod tests {
const TRANS_CONFIG: &str = r#"
base_url = "https://remplace-par-ton-url.fr"
default_language = "fr"
languages = [
{ code = "en" },
]
[languages]
[languages.en]
[translations]
[translations.fr]

View File

@ -18,10 +18,11 @@ taxonomies = [
{name = "tags", lang = "fr"},
]
languages = [
{code = "fr", feed = true},
{code = "it", feed = false, search = true },
]
[languages.fr]
generate_feed = true
[languages.it]
build_search_index = true
[markdown]
highlight_code = false