From 88e0624f11fb7665107d12e2fb353e9132c57d83 Mon Sep 17 00:00:00 2001 From: doomy Date: Fri, 4 Mar 2022 14:28:08 -0600 Subject: [PATCH] Added XML support for load_data (#1769) Co-authored-by: doomy <2640792-_doomy@users.noreply.gitlab.com> --- Cargo.lock | 31 ++++++++++ components/libs/Cargo.toml | 1 + components/libs/src/lib.rs | 1 + .../templates/src/global_fns/load_data.rs | 61 +++++++++++++++++++ components/utils/test-files/test.xml | 9 +++ .../documentation/templates/overview.md | 9 ++- 6 files changed, 107 insertions(+), 5 deletions(-) create mode 100644 components/utils/test-files/test.xml diff --git a/Cargo.lock b/Cargo.lock index 1a4e9821..e593439f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1412,6 +1412,7 @@ dependencies = [ "once_cell", "percent-encoding", "pulldown-cmark", + "quickxml_to_serde", "rayon", "regex", "relative-path", @@ -1640,6 +1641,15 @@ dependencies = [ "unicase", ] +[[package]] +name = "minidom" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe549115a674f5ec64c754d85e37d6f42664bd0ef4ffb62b619489ad99c6cb1a" +dependencies = [ + "quick-xml", +] + [[package]] name = "minify-html" version = "0.8.0" @@ -2387,6 +2397,27 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +[[package]] +name = "quick-xml" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe1e430bdcf30c9fdc25053b9c459bb1a4672af4617b6c783d7d91dc17c6bbb0" +dependencies = [ + "memchr", +] + +[[package]] +name = "quickxml_to_serde" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26f35112b35480fd72f63444289083eeedbd61d13907c82c4309f0ccda35e244" +dependencies = [ + "minidom", + "serde", + "serde_derive", + "serde_json", +] + [[package]] name = "quote" version = "1.0.15" diff --git a/components/libs/Cargo.toml b/components/libs/Cargo.toml index 75a4290b..7ce339a9 100644 --- a/components/libs/Cargo.toml +++ b/components/libs/Cargo.toml @@ -10,6 +10,7 @@ csv = "1" base64 = "0.13" serde_json = "1" serde_yaml = "0.8" +quickxml_to_serde = "0.5" url = "2" syntect = "4" once_cell = "1" diff --git a/components/libs/src/lib.rs b/components/libs/src/lib.rs index 4d12f2d7..ed61426b 100644 --- a/components/libs/src/lib.rs +++ b/components/libs/src/lib.rs @@ -21,6 +21,7 @@ pub use num_format; pub use once_cell; pub use percent_encoding; pub use pulldown_cmark; +pub use quickxml_to_serde; pub use rayon; pub use regex; pub use relative_path; diff --git a/components/templates/src/global_fns/load_data.rs b/components/templates/src/global_fns/load_data.rs index 1b31a2ab..da71ede2 100644 --- a/components/templates/src/global_fns/load_data.rs +++ b/components/templates/src/global_fns/load_data.rs @@ -46,6 +46,7 @@ enum OutputFormat { Csv, Bibtex, Plain, + Xml, } impl FromStr for OutputFormat { @@ -57,6 +58,7 @@ impl FromStr for OutputFormat { "csv" => Ok(OutputFormat::Csv), "json" => Ok(OutputFormat::Json), "bibtex" => Ok(OutputFormat::Bibtex), + "xml" => Ok(OutputFormat::Xml), "plain" => Ok(OutputFormat::Plain), format => Err(format!("Unknown output format {}", format).into()), } @@ -70,6 +72,7 @@ impl OutputFormat { OutputFormat::Csv => "text/csv", OutputFormat::Toml => "application/toml", OutputFormat::Bibtex => "application/x-bibtex", + OutputFormat::Xml => "text/xml", OutputFormat::Plain => "text/plain", }) } @@ -368,6 +371,7 @@ impl TeraFn for LoadData { OutputFormat::Csv => load_csv(data), OutputFormat::Json => load_json(data), OutputFormat::Bibtex => load_bibtex(data), + OutputFormat::Xml => load_xml(data), OutputFormat::Plain => to_value(data).map_err(|e| e.into()), }; @@ -502,6 +506,42 @@ fn load_csv(csv_data: String) -> Result { to_value(csv_value).map_err(|err| err.into()) } +/// Parse an XML string and convert it to a Tera Value +/// +/// An example XML file `example.xml` could be: +/// ```xml +/// +/// Number +/// Title +/// +/// 1 +/// Gutenberg +/// +/// +/// 2 +/// Printing +/// +/// +/// ``` +/// The json value output would be: +/// ```json +/// { +/// "root": { +/// "headers": ["Number", "Title"], +/// "records": [ +/// ["1", "Gutenberg"], +/// ["2", "Printing"] +/// ] +/// } +/// } +/// ``` +fn load_xml(xml_data: String) -> Result { + let xml_content: Value = + libs::quickxml_to_serde::xml_string_to_json(xml_data, &Default::default()) + .map_err(|e| format!("{:?}", e))?; + Ok(xml_content) +} + #[cfg(test)] mod tests { use super::{DataSource, LoadData, OutputFormat}; @@ -1007,6 +1047,27 @@ mod tests { ) } + #[test] + fn can_load_xml() { + let static_fn = LoadData::new(PathBuf::from("../utils/test-files"), None, PathBuf::new()); + let mut args = HashMap::new(); + args.insert("path".to_string(), to_value("test.xml").unwrap()); + let result = static_fn.call(&args.clone()).unwrap(); + + assert_eq!( + result, + json!({ + "root": { + "key": "value", + "array": [1, 2, 3], + "subpackage": { + "subkey": 5 + } + } + }) + ) + } + #[test] fn is_load_remote_data_using_post_method_with_different_body_not_cached() { let _mjson = mock("POST", "/kr1zdgbm4y3") diff --git a/components/utils/test-files/test.xml b/components/utils/test-files/test.xml new file mode 100644 index 00000000..d492f602 --- /dev/null +++ b/components/utils/test-files/test.xml @@ -0,0 +1,9 @@ + + value + 1 + 2 + 3 + + 5 + + diff --git a/docs/content/documentation/templates/overview.md b/docs/content/documentation/templates/overview.md index b33f3ebb..a689d00e 100644 --- a/docs/content/documentation/templates/overview.md +++ b/docs/content/documentation/templates/overview.md @@ -258,7 +258,7 @@ The method returns a map containing `width`, `height` and `format` (the lowercas ``` ### `load_data` -Loads data from a file or URL. Supported file types include *toml*, *json*, *csv* and *bibtex* and only supports UTF-8 encoding. +Loads data from a file or URL. Supported file types include *toml*, *json*, *csv*, *bibtex* and *xml* and only supports UTF-8 encoding. Any other file type will be loaded as plain text. The `path` argument specifies the path to a local data file, according to the [File Searching Logic](@/documentation/templates/overview.md#file-searching-logic). @@ -283,17 +283,16 @@ The snippet below outputs the HTML from a Wikipedia page, or "No data found" if ``` The optional `format` argument allows you to specify and override which data type is contained -within the specified file or URL. Valid entries are `toml`, `json`, `csv`, `bibtex` -or `plain`. If the `format` argument isn't specified, then the path extension is used. +within the specified file or URL. Valid entries are `toml`, `json`, `csv`, `bibtex`, `xml` or `plain`. If the `format` argument isn't specified, then the path extension is used. ```jinja2 {% set data = load_data(path="content/blog/story/data.txt", format="json") %} ``` -Use the `plain` format for when your file has a toml/json/csv extension but you want to load it as plain text. +Use the `plain` format for when your file has a supported extension but you want to load it as plain text. -For *toml* and *json*, the data is loaded into a structure matching the original data file; +For *toml*, *json* and *xml*, the data is loaded into a structure matching the original data file; however, for *csv* there is no native notion of such a structure. Instead, the data is separated into a data structure containing *headers* and *records*. See the example below to see how this works.