Added XML support for load_data (#1769)

Co-authored-by: doomy <2640792-_doomy@users.noreply.gitlab.com>
2022-03-04 14:28:08 -06:00 · 2022-03-04 14:28:08 -06:00 · 88e0624f11
parent a67370b8d8
commit 88e0624f11
6 changed files with 107 additions and 5 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1412,6 +1412,7 @@ dependencies = [
 "once_cell",
 "percent-encoding",
 "pulldown-cmark",
+ "quickxml_to_serde",
 "rayon",
 "regex",
 "relative-path",
@ -1640,6 +1641,15 @@ dependencies = [
 "unicase",
 ]

+[[package]]
+name = "minidom"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe549115a674f5ec64c754d85e37d6f42664bd0ef4ffb62b619489ad99c6cb1a"
+dependencies = [
+ "quick-xml",
+]
+
 [[package]]
 name = "minify-html"
 version = "0.8.0"
@ -2387,6 +2397,27 @@ version = "1.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"

+[[package]]
+name = "quick-xml"
+version = "0.17.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fe1e430bdcf30c9fdc25053b9c459bb1a4672af4617b6c783d7d91dc17c6bbb0"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "quickxml_to_serde"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26f35112b35480fd72f63444289083eeedbd61d13907c82c4309f0ccda35e244"
+dependencies = [
+ "minidom",
+ "serde",
+ "serde_derive",
+ "serde_json",
+]
+
 [[package]]
 name = "quote"
 version = "1.0.15"
--- a/components/libs/Cargo.toml
+++ b/components/libs/Cargo.toml
@ -10,6 +10,7 @@ csv = "1"
 base64 = "0.13"
 serde_json = "1"
 serde_yaml = "0.8"
+quickxml_to_serde = "0.5"
 url = "2"
 syntect = "4"
 once_cell = "1"
--- a/components/libs/src/lib.rs
+++ b/components/libs/src/lib.rs
@ -21,6 +21,7 @@ pub use num_format;
 pub use once_cell;
 pub use percent_encoding;
 pub use pulldown_cmark;
+pub use quickxml_to_serde;
 pub use rayon;
 pub use regex;
 pub use relative_path;
--- a/components/templates/src/global_fns/load_data.rs
+++ b/components/templates/src/global_fns/load_data.rs
@ -46,6 +46,7 @@ enum OutputFormat {
    Csv,
    Bibtex,
    Plain,
+    Xml,
 }

 impl FromStr for OutputFormat {
@ -57,6 +58,7 @@ impl FromStr for OutputFormat {
            "csv" => Ok(OutputFormat::Csv),
            "json" => Ok(OutputFormat::Json),
            "bibtex" => Ok(OutputFormat::Bibtex),
+            "xml" => Ok(OutputFormat::Xml),
            "plain" => Ok(OutputFormat::Plain),
            format => Err(format!("Unknown output format {}", format).into()),
        }
@ -70,6 +72,7 @@ impl OutputFormat {
            OutputFormat::Csv => "text/csv",
            OutputFormat::Toml => "application/toml",
            OutputFormat::Bibtex => "application/x-bibtex",
+            OutputFormat::Xml => "text/xml",
            OutputFormat::Plain => "text/plain",
        })
    }
@ -368,6 +371,7 @@ impl TeraFn for LoadData {
            OutputFormat::Csv => load_csv(data),
            OutputFormat::Json => load_json(data),
            OutputFormat::Bibtex => load_bibtex(data),
+            OutputFormat::Xml => load_xml(data),
            OutputFormat::Plain => to_value(data).map_err(|e| e.into()),
        };

@ -502,6 +506,42 @@ fn load_csv(csv_data: String) -> Result<Value> {
    to_value(csv_value).map_err(|err| err.into())
 }

+/// Parse an XML string and convert it to a Tera Value
+///
+/// An example XML file `example.xml` could be:
+/// ```xml
+/// <root>
+///   <headers>Number</headers>
+///   <headers>Title</headers>
+///   <records>
+///     <item>1</item>
+///     <item>Gutenberg</item>
+///   </records>
+///   <records>
+///     <item>2</item>
+///     <item>Printing</item>
+///   </records>
+/// </root>
+/// ```
+/// The json value output would be:
+/// ```json
+/// {   
+///     "root": {
+///         "headers": ["Number", "Title"],
+///         "records": [
+///                         ["1", "Gutenberg"],
+///                         ["2", "Printing"]
+///                    ]
+///     }
+/// }
+/// ```
+fn load_xml(xml_data: String) -> Result<Value> {
+    let xml_content: Value =
+        libs::quickxml_to_serde::xml_string_to_json(xml_data, &Default::default())
+            .map_err(|e| format!("{:?}", e))?;
+    Ok(xml_content)
+}
+
 #[cfg(test)]
 mod tests {
    use super::{DataSource, LoadData, OutputFormat};
@ -1007,6 +1047,27 @@ mod tests {
        )
    }

+    #[test]
+    fn can_load_xml() {
+        let static_fn = LoadData::new(PathBuf::from("../utils/test-files"), None, PathBuf::new());
+        let mut args = HashMap::new();
+        args.insert("path".to_string(), to_value("test.xml").unwrap());
+        let result = static_fn.call(&args.clone()).unwrap();
+
+        assert_eq!(
+            result,
+            json!({
+                "root": {
+                    "key": "value",
+                    "array": [1, 2, 3],
+                    "subpackage": {
+                        "subkey": 5
+                    }
+                }
+            })
+        )
+    }
+
    #[test]
    fn is_load_remote_data_using_post_method_with_different_body_not_cached() {
        let _mjson = mock("POST", "/kr1zdgbm4y3")
--- a/components/utils/test-files/test.xml
+++ b/components/utils/test-files/test.xml
@ -0,0 +1,9 @@
+<root>
+  <key>value</key>
+  <array>1</array>
+  <array>2</array>
+  <array>3</array>
+  <subpackage>
+    <subkey>5</subkey>
+  </subpackage>
+</root>
--- a/docs/content/documentation/templates/overview.md
+++ b/docs/content/documentation/templates/overview.md
@ -258,7 +258,7 @@ The method returns a map containing `width`, `height` and `format` (the lowercas
 ```

 ### `load_data`
-Loads data from a file or URL. Supported file types include *toml*, *json*, *csv* and *bibtex* and only supports UTF-8 encoding.
+Loads data from a file or URL. Supported file types include *toml*, *json*, *csv*, *bibtex* and *xml* and only supports UTF-8 encoding.
 Any other file type will be loaded as plain text.

 The `path` argument specifies the path to a local data file, according to the [File Searching Logic](@/documentation/templates/overview.md#file-searching-logic).
@ -283,17 +283,16 @@ The snippet below outputs the HTML from a Wikipedia page, or "No data found" if
 ```

 The optional `format` argument allows you to specify and override which data type is contained
-within the specified file or URL. Valid entries are `toml`, `json`, `csv`, `bibtex`
-or `plain`. If the `format` argument isn't specified, then the path extension is used.
+within the specified file or URL. Valid entries are `toml`, `json`, `csv`, `bibtex`, `xml` or `plain`. If the `format` argument isn't specified, then the path extension is used.


 ```jinja2
 {% set data = load_data(path="content/blog/story/data.txt", format="json") %}
 ```

-Use the `plain` format for when your file has a toml/json/csv extension but you want to load it as plain text.
+Use the `plain` format for when your file has a supported extension but you want to load it as plain text.

-For *toml* and *json*, the data is loaded into a structure matching the original data file;
+For *toml*, *json* and *xml*, the data is loaded into a structure matching the original data file;
 however, for *csv* there is no native notion of such a structure. Instead, the data is separated
 into a data structure containing *headers* and *records*. See the example below to see
 how this works.