From b1ceb3e80e3dac3de58f38c7a2d968e08445031d Mon Sep 17 00:00:00 2001
From: Michael Macias
Date: Thu, 10 Oct 2019 13:23:16 -0500
Subject: [PATCH] rendering: Avoid prepending URL prefix to links that start
with a scheme (#817)
Links that start with a scheme (e.g., `tel:18008675309`) inadvertently
had a URL prefix prepended. Previously, only `mailto:` was handled, but
given the sheer number of [registered URI schemes][uri-schemes], a loose
pattern matcher is used to detect schemes instead.
External links, as identified by the renderer, are now limited to `http`
and `https` schemes.
Fixes #747 and fixes #816.
[uri-schemes]: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
---
components/rendering/src/markdown.rs | 63 +++++++++++++++++++++++++-
components/rendering/tests/markdown.rs | 30 ++++++++++--
2 files changed, 87 insertions(+), 6 deletions(-)
diff --git a/components/rendering/src/markdown.rs b/components/rendering/src/markdown.rs
index 437f558e..673eca6d 100644
--- a/components/rendering/src/markdown.rs
+++ b/components/rendering/src/markdown.rs
@@ -1,4 +1,5 @@
use pulldown_cmark as cmark;
+use regex::Regex;
use slug::slugify;
use syntect::easy::HighlightLines;
use syntect::html::{
@@ -60,11 +61,31 @@ fn find_anchor(anchors: &[String], name: String, level: u8) -> String {
find_anchor(anchors, name, level + 1)
}
+// Returns whether the given string starts with a schema.
+//
+// Although there exists [a list of registered URI schemes][uri-schemes], a link may use arbitrary,
+// private schemes. This function checks if the given string starts with something that just looks
+// like a scheme, i.e., a case-insensitive identifier followed by a colon.
+//
+// [uri-schemes]: https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
+fn starts_with_schema(s: &str) -> bool {
+ lazy_static! {
+ static ref PATTERN: Regex = Regex::new(r"^[0-9A-Za-z\-]+:").unwrap();
+ }
+
+ PATTERN.is_match(s)
+}
+
// Colocated asset links refers to the files in the same directory,
// there it should be a filename only
fn is_colocated_asset_link(link: &str) -> bool {
!link.contains('/') // http://, ftp://, ../ etc
- && !link.starts_with("mailto:")
+ && !starts_with_schema(link)
+}
+
+// Returns whether a link starts with an HTTP(s) scheme.
+fn is_external_link(link: &str) -> bool {
+ link.starts_with("http:") || link.starts_with("https:")
}
fn fix_link(
@@ -103,7 +124,7 @@ fn fix_link(
} else if is_colocated_asset_link(&link) {
format!("{}{}", context.current_page_permalink, link)
} else {
- if !link.starts_with('#') && !link.starts_with("mailto:") {
+ if is_external_link(link) {
external_links.push(link.to_owned());
}
link.to_string()
@@ -328,3 +349,41 @@ pub fn markdown_to_html(content: &str, context: &RenderContext) -> Resultfoo@bar.tld
\n");
+ let permalinks_ctx = HashMap::new();
+
+ let context = RenderContext::new(
+ &tera_ctx,
+ &config,
+ "https://vincent.is/",
+ &permalinks_ctx,
+ InsertAnchor::None,
+ );
+
+ let res = render_content(content, &context).unwrap();
+
+ let expected = r#"foo@bar.tld
+(123) 456-7890
+blank page
+"#;
+
+ assert_eq!(res.body, expected);
}