Do not overfilter HTML when removing footnotes (#2008)

If the summary contain HTML using <sup> like "the 1<sup>st</<sup>", it get
filtered as the regexp that remove footnotes is not precise
enough to remove just the footnotes.
This commit is contained in:
mscherer 2022-10-30 21:33:53 +01:00 committed by Vincent Prouillet
parent 2aa067d5e3
commit 291c93e4ba

View File

@ -31,7 +31,7 @@ static RFC3339_DATE: Lazy<Regex> = Lazy::new(|| {
).unwrap() ).unwrap()
}); });
static FOOTNOTES_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"<sup\s*.*?>\s*.*?</sup>").unwrap()); static FOOTNOTES_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r#"<sup class="footnote-reference"><a href=\s*.*?>\s*.*?</a></sup>"#).unwrap());
#[derive(Clone, Debug, Default, PartialEq)] #[derive(Clone, Debug, Default, PartialEq)]
pub struct Page { pub struct Page {
@ -513,7 +513,7 @@ Hello world
let content = r#" let content = r#"
+++ +++
+++ +++
This page has footnotes, here's one. [^1] This page use <sup>1.5</sup> and has footnotes, here's one. [^1]
<!-- more --> <!-- more -->
@ -536,7 +536,7 @@ And here's another. [^2]
.unwrap(); .unwrap();
assert_eq!( assert_eq!(
page.summary, page.summary,
Some("<p>This page has footnotes, here\'s one. </p>\n".to_string()) Some("<p>This page use <sup>1.5</sup> and has footnotes, here\'s one. </p>\n".to_string())
); );
} }