fix ignore links to #top when checking anchors (#2519)

* fix ignore links to #top when checking anchors

* move logic to check internal links

---------

Co-authored-by: Tanishq <tanishq@levels.fyi>
This commit is contained in:
Tanishq 2024-06-12 22:08:51 +05:30 committed by Vincent Prouillet
parent 26f6677bfb
commit c5991fc814
2 changed files with 17 additions and 1 deletions

View File

@ -10,6 +10,7 @@ use crate::Site;
use errors::{bail, Result}; use errors::{bail, Result};
use libs::rayon; use libs::rayon;
use libs::url::Url; use libs::url::Url;
use utils::anchors::is_special_anchor;
/// Check whether all internal links pointing to explicit anchor fragments are valid. /// Check whether all internal links pointing to explicit anchor fragments are valid.
/// ///
@ -40,6 +41,7 @@ pub fn check_internal_links_with_anchors(site: &Site) -> Vec<String> {
(md_path, Some(anchor)) => Some((page_path, md_path, anchor)), (md_path, Some(anchor)) => Some((page_path, md_path, anchor)),
_ => None, _ => None,
}) })
.filter(|(_, _, anchor)| !is_special_anchor(anchor))
.inspect(|_| anchors_total = anchors_total.saturating_add(1)); .inspect(|_| anchors_total = anchors_total.saturating_add(1));
// Check for targets existence (including anchors), then keep only the faulty // Check for targets existence (including anchors), then keep only the faulty

View File

@ -10,9 +10,15 @@ fn anchor_id_checks(anchor: &str) -> Regex {
Regex::new(&format!(r#"\s(?i)(id|name) *= *("|')*{}("|'| |>)+"#, escape(anchor))).unwrap() Regex::new(&format!(r#"\s(?i)(id|name) *= *("|')*{}("|'| |>)+"#, escape(anchor))).unwrap()
} }
/// Checks if anchor has a special meaning in HTML
/// https://html.spec.whatwg.org/#select-the-indicated-part
pub fn is_special_anchor(anchor: &str) -> bool {
anchor.is_empty() || anchor.eq_ignore_ascii_case("top")
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::anchor_id_checks; use super::{anchor_id_checks, is_special_anchor};
fn check(anchor: &str, content: &str) -> bool { fn check(anchor: &str, content: &str) -> bool {
anchor_id_checks(anchor).is_match(content) anchor_id_checks(anchor).is_match(content)
@ -52,4 +58,12 @@ id="fred">"#));
// Non matchers // Non matchers
assert!(!m(r#"<a notid="fred">"#)); assert!(!m(r#"<a notid="fred">"#));
} }
#[test]
fn test_is_special_anchor() {
assert!(is_special_anchor(""));
assert!(is_special_anchor("top"));
assert!(is_special_anchor("Top"));
assert!(!is_special_anchor("anchor"));
}
} }