Store dates in the document head

This commit is contained in:
Olivier 'reivilibre' 2022-03-28 22:24:36 +01:00
parent 7dc2369dd2
commit 3bfd192c28
3 changed files with 10 additions and 2 deletions

View File

@ -11,4 +11,4 @@ serde = { version = "1.0.136", features = ["derive"] }
kuchiki = "0.8.1"
html5ever = "0.25.1"
regex = "1.5.5"
lazy_static = "1.4.0"
lazy_static = "1.4.0"

View File

@ -19,6 +19,8 @@ pub struct DenseHead {
pub language: String,
/// URL to icon of the page. May be empty if none were discovered.
pub icon: String,
/// Datetime of publication (or creation, as a fallback), in seconds since the epoch.
pub datetime: Option<i64>,
}
impl DenseHead {

View File

@ -236,7 +236,7 @@ impl PageExtractionServiceInternal {
}
}
find_page_metadata(root_node.clone())?;
let metadata = find_page_metadata(root_node.clone())?;
let mut readability =
quickpeep_moz_readability::Readability::new_from_node(root_node.clone());
@ -249,6 +249,11 @@ impl PageExtractionServiceInternal {
title = readability.metadata.title().to_owned();
}
let datetime = metadata
.publication_date
.or(metadata.creation_date)
.map(|dt| dt.timestamp());
let mut document = DenseDocument {
head: DenseHead {
title,
@ -256,6 +261,7 @@ impl PageExtractionServiceInternal {
icon: icon
.map(|url| url.as_str().to_owned())
.unwrap_or(String::with_capacity(0)),
datetime,
},
body_content: Vec::with_capacity(0),
body_remainder: Vec::with_capacity(0),