Store dates in the document head
This commit is contained in:
parent
7dc2369dd2
commit
3bfd192c28
|
@ -11,4 +11,4 @@ serde = { version = "1.0.136", features = ["derive"] }
|
|||
kuchiki = "0.8.1"
|
||||
html5ever = "0.25.1"
|
||||
regex = "1.5.5"
|
||||
lazy_static = "1.4.0"
|
||||
lazy_static = "1.4.0"
|
||||
|
|
|
@ -19,6 +19,8 @@ pub struct DenseHead {
|
|||
pub language: String,
|
||||
/// URL to icon of the page. May be empty if none were discovered.
|
||||
pub icon: String,
|
||||
/// Datetime of publication (or creation, as a fallback), in seconds since the epoch.
|
||||
pub datetime: Option<i64>,
|
||||
}
|
||||
|
||||
impl DenseHead {
|
||||
|
|
|
@ -236,7 +236,7 @@ impl PageExtractionServiceInternal {
|
|||
}
|
||||
}
|
||||
|
||||
find_page_metadata(root_node.clone())?;
|
||||
let metadata = find_page_metadata(root_node.clone())?;
|
||||
|
||||
let mut readability =
|
||||
quickpeep_moz_readability::Readability::new_from_node(root_node.clone());
|
||||
|
@ -249,6 +249,11 @@ impl PageExtractionServiceInternal {
|
|||
title = readability.metadata.title().to_owned();
|
||||
}
|
||||
|
||||
let datetime = metadata
|
||||
.publication_date
|
||||
.or(metadata.creation_date)
|
||||
.map(|dt| dt.timestamp());
|
||||
|
||||
let mut document = DenseDocument {
|
||||
head: DenseHead {
|
||||
title,
|
||||
|
@ -256,6 +261,7 @@ impl PageExtractionServiceInternal {
|
|||
icon: icon
|
||||
.map(|url| url.as_str().to_owned())
|
||||
.unwrap_or(String::with_capacity(0)),
|
||||
datetime,
|
||||
},
|
||||
body_content: Vec::with_capacity(0),
|
||||
body_remainder: Vec::with_capacity(0),
|
||||
|
|
Loading…
Reference in New Issue