Store favicon BLAKE3 hashes in the doc store
This commit is contained in:
parent
a271f83805
commit
8ec8003dbb
|
@ -116,12 +116,24 @@ dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "arrayref"
|
||||||
|
version = "0.3.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arrayvec"
|
name = "arrayvec"
|
||||||
version = "0.5.2"
|
version = "0.5.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
|
checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "arrayvec"
|
||||||
|
version = "0.7.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "askama"
|
name = "askama"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
|
@ -356,6 +368,20 @@ dependencies = [
|
||||||
"crunchy",
|
"crunchy",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "blake3"
|
||||||
|
version = "1.3.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a08e53fc5a564bb15bfe6fae56bd71522205f1f91893f9c0116edad6496c183f"
|
||||||
|
dependencies = [
|
||||||
|
"arrayref",
|
||||||
|
"arrayvec 0.7.2",
|
||||||
|
"cc",
|
||||||
|
"cfg-if",
|
||||||
|
"constant_time_eq",
|
||||||
|
"digest 0.10.3",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "block-buffer"
|
name = "block-buffer"
|
||||||
version = "0.7.3"
|
version = "0.7.3"
|
||||||
|
@ -377,6 +403,15 @@ dependencies = [
|
||||||
"generic-array 0.14.5",
|
"generic-array 0.14.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "block-buffer"
|
||||||
|
version = "0.10.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324"
|
||||||
|
dependencies = [
|
||||||
|
"generic-array 0.14.5",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "block-padding"
|
name = "block-padding"
|
||||||
version = "0.1.5"
|
version = "0.1.5"
|
||||||
|
@ -606,6 +641,12 @@ dependencies = [
|
||||||
"tiny-keccak",
|
"tiny-keccak",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "constant_time_eq"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "convert_case"
|
name = "convert_case"
|
||||||
version = "0.4.0"
|
version = "0.4.0"
|
||||||
|
@ -761,6 +802,16 @@ version = "0.2.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
|
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crypto-common"
|
||||||
|
version = "0.1.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8"
|
||||||
|
dependencies = [
|
||||||
|
"generic-array 0.14.5",
|
||||||
|
"typenum",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cssparser"
|
name = "cssparser"
|
||||||
version = "0.27.2"
|
version = "0.27.2"
|
||||||
|
@ -895,6 +946,17 @@ dependencies = [
|
||||||
"generic-array 0.14.5",
|
"generic-array 0.14.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "digest"
|
||||||
|
version = "0.10.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506"
|
||||||
|
dependencies = [
|
||||||
|
"block-buffer 0.10.2",
|
||||||
|
"crypto-common",
|
||||||
|
"subtle",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "diplomatic-bag"
|
name = "diplomatic-bag"
|
||||||
version = "0.2.0"
|
version = "0.2.0"
|
||||||
|
@ -1765,7 +1827,7 @@ version = "0.7.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe"
|
checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrayvec",
|
"arrayvec 0.5.2",
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"ryu",
|
"ryu",
|
||||||
|
@ -3637,6 +3699,7 @@ name = "quickpeep_index"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"blake3",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"fancy_mdbx",
|
"fancy_mdbx",
|
||||||
"log",
|
"log",
|
||||||
|
@ -3654,6 +3717,7 @@ name = "quickpeep_indexer"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
|
"blake3",
|
||||||
"clap",
|
"clap",
|
||||||
"colour",
|
"colour",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
|
@ -4544,6 +4608,12 @@ dependencies = [
|
||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "subtle"
|
||||||
|
version = "2.4.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "1.0.89"
|
version = "1.0.89"
|
||||||
|
|
|
@ -21,6 +21,16 @@ pub struct DenseHead {
|
||||||
pub icon: String,
|
pub icon: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl DenseHead {
|
||||||
|
pub fn effective_favicon_url(&self) -> &str {
|
||||||
|
if self.icon.is_empty() {
|
||||||
|
"/favicon.ico"
|
||||||
|
} else {
|
||||||
|
self.icon.as_str()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Clone, Debug)]
|
#[derive(Serialize, Deserialize, Clone, Debug)]
|
||||||
pub enum DenseTree {
|
pub enum DenseTree {
|
||||||
Heading1(Vec<DenseTree>),
|
Heading1(Vec<DenseTree>),
|
||||||
|
|
|
@ -16,6 +16,8 @@ serde = { version = "1.0.136", features = ["derive"] }
|
||||||
serde_bare = "0.5.0"
|
serde_bare = "0.5.0"
|
||||||
toml = "0.5.8"
|
toml = "0.5.8"
|
||||||
|
|
||||||
|
blake3 = "1.3.1"
|
||||||
|
|
||||||
fancy_mdbx = { path = "../../../libraries/fancy_mdbx" }
|
fancy_mdbx = { path = "../../../libraries/fancy_mdbx" }
|
||||||
|
|
||||||
quickpeep_structs = { path = "../quickpeep_structs" }
|
quickpeep_structs = { path = "../quickpeep_structs" }
|
||||||
|
|
|
@ -19,6 +19,7 @@ pub struct BackendIndependentDocument {
|
||||||
pub nonarticle_body: String,
|
pub nonarticle_body: String,
|
||||||
pub tags: Vec<String>,
|
pub tags: Vec<String>,
|
||||||
pub url: String,
|
pub url: String,
|
||||||
|
pub favicon_url_hash: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A backend-independent document struct.
|
/// A backend-independent document struct.
|
||||||
|
@ -29,4 +30,5 @@ pub struct SearchDocument {
|
||||||
pub excerpt: String,
|
pub excerpt: String,
|
||||||
pub tags: Vec<String>,
|
pub tags: Vec<String>,
|
||||||
pub url: String,
|
pub url: String,
|
||||||
|
pub favicon_url_hash: u64,
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,8 @@ pub struct DocumentStoreRow {
|
||||||
title: String,
|
title: String,
|
||||||
body: String,
|
body: String,
|
||||||
nonbody: String,
|
nonbody: String,
|
||||||
|
/// BLAKE3 of the favicon URL
|
||||||
|
favicon_url_hash: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct StoreTables {
|
pub struct StoreTables {
|
||||||
|
@ -145,6 +147,7 @@ impl Backend for TantivyBackend {
|
||||||
title: document.title,
|
title: document.title,
|
||||||
body: document.article_body,
|
body: document.article_body,
|
||||||
nonbody: document.nonarticle_body,
|
nonbody: document.nonarticle_body,
|
||||||
|
favicon_url_hash: document.favicon_url_hash,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
|
@ -200,6 +203,7 @@ impl Backend for TantivyBackend {
|
||||||
excerpt,
|
excerpt,
|
||||||
tags: vec![],
|
tags: vec![],
|
||||||
url: url.to_owned(),
|
url: url.to_owned(),
|
||||||
|
favicon_url_hash: doc_row.favicon_url_hash,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,8 @@ clap = { version = "3.1.6", features = ["derive"] }
|
||||||
colour = "0.6.0"
|
colour = "0.6.0"
|
||||||
url = "2.2.2"
|
url = "2.2.2"
|
||||||
|
|
||||||
|
blake3 = "1.3.1"
|
||||||
|
|
||||||
smartstring = "1.0.1"
|
smartstring = "1.0.1"
|
||||||
|
|
||||||
# Used for efficient lookup of seeds (URL prefixes)
|
# Used for efficient lookup of seeds (URL prefixes)
|
||||||
|
|
|
@ -85,6 +85,13 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
.unwrap_or_else(|| Vec::with_capacity(0));
|
.unwrap_or_else(|| Vec::with_capacity(0));
|
||||||
|
|
||||||
// TODO Store the actual structure of the document in the store?
|
// TODO Store the actual structure of the document in the store?
|
||||||
|
|
||||||
|
let favicon_url = document.head.effective_favicon_url();
|
||||||
|
let mut favicon_url_hash_long = [0u8; 8];
|
||||||
|
favicon_url_hash_long
|
||||||
|
.copy_from_slice(&blake3::hash(favicon_url.as_bytes()).as_bytes()[0..8]);
|
||||||
|
let favicon_url_hash = u64::from_le_bytes(favicon_url_hash_long);
|
||||||
|
|
||||||
indexer_backend.add_document(BackendIndependentDocument {
|
indexer_backend.add_document(BackendIndependentDocument {
|
||||||
title: document.head.title,
|
title: document.head.title,
|
||||||
article_body,
|
article_body,
|
||||||
|
@ -92,6 +99,7 @@ pub async fn main() -> anyhow::Result<()> {
|
||||||
// TODO populate tags & antifeatures
|
// TODO populate tags & antifeatures
|
||||||
tags,
|
tags,
|
||||||
url: page_record.url.to_string(),
|
url: page_record.url.to_string(),
|
||||||
|
favicon_url_hash,
|
||||||
})?;
|
})?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -476,12 +476,7 @@ impl EventProcessor<'_> {
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// If there's a favicon to be tried, add it to the list...
|
// If there's a favicon to be tried, add it to the list...
|
||||||
let favicon_url_rel = if page.document.head.icon.is_empty() {
|
let favicon_url_rel = page.document.head.effective_favicon_url();
|
||||||
"/favicon.ico"
|
|
||||||
} else {
|
|
||||||
page.document.head.icon.as_str()
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Ok(favicon_url) = url.join(favicon_url_rel) {
|
if let Ok(favicon_url) = url.join(favicon_url_rel) {
|
||||||
txn.enqueue_url(favicon_url.as_str(), None, RakeIntent::Icon)?;
|
txn.enqueue_url(favicon_url.as_str(), None, RakeIntent::Icon)?;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue