Load documents and index them!
This commit is contained in:
parent
5418afe8dd
commit
9866da2d16
|
@ -3465,6 +3465,7 @@ dependencies = [
|
||||||
"colour",
|
"colour",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"log",
|
"log",
|
||||||
|
"quickpeep_densedoc",
|
||||||
"quickpeep_index",
|
"quickpeep_index",
|
||||||
"quickpeep_structs",
|
"quickpeep_structs",
|
||||||
"serde",
|
"serde",
|
||||||
|
|
|
@ -17,5 +17,6 @@ toml = "0.5.8"
|
||||||
clap = { version = "3.1.6", features = ["derive"] }
|
clap = { version = "3.1.6", features = ["derive"] }
|
||||||
colour = "0.6.0"
|
colour = "0.6.0"
|
||||||
|
|
||||||
|
quickpeep_densedoc = { path = "../quickpeep_densedoc" }
|
||||||
quickpeep_index = { path = "../quickpeep_index" }
|
quickpeep_index = { path = "../quickpeep_index" }
|
||||||
quickpeep_structs = { path = "../quickpeep_structs" }
|
quickpeep_structs = { path = "../quickpeep_structs" }
|
||||||
|
|
|
@ -5,6 +5,8 @@ use env_logger::Env;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufRead, BufReader};
|
use std::io::{BufRead, BufReader};
|
||||||
|
|
||||||
|
use quickpeep_densedoc::{DenseTree};
|
||||||
|
use quickpeep_index::backend::BackendIndependentDocument;
|
||||||
use quickpeep_indexer::config::IndexerConfig;
|
use quickpeep_indexer::config::IndexerConfig;
|
||||||
use quickpeep_structs::rake_entries::{PackRecord, RakedPageEntry, SCHEMA_RAKED_PAGES};
|
use quickpeep_structs::rake_entries::{PackRecord, RakedPageEntry, SCHEMA_RAKED_PAGES};
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
@ -28,7 +30,7 @@ pub fn main() -> anyhow::Result<()> {
|
||||||
.unwrap_or_else(|| PathBuf::from("qp_indexer.toml"));
|
.unwrap_or_else(|| PathBuf::from("qp_indexer.toml"));
|
||||||
let config = IndexerConfig::load(&config_path).context("Failed to load config")?;
|
let config = IndexerConfig::load(&config_path).context("Failed to load config")?;
|
||||||
|
|
||||||
let _indexer_backend = config.open_indexer_backend()?;
|
let mut indexer_backend = config.open_indexer_backend()?;
|
||||||
|
|
||||||
for pack in opts.rakepacks {
|
for pack in opts.rakepacks {
|
||||||
blue!("Indexing: ");
|
blue!("Indexing: ");
|
||||||
|
@ -47,10 +49,25 @@ pub fn main() -> anyhow::Result<()> {
|
||||||
|
|
||||||
// TODO(unstable): this condition is `.has_data_left()` but it's unstable.
|
// TODO(unstable): this condition is `.has_data_left()` but it's unstable.
|
||||||
while buf_reader.fill_buf().map(|b| !b.is_empty())? {
|
while buf_reader.fill_buf().map(|b| !b.is_empty())? {
|
||||||
let _page_record: PackRecord<RakedPageEntry> =
|
let page_record: PackRecord<RakedPageEntry> = serde_bare::from_reader(&mut buf_reader)?;
|
||||||
serde_bare::from_reader(&mut buf_reader)?;
|
|
||||||
|
let document = page_record.record.document;
|
||||||
|
|
||||||
|
let article_body = DenseTree::generate_textual_format(&document.body_content);
|
||||||
|
let nonarticle_body = DenseTree::generate_textual_format(&document.body_remainder);
|
||||||
|
|
||||||
|
// TODO Store the actual structure of the document in the store?
|
||||||
|
indexer_backend.add_document(BackendIndependentDocument {
|
||||||
|
title: document.head.title,
|
||||||
|
article_body,
|
||||||
|
nonarticle_body,
|
||||||
|
// TODO populate tags & antifeatures
|
||||||
|
tags: vec![],
|
||||||
|
url: page_record.url.to_string(),
|
||||||
|
})?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
indexer_backend.flush()?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue