Load documents and index them!
This commit is contained in:
parent
5418afe8dd
commit
9866da2d16
|
@ -3465,6 +3465,7 @@ dependencies = [
|
|||
"colour",
|
||||
"env_logger",
|
||||
"log",
|
||||
"quickpeep_densedoc",
|
||||
"quickpeep_index",
|
||||
"quickpeep_structs",
|
||||
"serde",
|
||||
|
|
|
@ -17,5 +17,6 @@ toml = "0.5.8"
|
|||
clap = { version = "3.1.6", features = ["derive"] }
|
||||
colour = "0.6.0"
|
||||
|
||||
quickpeep_densedoc = { path = "../quickpeep_densedoc" }
|
||||
quickpeep_index = { path = "../quickpeep_index" }
|
||||
quickpeep_structs = { path = "../quickpeep_structs" }
|
||||
|
|
|
@ -5,6 +5,8 @@ use env_logger::Env;
|
|||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
|
||||
use quickpeep_densedoc::{DenseTree};
|
||||
use quickpeep_index::backend::BackendIndependentDocument;
|
||||
use quickpeep_indexer::config::IndexerConfig;
|
||||
use quickpeep_structs::rake_entries::{PackRecord, RakedPageEntry, SCHEMA_RAKED_PAGES};
|
||||
use std::path::PathBuf;
|
||||
|
@ -28,7 +30,7 @@ pub fn main() -> anyhow::Result<()> {
|
|||
.unwrap_or_else(|| PathBuf::from("qp_indexer.toml"));
|
||||
let config = IndexerConfig::load(&config_path).context("Failed to load config")?;
|
||||
|
||||
let _indexer_backend = config.open_indexer_backend()?;
|
||||
let mut indexer_backend = config.open_indexer_backend()?;
|
||||
|
||||
for pack in opts.rakepacks {
|
||||
blue!("Indexing: ");
|
||||
|
@ -47,10 +49,25 @@ pub fn main() -> anyhow::Result<()> {
|
|||
|
||||
// TODO(unstable): this condition is `.has_data_left()` but it's unstable.
|
||||
while buf_reader.fill_buf().map(|b| !b.is_empty())? {
|
||||
let _page_record: PackRecord<RakedPageEntry> =
|
||||
serde_bare::from_reader(&mut buf_reader)?;
|
||||
let page_record: PackRecord<RakedPageEntry> = serde_bare::from_reader(&mut buf_reader)?;
|
||||
|
||||
let document = page_record.record.document;
|
||||
|
||||
let article_body = DenseTree::generate_textual_format(&document.body_content);
|
||||
let nonarticle_body = DenseTree::generate_textual_format(&document.body_remainder);
|
||||
|
||||
// TODO Store the actual structure of the document in the store?
|
||||
indexer_backend.add_document(BackendIndependentDocument {
|
||||
title: document.head.title,
|
||||
article_body,
|
||||
nonarticle_body,
|
||||
// TODO populate tags & antifeatures
|
||||
tags: vec![],
|
||||
url: page_record.url.to_string(),
|
||||
})?;
|
||||
}
|
||||
}
|
||||
indexer_backend.flush()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue