Add an open function for the Tantivy Backend
This commit is contained in:
parent
73154e7e34
commit
f43424de94
|
@ -1,40 +1,107 @@
|
|||
use crate::backend::{Backend, BackendIndependentDocument};
|
||||
use anyhow::Context;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use tantivy::schema::{Schema, STORED, TEXT};
|
||||
use tantivy::{Index, IndexWriter};
|
||||
use tantivy::schema::{Facet, Field, SchemaBuilder, STORED, TEXT};
|
||||
use tantivy::{doc, Index, IndexWriter};
|
||||
|
||||
fn experiment_tantivy() -> anyhow::Result<()> {
|
||||
let mut schema_builder = Schema::builder();
|
||||
// TODO what should our schema look like? Should we have another database with stuff?
|
||||
// (notably we could Zstd-compress things in another datastore, for reduced disk usage...)
|
||||
schema_builder.add_text_field("title", TEXT | STORED);
|
||||
schema_builder.add_text_field("article", TEXT);
|
||||
schema_builder.add_text_field("nonarticle", TEXT);
|
||||
schema_builder.add_text_field("url", STORED);
|
||||
schema_builder.add_facet_field("tags", ());
|
||||
// schema_builder.add_bytes_field()
|
||||
let schema = schema_builder.build();
|
||||
let index = tantivy::Index::create_in_dir(Path::new("/tmp/tindex"), schema)?;
|
||||
let _writer = index.writer(100 * 1024 * 1024)?;
|
||||
|
||||
Ok(())
|
||||
pub struct Fields {
|
||||
title: Field,
|
||||
article: Field,
|
||||
nonarticle: Field,
|
||||
url: Field,
|
||||
tags: Field,
|
||||
}
|
||||
|
||||
pub struct TantivyBackend {
|
||||
index: Index,
|
||||
index_writer: IndexWriter,
|
||||
fields: Fields,
|
||||
index_writer: Option<IndexWriter>,
|
||||
}
|
||||
|
||||
impl TantivyBackend {
|
||||
pub fn open(path: &Path) -> anyhow::Result<TantivyBackend> {
|
||||
if !path.exists() {
|
||||
std::fs::create_dir(path)?;
|
||||
}
|
||||
|
||||
let dir_path = path.join("tantivy");
|
||||
|
||||
let (index, fields) = if dir_path.exists() {
|
||||
let index = Index::open_in_dir(dir_path)?;
|
||||
|
||||
let schema = index.schema();
|
||||
let mut field_map: HashMap<_, _> = schema
|
||||
.fields()
|
||||
.map(|(field, field_entry)| (field_entry.name(), field))
|
||||
.collect();
|
||||
|
||||
let fields = Fields {
|
||||
title: field_map.remove("title").context("No title field")?,
|
||||
article: field_map.remove("article").context("No article field")?,
|
||||
nonarticle: field_map
|
||||
.remove("nonarticle")
|
||||
.context("No nonarticle field")?,
|
||||
url: field_map.remove("url").context("No url field")?,
|
||||
tags: field_map.remove("tags").context("No tags field")?,
|
||||
};
|
||||
|
||||
(index, fields)
|
||||
} else {
|
||||
let mut schema_builder = SchemaBuilder::new();
|
||||
let fields = Fields {
|
||||
title: schema_builder.add_text_field("title", TEXT | STORED),
|
||||
article: schema_builder.add_text_field("article", TEXT),
|
||||
nonarticle: schema_builder.add_text_field("nonarticle", TEXT),
|
||||
url: schema_builder.add_text_field("url", STORED),
|
||||
tags: schema_builder.add_facet_field("tags", ()),
|
||||
};
|
||||
let schema = schema_builder.build();
|
||||
|
||||
let index = Index::create_in_dir(dir_path, schema)?;
|
||||
|
||||
(index, fields)
|
||||
};
|
||||
|
||||
Ok(TantivyBackend {
|
||||
index,
|
||||
fields,
|
||||
index_writer: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Backend for TantivyBackend {
|
||||
fn add_document(&mut self, _document: BackendIndependentDocument) -> anyhow::Result<()> {
|
||||
// self.index_writer.add_document(doc! {
|
||||
// "title" => document.title,
|
||||
// "article" => document.article_body,
|
||||
// "nonarticle" => document.nonarticle_body,
|
||||
// "url" => document.url,
|
||||
// "tags" => document.tags
|
||||
// })?;
|
||||
todo!()
|
||||
fn add_document(&mut self, document: BackendIndependentDocument) -> anyhow::Result<()> {
|
||||
let index_writer = match self.index_writer.as_ref() {
|
||||
None => {
|
||||
self.index_writer = Some(self.index.writer(100 * 1024 * 1024)?);
|
||||
self.index_writer.as_ref().unwrap()
|
||||
}
|
||||
Some(index_writer) => index_writer,
|
||||
};
|
||||
|
||||
let Fields {
|
||||
title,
|
||||
article,
|
||||
nonarticle,
|
||||
url,
|
||||
tags,
|
||||
} = &self.fields;
|
||||
|
||||
let mut tantivy_doc = doc! {
|
||||
*title => document.title,
|
||||
*article => document.article_body,
|
||||
*nonarticle => document.nonarticle_body,
|
||||
*url => document.url
|
||||
};
|
||||
// TODO do we actually want facets? How about u64 tags or something...?
|
||||
for tag in &document.tags {
|
||||
tantivy_doc.add_facet(*tags, Facet::from(&format!("/{}", tag)));
|
||||
}
|
||||
|
||||
index_writer.add_document(tantivy_doc)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> anyhow::Result<()> {
|
||||
|
|
Loading…
Reference in New Issue