Add an open function for the Tantivy Backend
This commit is contained in:
parent
73154e7e34
commit
f43424de94
|
@ -1,40 +1,107 @@
|
||||||
use crate::backend::{Backend, BackendIndependentDocument};
|
use crate::backend::{Backend, BackendIndependentDocument};
|
||||||
|
use anyhow::Context;
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use tantivy::schema::{Schema, STORED, TEXT};
|
use tantivy::schema::{Facet, Field, SchemaBuilder, STORED, TEXT};
|
||||||
use tantivy::{Index, IndexWriter};
|
use tantivy::{doc, Index, IndexWriter};
|
||||||
|
|
||||||
fn experiment_tantivy() -> anyhow::Result<()> {
|
pub struct Fields {
|
||||||
let mut schema_builder = Schema::builder();
|
title: Field,
|
||||||
// TODO what should our schema look like? Should we have another database with stuff?
|
article: Field,
|
||||||
// (notably we could Zstd-compress things in another datastore, for reduced disk usage...)
|
nonarticle: Field,
|
||||||
schema_builder.add_text_field("title", TEXT | STORED);
|
url: Field,
|
||||||
schema_builder.add_text_field("article", TEXT);
|
tags: Field,
|
||||||
schema_builder.add_text_field("nonarticle", TEXT);
|
|
||||||
schema_builder.add_text_field("url", STORED);
|
|
||||||
schema_builder.add_facet_field("tags", ());
|
|
||||||
// schema_builder.add_bytes_field()
|
|
||||||
let schema = schema_builder.build();
|
|
||||||
let index = tantivy::Index::create_in_dir(Path::new("/tmp/tindex"), schema)?;
|
|
||||||
let _writer = index.writer(100 * 1024 * 1024)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct TantivyBackend {
|
pub struct TantivyBackend {
|
||||||
index: Index,
|
index: Index,
|
||||||
index_writer: IndexWriter,
|
fields: Fields,
|
||||||
|
index_writer: Option<IndexWriter>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TantivyBackend {
|
||||||
|
pub fn open(path: &Path) -> anyhow::Result<TantivyBackend> {
|
||||||
|
if !path.exists() {
|
||||||
|
std::fs::create_dir(path)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let dir_path = path.join("tantivy");
|
||||||
|
|
||||||
|
let (index, fields) = if dir_path.exists() {
|
||||||
|
let index = Index::open_in_dir(dir_path)?;
|
||||||
|
|
||||||
|
let schema = index.schema();
|
||||||
|
let mut field_map: HashMap<_, _> = schema
|
||||||
|
.fields()
|
||||||
|
.map(|(field, field_entry)| (field_entry.name(), field))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let fields = Fields {
|
||||||
|
title: field_map.remove("title").context("No title field")?,
|
||||||
|
article: field_map.remove("article").context("No article field")?,
|
||||||
|
nonarticle: field_map
|
||||||
|
.remove("nonarticle")
|
||||||
|
.context("No nonarticle field")?,
|
||||||
|
url: field_map.remove("url").context("No url field")?,
|
||||||
|
tags: field_map.remove("tags").context("No tags field")?,
|
||||||
|
};
|
||||||
|
|
||||||
|
(index, fields)
|
||||||
|
} else {
|
||||||
|
let mut schema_builder = SchemaBuilder::new();
|
||||||
|
let fields = Fields {
|
||||||
|
title: schema_builder.add_text_field("title", TEXT | STORED),
|
||||||
|
article: schema_builder.add_text_field("article", TEXT),
|
||||||
|
nonarticle: schema_builder.add_text_field("nonarticle", TEXT),
|
||||||
|
url: schema_builder.add_text_field("url", STORED),
|
||||||
|
tags: schema_builder.add_facet_field("tags", ()),
|
||||||
|
};
|
||||||
|
let schema = schema_builder.build();
|
||||||
|
|
||||||
|
let index = Index::create_in_dir(dir_path, schema)?;
|
||||||
|
|
||||||
|
(index, fields)
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(TantivyBackend {
|
||||||
|
index,
|
||||||
|
fields,
|
||||||
|
index_writer: None,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Backend for TantivyBackend {
|
impl Backend for TantivyBackend {
|
||||||
fn add_document(&mut self, _document: BackendIndependentDocument) -> anyhow::Result<()> {
|
fn add_document(&mut self, document: BackendIndependentDocument) -> anyhow::Result<()> {
|
||||||
// self.index_writer.add_document(doc! {
|
let index_writer = match self.index_writer.as_ref() {
|
||||||
// "title" => document.title,
|
None => {
|
||||||
// "article" => document.article_body,
|
self.index_writer = Some(self.index.writer(100 * 1024 * 1024)?);
|
||||||
// "nonarticle" => document.nonarticle_body,
|
self.index_writer.as_ref().unwrap()
|
||||||
// "url" => document.url,
|
}
|
||||||
// "tags" => document.tags
|
Some(index_writer) => index_writer,
|
||||||
// })?;
|
};
|
||||||
todo!()
|
|
||||||
|
let Fields {
|
||||||
|
title,
|
||||||
|
article,
|
||||||
|
nonarticle,
|
||||||
|
url,
|
||||||
|
tags,
|
||||||
|
} = &self.fields;
|
||||||
|
|
||||||
|
let mut tantivy_doc = doc! {
|
||||||
|
*title => document.title,
|
||||||
|
*article => document.article_body,
|
||||||
|
*nonarticle => document.nonarticle_body,
|
||||||
|
*url => document.url
|
||||||
|
};
|
||||||
|
// TODO do we actually want facets? How about u64 tags or something...?
|
||||||
|
for tag in &document.tags {
|
||||||
|
tantivy_doc.add_facet(*tags, Facet::from(&format!("/{}", tag)));
|
||||||
|
}
|
||||||
|
|
||||||
|
index_writer.add_document(tantivy_doc)?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn flush(&mut self) -> anyhow::Result<()> {
|
fn flush(&mut self) -> anyhow::Result<()> {
|
||||||
|
|
Loading…
Reference in New Issue