From 7aa5521c5dad3ddef2006373685c29f849598ac2 Mon Sep 17 00:00:00 2001 From: Olivier Date: Wed, 23 Mar 2022 20:57:51 +0000 Subject: [PATCH] Think a bit about how indexers will fit together --- quickpeep_indexer/src/backend.rs | 16 ++++++++- quickpeep_indexer/src/backend/tantivy.rs | 46 ++++++++++++++++++++---- 2 files changed, 55 insertions(+), 7 deletions(-) diff --git a/quickpeep_indexer/src/backend.rs b/quickpeep_indexer/src/backend.rs index c10a969..978c5b7 100644 --- a/quickpeep_indexer/src/backend.rs +++ b/quickpeep_indexer/src/backend.rs @@ -3,4 +3,18 @@ pub mod tantivy; /// Trait representing a search index backend; /// either Tantivy (embedded) or Meilisearch (via HTTP API). -pub trait Backend {} +pub trait Backend { + fn add_document(&mut self, document: BackendIndependentDocument) -> anyhow::Result<()>; + + fn flush(&mut self) -> anyhow::Result<()>; +} + +/// A backend-independent document struct. +#[derive(Clone, Debug)] +pub struct BackendIndependentDocument { + pub title: String, + pub article_body: String, + pub nonarticle_body: String, + pub tags: Vec, + pub url: String, +} diff --git a/quickpeep_indexer/src/backend/tantivy.rs b/quickpeep_indexer/src/backend/tantivy.rs index 3bb7527..3cd9b4c 100644 --- a/quickpeep_indexer/src/backend/tantivy.rs +++ b/quickpeep_indexer/src/backend/tantivy.rs @@ -1,9 +1,43 @@ +use crate::backend::{Backend, BackendIndependentDocument}; use std::path::Path; -use tantivy::schema::Schema; +use tantivy::schema::{Schema, STORED, TEXT}; +use tantivy::{doc, Index, IndexWriter}; -fn experiment_tantivy() { - let schema = Schema::builder() - // TODO fields - .build(); - tantivy::Index::create_in_dir(Path::new("/tmp/tindex"), schema); +fn experiment_tantivy() -> anyhow::Result<()> { + let mut schema_builder = Schema::builder(); + // TODO what should our schema look like? Should we have another database with stuff? + // (notably we could Zstd-compress things in another datastore, for reduced disk usage...) + schema_builder.add_text_field("title", TEXT | STORED); + schema_builder.add_text_field("article", TEXT); + schema_builder.add_text_field("nonarticle", TEXT); + schema_builder.add_text_field("url", STORED); + schema_builder.add_facet_field("tags", ()); + // schema_builder.add_bytes_field() + let schema = schema_builder.build(); + let index = tantivy::Index::create_in_dir(Path::new("/tmp/tindex"), schema)?; + let writer = index.writer(100 * 1024 * 1024)?; + + Ok(()) +} + +pub struct TantivyBackend { + index: Index, + index_writer: IndexWriter, +} + +impl Backend for TantivyBackend { + fn add_document(&mut self, document: BackendIndependentDocument) -> anyhow::Result<()> { + self.index_writer.add_document(doc! { + "title" => document.title, + "article" => document.article_body, + "nonarticle" => document.nonarticle_body, + "url" => document.url, + "tags" => document.tags + })?; + todo!() + } + + fn flush(&mut self) -> anyhow::Result<()> { + todo!() + } }