Simplify textification of indexed pages
This commit is contained in:
		
							parent
							
								
									8ec8003dbb
								
							
						
					
					
						commit
						eb899ac9a5
					
				@ -66,67 +66,85 @@ impl DenseTree {
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn generate_textual_format(nodes: &Vec<DenseTree>) -> String {
 | 
			
		||||
    pub fn generate_textual_format(nodes: &Vec<DenseTree>, rich: bool) -> String {
 | 
			
		||||
        let mut buf = String::new();
 | 
			
		||||
        for node in nodes {
 | 
			
		||||
            node.append_in_textual_format(&mut buf);
 | 
			
		||||
            node.append_in_textual_format(&mut buf, rich);
 | 
			
		||||
        }
 | 
			
		||||
        simplify_newlines(&buf)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn append_in_textual_format(&self, string: &mut String) {
 | 
			
		||||
    fn append_in_textual_format(&self, string: &mut String, rich: bool) {
 | 
			
		||||
        match self {
 | 
			
		||||
            DenseTree::Heading1(children) => {
 | 
			
		||||
                if rich {
 | 
			
		||||
                    string.push_str("\n\n# ");
 | 
			
		||||
                };
 | 
			
		||||
                for child in children {
 | 
			
		||||
                    child.append_in_textual_format(string);
 | 
			
		||||
                    child.append_in_textual_format(string, rich);
 | 
			
		||||
                }
 | 
			
		||||
                string.push_str("\n");
 | 
			
		||||
            }
 | 
			
		||||
            DenseTree::Heading2(children) => {
 | 
			
		||||
                if rich {
 | 
			
		||||
                    string.push_str("\n\n## ");
 | 
			
		||||
                };
 | 
			
		||||
                for child in children {
 | 
			
		||||
                    child.append_in_textual_format(string);
 | 
			
		||||
                    child.append_in_textual_format(string, rich);
 | 
			
		||||
                }
 | 
			
		||||
                string.push_str("\n");
 | 
			
		||||
            }
 | 
			
		||||
            DenseTree::Heading3(children) => {
 | 
			
		||||
                if rich {
 | 
			
		||||
                    string.push_str("\n\n### ");
 | 
			
		||||
                };
 | 
			
		||||
                for child in children {
 | 
			
		||||
                    child.append_in_textual_format(string);
 | 
			
		||||
                    child.append_in_textual_format(string, rich);
 | 
			
		||||
                }
 | 
			
		||||
                string.push_str("\n");
 | 
			
		||||
            }
 | 
			
		||||
            DenseTree::Heading4(children) => {
 | 
			
		||||
                if rich {
 | 
			
		||||
                    string.push_str("\n\n#### ");
 | 
			
		||||
                };
 | 
			
		||||
                for child in children {
 | 
			
		||||
                    child.append_in_textual_format(string);
 | 
			
		||||
                    child.append_in_textual_format(string, rich);
 | 
			
		||||
                }
 | 
			
		||||
                string.push_str("\n");
 | 
			
		||||
            }
 | 
			
		||||
            DenseTree::Heading5(children) => {
 | 
			
		||||
                if rich {
 | 
			
		||||
                    string.push_str("\n\n##### ");
 | 
			
		||||
                };
 | 
			
		||||
                for child in children {
 | 
			
		||||
                    child.append_in_textual_format(string);
 | 
			
		||||
                    child.append_in_textual_format(string, rich);
 | 
			
		||||
                }
 | 
			
		||||
                string.push_str("\n");
 | 
			
		||||
            }
 | 
			
		||||
            DenseTree::Heading6(children) => {
 | 
			
		||||
                if rich {
 | 
			
		||||
                    string.push_str("\n\n###### ");
 | 
			
		||||
                };
 | 
			
		||||
                for child in children {
 | 
			
		||||
                    child.append_in_textual_format(string);
 | 
			
		||||
                    child.append_in_textual_format(string, rich);
 | 
			
		||||
                }
 | 
			
		||||
                string.push_str("\n");
 | 
			
		||||
            }
 | 
			
		||||
            DenseTree::Link { children, href, .. } => {
 | 
			
		||||
                if rich {
 | 
			
		||||
                    string.push('[');
 | 
			
		||||
                };
 | 
			
		||||
                for child in children {
 | 
			
		||||
                    child.append_in_textual_format(string);
 | 
			
		||||
                    child.append_in_textual_format(string, rich);
 | 
			
		||||
                }
 | 
			
		||||
                if rich {
 | 
			
		||||
                    string.push_str(&format!("]({})", href));
 | 
			
		||||
                };
 | 
			
		||||
            }
 | 
			
		||||
            DenseTree::Image { .. } => {
 | 
			
		||||
                if rich {
 | 
			
		||||
                    string.push_str("[IMG]");
 | 
			
		||||
                };
 | 
			
		||||
            }
 | 
			
		||||
            DenseTree::Text(text) => {
 | 
			
		||||
                string.push_str(text);
 | 
			
		||||
 | 
			
		||||
@ -76,8 +76,9 @@ pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
 | 
			
		||||
            let document = page_record.record.document;
 | 
			
		||||
 | 
			
		||||
            let article_body = DenseTree::generate_textual_format(&document.body_content);
 | 
			
		||||
            let nonarticle_body = DenseTree::generate_textual_format(&document.body_remainder);
 | 
			
		||||
            let article_body = DenseTree::generate_textual_format(&document.body_content, false);
 | 
			
		||||
            let nonarticle_body =
 | 
			
		||||
                DenseTree::generate_textual_format(&document.body_remainder, false);
 | 
			
		||||
 | 
			
		||||
            let tags = seed_lookup
 | 
			
		||||
                .look_up(&Url::parse(page_record.url.as_ref())?)?
 | 
			
		||||
 | 
			
		||||
@ -181,7 +181,9 @@ impl PageExtractionServiceInternal {
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let dense_doc = DenseTree::from_body(root_node.clone());
 | 
			
		||||
        let dense_doc_text = Lazy::new(Box::new(|| DenseTree::generate_textual_format(&dense_doc)));
 | 
			
		||||
        let dense_doc_text = Lazy::new(Box::new(|| {
 | 
			
		||||
            DenseTree::generate_textual_format(&dense_doc, true)
 | 
			
		||||
        }));
 | 
			
		||||
        //eprintln!("^^^^^\n{}\n^^^^^", *dense_doc_text);
 | 
			
		||||
 | 
			
		||||
        if language.is_none() {
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user