Sort through TODO items
This commit is contained in:
		
							parent
							
								
									120702ce0e
								
							
						
					
					
						commit
						6d109632a3
					
				| @ -272,7 +272,7 @@ impl DenseTreeBuilder { | ||||
|                         self.preceding_newlines = 0; | ||||
|                     } | ||||
|                     "img" => { | ||||
|                         // TODO Decide if this is worth the space...
 | ||||
|                         // TODO(future) Decide if this is worth the space...
 | ||||
|                         let attrs = element.attributes.borrow(); | ||||
|                         let src = attrs.get("src").unwrap_or("").to_owned(); | ||||
| 
 | ||||
|  | ||||
| @ -26,17 +26,17 @@ const FLAG_WEIGHT_CLASSES: u32 = 0x2; | ||||
| const FLAG_CLEAN_CONDITIONALLY: u32 = 0x4; | ||||
| const READABILITY_SCORE: &'static str = "readability-score"; | ||||
| const HTML_NS: &'static str = "http://www.w3.org/1999/xhtml"; | ||||
| // TODO: Change to HashSet
 | ||||
| // TODO(perf): Change to HashSet
 | ||||
| const PHRASING_ELEMS: [&str; 39] = [ | ||||
|     "abbr", "audio", "b", "bdo", "br", "button", "cite", "code", "data", "datalist", "dfn", "em", | ||||
|     "embed", "i", "img", "input", "kbd", "label", "mark", "math", "meter", "noscript", "object", | ||||
|     "output", "progress", "q", "ruby", "samp", "script", "select", "small", "span", "strong", | ||||
|     "sub", "sup", "textarea", "time", "var", "wbr", | ||||
| ]; | ||||
| // TODO: Change to HashSet
 | ||||
| // TODO(perf): Change to HashSet
 | ||||
| const DEFAULT_TAGS_TO_SCORE: [&str; 9] = | ||||
|     ["section", "h2", "h3", "h4", "h5", "h6", "p", "td", "pre"]; | ||||
| // TODO: Change to HashSet
 | ||||
| // TODO(perf): Change to HashSet
 | ||||
| const ALTER_TO_DIV_EXCEPTIONS: [&str; 4] = ["div", "article", "section", "p"]; | ||||
| const PRESENTATIONAL_ATTRIBUTES: [&str; 12] = [ | ||||
|     "align", | ||||
| @ -54,7 +54,7 @@ const PRESENTATIONAL_ATTRIBUTES: [&str; 12] = [ | ||||
| ]; | ||||
| 
 | ||||
| const DATA_TABLE_DESCENDANTS: [&str; 5] = ["col", "colgroup", "tfoot", "thead", "th"]; | ||||
| // TODO: Change to HashSet
 | ||||
| // TODO(perf): Change to HashSet
 | ||||
| const DEPRECATED_SIZE_ATTRIBUTE_ELEMS: [&str; 5] = ["table", "th", "td", "hr", "pre"]; | ||||
| 
 | ||||
| pub mod regexes; | ||||
| @ -168,7 +168,7 @@ impl Readability { | ||||
|                     continue; | ||||
|                 } | ||||
|                 if let Some(mut prev_elem) = noscript.as_node().previous_sibling() { | ||||
|                     // TODO: Fix this to have a better way of extracting nodes that are elements
 | ||||
|                     // TODO(style): Fix this to have a better way of extracting nodes that are elements
 | ||||
|                     while prev_elem.as_element().is_none() { | ||||
|                         match prev_elem.previous_sibling() { | ||||
|                             Some(new_prev) => prev_elem = new_prev, | ||||
| @ -389,7 +389,7 @@ impl Readability { | ||||
|     /// The must_be_element argument ensure the next element is actually an element node.
 | ||||
|     /// This is likely to factored out into a new function.
 | ||||
|     fn next_element(node_ref: Option<NodeRef>, must_be_element: bool) -> Option<NodeRef> { | ||||
|         // TODO: Could probably be refactored to use the elements method
 | ||||
|         // TODO(style): Could probably be refactored to use the elements method
 | ||||
|         let mut node_ref = node_ref; | ||||
|         while node_ref.is_some() { | ||||
|             match node_ref.as_ref().unwrap().data() { | ||||
| @ -544,7 +544,7 @@ impl Readability { | ||||
|     /// Converts some of the common HTML entities in string to their corresponding characters.
 | ||||
|     fn unescape_html_entities(value: &mut String) { | ||||
|         if !value.is_empty() { | ||||
|             // TODO: Extract this
 | ||||
|             // TODO(style): Extract this
 | ||||
|             let mut html_escape_map: HashMap<&str, &str> = HashMap::new(); | ||||
|             html_escape_map.insert("lt", "<"); | ||||
|             html_escape_map.insert("gt", ">"); | ||||
| @ -637,7 +637,7 @@ impl Readability { | ||||
|     /// Removes the class="" attribute from every element in the given subtree, except those that
 | ||||
|     /// match CLASSES_TO_PRESERVE and the classesToPreserve array from the options object.
 | ||||
|     fn clean_classes(&mut self) { | ||||
|         // TODO: This should accessed from Self
 | ||||
|         // TODO(style): This should accessed from Self
 | ||||
|         let classes_to_preserve: HashSet<&str> = HashSet::new(); | ||||
|         if let Some(article_node) = &mut self.article_node { | ||||
|             for elem in article_node.inclusive_descendants().elements() { | ||||
| @ -790,7 +790,7 @@ impl Readability { | ||||
|     /// Run any post-process modifications to article content as necessary.
 | ||||
|     fn post_process_content(&mut self, url: &str) { | ||||
|         self.fix_relative_uris(url); | ||||
|         // TODO: Add flag check
 | ||||
|         // TODO(style): Add flag check
 | ||||
|         self.clean_classes(); | ||||
|         self.clean_readability_attrs(); | ||||
|     } | ||||
| @ -1041,7 +1041,7 @@ impl Readability { | ||||
| 
 | ||||
|     /// Determine whether element has any children block level elements.
 | ||||
|     fn has_child_block_element(node_ref: &NodeRef) -> bool { | ||||
|         // TODO: Refer to a static HashSet
 | ||||
|         // TODO(perf): Refer to a static HashSet
 | ||||
|         let block_level_elems: [&str; 32] = [ | ||||
|             "address", | ||||
|             "article", | ||||
| @ -1622,13 +1622,13 @@ impl Readability { | ||||
|         let page = self.root_node.select_first("body"); | ||||
|         if page.is_err() { | ||||
|             bail!("Document has no <body>"); | ||||
|             // TODO return Err(ErrorKind::ReadabilityError("Document has no <body>".into()).into());
 | ||||
|             // TODO(style) return Err(ErrorKind::ReadabilityError("Document has no <body>".into()).into());
 | ||||
|         } | ||||
|         let page = page.unwrap(); | ||||
|         let mut attempts: Vec<ExtractAttempt> = Vec::new(); | ||||
| 
 | ||||
|         // var pageCacheHtml = page.innerHTML;
 | ||||
|         //TODO: Add page cache
 | ||||
|         //TODO(???): Add page cache
 | ||||
| 
 | ||||
|         loop { | ||||
|             //   var stripUnlikelyCandidates = this._flagIsActive(this.FLAG_STRIP_UNLIKELYS);
 | ||||
| @ -2197,7 +2197,7 @@ mod test { | ||||
|     use kuchiki::traits::*; | ||||
|     use kuchiki::NodeRef; | ||||
| 
 | ||||
|     // TODO: Refactor not to use test file possibly
 | ||||
|     // TODO(style): Refactor not to use test file possibly
 | ||||
|     const TEST_HTML: &'static str = include_str!("../test_html/simple.html"); | ||||
| 
 | ||||
|     #[test] | ||||
| @ -3867,7 +3867,7 @@ characters. For that reason, this <p> tag could not be a byline because it's too | ||||
| 
 | ||||
|     #[test] | ||||
|     fn test_clean_classes() { | ||||
|         // TODO: This test will later be edited to ensure it checks to only remove certain classes
 | ||||
|         // TODO(task): This test will later be edited to ensure it checks to only remove certain classes
 | ||||
|         let html_str = r#" | ||||
|         <!DOCTYPE html> | ||||
|         <html> | ||||
|  | ||||
| @ -42,14 +42,14 @@ pub async fn main() -> anyhow::Result<()> { | ||||
|     let client = reqwest::ClientBuilder::new() | ||||
|         .timeout(TIME_LIMIT) | ||||
|         .default_headers(header_map) | ||||
|         // TODO We want to handle redirects ourselves so we can track them...
 | ||||
|         // We want to handle redirects ourselves so we can track them...
 | ||||
|         .redirect(Policy::none()) | ||||
|         .build()?; | ||||
| 
 | ||||
|     let mut adblock_engines = Vec::new(); | ||||
| 
 | ||||
|     for (antifeature, name) in &ADBLOCK_FILTER_PATHS { | ||||
|         // TODO Don't hardcode these paths in quite as bad a way...
 | ||||
|         // TODO(not important) Don't hardcode these paths in quite as bad a way...
 | ||||
|         let path = PathBuf::from(format!("./data/{}.adblock", name)); | ||||
|         if !path.exists() { | ||||
|             warn!("Missing adblock rules: {:?}.", path); | ||||
| @ -94,16 +94,12 @@ pub async fn main() -> anyhow::Result<()> { | ||||
|         } | ||||
|         RakeOutcome::RakedFeed(feed) => { | ||||
|             green_ln!("Feed"); | ||||
|             // TODO
 | ||||
| 
 | ||||
|             println!(); | ||||
|             let refs = references_from_urlrakes(&feed, ReferenceKind::FeedEntry); | ||||
|             print_references(&refs); | ||||
|         } | ||||
|         RakeOutcome::RakedSitemap(sitemap) => { | ||||
|             green_ln!("Sitemap"); | ||||
|             // TODO
 | ||||
| 
 | ||||
|             println!(); | ||||
|             let refs = references_from_urlrakes(&sitemap, ReferenceKind::SitemapEntry); | ||||
|             print_references(&refs); | ||||
|  | ||||
| @ -84,7 +84,7 @@ pub async fn main() -> anyhow::Result<()> { | ||||
| 
 | ||||
| pub struct Seed { | ||||
|     url: UrlOrUrlPattern, | ||||
|     // TODO These make more sense at the indexer stage. tags: BTreeSet<CompactString>,
 | ||||
|     // TODO(later) These make more sense at the indexer stage. tags: BTreeSet<CompactString>,
 | ||||
| } | ||||
| 
 | ||||
| /// Either a URL or a URL prefix.
 | ||||
| @ -237,6 +237,10 @@ async fn import_and_flush_batch( | ||||
|         let mut allowed_domain_record = DirtyTracker::new( | ||||
|             allowed_domain_record.unwrap_or_else(|| AllowedDomainRecord::default()), | ||||
|         ); | ||||
|         if is_domain_new { | ||||
|             // Mark it as dirty
 | ||||
|             let _: &mut AllowedDomainRecord = allowed_domain_record.borrow_mut(); | ||||
|         } | ||||
| 
 | ||||
|         // Register the domain. This is a no-op if it's already active or backing off.
 | ||||
|         txn.insert_active_domain_with_new_raffle_ticket(domain.clone().into_owned())?; | ||||
|  | ||||
| @ -361,7 +361,7 @@ pub fn rake_feed(content: &[u8], url: &Url) -> anyhow::Result<Vec<UrlRaked>> { | ||||
|         } else { | ||||
|             continue; | ||||
|         }; | ||||
|         let url = Url::parse(&link.href).context("parsing URL in feed")?; // TODO ignore failure here...?
 | ||||
|         let url = Url::parse(&link.href).context("parsing URL in feed")?; // TODO(robustness) ignore failure here...?
 | ||||
| 
 | ||||
|         let last_changed = entry.updated.or(entry.published); | ||||
| 
 | ||||
| @ -372,7 +372,7 @@ pub fn rake_feed(content: &[u8], url: &Url) -> anyhow::Result<Vec<UrlRaked>> { | ||||
|         }); | ||||
|     } | ||||
| 
 | ||||
|     // TODO paginated feeds (e.g. JSON Feed next_url)
 | ||||
|     // TODO(feature) paginated feeds (e.g. JSON Feed next_url)
 | ||||
| 
 | ||||
|     Ok(urls) | ||||
| } | ||||
|  | ||||
| @ -126,7 +126,7 @@ pub fn guess_document_language(text: &str) -> Option<String> { | ||||
|         .map(|lang: Language| lang.iso_code_639_1().to_string()) | ||||
| } | ||||
| 
 | ||||
| // TODO this isn't particularly efficient. Probably want a trie if it's important...
 | ||||
| // TODO(perf) this isn't particularly efficient. Probably want a trie if it's important...
 | ||||
| pub struct IpSet { | ||||
|     ips: BTreeSet<IpNetwork>, | ||||
| } | ||||
|  | ||||
| @ -261,7 +261,7 @@ impl PageExtractionServiceInternal { | ||||
|             document.body_remainder = DenseTree::from_body(root_node.clone()); | ||||
|             document.body_content = DenseTree::from_body(article_node); | ||||
|         } else { | ||||
|             todo!() | ||||
|             todo!() // NEEDED
 | ||||
|         } | ||||
| 
 | ||||
|         Ok(ExtractedPage::Success { | ||||
|  | ||||
| @ -94,13 +94,13 @@ impl TaskContext { | ||||
|                         self.process_domain(domain).await?; | ||||
|                     } else { | ||||
|                         // Already in use; need to pick again
 | ||||
|                         // TODO be smarter about this.
 | ||||
|                         // TODO(perf) be smarter about this.
 | ||||
|                         tokio::time::sleep(Duration::from_secs(60)).await; | ||||
|                     } | ||||
|                 } | ||||
|                 None => { | ||||
|                     // No domains left! Woot.
 | ||||
|                     // TODO come up with some stopping conditions
 | ||||
|                     // TODO(perf, feature) come up with some stopping conditions
 | ||||
|                     tokio::time::sleep(Duration::from_secs(60)).await; | ||||
|                 } | ||||
|             } | ||||
| @ -143,8 +143,8 @@ impl TaskContext { | ||||
|                             return Ok(false); | ||||
|                         } | ||||
| 
 | ||||
|                         // TODO delete the domain from the store
 | ||||
|                         todo!(); | ||||
|                         // TODO(needed) delete the domain from the store
 | ||||
|                         todo!(); // NEEDED
 | ||||
| 
 | ||||
|                         txn.commit()?; | ||||
|                         Ok(true) | ||||
| @ -366,12 +366,13 @@ impl TaskContext { | ||||
|                 Ok(NextAction::Continue) | ||||
|             } | ||||
|             RakeOutcome::TemporaryFailure(failure) => { | ||||
|                 // TODO do we want to log this somewhere?
 | ||||
|                 // TODO(future) do we want to log this somewhere?
 | ||||
|                 // or at least a metric
 | ||||
| 
 | ||||
|                 let domain = get_reduced_domain(url)?; | ||||
|                 let url = url.clone(); | ||||
| 
 | ||||
|                 // TODO add 1.1× the previous backoff, if there was one.
 | ||||
|                 // TODO(feature) add 1.1× the previous backoff, if there was one.
 | ||||
|                 let new_backoff = failure.backoff_sec; | ||||
|                 let domain = domain.into_owned(); | ||||
| 
 | ||||
|  | ||||
| @ -178,7 +178,7 @@ impl RakerStore { | ||||
|         F: FnOnce(RakerTxn<'_, RW>) -> anyhow::Result<R> + Send + 'static, | ||||
|         R: Send + 'static, | ||||
|     { | ||||
|         // TODO consider adding a lock here to prevent all the async executors getting stuck here...
 | ||||
|         // TODO(robustness) consider adding a lock here to prevent all the async executors getting stuck here...
 | ||||
|         let this = self.clone(); | ||||
|         Ok(tokio::task::spawn_blocking(move || -> anyhow::Result<R> { | ||||
|             let txn = this.rw_txn()?; | ||||
|  | ||||
| @ -20,7 +20,7 @@ pub struct UrlVisitedRecord { | ||||
| 
 | ||||
| #[derive(Clone, Debug, Deserialize, Serialize)] | ||||
| pub struct QueueUrlRecord { | ||||
|     pub intent: RakeIntent, // TODO
 | ||||
|     pub intent: RakeIntent, // TODO CONSIDER
 | ||||
| } | ||||
| 
 | ||||
| #[derive(Clone, Debug, Deserialize, Serialize)] | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user