74 lines
2.5 KiB
Rust
74 lines
2.5 KiB
Rust
use crate::raking::{RakeIntent, TemporaryFailure};
|
|
use reqwest::Url;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::collections::BTreeMap;
|
|
|
|
#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
|
|
pub struct ActiveDomainRecord {
|
|
/// The raffle ticket number owned by this domain.
|
|
pub raffle_ticket: u32,
|
|
}
|
|
|
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
|
pub struct UrlVisitedRecord {
|
|
/// Number of days since the QuickPeep Epoch that this page was last raked at.
|
|
/// A u16 is fine here, giving 179 years worth of values. This allows compact encoding.
|
|
/// We don't really care about a more granular timestamp: sitemaps and feeds usually only
|
|
/// give the date of last update anyway.
|
|
pub last_visited_days: u16,
|
|
}
|
|
|
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
|
pub struct QueueUrlRecord {
|
|
pub intent: RakeIntent, // TODO CONSIDER
|
|
}
|
|
|
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
|
pub struct OnHoldUrlRecord {
|
|
/// Record that should be emitted once this is released.
|
|
pub queue_record: QueueUrlRecord,
|
|
|
|
/// Number of times this URL has been 'enqueued'; capped at 255.
|
|
pub refs: u8,
|
|
}
|
|
|
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
|
pub struct BackingOffDomainRecord {
|
|
/// The URL that caused the backoff.
|
|
pub failed_url: String,
|
|
/// The reason that this backoff is in place
|
|
pub failure: TemporaryFailure,
|
|
/// Duration of the backoff. Used to provide increasing backoffs if the failures persist.
|
|
pub backoff: u32,
|
|
/// When the domain should be reinstated
|
|
/// MUST match the timestamp present in the reinstatements table.
|
|
pub reinstate_at: u64,
|
|
}
|
|
|
|
#[derive(Clone, Debug, Serialize, Deserialize, Default)]
|
|
pub struct DomainRecord {
|
|
pub rakeable_path_prefixes: BTreeMap<String, bool>,
|
|
}
|
|
|
|
impl DomainRecord {
|
|
/// Returns whether the URL is rakeable.
|
|
///
|
|
/// Preconditions: it has been checked that the record applies to the domain
|
|
pub fn is_url_rakeable(&self, url: &Url) -> Option<bool> {
|
|
let mut final_result = None;
|
|
// TODO This could be made more efficient.
|
|
for (prefix, &rakeable) in self.rakeable_path_prefixes.iter() {
|
|
if url.path().starts_with(prefix) {
|
|
final_result = Some(rakeable);
|
|
}
|
|
if prefix.as_str() > url.path() {
|
|
// e.g. /dog > /cat/xyz
|
|
// This means we've missed all chances to see our prefix,
|
|
// so we break here (efficiency).
|
|
break;
|
|
}
|
|
}
|
|
final_result
|
|
}
|
|
}
|