quickpeep/quickpeep_structs/src/rake_entries.rs

80 lines
2.6 KiB
Rust

use bitflags::bitflags;
use bitflags_serde_shim::impl_serde_for_bitflags;
use quickpeep_densedoc::DenseDocument;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::collections::BTreeSet;
bitflags! {
pub struct AnalysisAntifeatures: u8 {
/// Adverts are present on the page, according to a filter.
const ADVERTS = 0x01;
/// Some things are blocked due to privacy concerns, according to a filter.
const PRIVACY = 0x02;
/// Annoying cookie nags are present on this page, according to a cosmetic filter.
const COOKIE_NAG = 0x04;
/// Unspecified annoyances are present on this page, according to a cosmetic filter.
const ANNOYANCE = 0x08;
/// Social trackers are a subcategory of privacy...
const SOCIAL = 0x10;
/// The web page was served over CloudFlare at the time of indexing, which is not in the
/// spirit of decentralisation.
const CLOUDFLARE = 0x10;
}
}
impl_serde_for_bitflags!(AnalysisAntifeatures);
pub const SCHEMA_RAKED_PAGES: &str = "quickpeep_pages:0.1.0";
pub const SCHEMA_RAKED_REFERENCES: &str = "quickpeep_references:0.1.0";
pub const SCHEMA_RAKED_REJECTIONS: &str = "quickpeep_rejections:0.1.0";
pub const SCHEMA_RAKED_ICONS: &str = "quickpeep_icons:0.1.0";
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct RakedPageEntry {
pub analysed_antifeatures: AnalysisAntifeatures,
pub document: DenseDocument,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct IconEntry {
/// Densely-packed WebP bytes (with low quality).
pub webp_bytes: Vec<u8>,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct RakedReferrerEntry {
pub references: BTreeSet<RakedReference>,
}
#[derive(Serialize, Deserialize, Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub struct RakedReference {
pub target: String,
pub kind: ReferenceKind,
/// Date of last modification (if known), as a QuickPeep datestamp.
pub last_mod: Option<u16>,
}
#[derive(Serialize, Deserialize, Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub enum ReferenceKind {
/// Canonical URL for the same document, as declared in the page.
CanonicalUrl,
/// HTTP -> HTTPS upgrade, automatically caused by QuickPeep
SecureUpgrade,
/// HTTP-level redirect.
Redirect,
/// Link in a page (<a>). Could be to another page or to a feed.
Link,
/// <link> to a feed
HeaderLinkedFeed,
FeedEntry,
SitemapEntry,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct PackRecord<'a, T> {
pub url: Cow<'a, str>,
pub record: T,
}