Port to Postgres
This commit is contained in:
		
							parent
							
								
									e07ac16bc4
								
							
						
					
					
						commit
						2876939b59
					
				
							
								
								
									
										343
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										343
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							@ -15,7 +15,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "7ab3f32d1eb0f323dcdd51cbb8d68cff415153870ff3bd60e12d5d56298bfcb1"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "addr",
 | 
			
		||||
 "base64",
 | 
			
		||||
 "base64 0.13.0",
 | 
			
		||||
 "bitflags",
 | 
			
		||||
 "flate2",
 | 
			
		||||
 "idna",
 | 
			
		||||
@ -206,9 +206,9 @@ dependencies = [
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "atoi"
 | 
			
		||||
version = "0.4.0"
 | 
			
		||||
version = "1.0.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "616896e05fc0e2649463a93a15183c6a16bf03413a7af88ef1285ddedfa9cda5"
 | 
			
		||||
checksum = "d7c57d12312ff59c811c0643f4d80830505833c9ffaebd193d819392b265be8e"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "num-traits",
 | 
			
		||||
]
 | 
			
		||||
@ -318,6 +318,12 @@ version = "0.13.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "base64"
 | 
			
		||||
version = "0.21.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "bindgen"
 | 
			
		||||
version = "0.59.2"
 | 
			
		||||
@ -394,15 +400,6 @@ dependencies = [
 | 
			
		||||
 "generic-array 0.12.4",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "block-buffer"
 | 
			
		||||
version = "0.9.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "generic-array 0.14.5",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "block-buffer"
 | 
			
		||||
version = "0.10.2"
 | 
			
		||||
@ -700,18 +697,18 @@ dependencies = [
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "crc"
 | 
			
		||||
version = "2.1.0"
 | 
			
		||||
version = "3.0.1"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "49fc9a695bca7f35f5f4c15cddc84415f66a74ea78eef08e90c5024f2b540e23"
 | 
			
		||||
checksum = "86ec7a15cbe22e59248fc7eadb1907dab5ba09372595da4d73dd805ed4417dfe"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "crc-catalog",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "crc-catalog"
 | 
			
		||||
version = "1.1.1"
 | 
			
		||||
version = "2.2.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "ccaeedb56da03b09f598226e25e80088cb4cd25f316e6e4df7d695f0feeb1403"
 | 
			
		||||
checksum = "9cace84e55f07e7301bae1c519df89cdad8cc3cd868413d3fdbdeca9ff3db484"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "crc32fast"
 | 
			
		||||
@ -947,15 +944,6 @@ dependencies = [
 | 
			
		||||
 "generic-array 0.12.4",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "digest"
 | 
			
		||||
version = "0.9.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "generic-array 0.14.5",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "digest"
 | 
			
		||||
version = "0.10.3"
 | 
			
		||||
@ -978,10 +966,30 @@ dependencies = [
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "dotenv"
 | 
			
		||||
version = "0.15.0"
 | 
			
		||||
name = "dirs"
 | 
			
		||||
version = "4.0.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f"
 | 
			
		||||
checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "dirs-sys",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "dirs-sys"
 | 
			
		||||
version = "0.3.7"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "libc",
 | 
			
		||||
 "redox_users",
 | 
			
		||||
 "winapi",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "dotenvy"
 | 
			
		||||
version = "0.15.7"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "downcast-rs"
 | 
			
		||||
@ -1009,6 +1017,9 @@ name = "either"
 | 
			
		||||
version = "1.6.1"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "serde",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "encoding_rs"
 | 
			
		||||
@ -1243,9 +1254,9 @@ dependencies = [
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "futures-core"
 | 
			
		||||
version = "0.3.21"
 | 
			
		||||
version = "0.3.28"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3"
 | 
			
		||||
checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "futures-executor"
 | 
			
		||||
@ -1349,13 +1360,13 @@ checksum = "aa12dfaa57be769c6681b4d193398cae8db7f7b9af3e86d362d7f0a3c294a1a0"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "anyhow",
 | 
			
		||||
 "ring",
 | 
			
		||||
 "rustls",
 | 
			
		||||
 "rustls 0.19.1",
 | 
			
		||||
 "thiserror",
 | 
			
		||||
 "tokio",
 | 
			
		||||
 "tokio-rustls",
 | 
			
		||||
 "tokio-rustls 0.22.0",
 | 
			
		||||
 "url",
 | 
			
		||||
 "webpki",
 | 
			
		||||
 "webpki-roots",
 | 
			
		||||
 "webpki 0.21.4",
 | 
			
		||||
 "webpki-roots 0.21.1",
 | 
			
		||||
 "x509-signature",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
@ -1453,12 +1464,21 @@ dependencies = [
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "hashlink"
 | 
			
		||||
version = "0.7.0"
 | 
			
		||||
name = "hashbrown"
 | 
			
		||||
version = "0.12.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf"
 | 
			
		||||
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "hashbrown",
 | 
			
		||||
 "ahash 0.7.6",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "hashlink"
 | 
			
		||||
version = "0.8.1"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "69fe1fcf8b4278d860ad0548329f892a3631fb63f82574df68275f34cdbe0ffa"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "hashbrown 0.12.3",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@ -1467,7 +1487,7 @@ version = "7.5.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "31672b7011be2c4f7456c4ddbcb40e7e9a4a9fad8efe49a6ebaf5f307d0109c0"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "base64",
 | 
			
		||||
 "base64 0.13.0",
 | 
			
		||||
 "byteorder",
 | 
			
		||||
 "crossbeam-channel",
 | 
			
		||||
 "flate2",
 | 
			
		||||
@ -1489,6 +1509,9 @@ name = "heck"
 | 
			
		||||
version = "0.4.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "unicode-segmentation",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "hermit-abi"
 | 
			
		||||
@ -1505,6 +1528,24 @@ version = "0.4.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "hkdf"
 | 
			
		||||
version = "0.12.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "791a029f6b9fc27657f6f188ec6e5e43f6911f6f878e0dc5501396e09809d437"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "hmac",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "hmac"
 | 
			
		||||
version = "0.12.1"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "digest 0.10.3",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "html5ever"
 | 
			
		||||
version = "0.25.1"
 | 
			
		||||
@ -1671,7 +1712,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "autocfg",
 | 
			
		||||
 "hashbrown",
 | 
			
		||||
 "hashbrown 0.11.2",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@ -1890,9 +1931,9 @@ dependencies = [
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "libsqlite3-sys"
 | 
			
		||||
version = "0.23.2"
 | 
			
		||||
version = "0.24.2"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "d2cafc7c74096c336d9d27145f7ebd4f4b6f95ba16aa5a282387267e6925cb58"
 | 
			
		||||
checksum = "898745e570c7d0453cc1fbc4a701eb6c662ed54e8fec8b7d14be137ebeeb9d14"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "cc",
 | 
			
		||||
 "pkg-config",
 | 
			
		||||
@ -2720,7 +2761,7 @@ version = "0.7.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "fcb87f3080f6d1d69e8c564c0fcfde1d7aa8cc451ce40cae89479111f03bc0eb"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "hashbrown",
 | 
			
		||||
 "hashbrown 0.11.2",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@ -2779,6 +2820,15 @@ version = "0.4.6"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "9376a4f0340565ad675d11fc1419227faf5f60cd7ac9cb2e7185a471f30af833"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "md-5"
 | 
			
		||||
version = "0.10.4"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "66b48670c893079d3c2ed79114e3644b7004df1c361a4e0ad52e2e6940d07c3d"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "digest 0.10.3",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "mdbx-sys"
 | 
			
		||||
version = "0.11.4-git.20210105"
 | 
			
		||||
@ -2924,7 +2974,7 @@ dependencies = [
 | 
			
		||||
 "atomic-shim",
 | 
			
		||||
 "crossbeam-epoch",
 | 
			
		||||
 "crossbeam-utils",
 | 
			
		||||
 "hashbrown",
 | 
			
		||||
 "hashbrown 0.11.2",
 | 
			
		||||
 "metrics 0.18.1",
 | 
			
		||||
 "num_cpus",
 | 
			
		||||
 "parking_lot 0.11.2",
 | 
			
		||||
@ -3219,12 +3269,6 @@ version = "0.2.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "opaque-debug"
 | 
			
		||||
version = "0.3.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "openssl"
 | 
			
		||||
version = "0.10.38"
 | 
			
		||||
@ -3615,11 +3659,11 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "proc-macro2"
 | 
			
		||||
version = "1.0.36"
 | 
			
		||||
version = "1.0.55"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
 | 
			
		||||
checksum = "1d0dd4be24fcdcfeaa12a432d588dc59bbad6cad3510c67e74a2b6b2fc950564"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "unicode-xid",
 | 
			
		||||
 "unicode-ident",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@ -3659,7 +3703,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "292972edad6bbecc137ab84c5e36421a4a6c979ea31d3cc73540dd04315b33e1"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "byteorder",
 | 
			
		||||
 "hashbrown",
 | 
			
		||||
 "hashbrown 0.11.2",
 | 
			
		||||
 "idna",
 | 
			
		||||
 "psl-types",
 | 
			
		||||
]
 | 
			
		||||
@ -3807,6 +3851,7 @@ dependencies = [
 | 
			
		||||
 "diplomatic-bag",
 | 
			
		||||
 "env_logger",
 | 
			
		||||
 "feed-rs",
 | 
			
		||||
 "futures-core",
 | 
			
		||||
 "futures-util",
 | 
			
		||||
 "gemini-fetch",
 | 
			
		||||
 "html5ever",
 | 
			
		||||
@ -3815,12 +3860,10 @@ dependencies = [
 | 
			
		||||
 "itertools",
 | 
			
		||||
 "kuchiki",
 | 
			
		||||
 "lazy_static",
 | 
			
		||||
 "libmdbx",
 | 
			
		||||
 "lingua",
 | 
			
		||||
 "log",
 | 
			
		||||
 "lru",
 | 
			
		||||
 "markup5ever",
 | 
			
		||||
 "mdbx-sys",
 | 
			
		||||
 "metrics 0.18.1",
 | 
			
		||||
 "metrics-exporter-prometheus",
 | 
			
		||||
 "metrics-process-promstyle",
 | 
			
		||||
@ -3843,7 +3886,7 @@ dependencies = [
 | 
			
		||||
 "signal-hook 0.3.13",
 | 
			
		||||
 "sitemap",
 | 
			
		||||
 "smartstring",
 | 
			
		||||
 "tempfile",
 | 
			
		||||
 "sqlx",
 | 
			
		||||
 "tikv-jemallocator",
 | 
			
		||||
 "tokio",
 | 
			
		||||
 "webp",
 | 
			
		||||
@ -4017,6 +4060,17 @@ dependencies = [
 | 
			
		||||
 "bitflags",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "redox_users"
 | 
			
		||||
version = "0.4.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "getrandom 0.2.5",
 | 
			
		||||
 "redox_syscall",
 | 
			
		||||
 "thiserror",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "regex"
 | 
			
		||||
version = "1.5.5"
 | 
			
		||||
@ -4066,7 +4120,7 @@ version = "0.11.10"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "46a1f7aa4f35e5e8b4160449f51afc758f0ce6454315a9fa7d0d113e958c41eb"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "base64",
 | 
			
		||||
 "base64 0.13.0",
 | 
			
		||||
 "bytes",
 | 
			
		||||
 "encoding_rs",
 | 
			
		||||
 "futures-core",
 | 
			
		||||
@ -4150,7 +4204,7 @@ version = "0.7.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "1b861ecaade43ac97886a512b360d01d66be9f41f3c61088b42cedf92e03d678"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "base64",
 | 
			
		||||
 "base64 0.13.0",
 | 
			
		||||
 "bitflags",
 | 
			
		||||
 "serde",
 | 
			
		||||
]
 | 
			
		||||
@ -4186,11 +4240,32 @@ version = "0.19.1"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "35edb675feee39aec9c99fa5ff985081995a06d594114ae14cbe797ad7b7a6d7"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "base64",
 | 
			
		||||
 "base64 0.13.0",
 | 
			
		||||
 "log",
 | 
			
		||||
 "ring",
 | 
			
		||||
 "sct",
 | 
			
		||||
 "webpki",
 | 
			
		||||
 "sct 0.6.1",
 | 
			
		||||
 "webpki 0.21.4",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "rustls"
 | 
			
		||||
version = "0.20.8"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "log",
 | 
			
		||||
 "ring",
 | 
			
		||||
 "sct 0.7.0",
 | 
			
		||||
 "webpki 0.22.0",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "rustls-pemfile"
 | 
			
		||||
version = "1.0.2"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "base64 0.21.0",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@ -4237,6 +4312,16 @@ dependencies = [
 | 
			
		||||
 "untrusted",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "sct"
 | 
			
		||||
version = "0.7.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "ring",
 | 
			
		||||
 "untrusted",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "seahash"
 | 
			
		||||
version = "3.0.7"
 | 
			
		||||
@ -4363,20 +4448,29 @@ dependencies = [
 | 
			
		||||
 "block-buffer 0.7.3",
 | 
			
		||||
 "digest 0.8.1",
 | 
			
		||||
 "fake-simd",
 | 
			
		||||
 "opaque-debug 0.2.3",
 | 
			
		||||
 "opaque-debug",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "sha1"
 | 
			
		||||
version = "0.10.4"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "006769ba83e921b3085caa8334186b00cf92b4cb1a6cf4632fbccc8eff5c7549"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "cfg-if",
 | 
			
		||||
 "cpufeatures",
 | 
			
		||||
 "digest 0.10.3",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "sha2"
 | 
			
		||||
version = "0.9.9"
 | 
			
		||||
version = "0.10.5"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800"
 | 
			
		||||
checksum = "cf9db03534dff993187064c4e0c05a5708d2a9728ace9a8959b77bedf415dac5"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "block-buffer 0.9.0",
 | 
			
		||||
 "cfg-if",
 | 
			
		||||
 "cpufeatures",
 | 
			
		||||
 "digest 0.9.0",
 | 
			
		||||
 "opaque-debug 0.3.0",
 | 
			
		||||
 "digest 0.10.3",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@ -4500,9 +4594,9 @@ dependencies = [
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "sqlformat"
 | 
			
		||||
version = "0.1.8"
 | 
			
		||||
version = "0.2.1"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "b4b7922be017ee70900be125523f38bdd644f4f06a1b16e8fa5a8ee8c34bffd4"
 | 
			
		||||
checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "itertools",
 | 
			
		||||
 "nom 7.1.1",
 | 
			
		||||
@ -4511,9 +4605,9 @@ dependencies = [
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "sqlx"
 | 
			
		||||
version = "0.5.11"
 | 
			
		||||
version = "0.6.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "fc15591eb44ffb5816a4a70a7efd5dd87bfd3aa84c4c200401c4396140525826"
 | 
			
		||||
checksum = "f8de3b03a925878ed54a954f621e64bf55a3c1bd29652d0d1a17830405350188"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "sqlx-core",
 | 
			
		||||
 "sqlx-macros",
 | 
			
		||||
@ -4521,18 +4615,22 @@ dependencies = [
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "sqlx-core"
 | 
			
		||||
version = "0.5.11"
 | 
			
		||||
version = "0.6.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "195183bf6ff8328bb82c0511a83faf60aacf75840103388851db61d7a9854ae3"
 | 
			
		||||
checksum = "fa8241483a83a3f33aa5fff7e7d9def398ff9990b2752b6c6112b83c6d246029"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "ahash 0.7.6",
 | 
			
		||||
 "atoi",
 | 
			
		||||
 "base64 0.13.0",
 | 
			
		||||
 "bitflags",
 | 
			
		||||
 "byteorder",
 | 
			
		||||
 "bytes",
 | 
			
		||||
 "crc",
 | 
			
		||||
 "crossbeam-queue",
 | 
			
		||||
 "dirs",
 | 
			
		||||
 "dotenvy",
 | 
			
		||||
 "either",
 | 
			
		||||
 "event-listener",
 | 
			
		||||
 "flume",
 | 
			
		||||
 "futures-channel",
 | 
			
		||||
 "futures-core",
 | 
			
		||||
@ -4541,16 +4639,24 @@ dependencies = [
 | 
			
		||||
 "futures-util",
 | 
			
		||||
 "hashlink",
 | 
			
		||||
 "hex",
 | 
			
		||||
 "hkdf",
 | 
			
		||||
 "hmac",
 | 
			
		||||
 "indexmap",
 | 
			
		||||
 "itoa 1.0.1",
 | 
			
		||||
 "libc",
 | 
			
		||||
 "libsqlite3-sys",
 | 
			
		||||
 "log",
 | 
			
		||||
 "md-5",
 | 
			
		||||
 "memchr",
 | 
			
		||||
 "once_cell",
 | 
			
		||||
 "paste",
 | 
			
		||||
 "percent-encoding",
 | 
			
		||||
 "rustls",
 | 
			
		||||
 "rand 0.8.5",
 | 
			
		||||
 "rustls 0.20.8",
 | 
			
		||||
 "rustls-pemfile",
 | 
			
		||||
 "serde",
 | 
			
		||||
 "serde_json",
 | 
			
		||||
 "sha1",
 | 
			
		||||
 "sha2",
 | 
			
		||||
 "smallvec",
 | 
			
		||||
 "sqlformat",
 | 
			
		||||
@ -4559,22 +4665,25 @@ dependencies = [
 | 
			
		||||
 "thiserror",
 | 
			
		||||
 "tokio-stream",
 | 
			
		||||
 "url",
 | 
			
		||||
 "webpki",
 | 
			
		||||
 "webpki-roots",
 | 
			
		||||
 "webpki-roots 0.22.6",
 | 
			
		||||
 "whoami",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "sqlx-macros"
 | 
			
		||||
version = "0.5.11"
 | 
			
		||||
version = "0.6.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "eee35713129561f5e55c554bba1c378e2a7e67f81257b7311183de98c50e6f94"
 | 
			
		||||
checksum = "9966e64ae989e7e575b19d7265cb79d7fc3cbbdf179835cb0d716f294c2049c9"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "dotenv",
 | 
			
		||||
 "dotenvy",
 | 
			
		||||
 "either",
 | 
			
		||||
 "heck 0.3.3",
 | 
			
		||||
 "heck 0.4.0",
 | 
			
		||||
 "hex",
 | 
			
		||||
 "once_cell",
 | 
			
		||||
 "proc-macro2",
 | 
			
		||||
 "quote",
 | 
			
		||||
 "serde",
 | 
			
		||||
 "serde_json",
 | 
			
		||||
 "sha2",
 | 
			
		||||
 "sqlx-core",
 | 
			
		||||
 "sqlx-rt",
 | 
			
		||||
@ -4584,13 +4693,13 @@ dependencies = [
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "sqlx-rt"
 | 
			
		||||
version = "0.5.11"
 | 
			
		||||
version = "0.6.3"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "b555e70fbbf84e269ec3858b7a6515bcfe7a166a7cc9c636dd6efd20431678b6"
 | 
			
		||||
checksum = "804d3f245f894e61b1e6263c84b23ca675d96753b5abfd5cc8597d86806e8024"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "once_cell",
 | 
			
		||||
 "tokio",
 | 
			
		||||
 "tokio-rustls",
 | 
			
		||||
 "tokio-rustls 0.23.4",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@ -4683,13 +4792,13 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "syn"
 | 
			
		||||
version = "1.0.89"
 | 
			
		||||
version = "1.0.109"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "ea297be220d52398dcc07ce15a209fce436d361735ac1db700cab3b6cdfb9f54"
 | 
			
		||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "proc-macro2",
 | 
			
		||||
 "quote",
 | 
			
		||||
 "unicode-xid",
 | 
			
		||||
 "unicode-ident",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@ -4705,7 +4814,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "264c2549892aa83975386a924ef8d0b8e909674c837d37ea58b4bd8739495c6e"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "async-trait",
 | 
			
		||||
 "base64",
 | 
			
		||||
 "base64 0.13.0",
 | 
			
		||||
 "bitpacking",
 | 
			
		||||
 "byteorder",
 | 
			
		||||
 "census",
 | 
			
		||||
@ -4994,9 +5103,20 @@ version = "0.22.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "bc6844de72e57df1980054b38be3a9f4702aba4858be64dd700181a8a6d0e1b6"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "rustls",
 | 
			
		||||
 "rustls 0.19.1",
 | 
			
		||||
 "tokio",
 | 
			
		||||
 "webpki",
 | 
			
		||||
 "webpki 0.21.4",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "tokio-rustls"
 | 
			
		||||
version = "0.23.4"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "rustls 0.20.8",
 | 
			
		||||
 "tokio",
 | 
			
		||||
 "webpki 0.22.0",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@ -5203,6 +5323,12 @@ version = "0.3.7"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "unicode-ident"
 | 
			
		||||
version = "1.0.8"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "unicode-normalization"
 | 
			
		||||
version = "0.1.19"
 | 
			
		||||
@ -5218,12 +5344,6 @@ version = "1.9.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "unicode-xid"
 | 
			
		||||
version = "0.2.2"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "unicode_categories"
 | 
			
		||||
version = "0.1.1"
 | 
			
		||||
@ -5412,13 +5532,32 @@ dependencies = [
 | 
			
		||||
 "untrusted",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "webpki"
 | 
			
		||||
version = "0.22.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "ring",
 | 
			
		||||
 "untrusted",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "webpki-roots"
 | 
			
		||||
version = "0.21.1"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "aabe153544e473b775453675851ecc86863d2a81d786d741f6b76778f2a48940"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "webpki",
 | 
			
		||||
 "webpki 0.21.4",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "webpki-roots"
 | 
			
		||||
version = "0.22.6"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "webpki 0.22.0",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
@ -5436,6 +5575,16 @@ dependencies = [
 | 
			
		||||
 "cc",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "whoami"
 | 
			
		||||
version = "1.4.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "2c70234412ca409cc04e864e89523cb0fc37f5e1344ebed5a3ebf4192b6b9f68"
 | 
			
		||||
dependencies = [
 | 
			
		||||
 "wasm-bindgen",
 | 
			
		||||
 "web-sys",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "winapi"
 | 
			
		||||
version = "0.3.9"
 | 
			
		||||
 | 
			
		||||
@ -17,7 +17,7 @@ ron = "0.7.0"
 | 
			
		||||
tower-http = { version = "0.2.5", features = ["fs"] }
 | 
			
		||||
log = "0.4.14"
 | 
			
		||||
env_logger = "0.9.0"
 | 
			
		||||
sqlx = { version = "0.5.11", features = ["sqlite", "runtime-tokio-rustls"] }
 | 
			
		||||
sqlx = { version = "0.6.3", features = ["sqlite", "runtime-tokio-rustls"] }
 | 
			
		||||
itertools = "0.10.3"
 | 
			
		||||
colour = "0.6.0"
 | 
			
		||||
futures-util = "0.3.21"
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1
									
								
								quickpeep_raker/.env
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								quickpeep_raker/.env
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1 @@
 | 
			
		||||
DATABASE_URL=${RAKER_DATABASE_URL}
 | 
			
		||||
@ -34,9 +34,7 @@ bytesize = {version = "1.1.0", features = ["serde"]}
 | 
			
		||||
chrono = "0.4.19"
 | 
			
		||||
 | 
			
		||||
### Storage
 | 
			
		||||
libmdbx = "0.1.1"
 | 
			
		||||
# Used for FFI. Must match the version in libmdbx.
 | 
			
		||||
mdbx-sys = "0.11.4-git.20210105"
 | 
			
		||||
sqlx = { version = "0.6.3", features = ["postgres", "runtime-tokio-rustls", "offline", "json"] }
 | 
			
		||||
# For compression of emitted packs. 0.11.1+zstd.1.5.2
 | 
			
		||||
zstd = "0.11.1"
 | 
			
		||||
 | 
			
		||||
@ -45,6 +43,7 @@ lazy_static = "1.4.0"
 | 
			
		||||
bytes = "1.1.0"
 | 
			
		||||
itertools = "0.10.3"
 | 
			
		||||
ipnetwork = "0.18.0"
 | 
			
		||||
futures-core = "0.3.28"
 | 
			
		||||
futures-util = "0.3.21"
 | 
			
		||||
tokio = { version = "1.17.0", features = ["full"] }
 | 
			
		||||
anyhow = "1.0.55"
 | 
			
		||||
@ -91,6 +90,3 @@ metrics = "0.18.1"
 | 
			
		||||
metrics-exporter-prometheus = { version = "0.9.0", default-features = false, features = ["http-listener"] }
 | 
			
		||||
metrics-process-promstyle = "0.18.0"
 | 
			
		||||
bare-metrics-recorder = "0.1.0"
 | 
			
		||||
 | 
			
		||||
[dev-dependencies]
 | 
			
		||||
tempfile = "3.3.0"
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										5
									
								
								quickpeep_raker/build.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								quickpeep_raker/build.rs
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,5 @@
 | 
			
		||||
// generated by `sqlx migrate build-script`
 | 
			
		||||
fn main() {
 | 
			
		||||
    // trigger recompilation when a new migration is added
 | 
			
		||||
    println!("cargo:rerun-if-changed=migrations");
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										21
									
								
								quickpeep_raker/devdb.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								quickpeep_raker/devdb.sh
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,21 @@
 | 
			
		||||
#!/bin/sh
 | 
			
		||||
 | 
			
		||||
cname=`docker run -d --rm -e POSTGRES_PASSWORD=password -p 127.0.0.10:55432:5432 postgres:15`
 | 
			
		||||
 | 
			
		||||
onstop() {
 | 
			
		||||
  docker stop $cname
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
trap onstop EXIT
 | 
			
		||||
 | 
			
		||||
export RAKER_DATABASE_URL=postgres://postgres:password@127.0.0.10:55432/postgres
 | 
			
		||||
 | 
			
		||||
echo "Running migrations"
 | 
			
		||||
sqlx migrate run
 | 
			
		||||
echo "Preparing offline mode SQLx data"
 | 
			
		||||
#cargo sqlx prepare
 | 
			
		||||
cargo sqlx prepare -- --lib
 | 
			
		||||
#cargo sqlx prepare --merged -- --all-features --all-targets --lib
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -0,0 +1,42 @@
 | 
			
		||||
-- All timestamps are in sec since 2023-01-01.
 | 
			
		||||
 | 
			
		||||
CREATE TYPE rakeintent AS ENUM ('Any', 'Page', 'Feed', 'SiteMap', 'Icon');
 | 
			
		||||
 | 
			
		||||
CREATE TABLE domains (
 | 
			
		||||
    domain_id SERIAL PRIMARY KEY NOT NULL,
 | 
			
		||||
    name TEXT UNIQUE NOT NULL,
 | 
			
		||||
    domain_record JSONB NOT NULL
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
CREATE TABLE urls (
 | 
			
		||||
    url_id SERIAL PRIMARY KEY NOT NULL,
 | 
			
		||||
    domain_id INTEGER NOT NULL REFERENCES domains(domain_id),
 | 
			
		||||
    url TEXT NOT NULL,
 | 
			
		||||
    intent rakeintent NOT NULL,
 | 
			
		||||
    visited_at_ts INTEGER,
 | 
			
		||||
 | 
			
		||||
    UNIQUE (domain_id, url)
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
CREATE TABLE url_queue (
 | 
			
		||||
    url_id INTEGER PRIMARY KEY NOT NULL REFERENCES urls(url_id),
 | 
			
		||||
    rake_after_ts INTEGER NOT NULL
 | 
			
		||||
);
 | 
			
		||||
-- Used for finding things to rake.
 | 
			
		||||
CREATE INDEX url_queue_rake_after_ts_idx ON url_queue(rake_after_ts);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
CREATE TABLE active_domain_raffle (
 | 
			
		||||
    domain_id INTEGER PRIMARY KEY NOT NULL REFERENCES domains(domain_id),
 | 
			
		||||
    raffle INTEGER UNIQUE NOT NULL
 | 
			
		||||
);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
CREATE TABLE domain_backoffs (
 | 
			
		||||
    domain_id INTEGER PRIMARY KEY NOT NULL REFERENCES domains(domain_id),
 | 
			
		||||
    backoff_until_ts INTEGER NOT NULL,
 | 
			
		||||
    backoff_sec INTEGER NOT NULL,
 | 
			
		||||
    reason TEXT NOT NULL
 | 
			
		||||
);
 | 
			
		||||
-- Used for finding things to rake.
 | 
			
		||||
CREATE INDEX domain_backoffs_backoff_until_ts_idx ON domain_backoffs(backoff_until_ts);
 | 
			
		||||
							
								
								
									
										3
									
								
								quickpeep_raker/sqlx-data.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								quickpeep_raker/sqlx-data.json
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,3 @@
 | 
			
		||||
{
 | 
			
		||||
  "db": "PostgreSQL"
 | 
			
		||||
}
 | 
			
		||||
@ -1,173 +0,0 @@
 | 
			
		||||
use clap::Parser;
 | 
			
		||||
use std::borrow::Cow;
 | 
			
		||||
 | 
			
		||||
use std::fmt::Debug;
 | 
			
		||||
 | 
			
		||||
use env_logger::Env;
 | 
			
		||||
 | 
			
		||||
use anyhow::{bail, Context};
 | 
			
		||||
 | 
			
		||||
use colour::{dark_yellow_ln, red_ln};
 | 
			
		||||
use libmdbx::{Database, TableObject, RO};
 | 
			
		||||
 | 
			
		||||
use std::path::PathBuf;
 | 
			
		||||
 | 
			
		||||
use quickpeep_raker::config;
 | 
			
		||||
 | 
			
		||||
use quickpeep_raker::storage::mdbx_helper_types::MdbxBare;
 | 
			
		||||
use quickpeep_raker::storage::records::{
 | 
			
		||||
    ActiveDomainRecord, BackingOffDomainRecord, DomainRecord, OnHoldUrlRecord, QueueUrlRecord,
 | 
			
		||||
    UrlVisitedRecord,
 | 
			
		||||
};
 | 
			
		||||
use quickpeep_raker::storage::{RakerStore, RakerTxn};
 | 
			
		||||
 | 
			
		||||
/// Seeds a raker's queue with URLs
 | 
			
		||||
#[derive(Clone, Debug, Parser)]
 | 
			
		||||
pub struct Opts {
 | 
			
		||||
    #[clap(long = "config")]
 | 
			
		||||
    config: Option<PathBuf>,
 | 
			
		||||
 | 
			
		||||
    /// Table name
 | 
			
		||||
    table: String,
 | 
			
		||||
 | 
			
		||||
    /// Key name to look up
 | 
			
		||||
    key_name: String,
 | 
			
		||||
 | 
			
		||||
    /// Search for any prefix, not an exact match.
 | 
			
		||||
    #[clap(long = "prefix", short = 'p')]
 | 
			
		||||
    prefix: bool,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[tokio::main]
 | 
			
		||||
pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
    env_logger::Builder::from_env(Env::default().default_filter_or("info,quickpeep=debug")).init();
 | 
			
		||||
 | 
			
		||||
    let opts: Opts = Opts::parse();
 | 
			
		||||
 | 
			
		||||
    let config_path = opts
 | 
			
		||||
        .config
 | 
			
		||||
        .unwrap_or_else(|| PathBuf::from("quickpeep.ron"));
 | 
			
		||||
    let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
 | 
			
		||||
 | 
			
		||||
    if !config.raker.workbench_dir.exists() {
 | 
			
		||||
        bail!(
 | 
			
		||||
            "Workbench directory ({:?}) doesn't exist.",
 | 
			
		||||
            config.raker.workbench_dir
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
    if !config.seed_dir.exists() {
 | 
			
		||||
        bail!("Seed directory ({:?}) doesn't exist.", config.seed_dir);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let store = RakerStore::open(&config.raker.workbench_dir.join("raker.mdbx"))?;
 | 
			
		||||
 | 
			
		||||
    let txn = store.ro_txn()?;
 | 
			
		||||
    match opts.table.as_ref() {
 | 
			
		||||
        "queue_urls" | "urls_queue" => {
 | 
			
		||||
            inspect::<MdbxBare<QueueUrlRecord>>(
 | 
			
		||||
                opts.key_name.as_ref(),
 | 
			
		||||
                opts.prefix,
 | 
			
		||||
                &txn.mdbx.borrow_dbs().queue_urls,
 | 
			
		||||
                &txn,
 | 
			
		||||
            )?;
 | 
			
		||||
        }
 | 
			
		||||
        "active_domains" => {
 | 
			
		||||
            inspect::<MdbxBare<ActiveDomainRecord>>(
 | 
			
		||||
                opts.key_name.as_ref(),
 | 
			
		||||
                opts.prefix,
 | 
			
		||||
                &txn.mdbx.borrow_dbs().active_domains,
 | 
			
		||||
                &txn,
 | 
			
		||||
            )?;
 | 
			
		||||
        }
 | 
			
		||||
        "active_domains_raffle" => {
 | 
			
		||||
            inspect::<MdbxBare<String>>(
 | 
			
		||||
                opts.key_name.as_ref(),
 | 
			
		||||
                opts.prefix,
 | 
			
		||||
                &txn.mdbx.borrow_dbs().active_domain_raffle,
 | 
			
		||||
                &txn,
 | 
			
		||||
            )?;
 | 
			
		||||
        }
 | 
			
		||||
        "backing_off_reinstatements" => {
 | 
			
		||||
            inspect::<MdbxBare<String>>(
 | 
			
		||||
                opts.key_name.as_ref(),
 | 
			
		||||
                opts.prefix,
 | 
			
		||||
                &txn.mdbx.borrow_dbs().backing_off_reinstatements,
 | 
			
		||||
                &txn,
 | 
			
		||||
            )?;
 | 
			
		||||
        }
 | 
			
		||||
        "backing_off_domains" => {
 | 
			
		||||
            inspect::<MdbxBare<BackingOffDomainRecord>>(
 | 
			
		||||
                opts.key_name.as_ref(),
 | 
			
		||||
                opts.prefix,
 | 
			
		||||
                &txn.mdbx.borrow_dbs().backing_off_domains,
 | 
			
		||||
                &txn,
 | 
			
		||||
            )?;
 | 
			
		||||
        }
 | 
			
		||||
        "visited_urls" => {
 | 
			
		||||
            inspect::<MdbxBare<UrlVisitedRecord>>(
 | 
			
		||||
                opts.key_name.as_ref(),
 | 
			
		||||
                opts.prefix,
 | 
			
		||||
                &txn.mdbx.borrow_dbs().visited_urls,
 | 
			
		||||
                &txn,
 | 
			
		||||
            )?;
 | 
			
		||||
        }
 | 
			
		||||
        "domains" => {
 | 
			
		||||
            inspect::<MdbxBare<DomainRecord>>(
 | 
			
		||||
                opts.key_name.as_ref(),
 | 
			
		||||
                opts.prefix,
 | 
			
		||||
                &txn.mdbx.borrow_dbs().domains,
 | 
			
		||||
                &txn,
 | 
			
		||||
            )?;
 | 
			
		||||
        }
 | 
			
		||||
        "urls_on_hold" => {
 | 
			
		||||
            inspect::<MdbxBare<OnHoldUrlRecord>>(
 | 
			
		||||
                opts.key_name.as_ref(),
 | 
			
		||||
                opts.prefix,
 | 
			
		||||
                &txn.mdbx.borrow_dbs().urls_on_hold,
 | 
			
		||||
                &txn,
 | 
			
		||||
            )?;
 | 
			
		||||
        }
 | 
			
		||||
        other => {
 | 
			
		||||
            dark_yellow_ln!("Unknown database {:?}", other);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
trait Inspectable {
 | 
			
		||||
    fn inspect(&self) -> String;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl<T: Debug> Inspectable for MdbxBare<T> {
 | 
			
		||||
    fn inspect(&self) -> String {
 | 
			
		||||
        format!("{:?}", &self.0)
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn inspect<'a, IV: Inspectable + TableObject<'a> + 'static>(
 | 
			
		||||
    key: &str,
 | 
			
		||||
    prefix: bool,
 | 
			
		||||
    database: &Database<'a>,
 | 
			
		||||
    txn: &'a RakerTxn<'a, RO>,
 | 
			
		||||
) -> anyhow::Result<()> {
 | 
			
		||||
    if prefix {
 | 
			
		||||
        let mut cur = txn.mdbx_txn.cursor(database)?;
 | 
			
		||||
        for item in cur.iter_from::<Cow<'_, [u8]>, IV>(key.as_bytes()) {
 | 
			
		||||
            let (k, v) = item?;
 | 
			
		||||
            if !k.starts_with(key.as_bytes()) {
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
            println!("• {}", std::str::from_utf8(&k).unwrap_or("<Not UTF-8>"));
 | 
			
		||||
            println!("  = {}", v.inspect());
 | 
			
		||||
        }
 | 
			
		||||
    } else {
 | 
			
		||||
        if let Some(entry) = txn.mdbx_txn.get::<IV>(database, key.as_bytes())? {
 | 
			
		||||
            println!("{}", entry.inspect());
 | 
			
		||||
        } else {
 | 
			
		||||
            red_ln!("no value");
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
@ -1,13 +1,11 @@
 | 
			
		||||
use anyhow::{bail, Context};
 | 
			
		||||
use anyhow::Context;
 | 
			
		||||
use clap::Parser;
 | 
			
		||||
use colour::{blue, yellow_ln};
 | 
			
		||||
use env_logger::Env;
 | 
			
		||||
use itertools::Itertools;
 | 
			
		||||
use libmdbx::Database;
 | 
			
		||||
use quickpeep_raker::config;
 | 
			
		||||
use quickpeep_raker::storage::mdbx_helper_types::{MdbxBare, MdbxString};
 | 
			
		||||
use quickpeep_raker::storage::records::OnHoldUrlRecord;
 | 
			
		||||
use quickpeep_raker::storage::RakerStore;
 | 
			
		||||
use sqlx::query;
 | 
			
		||||
use std::collections::HashMap;
 | 
			
		||||
use std::path::PathBuf;
 | 
			
		||||
 | 
			
		||||
@ -16,12 +14,9 @@ use std::path::PathBuf;
 | 
			
		||||
pub struct Opts {
 | 
			
		||||
    #[clap(long = "config")]
 | 
			
		||||
    config: Option<PathBuf>,
 | 
			
		||||
 | 
			
		||||
    /// Whether to show URLs instead of domains
 | 
			
		||||
    #[clap(long = "urls")]
 | 
			
		||||
    urls: bool,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TODO re-introduce refcounting
 | 
			
		||||
#[tokio::main]
 | 
			
		||||
pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
    env_logger::Builder::from_env(
 | 
			
		||||
@ -36,19 +31,11 @@ pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
        .unwrap_or_else(|| PathBuf::from("quickpeep.ron"));
 | 
			
		||||
    let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
 | 
			
		||||
 | 
			
		||||
    if !config.raker.workbench_dir.exists() {
 | 
			
		||||
        bail!(
 | 
			
		||||
            "Workbench directory ({:?}) doesn't exist.",
 | 
			
		||||
            config.raker.workbench_dir
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
    let store = RakerStore::open(&config.raker.database_uri).await?;
 | 
			
		||||
 | 
			
		||||
    let store = RakerStore::open(&config.raker.workbench_dir.join("raker.mdbx"))?;
 | 
			
		||||
    let is_urls = opts.urls;
 | 
			
		||||
    let counts = count_on_hold(&store).await?;
 | 
			
		||||
 | 
			
		||||
    let counts = tokio::task::spawn_blocking(move || -> anyhow::Result<Vec<_>> {
 | 
			
		||||
        let counts = count_on_hold(&store, is_urls)?;
 | 
			
		||||
 | 
			
		||||
        let sorted_counts = counts
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .map(|(string, count)| (count, string))
 | 
			
		||||
@ -60,12 +47,8 @@ pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
    })
 | 
			
		||||
    .await??;
 | 
			
		||||
 | 
			
		||||
    blue!("№ Refs   ");
 | 
			
		||||
    if opts.urls {
 | 
			
		||||
        yellow_ln!("URL");
 | 
			
		||||
    } else {
 | 
			
		||||
        yellow_ln!("Domain");
 | 
			
		||||
    }
 | 
			
		||||
    blue!("№ URLs   ");
 | 
			
		||||
    yellow_ln!("Domain");
 | 
			
		||||
    for (count, string) in counts {
 | 
			
		||||
        println!("{:>6}   {}", count, string);
 | 
			
		||||
    }
 | 
			
		||||
@ -73,25 +56,27 @@ pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn count_on_hold(store: &RakerStore, urls: bool) -> anyhow::Result<HashMap<String, u32>> {
 | 
			
		||||
    let mut map: HashMap<String, u32> = Default::default();
 | 
			
		||||
pub async fn count_on_hold(store: &RakerStore) -> anyhow::Result<HashMap<String, u32>> {
 | 
			
		||||
    store
 | 
			
		||||
        .ro_txn(move |txn| {
 | 
			
		||||
            Box::pin(async move {
 | 
			
		||||
                let rows = query!(
 | 
			
		||||
                    r#"
 | 
			
		||||
            SELECT d.name AS "domain_name", COUNT(1) AS "url_count" FROM urls u
 | 
			
		||||
            LEFT JOIN url_queue q ON u.url_id = q.url_id
 | 
			
		||||
            JOIN domains d USING (domain_id)
 | 
			
		||||
            WHERE u.visited_at_ts IS NULL AND q.url_id IS NULL
 | 
			
		||||
            GROUP BY d.name
 | 
			
		||||
        "#
 | 
			
		||||
                )
 | 
			
		||||
                .fetch_all(&mut *txn.txn)
 | 
			
		||||
                .await?;
 | 
			
		||||
 | 
			
		||||
    let txn = store.ro_txn()?;
 | 
			
		||||
    let urls_on_hold: &Database = &txn.mdbx.borrow_dbs().urls_on_hold;
 | 
			
		||||
 | 
			
		||||
    let mut cur = txn.mdbx_txn.cursor(urls_on_hold)?;
 | 
			
		||||
 | 
			
		||||
    for row in cur.iter_start::<MdbxString, MdbxBare<OnHoldUrlRecord>>() {
 | 
			
		||||
        let (domain_then_url, record) = row?;
 | 
			
		||||
        let mut split = domain_then_url.0.as_ref().split('\n');
 | 
			
		||||
        if urls {
 | 
			
		||||
            // Skip one
 | 
			
		||||
            split.next();
 | 
			
		||||
        }
 | 
			
		||||
        let piece = split.next().context("Missing piece")?;
 | 
			
		||||
        let count = map.entry(piece.to_owned()).or_insert(0);
 | 
			
		||||
        *count += record.0.refs as u32;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Ok(map)
 | 
			
		||||
                Ok(rows
 | 
			
		||||
                    .into_iter()
 | 
			
		||||
                    .map(|row| (row.domain_name, row.url_count.unwrap_or(0) as u32))
 | 
			
		||||
                    .collect::<HashMap<String, u32>>())
 | 
			
		||||
            })
 | 
			
		||||
        })
 | 
			
		||||
        .await
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -3,8 +3,7 @@ use clap::Parser;
 | 
			
		||||
use env_logger::Env;
 | 
			
		||||
 | 
			
		||||
use adblock::lists::RuleTypes;
 | 
			
		||||
use anyhow::{anyhow, bail, ensure, Context};
 | 
			
		||||
use chrono::Utc;
 | 
			
		||||
use anyhow::{ensure, Context};
 | 
			
		||||
use log::{debug, error, info, warn};
 | 
			
		||||
use lru::LruCache;
 | 
			
		||||
use metrics_exporter_prometheus::PrometheusBuilder;
 | 
			
		||||
@ -14,10 +13,10 @@ use signal_hook::consts::{SIGINT, SIGTERM};
 | 
			
		||||
use signal_hook::iterator::Signals;
 | 
			
		||||
use std::path::PathBuf;
 | 
			
		||||
use std::sync::atomic::{AtomicBool, Ordering};
 | 
			
		||||
use std::sync::{Arc, Mutex, RwLock};
 | 
			
		||||
use std::sync::{Arc, RwLock};
 | 
			
		||||
use std::time::{Duration, Instant, SystemTime};
 | 
			
		||||
use tokio::fs::File;
 | 
			
		||||
use tokio::sync::{mpsc, oneshot, Notify, Semaphore};
 | 
			
		||||
use tokio::sync::{mpsc, oneshot, Mutex, Notify, Semaphore};
 | 
			
		||||
use tokio::time::MissedTickBehavior;
 | 
			
		||||
 | 
			
		||||
use quickpeep_raker::config;
 | 
			
		||||
@ -32,7 +31,6 @@ use quickpeep_structs::rake_entries::{
 | 
			
		||||
    AnalysisAntifeatures, SCHEMA_RAKED_ICONS, SCHEMA_RAKED_PAGES, SCHEMA_RAKED_REFERENCES,
 | 
			
		||||
    SCHEMA_RAKED_REJECTIONS,
 | 
			
		||||
};
 | 
			
		||||
use quickpeep_utils::dates::date_to_quickpeep_days;
 | 
			
		||||
 | 
			
		||||
/// The ordering is slightly important on these: more specific things should come first.
 | 
			
		||||
/// This means they filter out the troublesome elements before the broader filters do.
 | 
			
		||||
@ -78,13 +76,6 @@ pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
        .unwrap_or_else(|| PathBuf::from("quickpeep.ron"));
 | 
			
		||||
    let config = config::RakerConfig::load(&config_path).context("Failed to load config")?;
 | 
			
		||||
 | 
			
		||||
    if !config.raker.workbench_dir.exists() {
 | 
			
		||||
        bail!(
 | 
			
		||||
            "Workbench directory ({:?}) doesn't exist.",
 | 
			
		||||
            config.raker.workbench_dir
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let mut header_map = HeaderMap::new();
 | 
			
		||||
    header_map.insert(USER_AGENT, HeaderValue::from_static(RAKER_USER_AGENT));
 | 
			
		||||
 | 
			
		||||
@ -106,7 +97,7 @@ pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
        .redirect(Policy::limited(5))
 | 
			
		||||
        .build()?;
 | 
			
		||||
 | 
			
		||||
    let store = RakerStore::open(&config.raker.workbench_dir.join("raker.mdbx"))?;
 | 
			
		||||
    let store = RakerStore::open(&config.raker.database_uri).await?;
 | 
			
		||||
 | 
			
		||||
    let mut adblock_engines = Vec::new();
 | 
			
		||||
    for (antifeature, name) in &ADBLOCK_FILTER_PATHS {
 | 
			
		||||
@ -277,12 +268,8 @@ pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
 | 
			
		||||
    // Reinstate old backoffs and re-rakable URLs
 | 
			
		||||
    store
 | 
			
		||||
        .async_rw_txn(|txn| {
 | 
			
		||||
            let today = date_to_quickpeep_days(&Utc::today())?;
 | 
			
		||||
            txn.reinstate_backoffs(SystemTime::now())?;
 | 
			
		||||
            txn.reinstate_rerakables(today)?;
 | 
			
		||||
            txn.commit()?;
 | 
			
		||||
            Ok(())
 | 
			
		||||
        .rw_txn(move |mut txn| {
 | 
			
		||||
            Box::pin(async move { txn.reinstate_backoffs(SystemTime::now()).await })
 | 
			
		||||
        })
 | 
			
		||||
        .await?;
 | 
			
		||||
 | 
			
		||||
@ -295,8 +282,9 @@ pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
            loop {
 | 
			
		||||
                tokio::select! {
 | 
			
		||||
                    _ = interval.tick() => {
 | 
			
		||||
                        let txn = store.ro_txn()?;
 | 
			
		||||
                        txn.emit_datastore_metrics()?;
 | 
			
		||||
                        store.ro_txn(move |mut txn| Box::pin(async move {
 | 
			
		||||
                            txn.emit_datastore_metrics().await
 | 
			
		||||
                        })).await?;
 | 
			
		||||
                        metrics_process_promstyle::emit_now()?;
 | 
			
		||||
                    }
 | 
			
		||||
                    _ = &mut dsmu_cancel_rx => {
 | 
			
		||||
@ -341,14 +329,19 @@ pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
 | 
			
		||||
async fn acquire_active_domain(task_context: &TaskContext) -> anyhow::Result<Option<String>> {
 | 
			
		||||
    // Acquire a domain for the task to run against
 | 
			
		||||
    let domain = {
 | 
			
		||||
        let txn = task_context.store.ro_txn()?;
 | 
			
		||||
        // TODO: don't clone teh Arc here — conv to ref.
 | 
			
		||||
        txn.acquire_random_active_domain(task_context.busy_domains.clone())?
 | 
			
		||||
    };
 | 
			
		||||
    let busy_domains = task_context.busy_domains.clone();
 | 
			
		||||
    let domain = task_context
 | 
			
		||||
        .store
 | 
			
		||||
        .ro_txn(move |mut txn| {
 | 
			
		||||
            Box::pin(async move {
 | 
			
		||||
                // TODO: don't clone teh Arc here — conv to ref.
 | 
			
		||||
                txn.acquire_random_active_domain(busy_domains).await
 | 
			
		||||
            })
 | 
			
		||||
        })
 | 
			
		||||
        .await?;
 | 
			
		||||
 | 
			
		||||
    match domain {
 | 
			
		||||
        RandomActiveDomainAcquisition::GotOne { domain, record: _ } => Ok(Some(domain)),
 | 
			
		||||
        RandomActiveDomainAcquisition::GotOne { domain } => Ok(Some(domain)),
 | 
			
		||||
        RandomActiveDomainAcquisition::AllBusy => Ok(None),
 | 
			
		||||
        RandomActiveDomainAcquisition::NoneLeft => Ok(None),
 | 
			
		||||
    }
 | 
			
		||||
@ -370,7 +363,7 @@ async fn orchestrator(task_context: TaskContext, semaphore: Arc<Semaphore>) -> a
 | 
			
		||||
        if domain_to_process.is_none() && semaphore.available_permits() == max_permits {
 | 
			
		||||
            // There's nothing to do and nothing is being processed.
 | 
			
		||||
            ensure!(
 | 
			
		||||
                task_context.busy_domains.lock().unwrap().is_empty(),
 | 
			
		||||
                task_context.busy_domains.lock().await.is_empty(),
 | 
			
		||||
                "Shutting down orchestrator but set of busy domains is not empty."
 | 
			
		||||
            );
 | 
			
		||||
        }
 | 
			
		||||
@ -378,13 +371,9 @@ async fn orchestrator(task_context: TaskContext, semaphore: Arc<Semaphore>) -> a
 | 
			
		||||
        tokio::select! {
 | 
			
		||||
            _ = tokio::time::sleep_until(next_reinstate.into()) => {
 | 
			
		||||
                // Reinstate backoffs and rerakables
 | 
			
		||||
                if let Err(err) = task_context.store.async_rw_txn(|txn| {
 | 
			
		||||
                    txn.reinstate_backoffs(SystemTime::now())?;
 | 
			
		||||
                    let today = date_to_quickpeep_days(&Utc::today())?;
 | 
			
		||||
                    txn.reinstate_rerakables(today)?;
 | 
			
		||||
                    txn.commit()?;
 | 
			
		||||
                    Ok(())
 | 
			
		||||
                }).await {
 | 
			
		||||
                if let Err(err) = task_context.store.rw_txn(move |mut txn| Box::pin(async move {
 | 
			
		||||
                    txn.reinstate_backoffs(SystemTime::now()).await
 | 
			
		||||
                })).await {
 | 
			
		||||
                    error!("Error performing periodic reinstatements: {err:?}");
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
@ -402,8 +391,7 @@ async fn orchestrator(task_context: TaskContext, semaphore: Arc<Semaphore>) -> a
 | 
			
		||||
                    }
 | 
			
		||||
                    ensure!(
 | 
			
		||||
                        task_context.busy_domains
 | 
			
		||||
                            .lock()
 | 
			
		||||
                            .map_err(|_| anyhow!("busy domains set poisoned"))?
 | 
			
		||||
                            .lock().await
 | 
			
		||||
                            .remove(&domain),
 | 
			
		||||
                        "Our domain was not busy after processing!"
 | 
			
		||||
                    );
 | 
			
		||||
 | 
			
		||||
@ -8,6 +8,8 @@ use anyhow::{bail, Context};
 | 
			
		||||
use colour::{dark_green_ln, dark_red_ln, dark_yellow, green, red, yellow_ln};
 | 
			
		||||
use reqwest::{Client, Url};
 | 
			
		||||
use std::path::PathBuf;
 | 
			
		||||
use std::sync::atomic::{AtomicU32, Ordering};
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
use tokio::sync::mpsc;
 | 
			
		||||
use tokio::sync::mpsc::Receiver;
 | 
			
		||||
 | 
			
		||||
@ -42,17 +44,11 @@ pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
        .unwrap_or_else(|| PathBuf::from("quickpeep.ron"));
 | 
			
		||||
    let config = RakerConfig::load(&config_path).context("Failed to load config")?;
 | 
			
		||||
 | 
			
		||||
    if !config.raker.workbench_dir.exists() {
 | 
			
		||||
        bail!(
 | 
			
		||||
            "Workbench directory ({:?}) doesn't exist.",
 | 
			
		||||
            config.raker.workbench_dir
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
    if !config.seed_dir.exists() {
 | 
			
		||||
        bail!("Seed directory ({:?}) doesn't exist.", config.seed_dir);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let store = RakerStore::open(&config.raker.workbench_dir.join("raker.mdbx"))?;
 | 
			
		||||
    let store = RakerStore::open(&config.raker.database_uri).await?;
 | 
			
		||||
 | 
			
		||||
    import_seeds(store.clone(), &config).await?;
 | 
			
		||||
 | 
			
		||||
@ -60,7 +56,9 @@ pub async fn main() -> anyhow::Result<()> {
 | 
			
		||||
 | 
			
		||||
    eprintln!("... re-applying seeds and weeds to on-hold URLs ...");
 | 
			
		||||
    store
 | 
			
		||||
        .async_rw_txn(|txn| maintenance::reapply_seeds_and_weeds_to_on_hold_urls(txn))
 | 
			
		||||
        .rw_txn(move |mut txn| {
 | 
			
		||||
            Box::pin(async move { maintenance::reapply_seeds_and_weeds_to_on_hold_urls(txn).await })
 | 
			
		||||
        })
 | 
			
		||||
        .await?;
 | 
			
		||||
    eprintln!("... done!");
 | 
			
		||||
 | 
			
		||||
@ -137,18 +135,23 @@ async fn importer(
 | 
			
		||||
) -> anyhow::Result<SeedImportStats> {
 | 
			
		||||
    let mut buf = Vec::with_capacity(BATCH_SIZE);
 | 
			
		||||
    let mut stats = SeedImportStats::default();
 | 
			
		||||
    let client = Client::new();
 | 
			
		||||
    let client = Arc::new(Client::new());
 | 
			
		||||
    while let Some(seed) = recv.recv().await {
 | 
			
		||||
        buf.push(seed);
 | 
			
		||||
 | 
			
		||||
        if buf.len() == BATCH_SIZE {
 | 
			
		||||
            import_and_flush_batch_seeds_or_weeds(
 | 
			
		||||
                &store, &mut buf, &mut stats, &client, !are_weeds,
 | 
			
		||||
            stats = import_and_flush_batch_seeds_or_weeds(
 | 
			
		||||
                &store,
 | 
			
		||||
                buf,
 | 
			
		||||
                stats,
 | 
			
		||||
                client.clone(),
 | 
			
		||||
                !are_weeds,
 | 
			
		||||
            )
 | 
			
		||||
            .await?;
 | 
			
		||||
            buf = Vec::new();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    import_and_flush_batch_seeds_or_weeds(&store, &mut buf, &mut stats, &client, !are_weeds)
 | 
			
		||||
    stats = import_and_flush_batch_seeds_or_weeds(&store, buf, stats, client.clone(), !are_weeds)
 | 
			
		||||
        .await?;
 | 
			
		||||
 | 
			
		||||
    Ok(stats)
 | 
			
		||||
@ -156,83 +159,106 @@ async fn importer(
 | 
			
		||||
 | 
			
		||||
async fn import_and_flush_batch_seeds_or_weeds(
 | 
			
		||||
    store: &RakerStore,
 | 
			
		||||
    buf: &mut Vec<Seed>,
 | 
			
		||||
    stats: &mut SeedImportStats,
 | 
			
		||||
    client: &Client,
 | 
			
		||||
    mut buf: Vec<Seed>,
 | 
			
		||||
    mut stats: SeedImportStats,
 | 
			
		||||
    client: Arc<Client>,
 | 
			
		||||
    is_seed: bool,
 | 
			
		||||
) -> anyhow::Result<()> {
 | 
			
		||||
    let txn = store.rw_txn()?;
 | 
			
		||||
    for seed in buf.drain(..) {
 | 
			
		||||
        let as_url = Url::parse(seed.url.as_str())
 | 
			
		||||
            .with_context(|| format!("Failed to parse {:?} as URL", seed.url))?;
 | 
			
		||||
        let domain = get_reduced_domain(&as_url)
 | 
			
		||||
            .with_context(|| format!("No domain in seed URL '{as_url}'!"))?;
 | 
			
		||||
) -> anyhow::Result<SeedImportStats> {
 | 
			
		||||
    store
 | 
			
		||||
        .rw_txn(move |mut txn| {
 | 
			
		||||
            Box::pin(async move {
 | 
			
		||||
                for seed in buf.drain(..) {
 | 
			
		||||
                    let as_url = Url::parse(seed.url.as_str())
 | 
			
		||||
                        .with_context(|| format!("Failed to parse {:?} as URL", seed.url))?;
 | 
			
		||||
                    let domain = get_reduced_domain(&as_url)
 | 
			
		||||
                        .with_context(|| format!("No domain in seed URL '{as_url}'!"))?;
 | 
			
		||||
 | 
			
		||||
        let domain_record = txn.get_domain_record(domain.borrow())?;
 | 
			
		||||
        let is_domain_new = domain_record.is_none();
 | 
			
		||||
        let mut domain_record = domain_record.unwrap_or_default();
 | 
			
		||||
        if is_domain_new {
 | 
			
		||||
            stats.new_domains += 1;
 | 
			
		||||
        }
 | 
			
		||||
        let mut dirty = is_domain_new;
 | 
			
		||||
                    let domain_record = txn.get_domain_record(domain.borrow()).await?;
 | 
			
		||||
                    let is_domain_new = domain_record.is_none();
 | 
			
		||||
                    let mut domain_record = domain_record.unwrap_or_default();
 | 
			
		||||
                    if is_domain_new {
 | 
			
		||||
                        stats.new_domains += 1;
 | 
			
		||||
                    }
 | 
			
		||||
                    let mut dirty = is_domain_new;
 | 
			
		||||
 | 
			
		||||
        // Register the domain. This is a no-op if it's already active or backing off.
 | 
			
		||||
        txn.insert_active_domain_with_new_raffle_ticket(domain.clone().into_owned())?;
 | 
			
		||||
                    // Register the domain. This is a no-op if it's already active or backing off.
 | 
			
		||||
                    txn.insert_active_domain_with_new_raffle_ticket(domain.clone().into_owned())
 | 
			
		||||
                        .await?;
 | 
			
		||||
 | 
			
		||||
        let url_like = match &seed.url {
 | 
			
		||||
            UrlOrUrlPattern::Url(url_str) => {
 | 
			
		||||
                let url = Url::parse(url_str.as_str())?;
 | 
			
		||||
                if is_seed {
 | 
			
		||||
                    if txn.enqueue_url(url.as_str(), None, RakeIntent::Any)? {
 | 
			
		||||
                        stats.new_urls += 1;
 | 
			
		||||
                    } else {
 | 
			
		||||
                        stats.already_present_urls += 1;
 | 
			
		||||
                    let url_like = match &seed.url {
 | 
			
		||||
                        UrlOrUrlPattern::Url(url_str) => {
 | 
			
		||||
                            let url = Url::parse(url_str.as_str())?;
 | 
			
		||||
                            if is_seed {
 | 
			
		||||
                                if txn
 | 
			
		||||
                                    .enqueue_url(url.as_str(), None, RakeIntent::Any, false)
 | 
			
		||||
                                    .await?
 | 
			
		||||
                                {
 | 
			
		||||
                                    stats.new_urls += 1;
 | 
			
		||||
                                } else {
 | 
			
		||||
                                    stats.already_present_urls += 1;
 | 
			
		||||
                                }
 | 
			
		||||
                            }
 | 
			
		||||
 | 
			
		||||
                            // Seed/weed with empty prefix
 | 
			
		||||
                            dirty |= domain_record
 | 
			
		||||
                                .rakeable_path_prefixes
 | 
			
		||||
                                .insert(String::new(), is_seed)
 | 
			
		||||
                                != Some(is_seed);
 | 
			
		||||
 | 
			
		||||
                            url
 | 
			
		||||
                        }
 | 
			
		||||
                        UrlOrUrlPattern::UrlPrefix(prefix) => {
 | 
			
		||||
                            let prefix_as_url = Url::parse(prefix.as_str())?;
 | 
			
		||||
                            if is_seed {
 | 
			
		||||
                                if txn
 | 
			
		||||
                                    .enqueue_url(
 | 
			
		||||
                                        prefix_as_url.as_str(),
 | 
			
		||||
                                        None,
 | 
			
		||||
                                        RakeIntent::Any,
 | 
			
		||||
                                        false,
 | 
			
		||||
                                    )
 | 
			
		||||
                                    .await?
 | 
			
		||||
                                {
 | 
			
		||||
                                    stats.new_urls += 1;
 | 
			
		||||
                                } else {
 | 
			
		||||
                                    stats.already_present_urls += 1;
 | 
			
		||||
                                }
 | 
			
		||||
                            }
 | 
			
		||||
 | 
			
		||||
                            dirty |= domain_record
 | 
			
		||||
                                .rakeable_path_prefixes
 | 
			
		||||
                                .insert(prefix_as_url.path().to_string(), is_seed)
 | 
			
		||||
                                != Some(is_seed);
 | 
			
		||||
 | 
			
		||||
                            prefix_as_url
 | 
			
		||||
                        }
 | 
			
		||||
                    };
 | 
			
		||||
 | 
			
		||||
                    if dirty {
 | 
			
		||||
                        txn.put_domain_record(domain.borrow(), domain_record)
 | 
			
		||||
                            .await?;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    if is_seed {
 | 
			
		||||
                        // look at robots.txt and discover sitemaps!
 | 
			
		||||
                        if let Some(robots_txt) = get_robots_txt_for(&url_like, &client).await? {
 | 
			
		||||
                            for sitemap in robots_txt.sitemaps {
 | 
			
		||||
                                if SUPPORTED_SCHEMES.contains(&sitemap.url.scheme()) {
 | 
			
		||||
                                    txn.enqueue_url(
 | 
			
		||||
                                        sitemap.url.as_str(),
 | 
			
		||||
                                        None,
 | 
			
		||||
                                        RakeIntent::SiteMap,
 | 
			
		||||
                                        false,
 | 
			
		||||
                                    )
 | 
			
		||||
                                    .await?;
 | 
			
		||||
                                    stats.new_sitemaps += 1;
 | 
			
		||||
                                }
 | 
			
		||||
                            }
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                // Seed/weed with empty prefix
 | 
			
		||||
                dirty |= domain_record
 | 
			
		||||
                    .rakeable_path_prefixes
 | 
			
		||||
                    .insert(String::new(), is_seed)
 | 
			
		||||
                    != Some(is_seed);
 | 
			
		||||
 | 
			
		||||
                url
 | 
			
		||||
            }
 | 
			
		||||
            UrlOrUrlPattern::UrlPrefix(prefix) => {
 | 
			
		||||
                let prefix_as_url = Url::parse(prefix.as_str())?;
 | 
			
		||||
                if is_seed {
 | 
			
		||||
                    if txn.enqueue_url(prefix_as_url.as_str(), None, RakeIntent::Any)? {
 | 
			
		||||
                        stats.new_urls += 1;
 | 
			
		||||
                    } else {
 | 
			
		||||
                        stats.already_present_urls += 1;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                dirty |= domain_record
 | 
			
		||||
                    .rakeable_path_prefixes
 | 
			
		||||
                    .insert(prefix_as_url.path().to_string(), is_seed)
 | 
			
		||||
                    != Some(is_seed);
 | 
			
		||||
 | 
			
		||||
                prefix_as_url
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        if dirty {
 | 
			
		||||
            txn.put_domain_record(domain.borrow(), domain_record)?;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if is_seed {
 | 
			
		||||
            // look at robots.txt and discover sitemaps!
 | 
			
		||||
            if let Some(robots_txt) = get_robots_txt_for(&url_like, &client).await? {
 | 
			
		||||
                for sitemap in robots_txt.sitemaps {
 | 
			
		||||
                    if SUPPORTED_SCHEMES.contains(&sitemap.url.scheme()) {
 | 
			
		||||
                        txn.enqueue_url(sitemap.url.as_str(), None, RakeIntent::SiteMap)?;
 | 
			
		||||
                        stats.new_sitemaps += 1;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    txn.commit()?;
 | 
			
		||||
    Ok(())
 | 
			
		||||
                Ok(stats)
 | 
			
		||||
            })
 | 
			
		||||
        })
 | 
			
		||||
        .await
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -19,8 +19,9 @@ pub struct RakerOnlyConfig {
 | 
			
		||||
    /// Path to data files
 | 
			
		||||
    pub data_dir: PathBuf,
 | 
			
		||||
 | 
			
		||||
    /// Path to the raker's workbench (queue etc)
 | 
			
		||||
    pub workbench_dir: PathBuf,
 | 
			
		||||
    /// URI to connect to Postgres.
 | 
			
		||||
    /// e.g. `postgres://user:password@host:port/dbname`
 | 
			
		||||
    pub database_uri: String,
 | 
			
		||||
 | 
			
		||||
    /// Directory where new rake packs will be emitted
 | 
			
		||||
    pub emit_dir: PathBuf,
 | 
			
		||||
@ -36,17 +37,19 @@ pub struct RakerOnlyConfig {
 | 
			
		||||
pub struct RerakeTimings {
 | 
			
		||||
    /// How long, in days, between re-rakes of the same page?
 | 
			
		||||
    /// Suggested: 300
 | 
			
		||||
    pub page: u16,
 | 
			
		||||
    pub page: i32,
 | 
			
		||||
 | 
			
		||||
    /// How long, in days, between re-rakes of feeds?
 | 
			
		||||
    /// Suggested: 10
 | 
			
		||||
    pub feed: u16,
 | 
			
		||||
    pub feed: i32,
 | 
			
		||||
 | 
			
		||||
    /// How long, in days, between re-rakes of icons?
 | 
			
		||||
    /// Suggested: 365
 | 
			
		||||
    pub icon: u16,
 | 
			
		||||
    pub icon: i32,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub const DAY_SEC: i32 = 86400;
 | 
			
		||||
 | 
			
		||||
impl RakerConfig {
 | 
			
		||||
    /// Loads a config at the specified path.
 | 
			
		||||
    /// Will resolve all the paths in the RakerConfig for you.
 | 
			
		||||
@ -57,7 +60,6 @@ impl RakerConfig {
 | 
			
		||||
 | 
			
		||||
        raker_config.raker.data_dir = config_dir.join(raker_config.raker.data_dir);
 | 
			
		||||
        raker_config.seed_dir = config_dir.join(raker_config.seed_dir);
 | 
			
		||||
        raker_config.raker.workbench_dir = config_dir.join(raker_config.raker.workbench_dir);
 | 
			
		||||
        raker_config.raker.emit_dir = config_dir.join(raker_config.raker.emit_dir);
 | 
			
		||||
 | 
			
		||||
        Ok(raker_config)
 | 
			
		||||
 | 
			
		||||
@ -126,7 +126,8 @@ impl Display for PermanentFailure {
 | 
			
		||||
 | 
			
		||||
impl Error for PermanentFailure {}
 | 
			
		||||
 | 
			
		||||
#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
 | 
			
		||||
#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq, sqlx::Type)]
 | 
			
		||||
// supposedly we need this, but doesn't seem to work ... #[sqlx(postgres(oid = 16386))]
 | 
			
		||||
pub enum RakeIntent {
 | 
			
		||||
    Any,
 | 
			
		||||
    Page,
 | 
			
		||||
@ -140,11 +141,11 @@ impl FromStr for RakeIntent {
 | 
			
		||||
 | 
			
		||||
    fn from_str(s: &str) -> Result<Self, Self::Err> {
 | 
			
		||||
        Ok(match s.to_lowercase().as_ref() {
 | 
			
		||||
            "any" => RakeIntent::Any,
 | 
			
		||||
            "page" => RakeIntent::Page,
 | 
			
		||||
            "feed" => RakeIntent::Feed,
 | 
			
		||||
            "sitemap" => RakeIntent::SiteMap,
 | 
			
		||||
            "icon" => RakeIntent::Icon,
 | 
			
		||||
            "Any" => RakeIntent::Any,
 | 
			
		||||
            "Page" => RakeIntent::Page,
 | 
			
		||||
            "Feed" => RakeIntent::Feed,
 | 
			
		||||
            "SiteMap" => RakeIntent::SiteMap,
 | 
			
		||||
            "Icon" => RakeIntent::Icon,
 | 
			
		||||
            other => {
 | 
			
		||||
                bail!("Unrecognised intent: {:?}", other)
 | 
			
		||||
            }
 | 
			
		||||
@ -152,6 +153,18 @@ impl FromStr for RakeIntent {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Into<&'static str> for RakeIntent {
 | 
			
		||||
    fn into(self) -> &'static str {
 | 
			
		||||
        match self {
 | 
			
		||||
            RakeIntent::Any => "Any",
 | 
			
		||||
            RakeIntent::Page => "Page",
 | 
			
		||||
            RakeIntent::Feed => "Feed",
 | 
			
		||||
            RakeIntent::SiteMap => "SiteMap",
 | 
			
		||||
            RakeIntent::Icon => "Icon",
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl From<ReferenceKind> for RakeIntent {
 | 
			
		||||
    fn from(kind: ReferenceKind) -> Self {
 | 
			
		||||
        match kind {
 | 
			
		||||
 | 
			
		||||
@ -1,13 +1,12 @@
 | 
			
		||||
use crate::config::RerakeTimings;
 | 
			
		||||
use crate::config::{RerakeTimings, DAY_SEC};
 | 
			
		||||
use crate::raking::references::{clean_url, references_from_urlrakes, SUPPORTED_SCHEMES};
 | 
			
		||||
use crate::raking::{
 | 
			
		||||
    get_robots_txt_for, robots_txt_url_for, PermanentFailure, PermanentFailureReason, RakeIntent,
 | 
			
		||||
    RakeOutcome, Raker, RedirectReason, RobotsTxt, TemporaryFailure, TemporaryFailureReason,
 | 
			
		||||
};
 | 
			
		||||
use crate::storage::records::{DomainRecord, UrlVisitedRecord};
 | 
			
		||||
use crate::storage::records::DomainRecord;
 | 
			
		||||
use crate::storage::RakerStore;
 | 
			
		||||
use anyhow::{anyhow, Context};
 | 
			
		||||
use chrono::Utc;
 | 
			
		||||
use cylon::Cylon;
 | 
			
		||||
use log::{debug, warn};
 | 
			
		||||
use lru::LruCache;
 | 
			
		||||
@ -15,16 +14,16 @@ use metrics::increment_counter;
 | 
			
		||||
use quickpeep_structs::rake_entries::{
 | 
			
		||||
    IconEntry, RakedPageEntry, RakedReference, RakedReferrerEntry, ReferenceKind,
 | 
			
		||||
};
 | 
			
		||||
use quickpeep_utils::dates::date_to_quickpeep_days;
 | 
			
		||||
use quickpeep_utils::dates::QUICKPEEP_EPOCH_ST;
 | 
			
		||||
use quickpeep_utils::urls::get_reduced_domain;
 | 
			
		||||
use reqwest::{Client, Url};
 | 
			
		||||
use std::borrow::{Borrow, Cow};
 | 
			
		||||
use std::collections::HashSet;
 | 
			
		||||
use std::sync::atomic::{AtomicBool, Ordering};
 | 
			
		||||
use std::sync::{Arc, Mutex as StdMutex, RwLock};
 | 
			
		||||
use std::time::Duration;
 | 
			
		||||
use std::sync::{Arc, RwLock};
 | 
			
		||||
use std::time::{Duration, SystemTime};
 | 
			
		||||
use tokio::sync::mpsc::Sender;
 | 
			
		||||
use tokio::sync::{Notify, Semaphore};
 | 
			
		||||
use tokio::sync::{Mutex, Notify, Semaphore};
 | 
			
		||||
use tokio::time::Instant;
 | 
			
		||||
 | 
			
		||||
/// A crawl delay that is greater than 61 seconds will cause the domain to lose its place in the
 | 
			
		||||
@ -64,7 +63,7 @@ pub struct TaskContext {
 | 
			
		||||
    pub raker: Arc<Raker>,
 | 
			
		||||
 | 
			
		||||
    /// Busy domains (that are being processed by other tasks)
 | 
			
		||||
    pub busy_domains: Arc<StdMutex<HashSet<String>>>,
 | 
			
		||||
    pub busy_domains: Arc<Mutex<HashSet<String>>>,
 | 
			
		||||
 | 
			
		||||
    /// Cache of robots.txt entries for recently-made dormant sites
 | 
			
		||||
    pub robotstxt_cache: Arc<RwLock<LruCache<String, Option<Cylon>>>>,
 | 
			
		||||
@ -94,24 +93,31 @@ impl TaskContext {
 | 
			
		||||
        let mut current_robot_rules: Option<Cylon> = None;
 | 
			
		||||
        let mut wait_until: Option<Instant> = None;
 | 
			
		||||
 | 
			
		||||
        let domain_record = {
 | 
			
		||||
            let txn = self.store.ro_txn()?;
 | 
			
		||||
            let dr = txn.get_domain_record(&domain)?;
 | 
			
		||||
            match dr {
 | 
			
		||||
                None => {
 | 
			
		||||
                    return Ok(());
 | 
			
		||||
                }
 | 
			
		||||
                Some(dr) => dr,
 | 
			
		||||
        let domain2 = domain.clone();
 | 
			
		||||
        let dr = self
 | 
			
		||||
            .store
 | 
			
		||||
            .ro_txn(move |mut txn| Box::pin(async move { txn.get_domain_record(&domain2).await }))
 | 
			
		||||
            .await?;
 | 
			
		||||
        let domain_record = match dr {
 | 
			
		||||
            None => {
 | 
			
		||||
                return Ok(());
 | 
			
		||||
            }
 | 
			
		||||
            Some(dr) => dr,
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        while !self.graceful_stop.load(Ordering::Relaxed) {
 | 
			
		||||
            // Get a URL to process
 | 
			
		||||
            let url = {
 | 
			
		||||
                let txn = self.store.ro_txn()?;
 | 
			
		||||
                txn.choose_url_for_domain(&domain)
 | 
			
		||||
                    .context("failed to choose URL for domain")?
 | 
			
		||||
            };
 | 
			
		||||
            let domain2 = domain.clone();
 | 
			
		||||
            let url = self
 | 
			
		||||
                .store
 | 
			
		||||
                .ro_txn(move |mut txn| {
 | 
			
		||||
                    Box::pin(async move {
 | 
			
		||||
                        txn.choose_url_for_domain(&domain2)
 | 
			
		||||
                            .await
 | 
			
		||||
                            .context("failed to choose URL for domain")
 | 
			
		||||
                    })
 | 
			
		||||
                })
 | 
			
		||||
                .await?;
 | 
			
		||||
 | 
			
		||||
            let (url_str, url_record) = if let Some(url) = url {
 | 
			
		||||
                url
 | 
			
		||||
@ -121,21 +127,22 @@ impl TaskContext {
 | 
			
		||||
                let domain = domain.to_owned();
 | 
			
		||||
                let out_of_urls = self
 | 
			
		||||
                    .store
 | 
			
		||||
                    .async_rw_txn(move |txn| {
 | 
			
		||||
                        // Double-check we're still out of URLs (another could have been added since
 | 
			
		||||
                        // we last checked!)
 | 
			
		||||
                        let out_of_urls = txn.choose_url_for_domain(&domain)?.is_none();
 | 
			
		||||
                    .rw_txn(move |mut txn| {
 | 
			
		||||
                        Box::pin(async move {
 | 
			
		||||
                            // Double-check we're still out of URLs (another could have been added since
 | 
			
		||||
                            // we last checked!)
 | 
			
		||||
                            let out_of_urls = txn.choose_url_for_domain(&domain).await?.is_none();
 | 
			
		||||
 | 
			
		||||
                        if !out_of_urls {
 | 
			
		||||
                            return Ok(false);
 | 
			
		||||
                        }
 | 
			
		||||
                            if !out_of_urls {
 | 
			
		||||
                                return Ok(false);
 | 
			
		||||
                            }
 | 
			
		||||
 | 
			
		||||
                        // Delete the active domain from the store
 | 
			
		||||
                        txn.remove_active_domain(&domain)
 | 
			
		||||
                            .context("failed to remove active domain")?;
 | 
			
		||||
 | 
			
		||||
                        txn.commit()?;
 | 
			
		||||
                        Ok(true)
 | 
			
		||||
                            // Delete the active domain from the store
 | 
			
		||||
                            txn.remove_active_domain(&domain)
 | 
			
		||||
                                .await
 | 
			
		||||
                                .context("failed to remove active domain")?;
 | 
			
		||||
                            Ok(true)
 | 
			
		||||
                        })
 | 
			
		||||
                    })
 | 
			
		||||
                    .await
 | 
			
		||||
                    .context("failed to check if we're out of URLs")?;
 | 
			
		||||
@ -154,10 +161,8 @@ impl TaskContext {
 | 
			
		||||
                let domain = domain.clone();
 | 
			
		||||
                let url = url.clone();
 | 
			
		||||
                self.store
 | 
			
		||||
                    .async_rw_txn(move |txn| {
 | 
			
		||||
                        txn.dequeue_url(&domain, url.as_str())?;
 | 
			
		||||
                        txn.commit()?;
 | 
			
		||||
                        Ok(())
 | 
			
		||||
                    .rw_txn(move |mut txn| {
 | 
			
		||||
                        Box::pin(async move { txn.dequeue_url(&domain, url.as_str()).await })
 | 
			
		||||
                    })
 | 
			
		||||
                    .await?;
 | 
			
		||||
                continue;
 | 
			
		||||
@ -290,24 +295,26 @@ impl TaskContext {
 | 
			
		||||
 | 
			
		||||
                let domain = domain.clone();
 | 
			
		||||
                let url = url.clone();
 | 
			
		||||
                let backoff = delay.as_secs().try_into().unwrap_or(u32::MAX);
 | 
			
		||||
                let backoff = delay.as_secs().try_into().unwrap_or(i32::MAX);
 | 
			
		||||
 | 
			
		||||
                self.store
 | 
			
		||||
                    .async_rw_txn(move |txn| {
 | 
			
		||||
                        txn.start_backing_off(
 | 
			
		||||
                            &domain,
 | 
			
		||||
                            backoff,
 | 
			
		||||
                            url.to_string(),
 | 
			
		||||
                            TemporaryFailure {
 | 
			
		||||
                                reason: TemporaryFailureReason::ExcruciatingCrawlDelay(
 | 
			
		||||
                                    delay.as_secs(),
 | 
			
		||||
                                ),
 | 
			
		||||
                                // Don't stack this up with a backoff; it's not an actual failure!
 | 
			
		||||
                                backoff_sec: 0,
 | 
			
		||||
                            },
 | 
			
		||||
                        )?;
 | 
			
		||||
                        txn.commit()?;
 | 
			
		||||
                        Ok(())
 | 
			
		||||
                    .rw_txn(move |mut txn| {
 | 
			
		||||
                        Box::pin(async move {
 | 
			
		||||
                            txn.start_backing_off(
 | 
			
		||||
                                &domain,
 | 
			
		||||
                                backoff,
 | 
			
		||||
                                url.to_string(),
 | 
			
		||||
                                TemporaryFailure {
 | 
			
		||||
                                    reason: TemporaryFailureReason::ExcruciatingCrawlDelay(
 | 
			
		||||
                                        delay.as_secs(),
 | 
			
		||||
                                    ),
 | 
			
		||||
                                    // Don't stack this up with a backoff; it's not an actual failure!
 | 
			
		||||
                                    backoff_sec: 0,
 | 
			
		||||
                                },
 | 
			
		||||
                            )
 | 
			
		||||
                            .await?;
 | 
			
		||||
                            Ok(())
 | 
			
		||||
                        })
 | 
			
		||||
                    })
 | 
			
		||||
                    .await
 | 
			
		||||
                    .context("failure whilst turning long crawl delay into backoff")?;
 | 
			
		||||
@ -319,7 +326,10 @@ impl TaskContext {
 | 
			
		||||
 | 
			
		||||
    /// Processes the outcome of
 | 
			
		||||
    async fn process_outcome(&self, url: &Url, outcome: RakeOutcome) -> anyhow::Result<NextAction> {
 | 
			
		||||
        let today = date_to_quickpeep_days(&Utc::today())?;
 | 
			
		||||
        let visited_on_ts = SystemTime::now()
 | 
			
		||||
            .duration_since(*QUICKPEEP_EPOCH_ST)
 | 
			
		||||
            .unwrap()
 | 
			
		||||
            .as_secs() as i32;
 | 
			
		||||
        match outcome {
 | 
			
		||||
            RakeOutcome::RakedPage(page) => {
 | 
			
		||||
                self.submission
 | 
			
		||||
@ -335,11 +345,11 @@ impl TaskContext {
 | 
			
		||||
                    .context("Reference processor shut down; can't stream references!")?;
 | 
			
		||||
 | 
			
		||||
                self.as_event_processor()
 | 
			
		||||
                    .process_page(url.clone(), page.page_entry, today)
 | 
			
		||||
                    .process_page(url.clone(), page.page_entry, visited_on_ts)
 | 
			
		||||
                    .await
 | 
			
		||||
                    .context("failure processing page for RakedPage")?;
 | 
			
		||||
                self.as_event_processor()
 | 
			
		||||
                    .process_refs(url.clone(), page.referrer_entry, today, false)
 | 
			
		||||
                    .process_refs(url.clone(), page.referrer_entry, visited_on_ts, false)
 | 
			
		||||
                    .await
 | 
			
		||||
                    .context("failure processing refs for RakedPage")?;
 | 
			
		||||
 | 
			
		||||
@ -357,7 +367,7 @@ impl TaskContext {
 | 
			
		||||
                    .context("Reference processor shut down; can't stream references!")?;
 | 
			
		||||
 | 
			
		||||
                self.as_event_processor()
 | 
			
		||||
                    .process_refs(url.clone(), refs, today, true)
 | 
			
		||||
                    .process_refs(url.clone(), refs, visited_on_ts, true)
 | 
			
		||||
                    .await
 | 
			
		||||
                    .context("failure processing refs for RakedFeed")?;
 | 
			
		||||
 | 
			
		||||
@ -375,7 +385,7 @@ impl TaskContext {
 | 
			
		||||
                    .context("Reference processor shut down; can't stream references!")?;
 | 
			
		||||
 | 
			
		||||
                self.as_event_processor()
 | 
			
		||||
                    .process_refs(url.clone(), refs, today, true)
 | 
			
		||||
                    .process_refs(url.clone(), refs, visited_on_ts, true)
 | 
			
		||||
                    .await
 | 
			
		||||
                    .context("failure processing refs for RakedSitemap")?;
 | 
			
		||||
 | 
			
		||||
@ -395,7 +405,7 @@ impl TaskContext {
 | 
			
		||||
                    .await?;
 | 
			
		||||
 | 
			
		||||
                self.as_event_processor()
 | 
			
		||||
                    .process_icon(url.clone(), today)
 | 
			
		||||
                    .process_icon(url.clone(), visited_on_ts)
 | 
			
		||||
                    .await
 | 
			
		||||
                    .context("failure processing icon for RakedIcon")?;
 | 
			
		||||
 | 
			
		||||
@ -423,7 +433,7 @@ impl TaskContext {
 | 
			
		||||
                    .context("Reference processor shut down; can't stream references!")?;
 | 
			
		||||
 | 
			
		||||
                self.as_event_processor()
 | 
			
		||||
                    .process_refs(url.clone(), refs, today, false)
 | 
			
		||||
                    .process_refs(url.clone(), refs, visited_on_ts, false)
 | 
			
		||||
                    .await
 | 
			
		||||
                    .context("Failure processing refs for Redirect")?;
 | 
			
		||||
 | 
			
		||||
@ -439,14 +449,15 @@ impl TaskContext {
 | 
			
		||||
                let url = url.clone();
 | 
			
		||||
 | 
			
		||||
                // TODO(feature) add 1.1× the previous backoff, if there was one.
 | 
			
		||||
                let new_backoff = failure.backoff_sec;
 | 
			
		||||
                let new_backoff = failure.backoff_sec as i32;
 | 
			
		||||
                let domain = domain.into_owned();
 | 
			
		||||
 | 
			
		||||
                self.store
 | 
			
		||||
                    .async_rw_txn(move |txn| {
 | 
			
		||||
                        txn.start_backing_off(&domain, new_backoff, url.to_string(), failure)?;
 | 
			
		||||
                        txn.commit()?;
 | 
			
		||||
                        Ok(())
 | 
			
		||||
                    .rw_txn(move |mut txn| {
 | 
			
		||||
                        Box::pin(async move {
 | 
			
		||||
                            txn.start_backing_off(&domain, new_backoff, url.to_string(), failure)
 | 
			
		||||
                                .await
 | 
			
		||||
                        })
 | 
			
		||||
                    })
 | 
			
		||||
                    .await
 | 
			
		||||
                    .context("failed to store backoff")?;
 | 
			
		||||
@ -461,7 +472,7 @@ impl TaskContext {
 | 
			
		||||
                    .await
 | 
			
		||||
                    .context("Rejection processor shut down; can't stream rejection!!")?;
 | 
			
		||||
                self.as_event_processor()
 | 
			
		||||
                    .process_rejection(url.clone(), today)
 | 
			
		||||
                    .process_rejection(url.clone(), visited_on_ts)
 | 
			
		||||
                    .await
 | 
			
		||||
                    .context("failed to process rejection for PermanentFailure")?;
 | 
			
		||||
 | 
			
		||||
@ -493,58 +504,58 @@ impl EventProcessor<'_> {
 | 
			
		||||
        &self,
 | 
			
		||||
        url: Url,
 | 
			
		||||
        page: RakedPageEntry,
 | 
			
		||||
        datestamp: u16,
 | 
			
		||||
        visited_on_ts: i32,
 | 
			
		||||
    ) -> anyhow::Result<()> {
 | 
			
		||||
        let rerake_on = Some(datestamp + self.rerake_timings.page);
 | 
			
		||||
        let rerake_on = Some(visited_on_ts + self.rerake_timings.page * DAY_SEC);
 | 
			
		||||
        self.store
 | 
			
		||||
            .as_ref()
 | 
			
		||||
            .async_rw_txn(move |txn| {
 | 
			
		||||
                let domain = get_reduced_domain(&url).with_context(|| {
 | 
			
		||||
                    format!("No domain for URL '{url}' for which we are processing the page!")
 | 
			
		||||
                })?;
 | 
			
		||||
                txn.mark_url_as_visited(
 | 
			
		||||
                    domain.as_ref(),
 | 
			
		||||
                    url.as_ref(),
 | 
			
		||||
                    UrlVisitedRecord {
 | 
			
		||||
                        last_visited_days: datestamp,
 | 
			
		||||
                    },
 | 
			
		||||
                    rerake_on,
 | 
			
		||||
                )?;
 | 
			
		||||
            .rw_txn(move |mut txn| {
 | 
			
		||||
                Box::pin(async move {
 | 
			
		||||
                    let domain = get_reduced_domain(&url).with_context(|| {
 | 
			
		||||
                        format!("No domain for URL '{url}' for which we are processing the page!")
 | 
			
		||||
                    })?;
 | 
			
		||||
                    txn.mark_url_as_visited(
 | 
			
		||||
                        domain.as_ref(),
 | 
			
		||||
                        url.as_ref(),
 | 
			
		||||
                        visited_on_ts,
 | 
			
		||||
                        rerake_on,
 | 
			
		||||
                    )
 | 
			
		||||
                    .await?;
 | 
			
		||||
 | 
			
		||||
                // If there's a favicon to be tried, add it to the list...
 | 
			
		||||
                let favicon_url_rel = page.document.head.effective_favicon_url();
 | 
			
		||||
                if let Ok(favicon_url) = url.join(favicon_url_rel) {
 | 
			
		||||
                    if SUPPORTED_SCHEMES.contains(&favicon_url.scheme()) {
 | 
			
		||||
                        txn.enqueue_url(favicon_url.as_str(), None, RakeIntent::Icon)?;
 | 
			
		||||
                    // If there's a favicon to be tried, add it to the list...
 | 
			
		||||
                    let favicon_url_rel = page.document.head.effective_favicon_url();
 | 
			
		||||
                    if let Ok(favicon_url) = url.join(favicon_url_rel) {
 | 
			
		||||
                        if SUPPORTED_SCHEMES.contains(&favicon_url.scheme()) {
 | 
			
		||||
                            txn.enqueue_url(favicon_url.as_str(), None, RakeIntent::Icon, false)
 | 
			
		||||
                                .await?;
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                txn.commit()?;
 | 
			
		||||
                Ok(())
 | 
			
		||||
                    Ok(())
 | 
			
		||||
                })
 | 
			
		||||
            })
 | 
			
		||||
            .await
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub async fn process_icon(&self, url: Url, datestamp: u16) -> anyhow::Result<()> {
 | 
			
		||||
        let rerake_on = Some(datestamp + self.rerake_timings.icon);
 | 
			
		||||
    pub async fn process_icon(&self, url: Url, visited_on_ts: i32) -> anyhow::Result<()> {
 | 
			
		||||
        let rerake_on = Some(visited_on_ts + self.rerake_timings.icon * DAY_SEC);
 | 
			
		||||
 | 
			
		||||
        self.store
 | 
			
		||||
            .as_ref()
 | 
			
		||||
            .async_rw_txn(move |txn| {
 | 
			
		||||
                let domain = get_reduced_domain(&url).with_context(|| {
 | 
			
		||||
                    format!("No domain for URL '{url}' for which we are processing an icon!")
 | 
			
		||||
                })?;
 | 
			
		||||
                txn.mark_url_as_visited(
 | 
			
		||||
                    domain.as_ref(),
 | 
			
		||||
                    url.as_ref(),
 | 
			
		||||
                    UrlVisitedRecord {
 | 
			
		||||
                        last_visited_days: datestamp,
 | 
			
		||||
                    },
 | 
			
		||||
                    rerake_on,
 | 
			
		||||
                )?;
 | 
			
		||||
 | 
			
		||||
                txn.commit()?;
 | 
			
		||||
                Ok(())
 | 
			
		||||
            .rw_txn(move |mut txn| {
 | 
			
		||||
                Box::pin(async move {
 | 
			
		||||
                    let domain = get_reduced_domain(&url).with_context(|| {
 | 
			
		||||
                        format!("No domain for URL '{url}' for which we are processing an icon!")
 | 
			
		||||
                    })?;
 | 
			
		||||
                    txn.mark_url_as_visited(
 | 
			
		||||
                        domain.as_ref(),
 | 
			
		||||
                        url.as_ref(),
 | 
			
		||||
                        visited_on_ts,
 | 
			
		||||
                        rerake_on,
 | 
			
		||||
                    )
 | 
			
		||||
                    .await?;
 | 
			
		||||
                    Ok(())
 | 
			
		||||
                })
 | 
			
		||||
            })
 | 
			
		||||
            .await
 | 
			
		||||
    }
 | 
			
		||||
@ -553,93 +564,105 @@ impl EventProcessor<'_> {
 | 
			
		||||
        &self,
 | 
			
		||||
        url: Url,
 | 
			
		||||
        refs: RakedReferrerEntry,
 | 
			
		||||
        datestamp: u16,
 | 
			
		||||
        visited_on_ts: i32,
 | 
			
		||||
        rerakeable_feed: bool,
 | 
			
		||||
    ) -> anyhow::Result<()> {
 | 
			
		||||
        let rerake_on = if rerakeable_feed {
 | 
			
		||||
            Some(self.rerake_timings.feed)
 | 
			
		||||
            Some(visited_on_ts + self.rerake_timings.feed * DAY_SEC)
 | 
			
		||||
        } else {
 | 
			
		||||
            None
 | 
			
		||||
        };
 | 
			
		||||
        self.store
 | 
			
		||||
            .as_ref()
 | 
			
		||||
            .async_rw_txn(move |txn| {
 | 
			
		||||
                let domain = get_reduced_domain(&url).with_context(|| {
 | 
			
		||||
                    format!("No domain for URL '{url}' for which we are processing refs!")
 | 
			
		||||
                })?;
 | 
			
		||||
                txn.mark_url_as_visited(
 | 
			
		||||
                    domain.as_ref(),
 | 
			
		||||
                    url.as_ref(),
 | 
			
		||||
                    UrlVisitedRecord {
 | 
			
		||||
                        last_visited_days: datestamp,
 | 
			
		||||
                    },
 | 
			
		||||
                    rerake_on,
 | 
			
		||||
                )
 | 
			
		||||
                .context("failed to mark URL as visited")?;
 | 
			
		||||
 | 
			
		||||
                // track all the referred-to URLs!
 | 
			
		||||
                for reference in refs.references {
 | 
			
		||||
                    let ref_url = Url::parse(&reference.target).with_context(|| {
 | 
			
		||||
                        format!(
 | 
			
		||||
                            "failed to parse target URL of reference: {:?}",
 | 
			
		||||
                            reference.target
 | 
			
		||||
                        )
 | 
			
		||||
                    })?;
 | 
			
		||||
                    let domain = get_reduced_domain(&ref_url).with_context(|| {
 | 
			
		||||
                        format!("failed to reduce domain: {:?}", reference.target)
 | 
			
		||||
            .rw_txn(move |mut txn| {
 | 
			
		||||
                Box::pin(async move {
 | 
			
		||||
                    let domain = get_reduced_domain(&url).with_context(|| {
 | 
			
		||||
                        format!("No domain for URL '{url}' for which we are processing refs!")
 | 
			
		||||
                    })?;
 | 
			
		||||
                    txn.mark_url_as_visited(
 | 
			
		||||
                        domain.as_ref(),
 | 
			
		||||
                        url.as_ref(),
 | 
			
		||||
                        visited_on_ts,
 | 
			
		||||
                        rerake_on,
 | 
			
		||||
                    )
 | 
			
		||||
                    .await
 | 
			
		||||
                    .context("failed to mark URL as visited")?;
 | 
			
		||||
 | 
			
		||||
                    // Check if this URL is an allowed URL (hence should be enqueued)
 | 
			
		||||
                    let allowed = txn
 | 
			
		||||
                        .get_domain_record(domain.borrow())?
 | 
			
		||||
                        .map(|record: DomainRecord| record.is_url_rakeable(&ref_url))
 | 
			
		||||
                        .flatten();
 | 
			
		||||
                    // track all the referred-to URLs!
 | 
			
		||||
                    for reference in refs.references {
 | 
			
		||||
                        let ref_url = Url::parse(&reference.target).with_context(|| {
 | 
			
		||||
                            format!(
 | 
			
		||||
                                "failed to parse target URL of reference: {:?}",
 | 
			
		||||
                                reference.target
 | 
			
		||||
                            )
 | 
			
		||||
                        })?;
 | 
			
		||||
                        let domain = get_reduced_domain(&ref_url).with_context(|| {
 | 
			
		||||
                            format!("failed to reduce domain: {:?}", reference.target)
 | 
			
		||||
                        })?;
 | 
			
		||||
 | 
			
		||||
                    match allowed {
 | 
			
		||||
                        Some(true) => {
 | 
			
		||||
                            let is_fresh = txn.enqueue_url(
 | 
			
		||||
                                &reference.target,
 | 
			
		||||
                                reference.last_mod,
 | 
			
		||||
                                reference.kind.into(),
 | 
			
		||||
                            )?;
 | 
			
		||||
                            if is_fresh {
 | 
			
		||||
                                increment_counter!("qprake_queue_new_url");
 | 
			
		||||
                        // Check if this URL is an allowed URL (hence should be enqueued)
 | 
			
		||||
                        let allowed = txn
 | 
			
		||||
                            .get_domain_record(domain.borrow())
 | 
			
		||||
                            .await?
 | 
			
		||||
                            .map(|record: DomainRecord| record.is_url_rakeable(&ref_url))
 | 
			
		||||
                            .flatten();
 | 
			
		||||
 | 
			
		||||
                        match allowed {
 | 
			
		||||
                            Some(true) => {
 | 
			
		||||
                                let is_fresh = txn
 | 
			
		||||
                                    .enqueue_url(
 | 
			
		||||
                                        &reference.target,
 | 
			
		||||
                                        reference.last_mod.map(|days| {
 | 
			
		||||
                                            *QUICKPEEP_EPOCH_ST
 | 
			
		||||
                                                + Duration::from_secs(days as u64 * DAY_SEC as u64)
 | 
			
		||||
                                        }),
 | 
			
		||||
                                        reference.kind.into(),
 | 
			
		||||
                                        false,
 | 
			
		||||
                                    )
 | 
			
		||||
                                    .await?;
 | 
			
		||||
                                if is_fresh {
 | 
			
		||||
                                    increment_counter!("qprake_queue_new_url");
 | 
			
		||||
                                }
 | 
			
		||||
                                continue;
 | 
			
		||||
                            }
 | 
			
		||||
                            Some(false) => {
 | 
			
		||||
                                // Weed! Do nothing.
 | 
			
		||||
                            }
 | 
			
		||||
                            None => {
 | 
			
		||||
                                // It's neither allowed nor weeded, so put it on hold for later inspection
 | 
			
		||||
                                txn.enqueue_url(
 | 
			
		||||
                                    &reference.target,
 | 
			
		||||
                                    reference.last_mod.map(|days| {
 | 
			
		||||
                                        *QUICKPEEP_EPOCH_ST
 | 
			
		||||
                                            + Duration::from_secs(days as u64 * DAY_SEC as u64)
 | 
			
		||||
                                    }),
 | 
			
		||||
                                    reference.kind.into(),
 | 
			
		||||
                                    true,
 | 
			
		||||
                                )
 | 
			
		||||
                                .await?;
 | 
			
		||||
                            }
 | 
			
		||||
                            continue;
 | 
			
		||||
                        }
 | 
			
		||||
                        Some(false) => {
 | 
			
		||||
                            // Weed! Do nothing.
 | 
			
		||||
                        }
 | 
			
		||||
                        None => {
 | 
			
		||||
                            // It's neither allowed nor weeded, so put it on hold for later inspection
 | 
			
		||||
                            txn.put_url_on_hold(&reference.target, reference.kind.into())?;
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                txn.commit()?;
 | 
			
		||||
                Ok(())
 | 
			
		||||
                    Ok(())
 | 
			
		||||
                })
 | 
			
		||||
            })
 | 
			
		||||
            .await
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub async fn process_rejection(&self, url: Url, datestamp: u16) -> anyhow::Result<()> {
 | 
			
		||||
    pub async fn process_rejection(&self, url: Url, visited_on_ts: i32) -> anyhow::Result<()> {
 | 
			
		||||
        self.store
 | 
			
		||||
            .as_ref()
 | 
			
		||||
            .async_rw_txn(move |txn| {
 | 
			
		||||
                let domain = get_reduced_domain(&url).with_context(|| {
 | 
			
		||||
                    format!("No domain for URL '{url}' for which we are processing a rejection!")
 | 
			
		||||
                })?;
 | 
			
		||||
                txn.mark_url_as_visited(
 | 
			
		||||
                    domain.as_ref(),
 | 
			
		||||
                    url.as_ref(),
 | 
			
		||||
                    UrlVisitedRecord {
 | 
			
		||||
                        last_visited_days: datestamp,
 | 
			
		||||
                    },
 | 
			
		||||
                    None,
 | 
			
		||||
                )?;
 | 
			
		||||
                txn.commit()?;
 | 
			
		||||
                Ok(())
 | 
			
		||||
            .rw_txn(move |mut txn| {
 | 
			
		||||
                Box::pin(async move {
 | 
			
		||||
                    let domain = get_reduced_domain(&url).with_context(|| {
 | 
			
		||||
                        format!(
 | 
			
		||||
                            "No domain for URL '{url}' for which we are processing a rejection!"
 | 
			
		||||
                        )
 | 
			
		||||
                    })?;
 | 
			
		||||
                    txn.mark_url_as_visited(domain.as_ref(), url.as_ref(), visited_on_ts, None)
 | 
			
		||||
                        .await?;
 | 
			
		||||
                    Ok(())
 | 
			
		||||
                })
 | 
			
		||||
            })
 | 
			
		||||
            .await
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -1,80 +1,41 @@
 | 
			
		||||
use crate::storage::mdbx_helper_types::{MdbxBare, MdbxString};
 | 
			
		||||
use crate::storage::records::{DomainRecord, OnHoldUrlRecord};
 | 
			
		||||
use crate::storage::records::DomainRecord;
 | 
			
		||||
use crate::storage::RakerTxn;
 | 
			
		||||
use anyhow::Context;
 | 
			
		||||
use libmdbx::{Database, WriteFlags, RW};
 | 
			
		||||
use reqwest::Url;
 | 
			
		||||
use itertools::Itertools;
 | 
			
		||||
use sqlx::query_as;
 | 
			
		||||
use sqlx::types::Json;
 | 
			
		||||
 | 
			
		||||
/// Runs one big transaction that:
 | 
			
		||||
/// - scans on-hold URLs
 | 
			
		||||
///     - moves 'allowed' ones to the queue
 | 
			
		||||
///     - deletes 'weeds'
 | 
			
		||||
///     - leaves unknown ones alone
 | 
			
		||||
///
 | 
			
		||||
/// Re-enqueues domains
 | 
			
		||||
/// Ideally should be applied after importing seeds and weeds on an existing database.
 | 
			
		||||
pub fn reapply_seeds_and_weeds_to_on_hold_urls(txn: RakerTxn<RW>) -> anyhow::Result<()> {
 | 
			
		||||
pub async fn reapply_seeds_and_weeds_to_on_hold_urls(
 | 
			
		||||
    mut txn: RakerTxn<'_, '_, true>,
 | 
			
		||||
) -> anyhow::Result<()> {
 | 
			
		||||
    struct DomainState {
 | 
			
		||||
        pub domain: String,
 | 
			
		||||
        pub domain_record: Option<DomainRecord>,
 | 
			
		||||
        pub domain_id: i32,
 | 
			
		||||
        pub domain_record: Json<DomainRecord>,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let urls_on_hold: &Database = &txn.mdbx.borrow_dbs().urls_on_hold;
 | 
			
		||||
    let reinstatable_domains: Vec<DomainState> = query_as!(DomainState, r#"
 | 
			
		||||
        SELECT DISTINCT u.domain_id, domain_record AS "domain_record: Json<DomainRecord>" FROM url_queue
 | 
			
		||||
        JOIN urls u USING (url_id)
 | 
			
		||||
        JOIN domains USING (domain_id)
 | 
			
		||||
        LEFT JOIN domain_backoffs db ON u.domain_id = db.domain_id
 | 
			
		||||
        WHERE db.domain_id IS NULL
 | 
			
		||||
    "#).fetch_all(&mut *txn.txn)
 | 
			
		||||
        .await?;
 | 
			
		||||
 | 
			
		||||
    let mut domain_state = None;
 | 
			
		||||
 | 
			
		||||
    // Scan through the on-hold URLs
 | 
			
		||||
    let mut cur = txn.mdbx_txn.cursor(urls_on_hold)?;
 | 
			
		||||
    let mut first_iteration = true;
 | 
			
		||||
 | 
			
		||||
    while let Some((MdbxString(domain_then_url), MdbxBare(record))) = if first_iteration {
 | 
			
		||||
        first_iteration = false;
 | 
			
		||||
        cur.first::<MdbxString, MdbxBare<OnHoldUrlRecord>>()
 | 
			
		||||
    } else {
 | 
			
		||||
        cur.next::<MdbxString, MdbxBare<OnHoldUrlRecord>>()
 | 
			
		||||
    }? {
 | 
			
		||||
        let mut split = domain_then_url.as_ref().split("\n");
 | 
			
		||||
        let domain = split.next().context("No first split..?")?;
 | 
			
		||||
        let url_str = split.next().context("No URL")?;
 | 
			
		||||
 | 
			
		||||
        // Is the domain new?
 | 
			
		||||
        if domain_state
 | 
			
		||||
            .as_ref()
 | 
			
		||||
            .map(|ds: &DomainState| &ds.domain != domain)
 | 
			
		||||
            .unwrap_or(true)
 | 
			
		||||
        {
 | 
			
		||||
            // Then load the relevant records for it.
 | 
			
		||||
            domain_state = Some(DomainState {
 | 
			
		||||
                domain: domain.to_owned(),
 | 
			
		||||
                domain_record: txn.get_domain_record(domain)?,
 | 
			
		||||
            });
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let url = Url::parse(url_str)?;
 | 
			
		||||
 | 
			
		||||
        let domain_state = domain_state.as_ref().unwrap();
 | 
			
		||||
 | 
			
		||||
        let is_rakeable = domain_state
 | 
			
		||||
    for domain_state in reinstatable_domains {
 | 
			
		||||
        let any_seeds = domain_state
 | 
			
		||||
            .domain_record
 | 
			
		||||
            .as_ref()
 | 
			
		||||
            .map(|dr: &DomainRecord| dr.is_url_rakeable(&url))
 | 
			
		||||
            .flatten();
 | 
			
		||||
 | 
			
		||||
        match is_rakeable {
 | 
			
		||||
            Some(true) => {
 | 
			
		||||
                // ALLOWED
 | 
			
		||||
                // Make it a queued URL
 | 
			
		||||
                txn.enqueue_url(url_str, None, record.queue_record.intent)?;
 | 
			
		||||
                cur.del(WriteFlags::empty())?;
 | 
			
		||||
            }
 | 
			
		||||
            Some(false) => {
 | 
			
		||||
                // WEED
 | 
			
		||||
                // Just delete
 | 
			
		||||
                cur.del(WriteFlags::empty())?;
 | 
			
		||||
            }
 | 
			
		||||
            None => { /* nop: neither allowed nor a weed. Keep on hold. */ }
 | 
			
		||||
            .rakeable_path_prefixes
 | 
			
		||||
            .values()
 | 
			
		||||
            .contains(&true);
 | 
			
		||||
        if !any_seeds {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    txn.commit()?;
 | 
			
		||||
        // This domain has *some* seeds, let's just reinstate it.
 | 
			
		||||
        txn.insert_active_domain_id_with_new_raffle_ticket(domain_state.domain_id)
 | 
			
		||||
            .await?;
 | 
			
		||||
    }
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -1,190 +0,0 @@
 | 
			
		||||
use anyhow::{anyhow, Context};
 | 
			
		||||
use libmdbx::{Error, TableObject, TransactionKind};
 | 
			
		||||
use serde::de::DeserializeOwned;
 | 
			
		||||
use serde::Serialize;
 | 
			
		||||
use std::borrow::Cow;
 | 
			
		||||
 | 
			
		||||
/// u16 in BIG byte endianness (u16 not supported by INTEGERKEY mode!)
 | 
			
		||||
#[derive(Copy, Clone, Debug)]
 | 
			
		||||
pub struct MdbxU16BE(pub u16);
 | 
			
		||||
 | 
			
		||||
impl MdbxU16BE {
 | 
			
		||||
    pub fn as_bytes(&self) -> Cow<'_, [u8]> {
 | 
			
		||||
        Cow::Owned(self.0.to_be_bytes().to_vec())
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl TableObject<'_> for MdbxU16BE {
 | 
			
		||||
    fn decode(data_val: &[u8]) -> Result<Self, libmdbx::Error>
 | 
			
		||||
    where
 | 
			
		||||
        Self: Sized,
 | 
			
		||||
    {
 | 
			
		||||
        if data_val.len() != 2 {
 | 
			
		||||
            return Err(libmdbx::Error::DecodeError(
 | 
			
		||||
                anyhow!("MDBX Key not 2 bytes; can't be decoded as u16").into(),
 | 
			
		||||
            ));
 | 
			
		||||
        }
 | 
			
		||||
        let mut buf = [0u8; 2];
 | 
			
		||||
        buf.copy_from_slice(&data_val);
 | 
			
		||||
        Ok(MdbxU16BE(u16::from_be_bytes(buf)))
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// u32 in native byte endianness (as required by INTEGERKEY mode)
 | 
			
		||||
#[derive(Copy, Clone, Debug)]
 | 
			
		||||
pub struct MdbxU32(pub u32);
 | 
			
		||||
 | 
			
		||||
impl MdbxU32 {
 | 
			
		||||
    pub fn as_bytes(&self) -> Cow<'_, [u8]> {
 | 
			
		||||
        Cow::Owned(self.0.to_ne_bytes().to_vec())
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl TableObject<'_> for MdbxU32 {
 | 
			
		||||
    fn decode(data_val: &[u8]) -> Result<Self, libmdbx::Error>
 | 
			
		||||
    where
 | 
			
		||||
        Self: Sized,
 | 
			
		||||
    {
 | 
			
		||||
        if data_val.len() != 4 {
 | 
			
		||||
            return Err(libmdbx::Error::DecodeError(
 | 
			
		||||
                anyhow!("MDBX Key not 4 bytes; can't be decoded as u32").into(),
 | 
			
		||||
            ));
 | 
			
		||||
        }
 | 
			
		||||
        let mut buf = [0u8; 4];
 | 
			
		||||
        buf.copy_from_slice(&data_val);
 | 
			
		||||
        Ok(MdbxU32(u32::from_ne_bytes(buf)))
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// u64 in native byte endianness (as required by INTEGERKEY mode)
 | 
			
		||||
#[derive(Copy, Clone, Debug)]
 | 
			
		||||
pub struct MdbxU64(pub u64);
 | 
			
		||||
 | 
			
		||||
impl MdbxU64 {
 | 
			
		||||
    pub fn as_bytes(&self) -> Cow<'_, [u8]> {
 | 
			
		||||
        Cow::Owned(self.0.to_ne_bytes().to_vec())
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl TableObject<'_> for MdbxU64 {
 | 
			
		||||
    fn decode(data_val: &[u8]) -> Result<Self, libmdbx::Error>
 | 
			
		||||
    where
 | 
			
		||||
        Self: Sized,
 | 
			
		||||
    {
 | 
			
		||||
        if data_val.len() != 8 {
 | 
			
		||||
            return Err(libmdbx::Error::DecodeError(
 | 
			
		||||
                anyhow!("MDBX Key not 8 bytes; can't be decoded as u64").into(),
 | 
			
		||||
            ));
 | 
			
		||||
        }
 | 
			
		||||
        let mut buf = [0u8; 8];
 | 
			
		||||
        buf.copy_from_slice(&data_val);
 | 
			
		||||
        Ok(MdbxU64(u64::from_ne_bytes(buf)))
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// UTF-8 String
 | 
			
		||||
#[derive(Clone, Debug)]
 | 
			
		||||
pub struct MdbxString<'txn>(pub Cow<'txn, str>);
 | 
			
		||||
 | 
			
		||||
impl MdbxString<'_> {
 | 
			
		||||
    pub fn as_bytes(&self) -> &[u8] {
 | 
			
		||||
        self.0.as_bytes()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn into_string(self) -> String {
 | 
			
		||||
        self.0.into_owned()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl<'a> TableObject<'_> for MdbxString<'a> {
 | 
			
		||||
    fn decode(_data_val: &[u8]) -> Result<Self, libmdbx::Error>
 | 
			
		||||
    where
 | 
			
		||||
        Self: Sized,
 | 
			
		||||
    {
 | 
			
		||||
        unreachable!()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    unsafe fn decode_val<K: TransactionKind>(
 | 
			
		||||
        txnptr: *const mdbx_sys::MDBX_txn,
 | 
			
		||||
        data_val: &mdbx_sys::MDBX_val,
 | 
			
		||||
    ) -> Result<Self, Error>
 | 
			
		||||
    where
 | 
			
		||||
        Self: Sized,
 | 
			
		||||
    {
 | 
			
		||||
        let bytes = MdbxBytes::decode_val::<K>(txnptr, data_val)?;
 | 
			
		||||
        let string_cow = match bytes {
 | 
			
		||||
            Cow::Borrowed(data) => {
 | 
			
		||||
                let string = std::str::from_utf8(data)
 | 
			
		||||
                    .context("Failed to decode MDBX key as string")
 | 
			
		||||
                    .map_err(|e| libmdbx::Error::DecodeError(e.into()))?;
 | 
			
		||||
                Cow::Borrowed(string)
 | 
			
		||||
            }
 | 
			
		||||
            Cow::Owned(data) => {
 | 
			
		||||
                let string = String::from_utf8(data)
 | 
			
		||||
                    .context("Failed to decode MDBX key as string")
 | 
			
		||||
                    .map_err(|e| libmdbx::Error::DecodeError(e.into()))?;
 | 
			
		||||
                Cow::Owned(string)
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
        Ok(MdbxString(string_cow))
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// /// UTF-8 String
 | 
			
		||||
// /// Using Cow<'txn, str> would've needed some unsafe code (see `Cow<'txn, [u8]>` for inspiration),
 | 
			
		||||
// /// so I didn't bother for now.
 | 
			
		||||
// #[derive(Clone, Debug)]
 | 
			
		||||
// pub struct MdbxString(pub String);
 | 
			
		||||
//
 | 
			
		||||
// impl MdbxString {
 | 
			
		||||
//     pub fn as_bytes(&self) -> &[u8] {
 | 
			
		||||
//         self.0.as_bytes()
 | 
			
		||||
//     }
 | 
			
		||||
// }
 | 
			
		||||
//
 | 
			
		||||
// impl TableObject<'_> for MdbxString {
 | 
			
		||||
//     fn decode(data_val: &[u8]) -> Result<Self, libmdbx::Error> where Self: Sized {
 | 
			
		||||
//         let string = String::from_utf8(data_val.to_vec())
 | 
			
		||||
//             .context("Failed to decode MDBX key as string")
 | 
			
		||||
//             .map_err(|e| libmdbx::Error::DecodeError(e.into()))?;
 | 
			
		||||
//         Ok(MdbxString(string))
 | 
			
		||||
//     }
 | 
			
		||||
// }
 | 
			
		||||
 | 
			
		||||
/// Any BARE payload
 | 
			
		||||
#[derive(Clone, Debug)]
 | 
			
		||||
pub struct MdbxBare<T>(pub T);
 | 
			
		||||
 | 
			
		||||
impl<T: Serialize> MdbxBare<T> {
 | 
			
		||||
    pub fn as_bytes(&self) -> Vec<u8> {
 | 
			
		||||
        serde_bare::to_vec(&self.0).expect("It's unreasonable to expect serialisation will fail")
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl<T: DeserializeOwned> TableObject<'_> for MdbxBare<T> {
 | 
			
		||||
    fn decode(_data_val: &[u8]) -> Result<Self, libmdbx::Error>
 | 
			
		||||
    where
 | 
			
		||||
        Self: Sized,
 | 
			
		||||
    {
 | 
			
		||||
        unreachable!()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    unsafe fn decode_val<K: TransactionKind>(
 | 
			
		||||
        txnptr: *const mdbx_sys::MDBX_txn,
 | 
			
		||||
        data_val: &mdbx_sys::MDBX_val,
 | 
			
		||||
    ) -> Result<Self, Error>
 | 
			
		||||
    where
 | 
			
		||||
        Self: Sized,
 | 
			
		||||
    {
 | 
			
		||||
        let bytes = MdbxBytes::decode_val::<K>(txnptr, data_val)?;
 | 
			
		||||
 | 
			
		||||
        let record = serde_bare::from_slice(bytes.as_ref())
 | 
			
		||||
            .context("Failed to decode MDBX key as BARE object")
 | 
			
		||||
            .map_err(|e| libmdbx::Error::DecodeError(e.into()))?;
 | 
			
		||||
 | 
			
		||||
        Ok(MdbxBare(record))
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Supported natively by libmdbx.
 | 
			
		||||
pub type MdbxBytes<'txn> = Cow<'txn, [u8]>;
 | 
			
		||||
@ -1,2 +0,0 @@
 | 
			
		||||
pub const MIGRATION_KEY: &[u8] = b"MIGRATION_VERSION";
 | 
			
		||||
pub const MIGRATION_VERSION: &str = "quickpeep_raker:0.1.0";
 | 
			
		||||
@ -1,50 +1,13 @@
 | 
			
		||||
use crate::raking::{RakeIntent, TemporaryFailure};
 | 
			
		||||
use crate::raking::RakeIntent;
 | 
			
		||||
use reqwest::Url;
 | 
			
		||||
use serde::{Deserialize, Serialize};
 | 
			
		||||
use std::collections::BTreeMap;
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
 | 
			
		||||
pub struct ActiveDomainRecord {
 | 
			
		||||
    /// The raffle ticket number owned by this domain.
 | 
			
		||||
    pub raffle_ticket: u32,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct UrlVisitedRecord {
 | 
			
		||||
    /// Number of days since the QuickPeep Epoch that this page was last raked at.
 | 
			
		||||
    /// A u16 is fine here, giving 179 years worth of values. This allows compact encoding.
 | 
			
		||||
    /// We don't really care about a more granular timestamp: sitemaps and feeds usually only
 | 
			
		||||
    /// give the date of last update anyway.
 | 
			
		||||
    pub last_visited_days: u16,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct QueueUrlRecord {
 | 
			
		||||
    pub intent: RakeIntent, // TODO CONSIDER
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct OnHoldUrlRecord {
 | 
			
		||||
    /// Record that should be emitted once this is released.
 | 
			
		||||
    pub queue_record: QueueUrlRecord,
 | 
			
		||||
 | 
			
		||||
    /// Number of times this URL has been 'enqueued'; capped at 255.
 | 
			
		||||
    pub refs: u8,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug, Deserialize, Serialize)]
 | 
			
		||||
pub struct BackingOffDomainRecord {
 | 
			
		||||
    /// The URL that caused the backoff.
 | 
			
		||||
    pub failed_url: String,
 | 
			
		||||
    /// The reason that this backoff is in place
 | 
			
		||||
    pub failure: TemporaryFailure,
 | 
			
		||||
    /// Duration of the backoff. Used to provide increasing backoffs if the failures persist.
 | 
			
		||||
    pub backoff: u32,
 | 
			
		||||
    /// When the domain should be reinstated
 | 
			
		||||
    /// MUST match the timestamp present in the reinstatements table.
 | 
			
		||||
    pub reinstate_at: u64,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Debug, Serialize, Deserialize, Default)]
 | 
			
		||||
pub struct DomainRecord {
 | 
			
		||||
    pub rakeable_path_prefixes: BTreeMap<String, bool>,
 | 
			
		||||
 | 
			
		||||
@ -1,11 +1,15 @@
 | 
			
		||||
use anyhow::Context;
 | 
			
		||||
use chrono::{Date, Duration, TimeZone, Utc};
 | 
			
		||||
use lazy_static::lazy_static;
 | 
			
		||||
use std::time::SystemTime;
 | 
			
		||||
 | 
			
		||||
lazy_static! {
 | 
			
		||||
    /// The QuickPeep Epoch is 2022-01-01, as this gives us 52 years of extra headroom compared to the
 | 
			
		||||
    /// Unix one. QuickPeep didn't exist before 2022 so we needn't worry about negative dates!
 | 
			
		||||
    pub static ref QUICKPEEP_EPOCH: Date<Utc> = Utc.ymd(2022, 1, 1);
 | 
			
		||||
    /// The QuickPeep Epoch is 2023-01-01, as this gives us 53 years of extra headroom compared to the
 | 
			
		||||
    /// Unix one. This QuickPeep database format didn't exist before 2023 so we needn't worry about negative dates!
 | 
			
		||||
    pub static ref QUICKPEEP_EPOCH: Date<Utc> = Utc.ymd(2023, 1, 1);
 | 
			
		||||
 | 
			
		||||
    /// 2023-01-01
 | 
			
		||||
    pub static ref QUICKPEEP_EPOCH_ST: SystemTime = SystemTime::UNIX_EPOCH + std::time::Duration::from_secs(1672531200);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn date_from_quickpeep_days(days: u16) -> Date<Utc> {
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user