Initial commit

This commit is contained in:
Pascal Engélibert 2025-03-29 22:15:07 +01:00
commit b35120be22
9 changed files with 677 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

350
Cargo.lock generated Normal file
View file

@ -0,0 +1,350 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "addr2line"
version = "0.24.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
dependencies = [
"gimli",
]
[[package]]
name = "adler2"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "backtrace"
version = "0.3.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
dependencies = [
"addr2line",
"cfg-if",
"libc",
"miniz_oxide",
"object",
"rustc-demangle",
"windows-targets",
]
[[package]]
name = "bytes"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "daemonize"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab8bfdaacb3c887a54d41bdf48d3af8873b3f5566469f8ba21b92057509f116e"
dependencies = [
"libc",
]
[[package]]
name = "gimli"
version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
name = "libc"
version = "0.2.171"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "mesozoa"
version = "0.1.0"
dependencies = [
"realm_io",
"realm_syscall",
"regex",
"static_cell",
"tokio",
]
[[package]]
name = "miniz_oxide"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
dependencies = [
"adler2",
]
[[package]]
name = "mio"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
dependencies = [
"libc",
"wasi",
"windows-sys",
]
[[package]]
name = "object"
version = "0.36.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
dependencies = [
"memchr",
]
[[package]]
name = "pin-project-lite"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
[[package]]
name = "portable-atomic"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e"
[[package]]
name = "proc-macro2"
version = "1.0.94"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
dependencies = [
"proc-macro2",
]
[[package]]
name = "realm_io"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2558ac548920bb63a08d21496481b24818501cb1a7bbd7c1c3b6d9bc5d1e4a41"
dependencies = [
"libc",
"socket2",
"tokio",
]
[[package]]
name = "realm_syscall"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e59aa61541fb76e4216da9eb43cdebd63d4ba2874e802aee31c77d57c703d17c"
dependencies = [
"daemonize",
"libc",
"socket2",
]
[[package]]
name = "regex"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "rustc-demangle"
version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
[[package]]
name = "socket2"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8"
dependencies = [
"libc",
"windows-sys",
]
[[package]]
name = "static_cell"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d89b0684884a883431282db1e4343f34afc2ff6996fe1f4a1664519b66e14c1e"
dependencies = [
"portable-atomic",
]
[[package]]
name = "syn"
version = "2.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "tokio"
version = "1.44.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a"
dependencies = [
"backtrace",
"bytes",
"libc",
"mio",
"pin-project-lite",
"socket2",
"tokio-macros",
"windows-sys",
]
[[package]]
name = "tokio-macros"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

11
Cargo.toml Normal file
View file

@ -0,0 +1,11 @@
[package]
name = "mesozoa"
version = "0.1.0"
edition = "2024"
[dependencies]
realm_io = { version = "0.5" }
realm_syscall = "0.1"
regex = { version = "1.11", default-features = false, features = ["perf", "std"] }
static_cell = { version = "2.1.0", features = ["nightly"] }
tokio = { version = "1", features = ["io-util", "macros", "rt", "rt-multi-thread", "time"] }

5
README.md Normal file
View file

@ -0,0 +1,5 @@
Mesozoa
Why not Anubis? Because it provides no build instructions and only supports Docker.
Why not using Realm completely? Because the hook system is useless and only allows filtering.

9
rustfmt.toml Normal file
View file

@ -0,0 +1,9 @@
hard_tabs = true
newline_style = "Unix"
imports_granularity = "Crate"
unstable_features = true
format_code_in_doc_comments = true
format_macro_bodies = true
format_macro_matchers = true
format_strings = true

17
src/challenge.html Normal file
View file

@ -0,0 +1,17 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<meta name="robots" content="noindex"/>
<title>Antispam working...</title>
</head>
<body>
<h1>Fighting crawlers</h1>
<script type="text/javascript">
/*setTimeout(function() {
document.cookie = "mesozoa=1234; max-age=3600";
window.location.reload();
}, 1000);*/
</script>
</body>
</html>

58
src/http.rs Normal file
View file

@ -0,0 +1,58 @@
/// Iterates lines of HTTP header and stops at end of header
pub struct HeaderLineIterator<'a> {
packet: &'a [u8],
start: usize,
cr: bool,
i: usize,
}
impl<'a> HeaderLineIterator<'a> {
pub fn new(packet: &'a [u8]) -> Self {
Self {
packet,
start: 0,
cr: false,
i: 0,
}
}
}
impl<'a> Iterator for HeaderLineIterator<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
while self.i < self.packet.len() {
let c = self.packet[self.i];
if c == b'\r' {
self.cr = true;
} else if c == b'\n' && self.cr {
self.cr = false;
self.i += 1;
let start = self.start;
self.start = self.i;
return Some(&self.packet[start..self.i - 2]);
} else {
self.cr = false;
}
self.i += 1;
}
None
}
}
pub fn parse_cookies<'a>(line: &'a [u8]) -> Option<&'a [u8]> {
if line.get(0..7) != Some(b"Cookie:") {
return None;
}
let mut waiting_for_name = true;
let mut iter = line.iter().enumerate().skip(7);
while let Some((i, c)) = iter.next() {
if *c == b' ' {
continue
}
if waiting_for_name {
}
//iter.advance_by(5);
}
None
}

95
src/main.rs Normal file
View file

@ -0,0 +1,95 @@
mod http;
mod policy;
use http::HeaderLineIterator;
use policy::{CompiledPolicies, Policy};
use regex::bytes::Regex;
use std::{
io::{BufReader, Write},
net::SocketAddr,
time::Duration,
};
use tokio::{io::{ReadBuf, AsyncWriteExt}, time::timeout};
static CHALLENGE_BODY: &str = include_str!("challenge.html");
macro_rules! mk_static {
($t:ty, $val:expr) => {{
static STATIC_CELL: static_cell::StaticCell<$t> = static_cell::StaticCell::new();
#[deny(unused_attributes)]
let x = STATIC_CELL.uninit().write(($val));
x
}};
}
#[tokio::main]
async fn main() {
let listen_addr = "127.0.0.1:8000".parse().unwrap();
let policy_groups = &[&[Policy {
name: String::from("Block"),
filter: policy::Filter::FirstLineMatch(String::from("GET /block")),
action: policy::Action::Drop,
priority: 0,
}]];
let challenge_response = &*mk_static!(String, format!("HTTP/1.1 200\r\ncontent-type: text/html\r\ncontent-length: {}\r\n\r\n{}", CHALLENGE_BODY.len(), CHALLENGE_BODY));
let policy_groups: Vec<CompiledPolicies> = policy_groups.into_iter().map(|policies| CompiledPolicies::new(*policies)).collect();
let socket = realm_syscall::new_tcp_socket(&listen_addr).unwrap();
socket.set_reuse_address(true).ok();
socket.bind(&listen_addr.into()).unwrap();
socket.listen(1024).unwrap();
let listener = tokio::net::TcpListener::from_std(socket.into()).unwrap();
let cookie_regex = Regex::new(r"^Cookie: *(?:[^;=]+=[^;=]* *; *)*mesozoa *= *([0-9a-zA-Z]{4})").unwrap();
loop {
let Ok((mut client_stream, client_addr)) = listener.accept().await else {
continue;
};
//client_stream.set_nodelay(true).ok();
let cookie_regex = cookie_regex.clone();
tokio::spawn(async move {
let mut buf = [0u8; 1024];
let mut buf_reader = ReadBuf::new(&mut buf);
if let Err(_) = timeout(
Duration::from_millis(100),
std::future::poll_fn(|cx| client_stream.poll_peek(cx, &mut buf_reader)),
)
.await
{
println!("peek timeout");
return;
}
let mut header_line_iter = HeaderLineIterator::new(&buf);
let Some(first_line) = header_line_iter.next() else {
println!("Not HTTP, or too long line");
return;
};
// TODO matching
// for test we will challenge everything!
if let Some(captures) = header_line_iter.find_map(|line| cookie_regex.captures(line)) {
if let Some(cookie) = captures.get(1) {
let mut stdout = std::io::stdout();
stdout.write_all(cookie.as_bytes()).unwrap();
stdout.flush().unwrap();
println!("");
} else {
println!("cookie header, but no cookie")
}
} else {
println!("no cookie");
}
client_stream.writable().await.unwrap();
client_stream.write_all(challenge_response.as_bytes()).await.unwrap();
});
}
}

131
src/policy.rs Normal file
View file

@ -0,0 +1,131 @@
use regex::{Regex, RegexSet};
#[derive(Clone, Debug)]
pub enum Action {
Allow,
Challenge,
Drop,
}
#[derive(Clone, Debug)]
pub enum Filter {
Bool(bool),
FirstLineMatch(String),
HeaderLineMatch(String),
And(Vec<Filter>),
Or(Vec<Filter>),
Not(Box<Filter>),
}
impl Filter {
fn compile<'a>(
&'a self,
first_line_regexes: &mut Vec<&'a str>,
header_line_regexes: &mut Vec<&'a str>,
) -> CompiledFilter {
match self {
Filter::Bool(v) => CompiledFilter::Bool(*v),
Filter::And(filters) => CompiledFilter::And(
filters
.iter()
.map(|filter| filter.compile(first_line_regexes, header_line_regexes))
.collect(),
),
Filter::Or(filters) => CompiledFilter::Or(
filters
.iter()
.map(|filter| filter.compile(first_line_regexes, header_line_regexes))
.collect(),
),
Filter::Not(filter) => CompiledFilter::Not(Box::new(
filter.compile(first_line_regexes, header_line_regexes),
)),
Filter::FirstLineMatch(regex) => {
let filter = CompiledFilter::FirstLineMatch(first_line_regexes.len());
first_line_regexes.push(regex);
filter
}
Filter::HeaderLineMatch(regex) => {
let filter = CompiledFilter::HeaderLineMatch(header_line_regexes.len());
header_line_regexes.push(regex);
filter
}
}
}
}
#[derive(Clone, Debug)]
pub struct Policy {
pub name: String,
pub filter: Filter,
pub action: Action,
pub priority: i32,
}
pub enum CompiledFilter {
Bool(bool),
FirstLineMatch(usize),
HeaderLineMatch(usize),
And(Vec<CompiledFilter>),
Or(Vec<CompiledFilter>),
Not(Box<CompiledFilter>),
}
pub struct CompiledPolicy {
pub name: String,
pub filter: CompiledFilter,
pub priority: i32,
pub action: Action,
}
pub struct CompiledPolicies {
pub first_line_regex_set: Option<RegexSet>,
pub header_line_regex_set: Option<RegexSet>,
pub policies: Vec<CompiledPolicy>,
}
impl CompiledPolicies {
pub fn new<'a>(policies: impl IntoIterator<Item = &'a Policy>) -> Self {
let mut first_line_regexes = Vec::new();
let mut header_line_regexes = Vec::new();
let mut compiled_policies = Vec::new();
for policy in policies {
let compiled_policy = CompiledPolicy {
name: policy.name.clone(),
filter: policy
.filter
.compile(&mut first_line_regexes, &mut header_line_regexes),
priority: policy.priority,
action: policy.action.clone(),
};
compiled_policies.push(compiled_policy);
}
CompiledPolicies {
first_line_regex_set: if first_line_regexes.is_empty() {None} else {Some(RegexSet::new(&first_line_regexes).unwrap())},
header_line_regex_set: if header_line_regexes.is_empty() {None} else {Some(RegexSet::new(&header_line_regexes).unwrap())},
policies: compiled_policies,
}
}
pub fn evaluate<'a>(&self, mut header_lines: impl Iterator<Item=&'a [u8]>) -> Result<Option<&CompiledPolicy>, PolicyEvaluationError> {
let mut best_policy = None;
let mut best_priority = i32::MAX;
let first_line = header_lines.next().ok_or(PolicyEvaluationError::NoFirstLine)?;
if let Some(first_line_regex_set) = &self.first_line_regex_set {
//let matches = first_line_regex_set.matches(first_line);
}
Ok(best_policy)
}
}
#[derive(Debug)]
pub enum PolicyEvaluationError {
/// First HTTP line is too long or absent
NoFirstLine,
}