From b35120be22f25a0c30cd808d149de74080edc0c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pascal=20Eng=C3=A9libert?= Date: Sat, 29 Mar 2025 22:15:07 +0100 Subject: [PATCH] Initial commit --- .gitignore | 1 + Cargo.lock | 350 +++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 11 ++ README.md | 5 + rustfmt.toml | 9 ++ src/challenge.html | 17 +++ src/http.rs | 58 ++++++++ src/main.rs | 95 ++++++++++++ src/policy.rs | 131 +++++++++++++++++ 9 files changed, 677 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 rustfmt.toml create mode 100644 src/challenge.html create mode 100644 src/http.rs create mode 100644 src/main.rs create mode 100644 src/policy.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..70b04b0 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,350 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "bytes" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "daemonize" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab8bfdaacb3c887a54d41bdf48d3af8873b3f5566469f8ba21b92057509f116e" +dependencies = [ + "libc", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "libc" +version = "0.2.171" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "mesozoa" +version = "0.1.0" +dependencies = [ + "realm_io", + "realm_syscall", + "regex", + "static_cell", + "tokio", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +dependencies = [ + "libc", + "wasi", + "windows-sys", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "portable-atomic" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" + +[[package]] +name = "proc-macro2" +version = "1.0.94" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "realm_io" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2558ac548920bb63a08d21496481b24818501cb1a7bbd7c1c3b6d9bc5d1e4a41" +dependencies = [ + "libc", + "socket2", + "tokio", +] + +[[package]] +name = "realm_syscall" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e59aa61541fb76e4216da9eb43cdebd63d4ba2874e802aee31c77d57c703d17c" +dependencies = [ + "daemonize", + "libc", + "socket2", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "socket2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "static_cell" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89b0684884a883431282db1e4343f34afc2ff6996fe1f4a1664519b66e14c1e" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "syn" +version = "2.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tokio" +version = "1.44.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a" +dependencies = [ + "backtrace", + "bytes", + "libc", + "mio", + "pin-project-lite", + "socket2", + "tokio-macros", + "windows-sys", +] + +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..c724636 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "mesozoa" +version = "0.1.0" +edition = "2024" + +[dependencies] +realm_io = { version = "0.5" } +realm_syscall = "0.1" +regex = { version = "1.11", default-features = false, features = ["perf", "std"] } +static_cell = { version = "2.1.0", features = ["nightly"] } +tokio = { version = "1", features = ["io-util", "macros", "rt", "rt-multi-thread", "time"] } diff --git a/README.md b/README.md new file mode 100644 index 0000000..1aa517d --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ +Mesozoa + +Why not Anubis? Because it provides no build instructions and only supports Docker. + +Why not using Realm completely? Because the hook system is useless and only allows filtering. \ No newline at end of file diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..26b2841 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,9 @@ +hard_tabs = true +newline_style = "Unix" +imports_granularity = "Crate" + +unstable_features = true +format_code_in_doc_comments = true +format_macro_bodies = true +format_macro_matchers = true +format_strings = true diff --git a/src/challenge.html b/src/challenge.html new file mode 100644 index 0000000..d6b5883 --- /dev/null +++ b/src/challenge.html @@ -0,0 +1,17 @@ + + + + + + Antispam working... + + +

Fighting crawlers

+ + + diff --git a/src/http.rs b/src/http.rs new file mode 100644 index 0000000..dfaa555 --- /dev/null +++ b/src/http.rs @@ -0,0 +1,58 @@ +/// Iterates lines of HTTP header and stops at end of header +pub struct HeaderLineIterator<'a> { + packet: &'a [u8], + start: usize, + cr: bool, + i: usize, +} + +impl<'a> HeaderLineIterator<'a> { + pub fn new(packet: &'a [u8]) -> Self { + Self { + packet, + start: 0, + cr: false, + i: 0, + } + } +} + +impl<'a> Iterator for HeaderLineIterator<'a> { + type Item = &'a [u8]; + fn next(&mut self) -> Option { + while self.i < self.packet.len() { + let c = self.packet[self.i]; + if c == b'\r' { + self.cr = true; + } else if c == b'\n' && self.cr { + self.cr = false; + self.i += 1; + let start = self.start; + self.start = self.i; + return Some(&self.packet[start..self.i - 2]); + } else { + self.cr = false; + } + self.i += 1; + } + None + } +} + +pub fn parse_cookies<'a>(line: &'a [u8]) -> Option<&'a [u8]> { + if line.get(0..7) != Some(b"Cookie:") { + return None; + } + let mut waiting_for_name = true; + let mut iter = line.iter().enumerate().skip(7); + while let Some((i, c)) = iter.next() { + if *c == b' ' { + continue + } + if waiting_for_name { + + } + //iter.advance_by(5); + } + None +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..1678f65 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,95 @@ +mod http; +mod policy; + +use http::HeaderLineIterator; +use policy::{CompiledPolicies, Policy}; + +use regex::bytes::Regex; +use std::{ + io::{BufReader, Write}, + net::SocketAddr, + time::Duration, +}; +use tokio::{io::{ReadBuf, AsyncWriteExt}, time::timeout}; + +static CHALLENGE_BODY: &str = include_str!("challenge.html"); + +macro_rules! mk_static { + ($t:ty, $val:expr) => {{ + static STATIC_CELL: static_cell::StaticCell<$t> = static_cell::StaticCell::new(); + #[deny(unused_attributes)] + let x = STATIC_CELL.uninit().write(($val)); + x + }}; +} + +#[tokio::main] +async fn main() { + let listen_addr = "127.0.0.1:8000".parse().unwrap(); + let policy_groups = &[&[Policy { + name: String::from("Block"), + filter: policy::Filter::FirstLineMatch(String::from("GET /block")), + action: policy::Action::Drop, + priority: 0, + }]]; + + let challenge_response = &*mk_static!(String, format!("HTTP/1.1 200\r\ncontent-type: text/html\r\ncontent-length: {}\r\n\r\n{}", CHALLENGE_BODY.len(), CHALLENGE_BODY)); + + let policy_groups: Vec = policy_groups.into_iter().map(|policies| CompiledPolicies::new(*policies)).collect(); + + let socket = realm_syscall::new_tcp_socket(&listen_addr).unwrap(); + + socket.set_reuse_address(true).ok(); + + socket.bind(&listen_addr.into()).unwrap(); + socket.listen(1024).unwrap(); + + let listener = tokio::net::TcpListener::from_std(socket.into()).unwrap(); + + let cookie_regex = Regex::new(r"^Cookie: *(?:[^;=]+=[^;=]* *; *)*mesozoa *= *([0-9a-zA-Z]{4})").unwrap(); + + + loop { + let Ok((mut client_stream, client_addr)) = listener.accept().await else { + continue; + }; + //client_stream.set_nodelay(true).ok(); + + let cookie_regex = cookie_regex.clone(); + tokio::spawn(async move { + let mut buf = [0u8; 1024]; + let mut buf_reader = ReadBuf::new(&mut buf); + if let Err(_) = timeout( + Duration::from_millis(100), + std::future::poll_fn(|cx| client_stream.poll_peek(cx, &mut buf_reader)), + ) + .await + { + println!("peek timeout"); + return; + } + + let mut header_line_iter = HeaderLineIterator::new(&buf); + let Some(first_line) = header_line_iter.next() else { + println!("Not HTTP, or too long line"); + return; + }; + // TODO matching + // for test we will challenge everything! + if let Some(captures) = header_line_iter.find_map(|line| cookie_regex.captures(line)) { + if let Some(cookie) = captures.get(1) { + let mut stdout = std::io::stdout(); + stdout.write_all(cookie.as_bytes()).unwrap(); + stdout.flush().unwrap(); + println!(""); + } else { + println!("cookie header, but no cookie") + } + } else { + println!("no cookie"); + } + client_stream.writable().await.unwrap(); + client_stream.write_all(challenge_response.as_bytes()).await.unwrap(); + }); + } +} diff --git a/src/policy.rs b/src/policy.rs new file mode 100644 index 0000000..520d417 --- /dev/null +++ b/src/policy.rs @@ -0,0 +1,131 @@ +use regex::{Regex, RegexSet}; + +#[derive(Clone, Debug)] +pub enum Action { + Allow, + Challenge, + Drop, +} + +#[derive(Clone, Debug)] +pub enum Filter { + Bool(bool), + FirstLineMatch(String), + HeaderLineMatch(String), + And(Vec), + Or(Vec), + Not(Box), +} + +impl Filter { + fn compile<'a>( + &'a self, + first_line_regexes: &mut Vec<&'a str>, + header_line_regexes: &mut Vec<&'a str>, + ) -> CompiledFilter { + match self { + Filter::Bool(v) => CompiledFilter::Bool(*v), + Filter::And(filters) => CompiledFilter::And( + filters + .iter() + .map(|filter| filter.compile(first_line_regexes, header_line_regexes)) + .collect(), + ), + Filter::Or(filters) => CompiledFilter::Or( + filters + .iter() + .map(|filter| filter.compile(first_line_regexes, header_line_regexes)) + .collect(), + ), + Filter::Not(filter) => CompiledFilter::Not(Box::new( + filter.compile(first_line_regexes, header_line_regexes), + )), + Filter::FirstLineMatch(regex) => { + let filter = CompiledFilter::FirstLineMatch(first_line_regexes.len()); + first_line_regexes.push(regex); + filter + } + Filter::HeaderLineMatch(regex) => { + let filter = CompiledFilter::HeaderLineMatch(header_line_regexes.len()); + header_line_regexes.push(regex); + filter + } + } + } +} + +#[derive(Clone, Debug)] +pub struct Policy { + pub name: String, + pub filter: Filter, + pub action: Action, + pub priority: i32, +} + +pub enum CompiledFilter { + Bool(bool), + FirstLineMatch(usize), + HeaderLineMatch(usize), + And(Vec), + Or(Vec), + Not(Box), +} + +pub struct CompiledPolicy { + pub name: String, + pub filter: CompiledFilter, + pub priority: i32, + pub action: Action, +} + +pub struct CompiledPolicies { + pub first_line_regex_set: Option, + pub header_line_regex_set: Option, + pub policies: Vec, +} + +impl CompiledPolicies { + pub fn new<'a>(policies: impl IntoIterator) -> Self { + let mut first_line_regexes = Vec::new(); + let mut header_line_regexes = Vec::new(); + let mut compiled_policies = Vec::new(); + + for policy in policies { + let compiled_policy = CompiledPolicy { + name: policy.name.clone(), + filter: policy + .filter + .compile(&mut first_line_regexes, &mut header_line_regexes), + priority: policy.priority, + action: policy.action.clone(), + }; + compiled_policies.push(compiled_policy); + } + + CompiledPolicies { + first_line_regex_set: if first_line_regexes.is_empty() {None} else {Some(RegexSet::new(&first_line_regexes).unwrap())}, + header_line_regex_set: if header_line_regexes.is_empty() {None} else {Some(RegexSet::new(&header_line_regexes).unwrap())}, + policies: compiled_policies, + } + } + + pub fn evaluate<'a>(&self, mut header_lines: impl Iterator) -> Result, PolicyEvaluationError> { + let mut best_policy = None; + let mut best_priority = i32::MAX; + + let first_line = header_lines.next().ok_or(PolicyEvaluationError::NoFirstLine)?; + + if let Some(first_line_regex_set) = &self.first_line_regex_set { + //let matches = first_line_regex_set.matches(first_line); + + } + + Ok(best_policy) + } +} + +#[derive(Debug)] +pub enum PolicyEvaluationError { + /// First HTTP line is too long or absent + NoFirstLine, +}