diff --git a/Cargo.lock b/Cargo.lock index 5a68860..0bde92c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -44,27 +44,6 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" -[[package]] -name = "argp" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7409aa6f1dd8464eac2e56cf538e1e5f7f79678caa32f198d214a3db8d5075c1" -dependencies = [ - "argp_derive", -] - -[[package]] -name = "argp_derive" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d9b949411282939e3f7d8923127e3f18aa474b46da4e8bb0ddf2cb8c81f963a" -dependencies = [ - "proc-macro2", - "pulldown-cmark", - "quote", - "syn", -] - [[package]] name = "arraydeque" version = "0.5.1" @@ -92,12 +71,6 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" -[[package]] -name = "bitflags" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" - [[package]] name = "block-buffer" version = "0.11.0-rc.4" @@ -156,15 +129,6 @@ dependencies = [ "crypto-common", ] -[[package]] -name = "getopts" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" -dependencies = [ - "unicode-width", -] - [[package]] name = "getrandom" version = "0.2.15" @@ -235,7 +199,6 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" name = "mesozoa" version = "0.1.0" dependencies = [ - "argp", "base64", "rand", "realm_io", @@ -314,18 +277,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "pulldown-cmark" -version = "0.9.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" -dependencies = [ - "bitflags", - "getopts", - "memchr", - "unicase", -] - [[package]] name = "quote" version = "1.0.40" @@ -533,24 +484,12 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" -[[package]] -name = "unicase" -version = "2.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" - [[package]] name = "unicode-ident" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" -[[package]] -name = "unicode-width" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" - [[package]] name = "version_check" version = "0.9.5" diff --git a/Cargo.toml b/Cargo.toml index fb871eb..c119268 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,13 +2,8 @@ name = "mesozoa" version = "0.1.0" edition = "2024" -description = "PoW anti-crawler middle-proxy" -authors = ["tuxmain "] -repository = "https://git.txmn.tk/tuxmain/mesozoa" -license = "AGPL-3.0-only" [dependencies] -argp = "0.4.0" base64 = "0.22.1" rand = "0.8.5" realm_io = { version = "0.5" } @@ -22,6 +17,3 @@ static_cell = { version = "2.1.0", features = ["nightly"] } subtle = { version = "2.6.1", default-features = false, features = ["const-generics", "nightly", "std"] } tokio = { version = "1", features = ["io-util", "macros", "rt", "rt-multi-thread", "time"] } #to-arraystring = "0.2.2" - -[profile.release] -lto = true diff --git a/README.md b/README.md index 9c0f4a5..5740f5d 100644 --- a/README.md +++ b/README.md @@ -14,12 +14,11 @@ And because it looked like a fun little project. [Install rustup](https://rustup.rs) and a nightly Rust toolchain. + # Build executable at ./target/release/mesozoa cargo build --release Must be used behind a reverse proxy providing `X-Forwarded-For`. - ./target/release/mesozoa -c example-config.yaml - ## Challenge protocol ### Challenge generation @@ -82,8 +81,6 @@ Configuration file can be modified and kept secret, of course. [Support me via LiberaPay](https://liberapay.com/tuxmain/donate) -No LLM was used to write this program. - GNU AGPL v3, CopyLeft 2025 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License. diff --git a/src/challenge.rs b/src/challenge.rs index 333b33a..d43d4e7 100644 --- a/src/challenge.rs +++ b/src/challenge.rs @@ -1,9 +1,9 @@ -use crate::{MAC_LEN, SALT_LEN, SECRET_LEN}; - use base64::Engine; use sha3::Digest; use subtle::ConstantTimeEq; +use crate::{CHALLENGE_TIMEOUT, MAC_LEN, SALT_LEN, SECRET_LEN}; + pub fn check_challenge(seed: &[u8], proof: &[u8], target_zeros: u32) -> bool { let mut hasher = sha2::Sha256::default(); hasher.update(proof); @@ -47,25 +47,24 @@ pub fn verify_challenge_cookie( secret: &[u8; SECRET_LEN], user_agent: &[u8], ip: &[u8], - challenge_timeout: u64, ) -> bool { let Ok(cookie_bytes) = base64::engine::general_purpose::URL_SAFE_NO_PAD.decode(cookie) else { - // invalid base64 + dbg!("invalid base64"); return false; }; if cookie_bytes.len() != SALT_LEN + 8 + MAC_LEN { - // bad length + dbg!("invalid len"); return false; } let timestamp: [u8; 8] = cookie_bytes[SALT_LEN..SALT_LEN + 8].try_into().unwrap(); let timestamp_time = u64::from_be_bytes(timestamp); - if timestamp_time.wrapping_add(challenge_timeout) + if timestamp_time.wrapping_add(CHALLENGE_TIMEOUT) < std::time::SystemTime::UNIX_EPOCH .elapsed() .unwrap() .as_secs() { - // challenge timeout + dbg!("invalid time"); return false; } let salt: [u8; SALT_LEN] = cookie_bytes[0..SALT_LEN].try_into().unwrap(); diff --git a/src/cli.rs b/src/cli.rs deleted file mode 100644 index 66b820a..0000000 --- a/src/cli.rs +++ /dev/null @@ -1,10 +0,0 @@ -/// PoW anti-crawler middle-proxy -/// -/// https://git.txmn.tk/tuxmain/mesozoa/ -/// Distributed under license GNU AGPL v3 without any warranty. -#[derive(argp::FromArgs)] -pub struct Cli { - /// Path to config file - #[argp(option, short = 'c')] - pub config: String, -} diff --git a/src/config.rs b/src/config.rs deleted file mode 100644 index 4d04d49..0000000 --- a/src/config.rs +++ /dev/null @@ -1,73 +0,0 @@ -use crate::policy::{Action, Policy}; - -use std::{io::Read, net::SocketAddr, str::FromStr}; - -pub struct Config { - pub listen_addr: SocketAddr, - pub pass_addr: SocketAddr, - pub default_action: Action, - pub challenge_timeout: u64, - pub policy_groups: Vec>, -} - -impl Config { - pub fn from_file(path: &str) -> Self { - let mut config_file = std::fs::File::open(path).expect("Cannot open config file"); - let mut config_str = String::new(); - config_file - .read_to_string(&mut config_str) - .expect("Cannot read config file"); - let config_yaml = saphyr::Yaml::load_from_str(&config_str).expect("Error parsing config"); - let config_doc = &config_yaml[0]; - - Config { - listen_addr: config_doc["listen"] - .as_str() - .expect("Missing listen address in config") - .parse() - .expect("Invalid listen address"), - pass_addr: config_doc["pass"] - .as_str() - .expect("Missing pass address in config") - .parse() - .expect("Invalid pass address"), - challenge_timeout: config_doc["challenge-timeout"] - .as_i64() - .expect("Missing challenge timeout in config") as u64, - default_action: Action::from_str( - config_doc["default-action"] - .as_str() - .expect("Missing default action in config"), - ) - .expect("Invalid default action"), - policy_groups: config_doc["policy-groups"] - .as_vec() - .expect("Missing policies in config") - .iter() - .map(|policy_group| { - policy_group - .as_vec() - .expect("Missing policies in config") - .iter() - .map(|policy| Policy { - name: policy["name"] - .as_str() - .expect("Expected policy name string") - .to_string(), - first_line_regex: policy["first-line"] - .as_str() - .expect("Expected policy first line regex string") - .to_string(), - action: Action::from_str( - policy["action"] - .as_str() - .expect("Expected policy action string"), - ) - .expect("Invalid policy action"), - }) - .collect() - }) - .collect(), - } - } -} diff --git a/src/main.rs b/src/main.rs index 52bc769..522a25b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,19 @@ mod challenge; -mod cli; -mod config; mod http; mod policy; use http::HeaderLineIterator; -use policy::CompiledPolicies; +use policy::{CompiledPolicies, Policy}; use rand::Rng; use realm_syscall::socket2::TcpKeepalive; use regex::bytes::Regex; -use std::{net::SocketAddr, time::Duration}; +use std::{ + io::{Read, Write}, + net::SocketAddr, + str::FromStr, + time::Duration, +}; use tokio::{ io::{AsyncWriteExt, ReadBuf}, net::{TcpSocket, TcpStream}, @@ -20,6 +23,7 @@ use tokio::{ const SALT_LEN: usize = 16; const SECRET_LEN: usize = 32; const MAC_LEN: usize = 32; +const CHALLENGE_TIMEOUT: u64 = 3600; const TARGET_ZEROS: u32 = 15; static CHALLENGE_BODY: &str = include_str!("challenge.html"); @@ -35,9 +39,35 @@ macro_rules! mk_static { #[tokio::main] async fn main() { - let cli: cli::Cli = argp::parse_args_or_exit(argp::DEFAULT); - - let config = config::Config::from_file(&cli.config); + let mut config_file = + std::fs::File::open("example-config.yaml").expect("Cannot open config file"); + let mut config_str = String::new(); + config_file + .read_to_string(&mut config_str) + .expect("Cannot read config file"); + let config_yaml = saphyr::Yaml::load_from_str(&config_str).expect("Error parsing config"); + let config_doc = &config_yaml[0]; + let listen_addr: SocketAddr = config_doc["listen"] + .as_str() + .expect("Missing listen address in config") + .parse() + .expect("Invalid listen address"); + let pass_addr: SocketAddr = config_doc["pass"] + .as_str() + .expect("Missing pass address in config") + .parse() + .expect("Invalid pass address"); + let default_action = policy::Action::from_str( + config_doc["default-action"] + .as_str() + .expect("Missing default action in config"), + ) + .expect("Invalid default action"); + let policy_groups: Vec> = config_doc["policy-groups"].as_vec().expect("Missing policies in config").into_iter().map(|policy_group| policy_group.as_vec().expect("Missing policies in config").into_iter().map(|policy| Policy { + name: policy["name"].as_str().expect("Expected policy name string").to_string(), + first_line_regex: policy["first-line"].as_str().expect("Expected policy first line regex string").to_string(), + action: policy::Action::from_str(policy["action"].as_str().expect("Expected policy action string")).expect("Invalid policy action"), + }).collect()).collect(); let mut rng = rand::thread_rng(); @@ -45,18 +75,17 @@ async fn main() { let policy_groups = &*mk_static!( Vec, - config - .policy_groups + policy_groups .into_iter() .map(CompiledPolicies::new) .collect() ); - let socket = realm_syscall::new_tcp_socket(&config.listen_addr).unwrap(); + let socket = realm_syscall::new_tcp_socket(&listen_addr).unwrap(); socket.set_reuse_address(true).ok(); - socket.bind(&config.listen_addr.into()).unwrap(); + socket.bind(&listen_addr.into()).unwrap(); socket.listen(1024).unwrap(); let listener = tokio::net::TcpListener::from_std(socket.into()).unwrap(); @@ -100,19 +129,25 @@ async fn main() { .await .is_err() { - // Peek timeout + println!("peek timeout"); return; } + let mut stdout = std::io::stdout(); + stdout.write_all(&buf).unwrap(); + stdout.flush().unwrap(); + println!(); + let mut header_line_iter = HeaderLineIterator::new(&buf); let Some(first_line) = header_line_iter.next() else { - // Not HTTP, or too long line + println!("Not HTTP, or too long line"); return; }; - let mut action = config.default_action; + let mut action = default_action; for policy_group in policy_groups.iter() { if let Some(policy) = policy_group.evaluate(first_line) { + println!("Applying policy {}", policy.name); action = policy.action; break; } @@ -121,7 +156,7 @@ async fn main() { match action { policy::Action::Drop => {} policy::Action::Allow => { - do_proxy(config.pass_addr, client_stream).await; + do_proxy(pass_addr, client_stream).await; } policy::Action::Challenge => { let mut req_challenge = None; @@ -152,14 +187,17 @@ async fn main() { &secret, req_user_agent, req_ip, - config.challenge_timeout, ); - allow = valid_challenge - && challenge::check_challenge(req_challenge, req_proof, TARGET_ZEROS); + allow = dbg!(valid_challenge) + && dbg!(challenge::check_challenge( + req_challenge, + req_proof, + TARGET_ZEROS + )); } if allow { - do_proxy(config.pass_addr, client_stream).await; + do_proxy(pass_addr, client_stream).await; } else { let salt: [u8; SALT_LEN] = rand::thread_rng().r#gen(); diff --git a/src/policy.bak.rs b/src/policy.bak.rs new file mode 100644 index 0000000..bdecc1d --- /dev/null +++ b/src/policy.bak.rs @@ -0,0 +1,198 @@ +use regex::bytes::{Regex, RegexSet, SetMatches}; + +#[derive(Clone, Debug)] +pub enum Action { + Allow, + Challenge, + Drop, +} + +#[derive(Clone, Debug)] +pub enum Filter { + Bool(bool), + FirstLineMatch(String), + HeaderLineMatch(String), + And(Vec), + Or(Vec), + Not(Box), +} + +impl Filter { + fn compile<'a>( + &'a self, + first_line_regexes: &mut Vec<&'a str>, + header_line_regexes: &mut Vec<&'a str>, + ) -> CompiledFilter { + match self { + Filter::Bool(v) => CompiledFilter::Bool(*v), + Filter::And(filters) => CompiledFilter::And( + filters + .iter() + .map(|filter| filter.compile(first_line_regexes, header_line_regexes)) + .collect(), + ), + Filter::Or(filters) => CompiledFilter::Or( + filters + .iter() + .map(|filter| filter.compile(first_line_regexes, header_line_regexes)) + .collect(), + ), + Filter::Not(filter) => CompiledFilter::Not(Box::new( + filter.compile(first_line_regexes, header_line_regexes), + )), + Filter::FirstLineMatch(regex) => { + let filter = CompiledFilter::FirstLineMatch(first_line_regexes.len()); + first_line_regexes.push(regex); + filter + } + Filter::HeaderLineMatch(regex) => { + let filter = CompiledFilter::HeaderLineMatch(header_line_regexes.len()); + header_line_regexes.push(regex); + filter + } + } + } +} + +#[derive(Clone, Debug)] +pub struct Policy { + pub name: String, + pub filter: Filter, + pub action: Action, + pub priority: i32, +} + +pub enum CompiledFilter { + Bool(bool), + FirstLineMatch(usize), + HeaderLineMatch(usize), + And(Vec), + Or(Vec), + Not(Box), +} + +/*impl CompiledFilter { + fn evaluate(&self, matches: &SetMatches) -> bool { + match self { + Self::And(filters) => filters.iter().all(Self::evaluate), + Self::Or(filters) => filters.iter().any(Self::evaluate), + Self::Bool(b) => *b, + Self::Not(filter) => !filter.evaluate(matches), + Self::FirstLineMatch(regex_id) => matches.matched(regex_id), + Self::HeaderLineMatch(regex_id) => matches.matched(regex_id), + } + } +}*/ + +pub struct CompiledPolicy { + pub name: String, + pub filter: CompiledFilter, + pub priority: i32, + pub action: Action, +} + +pub struct CompiledPolicies { + pub first_line_regex_set: Option, + pub header_line_regex_set: Option, + pub policies: Vec, +} + +pub enum RegexOrRegexSet { + Many(RegexSet), + One(Regex), + None, +} + +impl TryInto for Vec<&str> { + type Error = regex::Error; + fn try_into(self) -> Result { + Ok(match self.len() { + 0 => RegexOrRegexSet::None, + 1 => RegexOrRegexSet::One(Regex::new(self[0])?), + _ => RegexOrRegexSet::Many(RegexSet::new(self)?) + }) + } +} + +impl CompiledPolicies { + pub fn new<'a>(policies: impl IntoIterator) -> Self { + let mut first_line_regexes = Vec::new(); + let mut header_line_regexes = Vec::new(); + let mut compiled_policies = Vec::new(); + + for policy in policies { + let compiled_policy = CompiledPolicy { + name: policy.name.clone(), + filter: policy + .filter + .compile(&mut first_line_regexes, &mut header_line_regexes), + priority: policy.priority, + action: policy.action.clone(), + }; + compiled_policies.push(compiled_policy); + } + + CompiledPolicies { + first_line_regex_set: if first_line_regexes.is_empty() { + None + } else { + Some(RegexSet::new(&first_line_regexes).unwrap()) + }, + header_line_regex_set: if header_line_regexes.is_empty() { + None + } else { + Some(RegexSet::new(&header_line_regexes).unwrap()) + }, + policies: compiled_policies, + } + } + + /*pub fn evaluate<'a>( + &self, + mut header_lines: impl Iterator, + ) -> Result, PolicyEvaluationError> { + let mut best_policy: Option<&CompiledPolicy> = None; + + let first_line = header_lines + .next() + .ok_or(PolicyEvaluationError::NoFirstLine)?; + + if let Some(first_line_regex_set) = &self.first_line_regex_set { + let matches = first_line_regex_set.matches(first_line); + for policy in self.policies.iter() { + + } + let policy = &self.policies[matched]; + if let Some(best_policy) = &mut best_policy { + if policy.priority < best_policy.priority { + *best_policy = policy; + } + } else { + best_policy = Some(policy); + } + } + + if let Some(header_line_regex_set) = &self.header_line_regex_set { + for header_line in header_lines { + for matched in header_line_regex_set.matches(header_line) { + let policy = &self.policies[matched]; + if let Some(best_policy) = &mut best_policy { + if policy.priority < best_policy.priority { + *best_policy = policy; + } + } else { + best_policy = Some(policy); + } + } + } + } + + Ok(best_policy) + }*/ +} + +#[derive(Debug)] +pub enum PolicyEvaluationError { + /// First HTTP line is too long or absent + NoFirstLine, +} diff --git a/src/policy.rs b/src/policy.rs index cec93dd..1c796e0 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -21,8 +21,6 @@ impl std::str::FromStr for Action { #[derive(Clone, Debug)] pub struct Policy { - // Will be used when we add log - #[allow(unused)] pub name: String, pub first_line_regex: String, pub action: Action,