diff --git a/Cargo.lock b/Cargo.lock index 0bde92c..5a68860 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -44,6 +44,27 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "argp" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7409aa6f1dd8464eac2e56cf538e1e5f7f79678caa32f198d214a3db8d5075c1" +dependencies = [ + "argp_derive", +] + +[[package]] +name = "argp_derive" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d9b949411282939e3f7d8923127e3f18aa474b46da4e8bb0ddf2cb8c81f963a" +dependencies = [ + "proc-macro2", + "pulldown-cmark", + "quote", + "syn", +] + [[package]] name = "arraydeque" version = "0.5.1" @@ -71,6 +92,12 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bitflags" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" + [[package]] name = "block-buffer" version = "0.11.0-rc.4" @@ -129,6 +156,15 @@ dependencies = [ "crypto-common", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.2.15" @@ -199,6 +235,7 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" name = "mesozoa" version = "0.1.0" dependencies = [ + "argp", "base64", "rand", "realm_io", @@ -277,6 +314,18 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "pulldown-cmark" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" +dependencies = [ + "bitflags", + "getopts", + "memchr", + "unicase", +] + [[package]] name = "quote" version = "1.0.40" @@ -484,12 +533,24 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +[[package]] +name = "unicase" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" + [[package]] name = "unicode-ident" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "version_check" version = "0.9.5" diff --git a/Cargo.toml b/Cargo.toml index c119268..fb871eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,8 +2,13 @@ name = "mesozoa" version = "0.1.0" edition = "2024" +description = "PoW anti-crawler middle-proxy" +authors = ["tuxmain "] +repository = "https://git.txmn.tk/tuxmain/mesozoa" +license = "AGPL-3.0-only" [dependencies] +argp = "0.4.0" base64 = "0.22.1" rand = "0.8.5" realm_io = { version = "0.5" } @@ -17,3 +22,6 @@ static_cell = { version = "2.1.0", features = ["nightly"] } subtle = { version = "2.6.1", default-features = false, features = ["const-generics", "nightly", "std"] } tokio = { version = "1", features = ["io-util", "macros", "rt", "rt-multi-thread", "time"] } #to-arraystring = "0.2.2" + +[profile.release] +lto = true diff --git a/README.md b/README.md index 5740f5d..9c0f4a5 100644 --- a/README.md +++ b/README.md @@ -14,11 +14,12 @@ And because it looked like a fun little project. [Install rustup](https://rustup.rs) and a nightly Rust toolchain. - # Build executable at ./target/release/mesozoa cargo build --release Must be used behind a reverse proxy providing `X-Forwarded-For`. + ./target/release/mesozoa -c example-config.yaml + ## Challenge protocol ### Challenge generation @@ -81,6 +82,8 @@ Configuration file can be modified and kept secret, of course. [Support me via LiberaPay](https://liberapay.com/tuxmain/donate) +No LLM was used to write this program. + GNU AGPL v3, CopyLeft 2025 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/) This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License. diff --git a/src/challenge.rs b/src/challenge.rs index d43d4e7..333b33a 100644 --- a/src/challenge.rs +++ b/src/challenge.rs @@ -1,9 +1,9 @@ +use crate::{MAC_LEN, SALT_LEN, SECRET_LEN}; + use base64::Engine; use sha3::Digest; use subtle::ConstantTimeEq; -use crate::{CHALLENGE_TIMEOUT, MAC_LEN, SALT_LEN, SECRET_LEN}; - pub fn check_challenge(seed: &[u8], proof: &[u8], target_zeros: u32) -> bool { let mut hasher = sha2::Sha256::default(); hasher.update(proof); @@ -47,24 +47,25 @@ pub fn verify_challenge_cookie( secret: &[u8; SECRET_LEN], user_agent: &[u8], ip: &[u8], + challenge_timeout: u64, ) -> bool { let Ok(cookie_bytes) = base64::engine::general_purpose::URL_SAFE_NO_PAD.decode(cookie) else { - dbg!("invalid base64"); + // invalid base64 return false; }; if cookie_bytes.len() != SALT_LEN + 8 + MAC_LEN { - dbg!("invalid len"); + // bad length return false; } let timestamp: [u8; 8] = cookie_bytes[SALT_LEN..SALT_LEN + 8].try_into().unwrap(); let timestamp_time = u64::from_be_bytes(timestamp); - if timestamp_time.wrapping_add(CHALLENGE_TIMEOUT) + if timestamp_time.wrapping_add(challenge_timeout) < std::time::SystemTime::UNIX_EPOCH .elapsed() .unwrap() .as_secs() { - dbg!("invalid time"); + // challenge timeout return false; } let salt: [u8; SALT_LEN] = cookie_bytes[0..SALT_LEN].try_into().unwrap(); diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..66b820a --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,10 @@ +/// PoW anti-crawler middle-proxy +/// +/// https://git.txmn.tk/tuxmain/mesozoa/ +/// Distributed under license GNU AGPL v3 without any warranty. +#[derive(argp::FromArgs)] +pub struct Cli { + /// Path to config file + #[argp(option, short = 'c')] + pub config: String, +} diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..4d04d49 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,73 @@ +use crate::policy::{Action, Policy}; + +use std::{io::Read, net::SocketAddr, str::FromStr}; + +pub struct Config { + pub listen_addr: SocketAddr, + pub pass_addr: SocketAddr, + pub default_action: Action, + pub challenge_timeout: u64, + pub policy_groups: Vec>, +} + +impl Config { + pub fn from_file(path: &str) -> Self { + let mut config_file = std::fs::File::open(path).expect("Cannot open config file"); + let mut config_str = String::new(); + config_file + .read_to_string(&mut config_str) + .expect("Cannot read config file"); + let config_yaml = saphyr::Yaml::load_from_str(&config_str).expect("Error parsing config"); + let config_doc = &config_yaml[0]; + + Config { + listen_addr: config_doc["listen"] + .as_str() + .expect("Missing listen address in config") + .parse() + .expect("Invalid listen address"), + pass_addr: config_doc["pass"] + .as_str() + .expect("Missing pass address in config") + .parse() + .expect("Invalid pass address"), + challenge_timeout: config_doc["challenge-timeout"] + .as_i64() + .expect("Missing challenge timeout in config") as u64, + default_action: Action::from_str( + config_doc["default-action"] + .as_str() + .expect("Missing default action in config"), + ) + .expect("Invalid default action"), + policy_groups: config_doc["policy-groups"] + .as_vec() + .expect("Missing policies in config") + .iter() + .map(|policy_group| { + policy_group + .as_vec() + .expect("Missing policies in config") + .iter() + .map(|policy| Policy { + name: policy["name"] + .as_str() + .expect("Expected policy name string") + .to_string(), + first_line_regex: policy["first-line"] + .as_str() + .expect("Expected policy first line regex string") + .to_string(), + action: Action::from_str( + policy["action"] + .as_str() + .expect("Expected policy action string"), + ) + .expect("Invalid policy action"), + }) + .collect() + }) + .collect(), + } + } +} diff --git a/src/main.rs b/src/main.rs index 522a25b..52bc769 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,19 +1,16 @@ mod challenge; +mod cli; +mod config; mod http; mod policy; use http::HeaderLineIterator; -use policy::{CompiledPolicies, Policy}; +use policy::CompiledPolicies; use rand::Rng; use realm_syscall::socket2::TcpKeepalive; use regex::bytes::Regex; -use std::{ - io::{Read, Write}, - net::SocketAddr, - str::FromStr, - time::Duration, -}; +use std::{net::SocketAddr, time::Duration}; use tokio::{ io::{AsyncWriteExt, ReadBuf}, net::{TcpSocket, TcpStream}, @@ -23,7 +20,6 @@ use tokio::{ const SALT_LEN: usize = 16; const SECRET_LEN: usize = 32; const MAC_LEN: usize = 32; -const CHALLENGE_TIMEOUT: u64 = 3600; const TARGET_ZEROS: u32 = 15; static CHALLENGE_BODY: &str = include_str!("challenge.html"); @@ -39,35 +35,9 @@ macro_rules! mk_static { #[tokio::main] async fn main() { - let mut config_file = - std::fs::File::open("example-config.yaml").expect("Cannot open config file"); - let mut config_str = String::new(); - config_file - .read_to_string(&mut config_str) - .expect("Cannot read config file"); - let config_yaml = saphyr::Yaml::load_from_str(&config_str).expect("Error parsing config"); - let config_doc = &config_yaml[0]; - let listen_addr: SocketAddr = config_doc["listen"] - .as_str() - .expect("Missing listen address in config") - .parse() - .expect("Invalid listen address"); - let pass_addr: SocketAddr = config_doc["pass"] - .as_str() - .expect("Missing pass address in config") - .parse() - .expect("Invalid pass address"); - let default_action = policy::Action::from_str( - config_doc["default-action"] - .as_str() - .expect("Missing default action in config"), - ) - .expect("Invalid default action"); - let policy_groups: Vec> = config_doc["policy-groups"].as_vec().expect("Missing policies in config").into_iter().map(|policy_group| policy_group.as_vec().expect("Missing policies in config").into_iter().map(|policy| Policy { - name: policy["name"].as_str().expect("Expected policy name string").to_string(), - first_line_regex: policy["first-line"].as_str().expect("Expected policy first line regex string").to_string(), - action: policy::Action::from_str(policy["action"].as_str().expect("Expected policy action string")).expect("Invalid policy action"), - }).collect()).collect(); + let cli: cli::Cli = argp::parse_args_or_exit(argp::DEFAULT); + + let config = config::Config::from_file(&cli.config); let mut rng = rand::thread_rng(); @@ -75,17 +45,18 @@ async fn main() { let policy_groups = &*mk_static!( Vec, - policy_groups + config + .policy_groups .into_iter() .map(CompiledPolicies::new) .collect() ); - let socket = realm_syscall::new_tcp_socket(&listen_addr).unwrap(); + let socket = realm_syscall::new_tcp_socket(&config.listen_addr).unwrap(); socket.set_reuse_address(true).ok(); - socket.bind(&listen_addr.into()).unwrap(); + socket.bind(&config.listen_addr.into()).unwrap(); socket.listen(1024).unwrap(); let listener = tokio::net::TcpListener::from_std(socket.into()).unwrap(); @@ -129,25 +100,19 @@ async fn main() { .await .is_err() { - println!("peek timeout"); + // Peek timeout return; } - let mut stdout = std::io::stdout(); - stdout.write_all(&buf).unwrap(); - stdout.flush().unwrap(); - println!(); - let mut header_line_iter = HeaderLineIterator::new(&buf); let Some(first_line) = header_line_iter.next() else { - println!("Not HTTP, or too long line"); + // Not HTTP, or too long line return; }; - let mut action = default_action; + let mut action = config.default_action; for policy_group in policy_groups.iter() { if let Some(policy) = policy_group.evaluate(first_line) { - println!("Applying policy {}", policy.name); action = policy.action; break; } @@ -156,7 +121,7 @@ async fn main() { match action { policy::Action::Drop => {} policy::Action::Allow => { - do_proxy(pass_addr, client_stream).await; + do_proxy(config.pass_addr, client_stream).await; } policy::Action::Challenge => { let mut req_challenge = None; @@ -187,17 +152,14 @@ async fn main() { &secret, req_user_agent, req_ip, + config.challenge_timeout, ); - allow = dbg!(valid_challenge) - && dbg!(challenge::check_challenge( - req_challenge, - req_proof, - TARGET_ZEROS - )); + allow = valid_challenge + && challenge::check_challenge(req_challenge, req_proof, TARGET_ZEROS); } if allow { - do_proxy(pass_addr, client_stream).await; + do_proxy(config.pass_addr, client_stream).await; } else { let salt: [u8; SALT_LEN] = rand::thread_rng().r#gen(); diff --git a/src/policy.bak.rs b/src/policy.bak.rs deleted file mode 100644 index bdecc1d..0000000 --- a/src/policy.bak.rs +++ /dev/null @@ -1,198 +0,0 @@ -use regex::bytes::{Regex, RegexSet, SetMatches}; - -#[derive(Clone, Debug)] -pub enum Action { - Allow, - Challenge, - Drop, -} - -#[derive(Clone, Debug)] -pub enum Filter { - Bool(bool), - FirstLineMatch(String), - HeaderLineMatch(String), - And(Vec), - Or(Vec), - Not(Box), -} - -impl Filter { - fn compile<'a>( - &'a self, - first_line_regexes: &mut Vec<&'a str>, - header_line_regexes: &mut Vec<&'a str>, - ) -> CompiledFilter { - match self { - Filter::Bool(v) => CompiledFilter::Bool(*v), - Filter::And(filters) => CompiledFilter::And( - filters - .iter() - .map(|filter| filter.compile(first_line_regexes, header_line_regexes)) - .collect(), - ), - Filter::Or(filters) => CompiledFilter::Or( - filters - .iter() - .map(|filter| filter.compile(first_line_regexes, header_line_regexes)) - .collect(), - ), - Filter::Not(filter) => CompiledFilter::Not(Box::new( - filter.compile(first_line_regexes, header_line_regexes), - )), - Filter::FirstLineMatch(regex) => { - let filter = CompiledFilter::FirstLineMatch(first_line_regexes.len()); - first_line_regexes.push(regex); - filter - } - Filter::HeaderLineMatch(regex) => { - let filter = CompiledFilter::HeaderLineMatch(header_line_regexes.len()); - header_line_regexes.push(regex); - filter - } - } - } -} - -#[derive(Clone, Debug)] -pub struct Policy { - pub name: String, - pub filter: Filter, - pub action: Action, - pub priority: i32, -} - -pub enum CompiledFilter { - Bool(bool), - FirstLineMatch(usize), - HeaderLineMatch(usize), - And(Vec), - Or(Vec), - Not(Box), -} - -/*impl CompiledFilter { - fn evaluate(&self, matches: &SetMatches) -> bool { - match self { - Self::And(filters) => filters.iter().all(Self::evaluate), - Self::Or(filters) => filters.iter().any(Self::evaluate), - Self::Bool(b) => *b, - Self::Not(filter) => !filter.evaluate(matches), - Self::FirstLineMatch(regex_id) => matches.matched(regex_id), - Self::HeaderLineMatch(regex_id) => matches.matched(regex_id), - } - } -}*/ - -pub struct CompiledPolicy { - pub name: String, - pub filter: CompiledFilter, - pub priority: i32, - pub action: Action, -} - -pub struct CompiledPolicies { - pub first_line_regex_set: Option, - pub header_line_regex_set: Option, - pub policies: Vec, -} - -pub enum RegexOrRegexSet { - Many(RegexSet), - One(Regex), - None, -} - -impl TryInto for Vec<&str> { - type Error = regex::Error; - fn try_into(self) -> Result { - Ok(match self.len() { - 0 => RegexOrRegexSet::None, - 1 => RegexOrRegexSet::One(Regex::new(self[0])?), - _ => RegexOrRegexSet::Many(RegexSet::new(self)?) - }) - } -} - -impl CompiledPolicies { - pub fn new<'a>(policies: impl IntoIterator) -> Self { - let mut first_line_regexes = Vec::new(); - let mut header_line_regexes = Vec::new(); - let mut compiled_policies = Vec::new(); - - for policy in policies { - let compiled_policy = CompiledPolicy { - name: policy.name.clone(), - filter: policy - .filter - .compile(&mut first_line_regexes, &mut header_line_regexes), - priority: policy.priority, - action: policy.action.clone(), - }; - compiled_policies.push(compiled_policy); - } - - CompiledPolicies { - first_line_regex_set: if first_line_regexes.is_empty() { - None - } else { - Some(RegexSet::new(&first_line_regexes).unwrap()) - }, - header_line_regex_set: if header_line_regexes.is_empty() { - None - } else { - Some(RegexSet::new(&header_line_regexes).unwrap()) - }, - policies: compiled_policies, - } - } - - /*pub fn evaluate<'a>( - &self, - mut header_lines: impl Iterator, - ) -> Result, PolicyEvaluationError> { - let mut best_policy: Option<&CompiledPolicy> = None; - - let first_line = header_lines - .next() - .ok_or(PolicyEvaluationError::NoFirstLine)?; - - if let Some(first_line_regex_set) = &self.first_line_regex_set { - let matches = first_line_regex_set.matches(first_line); - for policy in self.policies.iter() { - - } - let policy = &self.policies[matched]; - if let Some(best_policy) = &mut best_policy { - if policy.priority < best_policy.priority { - *best_policy = policy; - } - } else { - best_policy = Some(policy); - } - } - - if let Some(header_line_regex_set) = &self.header_line_regex_set { - for header_line in header_lines { - for matched in header_line_regex_set.matches(header_line) { - let policy = &self.policies[matched]; - if let Some(best_policy) = &mut best_policy { - if policy.priority < best_policy.priority { - *best_policy = policy; - } - } else { - best_policy = Some(policy); - } - } - } - } - - Ok(best_policy) - }*/ -} - -#[derive(Debug)] -pub enum PolicyEvaluationError { - /// First HTTP line is too long or absent - NoFirstLine, -} diff --git a/src/policy.rs b/src/policy.rs index 1c796e0..cec93dd 100644 --- a/src/policy.rs +++ b/src/policy.rs @@ -21,6 +21,8 @@ impl std::str::FromStr for Action { #[derive(Clone, Debug)] pub struct Policy { + // Will be used when we add log + #[allow(unused)] pub name: String, pub first_line_regex: String, pub action: Action,