Compare commits

...

3 commits

Author SHA1 Message Date
557cfa4a55 LTO, Cargo.toml info 2025-04-06 16:32:30 +02:00
e9cf447586 Remove debug prints 2025-04-06 16:24:58 +02:00
f5e4a421bb CLI 2025-04-06 16:20:05 +02:00
9 changed files with 184 additions and 262 deletions

61
Cargo.lock generated
View file

@ -44,6 +44,27 @@ version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "argp"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7409aa6f1dd8464eac2e56cf538e1e5f7f79678caa32f198d214a3db8d5075c1"
dependencies = [
"argp_derive",
]
[[package]]
name = "argp_derive"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d9b949411282939e3f7d8923127e3f18aa474b46da4e8bb0ddf2cb8c81f963a"
dependencies = [
"proc-macro2",
"pulldown-cmark",
"quote",
"syn",
]
[[package]]
name = "arraydeque"
version = "0.5.1"
@ -71,6 +92,12 @@ version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "bitflags"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
[[package]]
name = "block-buffer"
version = "0.11.0-rc.4"
@ -129,6 +156,15 @@ dependencies = [
"crypto-common",
]
[[package]]
name = "getopts"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5"
dependencies = [
"unicode-width",
]
[[package]]
name = "getrandom"
version = "0.2.15"
@ -199,6 +235,7 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
name = "mesozoa"
version = "0.1.0"
dependencies = [
"argp",
"base64",
"rand",
"realm_io",
@ -277,6 +314,18 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "pulldown-cmark"
version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b"
dependencies = [
"bitflags",
"getopts",
"memchr",
"unicase",
]
[[package]]
name = "quote"
version = "1.0.40"
@ -484,12 +533,24 @@ version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
[[package]]
name = "unicase"
version = "2.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
[[package]]
name = "unicode-ident"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "unicode-width"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
[[package]]
name = "version_check"
version = "0.9.5"

View file

@ -2,8 +2,13 @@
name = "mesozoa"
version = "0.1.0"
edition = "2024"
description = "PoW anti-crawler middle-proxy"
authors = ["tuxmain <tuxmain ât zettascript ðøt org>"]
repository = "https://git.txmn.tk/tuxmain/mesozoa"
license = "AGPL-3.0-only"
[dependencies]
argp = "0.4.0"
base64 = "0.22.1"
rand = "0.8.5"
realm_io = { version = "0.5" }
@ -17,3 +22,6 @@ static_cell = { version = "2.1.0", features = ["nightly"] }
subtle = { version = "2.6.1", default-features = false, features = ["const-generics", "nightly", "std"] }
tokio = { version = "1", features = ["io-util", "macros", "rt", "rt-multi-thread", "time"] }
#to-arraystring = "0.2.2"
[profile.release]
lto = true

View file

@ -14,11 +14,12 @@ And because it looked like a fun little project.
[Install rustup](https://rustup.rs) and a nightly Rust toolchain.
# Build executable at ./target/release/mesozoa
cargo build --release
Must be used behind a reverse proxy providing `X-Forwarded-For`.
./target/release/mesozoa -c example-config.yaml
## Challenge protocol
### Challenge generation
@ -81,6 +82,8 @@ Configuration file can be modified and kept secret, of course.
[Support me via LiberaPay](https://liberapay.com/tuxmain/donate)
No LLM was used to write this program.
GNU AGPL v3, CopyLeft 2025 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/)
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License.

View file

@ -1,9 +1,9 @@
use crate::{MAC_LEN, SALT_LEN, SECRET_LEN};
use base64::Engine;
use sha3::Digest;
use subtle::ConstantTimeEq;
use crate::{CHALLENGE_TIMEOUT, MAC_LEN, SALT_LEN, SECRET_LEN};
pub fn check_challenge(seed: &[u8], proof: &[u8], target_zeros: u32) -> bool {
let mut hasher = sha2::Sha256::default();
hasher.update(proof);
@ -47,24 +47,25 @@ pub fn verify_challenge_cookie(
secret: &[u8; SECRET_LEN],
user_agent: &[u8],
ip: &[u8],
challenge_timeout: u64,
) -> bool {
let Ok(cookie_bytes) = base64::engine::general_purpose::URL_SAFE_NO_PAD.decode(cookie) else {
dbg!("invalid base64");
// invalid base64
return false;
};
if cookie_bytes.len() != SALT_LEN + 8 + MAC_LEN {
dbg!("invalid len");
// bad length
return false;
}
let timestamp: [u8; 8] = cookie_bytes[SALT_LEN..SALT_LEN + 8].try_into().unwrap();
let timestamp_time = u64::from_be_bytes(timestamp);
if timestamp_time.wrapping_add(CHALLENGE_TIMEOUT)
if timestamp_time.wrapping_add(challenge_timeout)
< std::time::SystemTime::UNIX_EPOCH
.elapsed()
.unwrap()
.as_secs()
{
dbg!("invalid time");
// challenge timeout
return false;
}
let salt: [u8; SALT_LEN] = cookie_bytes[0..SALT_LEN].try_into().unwrap();

10
src/cli.rs Normal file
View file

@ -0,0 +1,10 @@
/// PoW anti-crawler middle-proxy
///
/// https://git.txmn.tk/tuxmain/mesozoa/
/// Distributed under license GNU AGPL v3 without any warranty.
#[derive(argp::FromArgs)]
pub struct Cli {
/// Path to config file
#[argp(option, short = 'c')]
pub config: String,
}

73
src/config.rs Normal file
View file

@ -0,0 +1,73 @@
use crate::policy::{Action, Policy};
use std::{io::Read, net::SocketAddr, str::FromStr};
pub struct Config {
pub listen_addr: SocketAddr,
pub pass_addr: SocketAddr,
pub default_action: Action,
pub challenge_timeout: u64,
pub policy_groups: Vec<Vec<Policy>>,
}
impl Config {
pub fn from_file(path: &str) -> Self {
let mut config_file = std::fs::File::open(path).expect("Cannot open config file");
let mut config_str = String::new();
config_file
.read_to_string(&mut config_str)
.expect("Cannot read config file");
let config_yaml = saphyr::Yaml::load_from_str(&config_str).expect("Error parsing config");
let config_doc = &config_yaml[0];
Config {
listen_addr: config_doc["listen"]
.as_str()
.expect("Missing listen address in config")
.parse()
.expect("Invalid listen address"),
pass_addr: config_doc["pass"]
.as_str()
.expect("Missing pass address in config")
.parse()
.expect("Invalid pass address"),
challenge_timeout: config_doc["challenge-timeout"]
.as_i64()
.expect("Missing challenge timeout in config") as u64,
default_action: Action::from_str(
config_doc["default-action"]
.as_str()
.expect("Missing default action in config"),
)
.expect("Invalid default action"),
policy_groups: config_doc["policy-groups"]
.as_vec()
.expect("Missing policies in config")
.iter()
.map(|policy_group| {
policy_group
.as_vec()
.expect("Missing policies in config")
.iter()
.map(|policy| Policy {
name: policy["name"]
.as_str()
.expect("Expected policy name string")
.to_string(),
first_line_regex: policy["first-line"]
.as_str()
.expect("Expected policy first line regex string")
.to_string(),
action: Action::from_str(
policy["action"]
.as_str()
.expect("Expected policy action string"),
)
.expect("Invalid policy action"),
})
.collect()
})
.collect(),
}
}
}

View file

@ -1,19 +1,16 @@
mod challenge;
mod cli;
mod config;
mod http;
mod policy;
use http::HeaderLineIterator;
use policy::{CompiledPolicies, Policy};
use policy::CompiledPolicies;
use rand::Rng;
use realm_syscall::socket2::TcpKeepalive;
use regex::bytes::Regex;
use std::{
io::{Read, Write},
net::SocketAddr,
str::FromStr,
time::Duration,
};
use std::{net::SocketAddr, time::Duration};
use tokio::{
io::{AsyncWriteExt, ReadBuf},
net::{TcpSocket, TcpStream},
@ -23,7 +20,6 @@ use tokio::{
const SALT_LEN: usize = 16;
const SECRET_LEN: usize = 32;
const MAC_LEN: usize = 32;
const CHALLENGE_TIMEOUT: u64 = 3600;
const TARGET_ZEROS: u32 = 15;
static CHALLENGE_BODY: &str = include_str!("challenge.html");
@ -39,35 +35,9 @@ macro_rules! mk_static {
#[tokio::main]
async fn main() {
let mut config_file =
std::fs::File::open("example-config.yaml").expect("Cannot open config file");
let mut config_str = String::new();
config_file
.read_to_string(&mut config_str)
.expect("Cannot read config file");
let config_yaml = saphyr::Yaml::load_from_str(&config_str).expect("Error parsing config");
let config_doc = &config_yaml[0];
let listen_addr: SocketAddr = config_doc["listen"]
.as_str()
.expect("Missing listen address in config")
.parse()
.expect("Invalid listen address");
let pass_addr: SocketAddr = config_doc["pass"]
.as_str()
.expect("Missing pass address in config")
.parse()
.expect("Invalid pass address");
let default_action = policy::Action::from_str(
config_doc["default-action"]
.as_str()
.expect("Missing default action in config"),
)
.expect("Invalid default action");
let policy_groups: Vec<Vec<Policy>> = config_doc["policy-groups"].as_vec().expect("Missing policies in config").into_iter().map(|policy_group| policy_group.as_vec().expect("Missing policies in config").into_iter().map(|policy| Policy {
name: policy["name"].as_str().expect("Expected policy name string").to_string(),
first_line_regex: policy["first-line"].as_str().expect("Expected policy first line regex string").to_string(),
action: policy::Action::from_str(policy["action"].as_str().expect("Expected policy action string")).expect("Invalid policy action"),
}).collect()).collect();
let cli: cli::Cli = argp::parse_args_or_exit(argp::DEFAULT);
let config = config::Config::from_file(&cli.config);
let mut rng = rand::thread_rng();
@ -75,17 +45,18 @@ async fn main() {
let policy_groups = &*mk_static!(
Vec<CompiledPolicies>,
policy_groups
config
.policy_groups
.into_iter()
.map(CompiledPolicies::new)
.collect()
);
let socket = realm_syscall::new_tcp_socket(&listen_addr).unwrap();
let socket = realm_syscall::new_tcp_socket(&config.listen_addr).unwrap();
socket.set_reuse_address(true).ok();
socket.bind(&listen_addr.into()).unwrap();
socket.bind(&config.listen_addr.into()).unwrap();
socket.listen(1024).unwrap();
let listener = tokio::net::TcpListener::from_std(socket.into()).unwrap();
@ -129,25 +100,19 @@ async fn main() {
.await
.is_err()
{
println!("peek timeout");
// Peek timeout
return;
}
let mut stdout = std::io::stdout();
stdout.write_all(&buf).unwrap();
stdout.flush().unwrap();
println!();
let mut header_line_iter = HeaderLineIterator::new(&buf);
let Some(first_line) = header_line_iter.next() else {
println!("Not HTTP, or too long line");
// Not HTTP, or too long line
return;
};
let mut action = default_action;
let mut action = config.default_action;
for policy_group in policy_groups.iter() {
if let Some(policy) = policy_group.evaluate(first_line) {
println!("Applying policy {}", policy.name);
action = policy.action;
break;
}
@ -156,7 +121,7 @@ async fn main() {
match action {
policy::Action::Drop => {}
policy::Action::Allow => {
do_proxy(pass_addr, client_stream).await;
do_proxy(config.pass_addr, client_stream).await;
}
policy::Action::Challenge => {
let mut req_challenge = None;
@ -187,17 +152,14 @@ async fn main() {
&secret,
req_user_agent,
req_ip,
config.challenge_timeout,
);
allow = dbg!(valid_challenge)
&& dbg!(challenge::check_challenge(
req_challenge,
req_proof,
TARGET_ZEROS
));
allow = valid_challenge
&& challenge::check_challenge(req_challenge, req_proof, TARGET_ZEROS);
}
if allow {
do_proxy(pass_addr, client_stream).await;
do_proxy(config.pass_addr, client_stream).await;
} else {
let salt: [u8; SALT_LEN] = rand::thread_rng().r#gen();

View file

@ -1,198 +0,0 @@
use regex::bytes::{Regex, RegexSet, SetMatches};
#[derive(Clone, Debug)]
pub enum Action {
Allow,
Challenge,
Drop,
}
#[derive(Clone, Debug)]
pub enum Filter {
Bool(bool),
FirstLineMatch(String),
HeaderLineMatch(String),
And(Vec<Filter>),
Or(Vec<Filter>),
Not(Box<Filter>),
}
impl Filter {
fn compile<'a>(
&'a self,
first_line_regexes: &mut Vec<&'a str>,
header_line_regexes: &mut Vec<&'a str>,
) -> CompiledFilter {
match self {
Filter::Bool(v) => CompiledFilter::Bool(*v),
Filter::And(filters) => CompiledFilter::And(
filters
.iter()
.map(|filter| filter.compile(first_line_regexes, header_line_regexes))
.collect(),
),
Filter::Or(filters) => CompiledFilter::Or(
filters
.iter()
.map(|filter| filter.compile(first_line_regexes, header_line_regexes))
.collect(),
),
Filter::Not(filter) => CompiledFilter::Not(Box::new(
filter.compile(first_line_regexes, header_line_regexes),
)),
Filter::FirstLineMatch(regex) => {
let filter = CompiledFilter::FirstLineMatch(first_line_regexes.len());
first_line_regexes.push(regex);
filter
}
Filter::HeaderLineMatch(regex) => {
let filter = CompiledFilter::HeaderLineMatch(header_line_regexes.len());
header_line_regexes.push(regex);
filter
}
}
}
}
#[derive(Clone, Debug)]
pub struct Policy {
pub name: String,
pub filter: Filter,
pub action: Action,
pub priority: i32,
}
pub enum CompiledFilter {
Bool(bool),
FirstLineMatch(usize),
HeaderLineMatch(usize),
And(Vec<CompiledFilter>),
Or(Vec<CompiledFilter>),
Not(Box<CompiledFilter>),
}
/*impl CompiledFilter {
fn evaluate(&self, matches: &SetMatches) -> bool {
match self {
Self::And(filters) => filters.iter().all(Self::evaluate),
Self::Or(filters) => filters.iter().any(Self::evaluate),
Self::Bool(b) => *b,
Self::Not(filter) => !filter.evaluate(matches),
Self::FirstLineMatch(regex_id) => matches.matched(regex_id),
Self::HeaderLineMatch(regex_id) => matches.matched(regex_id),
}
}
}*/
pub struct CompiledPolicy {
pub name: String,
pub filter: CompiledFilter,
pub priority: i32,
pub action: Action,
}
pub struct CompiledPolicies {
pub first_line_regex_set: Option<RegexSet>,
pub header_line_regex_set: Option<RegexSet>,
pub policies: Vec<CompiledPolicy>,
}
pub enum RegexOrRegexSet {
Many(RegexSet),
One(Regex),
None,
}
impl TryInto<RegexOrRegexSet> for Vec<&str> {
type Error = regex::Error;
fn try_into(self) -> Result<RegexOrRegexSet, regex::Error> {
Ok(match self.len() {
0 => RegexOrRegexSet::None,
1 => RegexOrRegexSet::One(Regex::new(self[0])?),
_ => RegexOrRegexSet::Many(RegexSet::new(self)?)
})
}
}
impl CompiledPolicies {
pub fn new<'a>(policies: impl IntoIterator<Item = &'a Policy>) -> Self {
let mut first_line_regexes = Vec::new();
let mut header_line_regexes = Vec::new();
let mut compiled_policies = Vec::new();
for policy in policies {
let compiled_policy = CompiledPolicy {
name: policy.name.clone(),
filter: policy
.filter
.compile(&mut first_line_regexes, &mut header_line_regexes),
priority: policy.priority,
action: policy.action.clone(),
};
compiled_policies.push(compiled_policy);
}
CompiledPolicies {
first_line_regex_set: if first_line_regexes.is_empty() {
None
} else {
Some(RegexSet::new(&first_line_regexes).unwrap())
},
header_line_regex_set: if header_line_regexes.is_empty() {
None
} else {
Some(RegexSet::new(&header_line_regexes).unwrap())
},
policies: compiled_policies,
}
}
/*pub fn evaluate<'a>(
&self,
mut header_lines: impl Iterator<Item = &'a [u8]>,
) -> Result<Option<&CompiledPolicy>, PolicyEvaluationError> {
let mut best_policy: Option<&CompiledPolicy> = None;
let first_line = header_lines
.next()
.ok_or(PolicyEvaluationError::NoFirstLine)?;
if let Some(first_line_regex_set) = &self.first_line_regex_set {
let matches = first_line_regex_set.matches(first_line);
for policy in self.policies.iter() {
}
let policy = &self.policies[matched];
if let Some(best_policy) = &mut best_policy {
if policy.priority < best_policy.priority {
*best_policy = policy;
}
} else {
best_policy = Some(policy);
}
}
if let Some(header_line_regex_set) = &self.header_line_regex_set {
for header_line in header_lines {
for matched in header_line_regex_set.matches(header_line) {
let policy = &self.policies[matched];
if let Some(best_policy) = &mut best_policy {
if policy.priority < best_policy.priority {
*best_policy = policy;
}
} else {
best_policy = Some(policy);
}
}
}
}
Ok(best_policy)
}*/
}
#[derive(Debug)]
pub enum PolicyEvaluationError {
/// First HTTP line is too long or absent
NoFirstLine,
}

View file

@ -21,6 +21,8 @@ impl std::str::FromStr for Action {
#[derive(Clone, Debug)]
pub struct Policy {
// Will be used when we add log
#[allow(unused)]
pub name: String,
pub first_line_regex: String,
pub action: Action,