From 44c310ac11ef2a5c96d06c4d73e8ae28e0a74ed6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pascal=20Eng=C3=A9libert?= Date: Sun, 15 Mar 2026 20:49:27 +0100 Subject: [PATCH] Readme, Box leak, async mutex, repo URL parsing --- Cargo.lock | 2 ++ Cargo.toml | 2 ++ README.md | 37 +++++++++++++++++++++++++---- src/api_client.rs | 23 ++++++++++++++---- src/main.rs | 4 ++-- src/queue.rs | 2 +- src/server.rs | 57 +++++++++++++++++++++++++++++---------------- templates/home.html | 8 ++++++- 8 files changed, 103 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 11d0aea..9cca425 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -282,6 +282,7 @@ version = "0.1.0" dependencies = [ "argp", "askama", + "async-lock", "base64-turbo", "boml", "form_urlencoded", @@ -298,6 +299,7 @@ dependencies = [ "trillium-native-tls", "trillium-router", "trillium-smol", + "url", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 1f41a67..fa7e214 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ edition = "2024" argp = "0.4.0" # Templates askama = "0.12.1" +async-lock = "3.4.2" base64-turbo = "0.1.3" # TOML parser boml = "1.0.2" @@ -31,3 +32,4 @@ trillium-native-tls = "0.4.0" trillium-router = "0.4.1" trillium-smol = "0.4.2" #trillium-static-compiled = "0.5.2" +url = "2.5.8" diff --git a/README.md b/README.md index b108ed9..96fd675 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,17 @@ +# Blindforge + +(The name may not be definitive.) + +Git repository anonymizer for double-blind scientific review. Inspired by [Anonymous GitHub](https://anonymous.4open.science/) but not tied to a centralized platform. It will allow to mirror public or private Git repositories from diverse platforms (Forgejo, and maybe Gitea, GitLab, GitHub...), removing any information leaking the author's identities or affiliations. + +Early development: not usable yet. + ## TODO -* [ ] Fetch repo from API -* [ ] Repo caching -* [ ] Submission form +* [x] Fetch repo from API +* [x] Repo caching +* [x] Submission form +* [x] Config * [ ] Repo serving * [ ] Replace words * [ ] Abuse report @@ -13,4 +22,24 @@ * [ ] Manual removal * [ ] Security tests (zip bomb) * [ ] Allow download -* [ ] Config +* [ ] Markdown rendering + +## Design choices + +**Language**: It would have been simpler to use good old PHP and SQL. But I have more fun with Rust. + +**Database**: No database, because none needed. Persistent data are structured in directories and files. Metadata are written to files in a simple custom binary format. Advantages: simpler installation, easier portability, no dependency to a third-party nonstandard format (such as embedded databases other than SQLite). + +**Rust crates**: I did not select the most famous crates (e.g. clap, toml, tokio). They are great and quite feature-complete, but also very big and we don't use most of their features. I instead carefully selected smaller crates with fewer dependencies but still satisfying our needs and with good (or better) performance. + +**AI**: No generative AI was used. Any use of generative AI by contributors must be declared explicitly. AI-generated content that has not been reviewed carefully by the contributor will not be accepted. Contributing using AI with something the contributor does not have the skills to review correctly, is strongly discouraged. + +**TLS**: Cryptographic libraries such as Ring, AWS-LC or Graviola are often more performant than OpenSSL. However, they are less often available as a shared library in Linux distributions. Embedding them statically takes storage and memory (each program has its own copy of a different crypto library, and the OS cannot optimize), and makes the program's maintainer responsible for security updates of the embedded library. As this project is very small, it is better to use your distribution's cryptographic implementation. Note: only the API client uses TLS. The user is responsible for setting up an HTTPS reverse proxy of their choice. + +## License + +GNU AGPL v3, CopyLeft 2026 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/) + +This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License. +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. +You should have received a copy of the GNU Affero General Public License along with this program. If not, see https://www.gnu.org/licenses/. diff --git a/src/api_client.rs b/src/api_client.rs index 1d75928..6c98f70 100644 --- a/src/api_client.rs +++ b/src/api_client.rs @@ -107,6 +107,7 @@ pub enum FetchRepoError { CannotCreateDir(std::io::Error), TooManyEntries, Client(ClientError), + UrlParsing, } impl From for FetchRepoError { @@ -133,6 +134,12 @@ impl From for FetchRepoError { } } +impl From for FetchRepoError { + fn from(_value: url::ParseError) -> Self { + Self::UrlParsing + } +} + #[derive(Default)] pub struct RepoIndex { files: Vec, @@ -145,17 +152,25 @@ pub struct RepoIndexFile { pub async fn fetch_repo_tree_index_at_commit( client: &mut Client, - url: &str, - owner: &str, - repo: &str, + repo_url: &str, commit_hash: &str, token: Option<&str>, ) -> Result { + let parsed = url::Url::parse(repo_url)?; + let mut base = parsed.clone(); + base.set_fragment(None); + let base_url = base.as_str(); + // Is the URL always /owner/repo? + let mut segments = parsed.path_segments().ok_or(FetchRepoError::UrlParsing)?; + let owner = segments.next().ok_or(FetchRepoError::UrlParsing)?; + let repo = segments.next().ok_or(FetchRepoError::UrlParsing)?; + let repo = repo.strip_suffix(".git").unwrap_or(repo); + let mut repo_index = RepoIndex::default(); let mut count: u32 = 0; for page in 1..MAX_PAGE { let res = - fetch_repo_tree_at_commit_page(client, url, owner, repo, commit_hash, token, page) + fetch_repo_tree_at_commit_page(client, base_url, owner, repo, commit_hash, token, page) .await?; count = count.saturating_add(res.total_count); if count > MAX_ENTRIES || res.total_count > MAX_ENTRIES { diff --git a/src/main.rs b/src/main.rs index 4632ace..a0aa504 100644 --- a/src/main.rs +++ b/src/main.rs @@ -42,9 +42,9 @@ fn main() { simplelog::ColorChoice::Auto, ) .unwrap(); - let config = Arc::new(config::Config::from_toml( + let config: &'static _ = Box::leak(Box::new(config::Config::from_toml( &std::fs::read(cli.config).expect("Cannot read config file"), - )); + ))); std::fs::DirBuilder::new() .recursive(true) .create(PathBuf::from(&config.data_dir).join(SUBDIR_REPOS)) diff --git a/src/queue.rs b/src/queue.rs index 8b13789..83fb866 100644 --- a/src/queue.rs +++ b/src/queue.rs @@ -1 +1 @@ - +pub async fn queue_handler() {} diff --git a/src/server.rs b/src/server.rs index 23be7e3..a1cad7f 100644 --- a/src/server.rs +++ b/src/server.rs @@ -1,13 +1,9 @@ use crate::{cache, config::Config, repo::ReadRepoMetadataError}; use askama::Template; +use async_lock::Mutex; use log::error; -use std::{ - collections::HashMap, - io::ErrorKind, - path::PathBuf, - sync::{Arc, Mutex}, -}; +use std::{collections::HashMap, io::ErrorKind, path::PathBuf}; use trillium::{Conn, Handler}; use trillium_router::{Router, RouterConnExt}; @@ -15,14 +11,18 @@ pub async fn hello_world(conn: Conn) -> Conn { conn.ok("hello world!") } -pub fn make_router(config: Arc) -> impl Handler { +pub fn make_router(config: &'static Config) -> impl Handler { let mut hl_registry = giallo::Registry::builtin().unwrap(); hl_registry.link_grammars(); - let hl_registry = Arc::new(hl_registry); + let hl_registry: &'static _ = Box::leak(Box::new(hl_registry)); - let mut metadata_cache = Arc::new(Mutex::new( - cache::Cache::>::default(), - )); + let metadata_cache: &'static _ = Box::leak(Box::new(Mutex::new(cache::Cache::< + String, + HashMap, + >::default()))); + let client: &'static _ = Box::leak(Box::new(async_lock::Mutex::new( + crate::api_client::make_client(), + ))); ( trillium_caching_headers::CachingHeaders::new(), @@ -31,25 +31,42 @@ pub fn make_router(config: Arc) -> impl Handler { .get("/", |conn: Conn| async move { conn.ok(crate::templates::Home {}.render().unwrap()) }) - .post("/fetch", |mut conn: Conn| async move { + .post("/fetch", move |mut conn: Conn| async move { if let Ok(request_body) = conn.request_body().await.with_max_len(8192).await { let mut repo_url = None; + let mut commit_hash = None; for (key, val) in form_urlencoded::parse(request_body.as_bytes()) { - if key == "repo-url" { - repo_url = Some(val); + match key.as_ref() { + "repo-url" => { + repo_url = Some(val); + } + "commit" => { + commit_hash = Some(val); + } + _ => {} } } - let Some(repo_url) = repo_url else { + let (Some(repo_url), Some(commit_hash)) = (repo_url, commit_hash) else { return conn.ok("Missing arg"); }; + + let mut client = client.lock().await; + let repo_index = crate::api_client::fetch_repo_tree_index_at_commit( + &mut client, + &repo_url, + &commit_hash, + None, + ) + .await + .expect("todo handle error"); + crate::api_client::fetch_repo_files(config, &mut client, &repo_index, None) + .await + .expect("todo handle error"); } //let planet = conn.param("planet").unwrap(); conn.ok(crate::templates::Home {}.render().unwrap()) }) .get("/r/:hash/*", move |conn: Conn| { - let hl_registry = hl_registry.clone(); - let config = config.clone(); - let metadata_cache = metadata_cache.clone(); async move { let Some(repo_hash_str) = conn.param("hash") else { return conn.with_status(401); @@ -66,7 +83,7 @@ pub fn make_router(config: Arc) -> impl Handler { .join(crate::SUBDIR_REPOS) .join(repo_hash_str); let repo_metadata = - match crate::repo::RepoMetadata::read_from_file(&config, &repo_dir) { + match crate::repo::RepoMetadata::read_from_file(config, &repo_dir) { Ok(v) => v, Err(e) => { if let ReadRepoMetadataError::CannotOpenFile(e) = &e { @@ -94,7 +111,7 @@ pub fn make_router(config: Arc) -> impl Handler { // TODO replace mutex with better thing (less contention or async mutex) metadata_cache .lock() - .unwrap() + .await .fetch(repo_hash_str.to_string(), cache_fetch); let hl_options = giallo::HighlightOptions::new( diff --git a/templates/home.html b/templates/home.html index 1e64cab..4fbfa8d 100644 --- a/templates/home.html +++ b/templates/home.html @@ -5,9 +5,15 @@ Blindforge +

Import any repository from any Forgejo instance using its main URL. The full commit hash is also needed, so the right version is fetched.

+

Token is optional. It is needed if the repository is private. In that case, create an API token with read access to the repository. It is revocable at any moment and only grants the selected permissions.

-
+
+ +
+ +