wip: refactoring the cache logic
This commit is contained in:
parent
6030bebac5
commit
f5197d0869
6 changed files with 261 additions and 22 deletions
|
|
@ -24,7 +24,7 @@ http3-s2n = [
|
||||||
sticky-cookie = ["base64", "sha2", "chrono"]
|
sticky-cookie = ["base64", "sha2", "chrono"]
|
||||||
native-tls-backend = ["hyper-tls"]
|
native-tls-backend = ["hyper-tls"]
|
||||||
rustls-backend = []
|
rustls-backend = []
|
||||||
cache = [] #"http-cache-semantics", "lru"]
|
cache = ["http-cache-semantics", "lru"]
|
||||||
native-roots = [] #"hyper-rustls/native-tokio"]
|
native-roots = [] #"hyper-rustls/native-tokio"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
|
@ -66,7 +66,7 @@ hyper-tls = { version = "0.6.0", features = ["alpn"], optional = true }
|
||||||
|
|
||||||
# tls and cert management for server
|
# tls and cert management for server
|
||||||
hot_reload = "0.1.4"
|
hot_reload = "0.1.4"
|
||||||
rustls = { version = "0.21.9", default-features = false }
|
rustls = { version = "0.21.10", default-features = false }
|
||||||
tokio-rustls = { version = "0.24.1", features = ["early-data"] }
|
tokio-rustls = { version = "0.24.1", features = ["early-data"] }
|
||||||
webpki = "0.22.4"
|
webpki = "0.22.4"
|
||||||
x509-parser = "0.15.1"
|
x509-parser = "0.15.1"
|
||||||
|
|
@ -88,8 +88,8 @@ s2n-quic-rustls = { version = "0.32.0", optional = true }
|
||||||
socket2 = { version = "0.5.5", features = ["all"], optional = true }
|
socket2 = { version = "0.5.5", features = ["all"], optional = true }
|
||||||
|
|
||||||
# # cache
|
# # cache
|
||||||
# http-cache-semantics = { path = "../submodules/rusty-http-cache-semantics/", optional = true }
|
http-cache-semantics = { path = "../submodules/rusty-http-cache-semantics/", optional = true }
|
||||||
# lru = { version = "0.12.1", optional = true }
|
lru = { version = "0.12.1", optional = true }
|
||||||
|
|
||||||
# cookie handling for sticky cookie
|
# cookie handling for sticky cookie
|
||||||
chrono = { version = "0.4.31", default-features = false, features = [
|
chrono = { version = "0.4.31", default-features = false, features = [
|
||||||
|
|
|
||||||
|
|
@ -84,6 +84,15 @@ pub enum RpxyError {
|
||||||
#[error("Failed to fetch from upstream: {0}")]
|
#[error("Failed to fetch from upstream: {0}")]
|
||||||
FailedToFetchFromUpstream(String),
|
FailedToFetchFromUpstream(String),
|
||||||
|
|
||||||
|
// Cache errors,
|
||||||
|
#[cfg(feature = "cache")]
|
||||||
|
#[error("Invalid null request and/or response")]
|
||||||
|
NullRequestOrResponse,
|
||||||
|
|
||||||
|
#[cfg(feature = "cache")]
|
||||||
|
#[error("Failed to write byte buffer")]
|
||||||
|
FailedToWriteByteBufferForCache,
|
||||||
|
|
||||||
// Upstream connection setting errors
|
// Upstream connection setting errors
|
||||||
#[error("Unsupported upstream option")]
|
#[error("Unsupported upstream option")]
|
||||||
UnsupportedUpstreamOption,
|
UnsupportedUpstreamOption,
|
||||||
|
|
|
||||||
161
rpxy-lib/src/forwarder/cache.rs
Normal file
161
rpxy-lib/src/forwarder/cache.rs
Normal file
|
|
@ -0,0 +1,161 @@
|
||||||
|
use crate::{error::*, globals::Globals, log::*};
|
||||||
|
use http::{Request, Response};
|
||||||
|
use http_cache_semantics::CachePolicy;
|
||||||
|
use lru::LruCache;
|
||||||
|
use std::{
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
sync::{atomic::AtomicUsize, Arc, Mutex},
|
||||||
|
};
|
||||||
|
use tokio::{fs, sync::RwLock};
|
||||||
|
|
||||||
|
/* ---------------------------------------------- */
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct RpxyCache {
|
||||||
|
/// Lru cache storing http message caching policy
|
||||||
|
inner: LruCacheManager,
|
||||||
|
/// Managing cache file objects through RwLock's lock mechanism for file lock
|
||||||
|
file_store: FileStore,
|
||||||
|
/// Async runtime
|
||||||
|
runtime_handle: tokio::runtime::Handle,
|
||||||
|
/// Maximum size of each cache file object
|
||||||
|
max_each_size: usize,
|
||||||
|
/// Maximum size of cache object on memory
|
||||||
|
max_each_size_on_memory: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RpxyCache {
|
||||||
|
/// Generate cache storage
|
||||||
|
pub async fn new(globals: &Globals) -> Option<Self> {
|
||||||
|
if !globals.proxy_config.cache_enabled {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let path = globals.proxy_config.cache_dir.as_ref().unwrap();
|
||||||
|
let file_store = FileStore::new(path, &globals.runtime_handle).await;
|
||||||
|
let inner = LruCacheManager::new(globals.proxy_config.cache_max_entry);
|
||||||
|
|
||||||
|
let max_each_size = globals.proxy_config.cache_max_each_size;
|
||||||
|
let mut max_each_size_on_memory = globals.proxy_config.cache_max_each_size_on_memory;
|
||||||
|
if max_each_size < max_each_size_on_memory {
|
||||||
|
warn!(
|
||||||
|
"Maximum size of on memory cache per entry must be smaller than or equal to the maximum of each file cache"
|
||||||
|
);
|
||||||
|
max_each_size_on_memory = max_each_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Self {
|
||||||
|
file_store,
|
||||||
|
inner,
|
||||||
|
runtime_handle: globals.runtime_handle.clone(),
|
||||||
|
max_each_size,
|
||||||
|
max_each_size_on_memory,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------- */
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
/// Cache file manager outer that is responsible to handle `RwLock`
|
||||||
|
struct FileStore {
|
||||||
|
inner: Arc<RwLock<FileStoreInner>>,
|
||||||
|
}
|
||||||
|
impl FileStore {
|
||||||
|
/// Build manager
|
||||||
|
async fn new(path: impl AsRef<Path>, runtime_handle: &tokio::runtime::Handle) -> Self {
|
||||||
|
Self {
|
||||||
|
inner: Arc::new(RwLock::new(FileStoreInner::new(path, runtime_handle).await)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
/// Manager inner for cache on file system
|
||||||
|
struct FileStoreInner {
|
||||||
|
/// Directory of temporary files
|
||||||
|
cache_dir: PathBuf,
|
||||||
|
/// Counter of current cached files
|
||||||
|
cnt: usize,
|
||||||
|
/// Async runtime
|
||||||
|
runtime_handle: tokio::runtime::Handle,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileStoreInner {
|
||||||
|
/// Build new cache file manager.
|
||||||
|
/// This first creates cache file dir if not exists, and cleans up the file inside the directory.
|
||||||
|
/// TODO: Persistent cache is really difficult. `sqlite` or something like that is needed.
|
||||||
|
async fn new(path: impl AsRef<Path>, runtime_handle: &tokio::runtime::Handle) -> Self {
|
||||||
|
let path_buf = path.as_ref().to_path_buf();
|
||||||
|
if let Err(e) = fs::remove_dir_all(path).await {
|
||||||
|
warn!("Failed to clean up the cache dir: {e}");
|
||||||
|
};
|
||||||
|
fs::create_dir_all(&path_buf).await.unwrap();
|
||||||
|
Self {
|
||||||
|
cache_dir: path_buf.clone(),
|
||||||
|
cnt: 0,
|
||||||
|
runtime_handle: runtime_handle.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------- */
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
/// Cache target in hybrid manner of on-memory and file system
|
||||||
|
pub enum CacheFileOrOnMemory {
|
||||||
|
/// Pointer to the temporary cache file
|
||||||
|
File(PathBuf),
|
||||||
|
/// Cached body itself
|
||||||
|
OnMemory(Vec<u8>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
/// Cache object definition
|
||||||
|
struct CacheObject {
|
||||||
|
/// Cache policy to determine if the stored cache can be used as a response to a new incoming request
|
||||||
|
pub policy: CachePolicy,
|
||||||
|
/// Cache target: on-memory object or temporary file
|
||||||
|
pub target: CacheFileOrOnMemory,
|
||||||
|
/// SHA256 hash of target to strongly bind the cache metadata (this object) and file target
|
||||||
|
pub hash: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------- */
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
/// Lru cache manager that is responsible to handle `Mutex` as an outer of `LruCache`
|
||||||
|
struct LruCacheManager {
|
||||||
|
inner: Arc<Mutex<LruCache<String, CacheObject>>>, // TODO: keyはstring urlでいいのか疑問。全requestに対してcheckすることになりそう
|
||||||
|
cnt: Arc<AtomicUsize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl LruCacheManager {
|
||||||
|
/// Build LruCache
|
||||||
|
fn new(cache_max_entry: usize) -> Self {
|
||||||
|
Self {
|
||||||
|
inner: Arc::new(Mutex::new(LruCache::new(
|
||||||
|
std::num::NonZeroUsize::new(cache_max_entry).unwrap(),
|
||||||
|
))),
|
||||||
|
cnt: Arc::new(AtomicUsize::default()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------- */
|
||||||
|
pub fn get_policy_if_cacheable<B1, B2>(
|
||||||
|
req: Option<&Request<B1>>,
|
||||||
|
res: Option<&Response<B2>>,
|
||||||
|
) -> RpxyResult<Option<CachePolicy>>
|
||||||
|
// where
|
||||||
|
// B1: core::fmt::Debug,
|
||||||
|
{
|
||||||
|
// deduce cache policy from req and res
|
||||||
|
let (Some(req), Some(res)) = (req, res) else {
|
||||||
|
return Err(RpxyError::NullRequestOrResponse);
|
||||||
|
};
|
||||||
|
|
||||||
|
let new_policy = CachePolicy::new(req, res);
|
||||||
|
if new_policy.is_storable() {
|
||||||
|
// debug!("Response is cacheable: {:?}\n{:?}", req, res.headers());
|
||||||
|
Ok(Some(new_policy))
|
||||||
|
} else {
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -9,13 +9,20 @@ use crate::{
|
||||||
};
|
};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use http::{Request, Response, Version};
|
use http::{Request, Response, Version};
|
||||||
use hyper::body::Body;
|
use hyper::body::{Body, Incoming};
|
||||||
use hyper_util::client::legacy::{
|
use hyper_util::client::legacy::{
|
||||||
connect::{Connect, HttpConnector},
|
connect::{Connect, HttpConnector},
|
||||||
Client,
|
Client,
|
||||||
};
|
};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
#[cfg(feature = "cache")]
|
||||||
|
use super::cache::{get_policy_if_cacheable, RpxyCache};
|
||||||
|
#[cfg(feature = "cache")]
|
||||||
|
use crate::hyper_ext::body::{full, BoxBody};
|
||||||
|
#[cfg(feature = "cache")]
|
||||||
|
use http_body_util::BodyExt;
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
/// Definition of the forwarder that simply forward requests from downstream client to upstream app servers.
|
/// Definition of the forwarder that simply forward requests from downstream client to upstream app servers.
|
||||||
pub trait ForwardRequest<B1, B2> {
|
pub trait ForwardRequest<B1, B2> {
|
||||||
|
|
@ -25,27 +32,71 @@ pub trait ForwardRequest<B1, B2> {
|
||||||
|
|
||||||
/// Forwarder http client struct responsible to cache handling
|
/// Forwarder http client struct responsible to cache handling
|
||||||
pub struct Forwarder<C, B> {
|
pub struct Forwarder<C, B> {
|
||||||
// #[cfg(feature = "cache")]
|
#[cfg(feature = "cache")]
|
||||||
// cache: Option<RpxyCache>,
|
cache: Option<RpxyCache>,
|
||||||
inner: Client<C, B>,
|
inner: Client<C, B>,
|
||||||
inner_h2: Client<C, B>, // `h2c` or http/2-only client is defined separately
|
inner_h2: Client<C, B>, // `h2c` or http/2-only client is defined separately
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl<C, B1, B2> ForwardRequest<B1, IncomingOr<B2>> for Forwarder<C, B1>
|
impl<C, B1> ForwardRequest<B1, IncomingOr<BoxBody>> for Forwarder<C, B1>
|
||||||
where
|
where
|
||||||
C: Send + Sync + Connect + Clone + 'static,
|
C: Send + Sync + Connect + Clone + 'static,
|
||||||
B1: Body + Send + Sync + Unpin + 'static,
|
B1: Body + Send + Sync + Unpin + 'static,
|
||||||
<B1 as Body>::Data: Send,
|
<B1 as Body>::Data: Send,
|
||||||
<B1 as Body>::Error: Into<Box<(dyn std::error::Error + Send + Sync + 'static)>>,
|
<B1 as Body>::Error: Into<Box<(dyn std::error::Error + Send + Sync + 'static)>>,
|
||||||
B2: Body,
|
|
||||||
{
|
{
|
||||||
type Error = RpxyError;
|
type Error = RpxyError;
|
||||||
|
|
||||||
async fn request(&self, req: Request<B1>) -> Result<Response<IncomingOr<B2>>, Self::Error> {
|
async fn request(&self, req: Request<B1>) -> Result<Response<IncomingOr<BoxBody>>, Self::Error> {
|
||||||
// TODO: cache handling
|
// TODO: cache handling
|
||||||
|
#[cfg(feature = "cache")]
|
||||||
|
{
|
||||||
|
let mut synth_req = None;
|
||||||
|
if self.cache.is_some() {
|
||||||
|
// if let Some(cached_response) = self.cache.as_ref().unwrap().get(&req).await {
|
||||||
|
// // if found, return it as response.
|
||||||
|
// info!("Cache hit - Return from cache");
|
||||||
|
// return Ok(cached_response);
|
||||||
|
// };
|
||||||
|
|
||||||
self.request_directly(req).await
|
// Synthetic request copy used just for caching (cannot clone request object...)
|
||||||
|
synth_req = Some(build_synth_req_for_cache(&req));
|
||||||
|
}
|
||||||
|
let res = self.request_directly(req).await;
|
||||||
|
|
||||||
|
if self.cache.is_none() {
|
||||||
|
return res.map(wrap_incoming_body_response::<BoxBody>);
|
||||||
|
}
|
||||||
|
|
||||||
|
// check cacheability and store it if cacheable
|
||||||
|
let Ok(Some(cache_policy)) = get_policy_if_cacheable(synth_req.as_ref(), res.as_ref().ok()) else {
|
||||||
|
return res.map(wrap_incoming_body_response::<BoxBody>);
|
||||||
|
};
|
||||||
|
let (parts, body) = res.unwrap().into_parts();
|
||||||
|
let Ok(bytes) = body.collect().await.map(|v| v.to_bytes()) else {
|
||||||
|
return Err(RpxyError::FailedToWriteByteBufferForCache);
|
||||||
|
};
|
||||||
|
|
||||||
|
// if let Err(cache_err) = self
|
||||||
|
// .cache
|
||||||
|
// .as_ref()
|
||||||
|
// .unwrap()
|
||||||
|
// .put(synth_req.unwrap().uri(), &bytes, &cache_policy)
|
||||||
|
// .await
|
||||||
|
// {
|
||||||
|
// error!("{:?}", cache_err);
|
||||||
|
// };
|
||||||
|
|
||||||
|
// response with cached body
|
||||||
|
Ok(Response::from_parts(parts, IncomingOr::Right(full(bytes))))
|
||||||
|
}
|
||||||
|
|
||||||
|
// No cache handling
|
||||||
|
#[cfg(not(feature = "cache"))]
|
||||||
|
{
|
||||||
|
self.request_directly(req).await.map(wrap_incoming_body_response::<B2>)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -56,13 +107,15 @@ where
|
||||||
<B1 as Body>::Data: Send,
|
<B1 as Body>::Data: Send,
|
||||||
<B1 as Body>::Error: Into<Box<(dyn std::error::Error + Send + Sync + 'static)>>,
|
<B1 as Body>::Error: Into<Box<(dyn std::error::Error + Send + Sync + 'static)>>,
|
||||||
{
|
{
|
||||||
async fn request_directly<B2: Body>(&self, req: Request<B1>) -> RpxyResult<Response<IncomingOr<B2>>> {
|
async fn request_directly(&self, req: Request<B1>) -> RpxyResult<Response<Incoming>> {
|
||||||
|
// TODO: This 'match' condition is always evaluated at every 'request' invocation. So, it is inefficient.
|
||||||
|
// Needs to be reconsidered. Currently, this is a kind of work around.
|
||||||
|
// This possibly relates to https://github.com/hyperium/hyper/issues/2417.
|
||||||
match req.version() {
|
match req.version() {
|
||||||
Version::HTTP_2 => self.inner_h2.request(req).await, // handles `h2c` requests
|
Version::HTTP_2 => self.inner_h2.request(req).await, // handles `h2c` requests
|
||||||
_ => self.inner.request(req).await,
|
_ => self.inner.request(req).await,
|
||||||
}
|
}
|
||||||
.map_err(|e| RpxyError::FailedToFetchFromUpstream(e.to_string()))
|
.map_err(|e| RpxyError::FailedToFetchFromUpstream(e.to_string()))
|
||||||
.map(wrap_incoming_body_response::<B2>)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -90,7 +143,9 @@ Please enable native-tls-backend or rustls-backend feature to enable TLS support
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
inner,
|
inner,
|
||||||
inner_h2: inner.clone(),
|
inner_h2,
|
||||||
|
#[cfg(feature = "cache")]
|
||||||
|
cache: RpxyCache::new(_globals).await,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -130,13 +185,12 @@ where
|
||||||
.http2_only(true)
|
.http2_only(true)
|
||||||
.build::<_, B1>(connector_h2);
|
.build::<_, B1>(connector_h2);
|
||||||
|
|
||||||
// #[cfg(feature = "cache")]
|
Ok(Self {
|
||||||
// {
|
inner,
|
||||||
// let cache = RpxyCache::new(_globals).await;
|
inner_h2,
|
||||||
// Self { inner, inner_h2, cache }
|
#[cfg(feature = "cache")]
|
||||||
// }
|
cache: RpxyCache::new(_globals).await,
|
||||||
// #[cfg(not(feature = "cache"))]
|
})
|
||||||
Ok(Self { inner, inner_h2 })
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -172,3 +226,17 @@ where
|
||||||
// let inner_h2 = Client::builder().http2_only(true).build::<_, Body>(connector_h2);
|
// let inner_h2 = Client::builder().http2_only(true).build::<_, Body>(connector_h2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "cache")]
|
||||||
|
/// Build synthetic request to cache
|
||||||
|
fn build_synth_req_for_cache<T>(req: &Request<T>) -> Request<()> {
|
||||||
|
let mut builder = Request::builder()
|
||||||
|
.method(req.method())
|
||||||
|
.uri(req.uri())
|
||||||
|
.version(req.version());
|
||||||
|
// TODO: omit extensions. is this approach correct?
|
||||||
|
for (header_key, header_value) in req.headers() {
|
||||||
|
builder = builder.header(header_key, header_value);
|
||||||
|
}
|
||||||
|
builder.body(()).unwrap()
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
mod cache;
|
||||||
mod client;
|
mod client;
|
||||||
|
|
||||||
use crate::hyper_ext::body::{IncomingLike, IncomingOr};
|
use crate::hyper_ext::body::{IncomingLike, IncomingOr};
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
Subproject commit 3cd09170305753309d86e88b9427827cca0de0dd
|
Subproject commit 88d23c2f5a3ac36295dff4a804968c43932ba46b
|
||||||
Loading…
Add table
Add a link
Reference in a new issue