Compare commits

..

No commits in common. "main" and "v0.1.0" have entirely different histories.

5 changed files with 56 additions and 151 deletions

View file

@ -1,34 +1,21 @@
[package]
name = "median-accumulator"
version = "0.4.0"
version = "0.1.0"
edition = "2021"
authors = ["tuxmain <tuxmain@zettascript.org>"]
license = "AGPL-3.0-only"
repository = "https://git.txmn.tk/tuxmain/median-accumulator"
documentation = "https://docs.rs/median-accumulator/"
description = "Simple, fast, space-efficient, generic accumulator for computing median"
categories = ["algorithms", "data-structures", "no-std"]
keywords = ["median"]
description = "Simple, fast, space-efficient accumulator for computing median"
categories = ["algorithms"]
[dependencies]
cc-traits = { version = "2.0.0", default_features = false }
smallvec = { version = "^1.6", optional = true }
[features]
std = ["cc-traits/alloc", "cc-traits/std"]
smallvec = ["dep:smallvec", "cc-traits/smallvec"]
default = ["std"]
[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
medianheap = "0.4.1"
criterion = { version = "0.4.0", features = ["html_reports"] }
medianheap = "0.3.0"
rand = "0.8.5"
smallvec = "^1.6"
[[bench]]
name = "comparison"
harness = false
[package.metadata.docs.rs]
features = ["std"]

View file

@ -7,17 +7,17 @@ Simple, space-efficient algorithm to compute the median of an accumulation of el
* **Space-efficient**: `O(D)` space, D being the number of _different_ samples, not the _total_ number of samples
* **Time-efficient**: push is `O(log(N))`
* **Generic**: `T: Clone + Ord`
* **No unsafe**
* **no_std** (optional): supports generic collections
* **Tested**
* **No unsafe**, no deps except `std`
Faster than other implementations if lots of samples have the same value. If this is not your case, you should use another implementation.
Faster than other implementations if there are samples having the same value. If this is not your case, you should use another implementation.
## Use
```rust
use median_accumulator::*;
let mut acc = vec::MedianAcc::new();
let mut acc = MedianAcc::new();
assert_eq!(acc.get_median(), None);
acc.push(7);
@ -30,21 +30,9 @@ assert_eq!(acc.get_median(), Some(MedianResult::One(7)));
If you ever encounter an `unreachable` panic, please file an issue or send me an e-mail.
## no_std
Example with [smallvec](https://crates.io/crates/smallvec): (`smallvec` feature required)
```rust
use median_accumulator::*;
let mut acc = MedianAcc::<i32, smallvec::SmallVec<[(i32, u32); 64]>>::new();
```
For other collections than `Vec` or `SmallVec`, you must implement [cc-traits](https://crates.io/crates/cc-traits) and `InsertIndex`.
## License
CopyLeft 2022-2024 Pascal Engélibert [(why copyleft?)](https://txmn.tk/blog/why-copyleft/)
CopyLeft 2022 Pascal Engélibert
This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, version 3 of the License.

View file

@ -1,34 +1,54 @@
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use rand::Rng;
static ITERS: u32 = 10_000;
fn compare_crates(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let mut group = c.benchmark_group("Comparison");
for redundancy in [1, 5, 10, 20, 40] {
let samples: Vec<u32> = (0..ITERS)
.map(|_| rng.gen_range(0..ITERS / redundancy))
.collect();
for len in [10, 50, 100, 500, 1000] {
let samples: Vec<u32> = (0..len).map(|_| rng.gen_range(0..len / 5)).collect();
group.bench_with_input(
BenchmarkId::new("median_accumulator", redundancy),
BenchmarkId::new("median_accumulator 1:5", len),
&samples,
|b, _i| {
b.iter(|| {
let mut median = median_accumulator::vec::MedianAcc::new();
let mut median = median_accumulator::MedianAcc::new();
samples.iter().for_each(|s| median.push(*s));
black_box(median.get_median());
median.get_median()
})
},
);
group.bench_with_input(
BenchmarkId::new("medianheap", redundancy),
BenchmarkId::new("medianheap 1:5", len),
&samples,
|b, _i| {
b.iter(|| {
let mut median = medianheap::MedianHeap::new();
samples.iter().for_each(|s| median.push(*s));
black_box(median.median());
median.median()
})
},
);
let samples: Vec<u32> = (0..len).map(|_| rng.gen_range(0..len)).collect();
group.bench_with_input(
BenchmarkId::new("median_accumulator 1:1", len),
&samples,
|b, _i| {
b.iter(|| {
let mut median = median_accumulator::MedianAcc::new();
samples.iter().for_each(|s| median.push(*s));
median.get_median()
})
},
);
group.bench_with_input(
BenchmarkId::new("medianheap 1:1", len),
&samples,
|b, _i| {
b.iter(|| {
let mut median = medianheap::MedianHeap::new();
samples.iter().for_each(|s| median.push(*s));
median.median()
})
},
);

View file

@ -1,7 +1,7 @@
//! ```rust
//! use median_accumulator::*;
//!
//! let mut acc = vec::MedianAcc::new();
//! let mut acc = MedianAcc::new();
//!
//! assert_eq!(acc.get_median(), None);
//! acc.push(7);
@ -14,29 +14,14 @@
//!
//! In doc comments, _N_ represents the number of samples, _D_ represents the number of different values taken by the samples.
#![cfg_attr(not(feature = "std"), no_std)]
mod traits;
pub use traits::*;
use core::{cmp::Ordering, ops::DerefMut};
use std::cmp::Ordering;
/// Accumulator for computing median
#[derive(Clone, Debug, Default)]
pub struct MedianAcc<
T: Clone + Ord,
V: DerefMut<Target = [(T, u32)]> + cc_traits::VecMut<(T, u32)> + InsertIndex,
> {
samples: V,
pub struct MedianAcc<T: Clone + Ord> {
samples: Vec<(T, u32)>,
median_index: Option<usize>,
median_subindex: u32,
_t: core::marker::PhantomData<T>,
}
#[cfg(feature = "std")]
pub mod vec {
pub type MedianAcc<T> = crate::MedianAcc<T, Vec<(T, u32)>>;
}
/// Computed median
@ -49,41 +34,17 @@ pub enum MedianResult<T: Clone + Ord> {
Two(T, T),
}
impl<
T: Clone + Ord,
V: DerefMut<Target = [(T, u32)]> + cc_traits::VecMut<(T, u32)> + InsertIndex,
> MedianAcc<T, V>
{
impl<T: Clone + Ord> MedianAcc<T> {
/// Create an empty accumulator
///
/// _O(1)_
///
/// If using `std::vec::Vec`, does not allocate until the first push.
pub fn new() -> Self
where
V: Default,
{
/// Does not allocate until the first push.
pub fn new() -> Self {
Self {
samples: Default::default(),
samples: Vec::new(),
median_index: None,
median_subindex: 0,
_t: Default::default(),
}
}
/// Create an empty accumulator from an existing (empty) collection
///
/// _O(1)_
///
/// Useful when using fixed-length collections or to avoid allocations.
pub fn new_from(collection: V) -> Self {
assert!(collection.is_empty(), "the collection must be empty");
Self {
samples: collection,
median_index: None,
median_subindex: 0,
_t: Default::default(),
}
}
@ -124,7 +85,7 @@ impl<
}
}
Err(sample_index) => {
self.samples.insert_index(sample_index, (sample, 1));
self.samples.insert(sample_index, (sample, 1));
if *median_index >= sample_index {
if self.median_subindex == 0 {
self.median_subindex =
@ -144,7 +105,7 @@ impl<
}
}
} else {
self.samples.push_back((sample, 1));
self.samples.push((sample, 1));
self.median_index = Some(0);
}
}
@ -187,22 +148,19 @@ impl<
}
/// Clear the data
pub fn clear(&mut self)
where
V: cc_traits::Clear,
{
pub fn clear(&mut self) {
self.samples.clear();
self.median_index = None;
self.median_subindex = 0;
}
/// Access the underlying collection
/// Access the underlying vec
///
/// Just in case you need finer allocation management.
///
/// # Safety
/// Leaving the vector in an invalid state may cause invalid result or panic (but no UB).
pub unsafe fn get_samples_mut(&mut self) -> &mut V {
pub unsafe fn get_samples_mut(&mut self) -> &mut Vec<(T, u32)> {
&mut self.samples
}
}
@ -213,7 +171,6 @@ mod tests {
use rand::Rng;
#[cfg(feature = "std")]
fn naive_median<T: Clone + Ord>(samples: &mut [T]) -> Option<MedianResult<T>> {
if samples.is_empty() {
None
@ -233,7 +190,6 @@ mod tests {
}
}
#[cfg(feature = "std")]
#[test]
fn correctness() {
let mut rng = rand::thread_rng();
@ -242,25 +198,7 @@ mod tests {
let len: usize = rng.gen_range(0..100);
let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();
let mut median = vec::MedianAcc::new();
for sample in samples.iter() {
median.push(*sample);
}
assert_eq!(median.get_median(), naive_median(&mut samples));
}
}
#[cfg(feature = "smallvec")]
#[test]
fn correctness_smallvec() {
let mut rng = rand::thread_rng();
for _ in 0..100_000 {
let len: usize = rng.gen_range(0..64);
let mut samples: Vec<i32> = (0..len).map(|_| rng.gen_range(-100..100)).collect();
let mut median = MedianAcc::<i32, smallvec::SmallVec<[(i32, u32); 64]>>::new();
let mut median = MedianAcc::new();
for sample in samples.iter() {
median.push(*sample);
}

View file

@ -1,28 +0,0 @@
/// Collection where an item can be inserted at a given index.
pub trait InsertIndex: cc_traits::Collection {
type Output;
fn insert_index(
&mut self,
index: usize,
element: <Self as cc_traits::Collection>::Item,
) -> Self::Output;
}
#[cfg(feature = "std")]
impl<T> InsertIndex for Vec<T> {
type Output = ();
fn insert_index(&mut self, index: usize, element: T) {
self.insert(index, element)
}
}
#[cfg(feature = "smallvec")]
impl<T, A: smallvec::Array<Item = T>> InsertIndex for smallvec::SmallVec<A> {
type Output = ();
fn insert_index(&mut self, index: usize, element: T) {
self.insert(index, element)
}
}