Skip to content

Commit

Permalink
feat: initialize this project (#1)
Browse files Browse the repository at this point in the history
First PR to OSS cache
  • Loading branch information
eddyxu authored Dec 2, 2024
1 parent a1a4562 commit 37cecc1
Show file tree
Hide file tree
Showing 10 changed files with 712 additions and 3 deletions.
52 changes: 52 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Rust
on:
push:
branches:
- main
pull_request:

env:
# This env var is used by Swatinem/rust-cache@v2 for the cache
# key, so we set it to make sure it is always consistent.
CARGO_TERM_COLOR: always
# Disable full debug symbol generation to speed up CI build and keep memory down
# "1" means line tables only, which is useful for panic tracebacks.
RUSTFLAGS: "-C debuginfo=1"
RUST_BACKTRACE: "1"
# according to: https://matklad.github.io/2021/09/04/fast-rust-builds.html
# CI builds are faster with incremental disabled.
CARGO_INCREMENTAL: "0"
CARGO_BUILD_JOBS: "1"

jobs:
lint:
runs-on: ubuntu-24.04
timeout-minutes: 30
steps:
- uses: actions/checkout@v4
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
toolchain: 1.83
components: rustfmt, clippy
- uses: Swatinem/rust-cache@v2
- name: Check formatting
run: cargo fmt -- --check
- name: Check clippy
run: cargo clippy --tests --benches -- -D warnings
test:
strategy:
matrix:
machine:
- ubuntu-24.04
# - ubuntu-2404-4x-arm64
- macos-14
runs-on: ${{ matrix.machine }}
steps:
- uses: actions/checkout@v4
- uses: actions-rust-lang/setup-rust-toolchain@v1
with:
toolchain: 1.83
- uses: rui314/setup-mold@v1
- uses: Swatinem/rust-cache@v2
- name: Run tests
run: cargo test
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,8 @@ Cargo.lock
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
#.idea/

# Added by cargo

/target
30 changes: 30 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[package]
name = "ocra"
version = "0.1.0"
authors = ["[email protected]"]
description = "OCRA: A Rust implementation of Cache in arrow-rs' ObjectStore interface"
edition = "2021"
license-file = "LICENSE"
keywords = ["cache", "object-store", "arrow"]
categories = ["caching"]

[dependencies]
async-trait = "0.1"
bytes = "~1.9"
futures = "0.3"
log = "~0.4"
moka = { version = "~0.12", features = ["future"] }
num_cpus = "1.16"
object_store = "~0.11"
sysinfo = "~0.32"
tokio = { version = "1", features = ["sync"] }

[dev-dependencies]
criterion = { version = "~0.5", features = ["async_tokio"] }
tempfile = "~3.14"
tokio = { version = "1", features = ["full"] }
rand = "~0.8"

[[bench]]
name = "memory"
harness = false
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# ocra
OCRA: Object-store Cache in Rust for All
# OCRA

**OCRA**: (**A**) (**R**)ust (**C**)ache implementation using _arrow-rs_ [(**O**)bjectStore](https://docs.rs/object_store/latest/object_store/) trait.
85 changes: 85 additions & 0 deletions benches/memory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
//! Benchmark for in-memory page cache.
//!
//!
use std::{fs::File, io::Write, sync::Arc};

use criterion::{criterion_group, criterion_main, Criterion};
use object_store::{path::Path, ObjectStore};
use rand::Rng;

use ocra::{memory::InMemoryCache, paging::PageCache};

fn memory_cache_bench(c: &mut Criterion) {
let rt = tokio::runtime::Runtime::new().unwrap();
let mut rng = rand::thread_rng();

const FILE_SIZE: usize = 1024 * 1024 * 1024;
// TODO: support other object stores later
let store: Arc<dyn ObjectStore> = Arc::new(object_store::local::LocalFileSystem::new());
let temp_file = tempfile::NamedTempFile::new().unwrap().into_temp_path();
{
let mut writer = File::create(temp_file.to_str().unwrap()).unwrap();
let mut buf = vec![0_u8; 128 * 1024];

for _ in 0..FILE_SIZE / (128 * 1024) {
rng.fill(&mut buf[..]);
writer.write_all(&buf).unwrap();
}
}

for page_size in &[1024 * 1024, 8 * 1024 * 1024] {
let cache = Arc::new(InMemoryCache::new(FILE_SIZE + 32 * 1024, *page_size));
let location = Path::from(temp_file.to_str().unwrap());

// Warm up the cache
println!("Starting warm up cache with page size: {}", page_size);
rt.block_on(async {
let loc = location.clone();
for i in 0..FILE_SIZE / page_size {
let data = cache
.get_with(&loc, i as u32, {
let store = store.clone();
let location = loc.clone();
async move {
store
.get_range(&location, i * page_size..(i + 1) * page_size)
.await
}
})
.await
.unwrap();
assert!(!data.is_empty());
}
});
println!("Warm up cache done");

c.bench_function(
format!("memory_cache,warm,page_size={}", page_size).as_str(),
|b| {
b.to_async(&rt).iter(|| {
let mut rng = rand::thread_rng();
let cache = cache.clone();
let loc = location.clone();
async move {
let page_id = rng.gen_range(0..FILE_SIZE / page_size);

let _data = cache
.get_with(&loc, page_id as u32, async {
panic!("Should not be called page_id={}", page_id)
})
.await
.unwrap();
}
})
},
);
}
}

criterion_group!(
name=benches;
config = Criterion::default().significance_level(0.1).sample_size(10);
targets = memory_cache_bench);

criterion_main!(benches);
40 changes: 40 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//! **OCRA**: (**A**) (**R**)ust (**C**)ache implementation for *arrow-rs*
//! [(**O**)bjectStore](object_store::ObjectStore).
//!
//! It offers a few `ObjectStore` implementations that work with
//! caches.
//!
//! For example, you can use [`ReadThroughCache`] to wrap an existing
//! `ObjectStore` instance with a [`PageCache`](paging::PageCache).
//!
//! ```no_run
//! # use std::sync::Arc;
//! # use tokio::runtime::Runtime;
//! use object_store::{ObjectStore, local::LocalFileSystem, path::Path};
//! use ocra::{ReadThroughCache, memory::InMemoryCache};
//!
//! # let mut rt = Runtime::new().unwrap();
//! # rt.block_on(async {
//! let fs = Arc::new(LocalFileSystem::new());
//! // Use 75% of system memory for cache
//! let memory_cache = Arc::new(
//! InMemoryCache::with_sys_memory(0.75).build());
//! let cached_store: Arc<dyn ObjectStore> =
//! Arc::new(ReadThroughCache::new(fs, memory_cache));
//!
//! // Now you can use `cached_store` as a regular ObjectStore
//! let path = Path::from("my-key");
//! let data = cached_store.get_range(&path, 1024..2048).await.unwrap();
//! # })
//! ```
// pub mod error;
pub mod memory;
pub mod paging;
mod read_through;

// We reuse `object_store` Error and Result to make this crate work well
// with the rest of object_store implementations.
pub use object_store::{Error, Result};

pub use read_through::ReadThroughCache;
Loading

0 comments on commit 37cecc1

Please sign in to comment.