Skip to content

Commit

Permalink
rsc: Tier blobs into nested dirs to avoid file limits (#1617)
Browse files Browse the repository at this point in the history
* rsc: Tier blobs into nested dirs to avoid file limits

* tune parameters

* Update rust/rsc/src/bin/rsc/blob_store_impls.rs

Co-authored-by: Colin Schmidt <[email protected]>

---------

Co-authored-by: Colin Schmidt <[email protected]>
  • Loading branch information
V-FEXrt and colinschmidt authored Aug 1, 2024
1 parent e861109 commit af316b3
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 13 deletions.
2 changes: 1 addition & 1 deletion rust/rsc/.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"server_address": "0.0.0.0:3002",
"connection_pool_timeout": 60,
"standalone": false,
"active_store": "6a6ea9c9-a261-44b1-8ef7-305a12b04eab",
"active_store": "e9c2dac1-3882-442f-b8a4-1fc04582a003",
"log_directory": null,
"blob_eviction": {
"tick_rate": 60,
Expand Down
50 changes: 38 additions & 12 deletions rust/rsc/src/bin/rsc/blob_store_impls.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,37 @@
use crate::blob::*;
use async_trait::async_trait;
use data_encoding::BASE64URL;
use futures::stream::BoxStream;
use rand_core::{OsRng, RngCore};
use sea_orm::prelude::Uuid;
use std::fmt::Write;
use tokio::fs::File;
use tokio::io::AsyncReadExt;
use tokio::io::BufWriter;
use tokio_util::bytes::Bytes;
use tokio_util::io::StreamReader;

fn create_temp_filename() -> String {
let mut key = [0u8; 16];
OsRng.fill_bytes(&mut key);
// URL must be used as files can't contain /
BASE64URL.encode(&key)
fn create_random_blob_path() -> std::path::PathBuf {
// 2 deep @ 8 bytes wide
let mut parts = [0u8; 10];
OsRng.fill_bytes(&mut parts);

let mut buf = std::path::PathBuf::from("");

// First 2 bytes represent the containing directories
for i in 0..2 {
let mut s = String::new();
write!(&mut s, "{:02X}", parts[i]).unwrap();
buf.push(s);
}

// Next 8 bytes represent the file name
let mut s = String::new();
for i in 2..10 {
write!(&mut s, "{:02X}", parts[i]).unwrap();
}
buf.push(s);

return buf;
}

#[derive(Debug, Clone)]
Expand All @@ -36,20 +53,29 @@ impl BlobStore for LocalBlobStore {
let reader = StreamReader::new(stream);
futures::pin_mut!(reader);

let key = create_temp_filename();
let path = std::path::Path::new(&self.root).join(key.clone());
let mut file = BufWriter::new(File::create(path).await?);
let rel_path = create_random_blob_path();
let path = std::path::Path::new(&self.root).join(rel_path.clone());
tokio::fs::create_dir_all(path.parent().unwrap()).await?;

let mut file = BufWriter::new(File::create(path).await?);
let written = tokio::io::copy(&mut reader, &mut file).await?;

let size = match i64::try_from(written) {
Err(_) => {
tracing::error!(%written, "Size overflows i64, setting to i64::MAX instead");
Err(err) => {
tracing::error!(%err, %written, "Size overflows i64, setting to i64::MAX instead");
i64::MAX
}
Ok(size) => size,
};

let key = match rel_path.into_os_string().into_string() {
Err(path) => {
tracing::error!("Cannot convert path to string, returning lossy path instead");
path.to_string_lossy().to_string()
}
Ok(s) => s,
};

Ok((key, size))
}

Expand Down Expand Up @@ -80,7 +106,7 @@ impl BlobStore for TestBlobStore {
&self,
_stream: BoxStream<'a, Result<Bytes, std::io::Error>>,
) -> Result<(String, i64), std::io::Error> {
Ok((create_temp_filename(), 0xDEADBEEF))
Ok(("TestTestTest".to_string(), 0xDEADBEEF))
}

async fn download_url(&self, key: String) -> String {
Expand Down

0 comments on commit af316b3

Please sign in to comment.