Skip to content

Commit

Permalink
save a lot more snapshot information
Browse files Browse the repository at this point in the history
  • Loading branch information
drdo committed Jun 30, 2024
1 parent 1e60019 commit 2ea0776
Show file tree
Hide file tree
Showing 7 changed files with 408 additions and 103 deletions.
27 changes: 25 additions & 2 deletions benches/cache.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::cell::Cell;

use criterion::{black_box, criterion_group, criterion_main, Criterion};
use redu::cache::tests::*;
use redu::{cache::tests::*, restic::Snapshot};

pub fn criterion_benchmark(c: &mut Criterion) {
c.bench_function("merge sizetree", |b| {
Expand All @@ -14,10 +14,33 @@ pub fn criterion_benchmark(c: &mut Criterion) {

c.bench_function("create and save snapshot", |b| {
with_cache_open(|mut cache| {
let foo = Snapshot {
id: "foo".to_string(),
time: mk_datetime(2024, 4, 12, 12, 00, 00),
parent: Some("bar".to_string()),
tree: "sometree".to_string(),
paths: vec![
"/home/user".to_string(),
"/etc".to_string(),
"/var".to_string(),
],
hostname: Some("foo.com".to_string()),
username: Some("user".to_string()),
uid: Some(123),
gid: Some(456),
excludes: vec![
".cache".to_string(),
"Cache".to_string(),
"/home/user/Downloads".to_string(),
],
tags: vec!["foo_machine".to_string(), "rewrite".to_string()],
original_id: Some("fefwfwew".to_string()),
program_version: Some("restic 0.16.0".to_string()),
};
b.iter(move || {
cache
.save_snapshot(
"foo",
&foo,
generate_sizetree(black_box(6), black_box(12)),
)
.unwrap();
Expand Down
140 changes: 118 additions & 22 deletions src/cache/mod.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
use std::{collections::HashSet, path::Path};

use camino::{Utf8Path, Utf8PathBuf};
use chrono::DateTime;
use log::trace;
use rusqlite::{
functions::FunctionFlags, params, Connection, OptionalExtension, Row,
functions::FunctionFlags, params, types::FromSqlError, Connection,
OptionalExtension, Row,
};
use thiserror::Error;

use crate::cache::filetree::SizeTree;
use crate::{
cache::filetree::SizeTree,
restic::Snapshot,
};

pub mod filetree;
#[cfg(any(test, feature = "bench"))]
Expand All @@ -26,14 +31,57 @@ pub enum OpenError {
Migration(#[from] MigrationError),
}

#[derive(Error, Debug)]
pub enum Error {
#[error("SQL error")]
Sql(#[from] rusqlite::Error),
#[error("Unexpected SQL datatype")]
FromSqlError(#[from] FromSqlError),
#[error("Error parsing JSON")]
Json(#[from] serde_json::Error),
#[error("Exhausted timestamp precision (a couple hundred thousand years after the epoch).")]
ExhaustedTimestampPrecision,
}

impl Cache {
pub fn get_snapshots(&self) -> Result<Vec<String>, rusqlite::Error> {
Ok(get_tables(&self.conn)?
.iter()
.filter_map(|name| {
name.strip_prefix("entries_").map(ToOwned::to_owned)
})
.collect())
pub fn get_snapshots(&self) -> Result<Vec<Snapshot>, Error> {
self.conn
.prepare(
"SELECT \
hash, \
time, \
parent, \
tree, \
hostname, \
username, \
uid, \
gid, \
original_id, \
program_version, \
coalesce((SELECT json_group_array(path) FROM snapshot_paths WHERE hash = snapshots.hash), json_array()) as paths, \
coalesce((SELECT json_group_array(path) FROM snapshot_excludes WHERE hash = snapshots.hash), json_array()) as excludes, \
coalesce((SELECT json_group_array(tag) FROM snapshot_tags WHERE hash = snapshots.hash), json_array()) as tags \
FROM snapshots")?
.query_and_then([], |row|
Ok(Snapshot {
id: row.get("hash")?,
time: DateTime::from_timestamp_micros(row.get("time")?)
.map(Ok)
.unwrap_or(Err(Error::ExhaustedTimestampPrecision))?,
parent: row.get("parent")?,
tree: row.get("tree")?,
paths: serde_json::from_str(row.get_ref("paths")?.as_str()?)?,
hostname: row.get("hostname")?,
username: row.get("username")?,
uid: row.get("uid")?,
gid: row.get("gid")?,
excludes: serde_json::from_str(row.get_ref("excludes")?.as_str()?)?,
tags: serde_json::from_str(row.get_ref("tags")?.as_str()?)?,
original_id: row.get("original_id")?,
program_version: row.get("program_version")?,
})
)?
.collect()
}

pub fn get_parent_id(
Expand Down Expand Up @@ -123,14 +171,60 @@ impl Cache {

pub fn save_snapshot(
&mut self,
hash: impl AsRef<str>,
snapshot: &Snapshot,
tree: SizeTree,
) -> Result<usize, rusqlite::Error> {
let mut file_count = 0;
let tx = self.conn.transaction()?;
{
let entries_table = format!("entries_{}", hash.as_ref());

tx.execute(
"INSERT INTO snapshots ( \
hash, \
time, \
parent, \
tree, \
hostname, \
username, \
uid, \
gid, \
original_id, \
program_version \
) \
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
params![
snapshot.id,
snapshot.time.timestamp_micros(),
snapshot.parent,
snapshot.tree,
snapshot.hostname,
snapshot.username,
snapshot.uid,
snapshot.gid,
snapshot.original_id,
snapshot.program_version
],
)?;
let mut snapshot_paths_stmt = tx.prepare(
"INSERT INTO snapshot_paths (hash, path) VALUES (?, ?)",
)?;
for path in snapshot.paths.iter() {
snapshot_paths_stmt.execute([&snapshot.id, path])?;
}
let mut snapshot_excludes_stmt = tx.prepare(
"INSERT INTO snapshot_excludes (hash, path) VALUES (?, ?)",
)?;
for path in snapshot.excludes.iter() {
snapshot_excludes_stmt.execute([&snapshot.id, path])?;
}
let mut snapshot_tags_stmt = tx.prepare(
"INSERT INTO snapshot_tags (hash, tag) VALUES (?, ?)",
)?;
for path in snapshot.tags.iter() {
snapshot_tags_stmt.execute([&snapshot.id, path])?;
}
}
{
let entries_table = format!("entries_{}", &snapshot.id);
tx.execute(
&format!(
"CREATE TABLE \"{entries_table}\" (
Expand All @@ -142,6 +236,10 @@ impl Cache {
),
[],
)?;
let mut entries_stmt = tx.prepare(&format!(
"INSERT INTO \"{entries_table}\" (path_id, size, is_dir) \
VALUES (?, ?, ?)",
))?;

let mut paths_stmt = tx.prepare(
"INSERT INTO paths (parent_id, component)
Expand All @@ -152,11 +250,6 @@ impl Cache {
"SELECT id FROM paths WHERE parent_id = ? AND component = ?",
)?;

let mut entries_stmt = tx.prepare(&format!(
"INSERT INTO \"{entries_table}\" (path_id, size, is_dir) \
VALUES (?, ?, ?)",
))?;

tree.0.traverse_with_context(
|id_stack, component, size, is_dir| {
let parent_id = id_stack.last().copied();
Expand All @@ -182,11 +275,14 @@ impl Cache {
&mut self,
hash: impl AsRef<str>,
) -> Result<(), rusqlite::Error> {
self.conn.execute(
&format!("DROP TABLE IF EXISTS \"entries_{}\"", hash.as_ref()),
[],
)?;
Ok(())
let hash = hash.as_ref();
let tx = self.conn.transaction()?;
tx.execute("DELETE FROM snapshots WHERE hash = ?", [hash])?;
tx.execute("DELETE FROM snapshot_paths WHERE hash = ?", [hash])?;
tx.execute("DELETE FROM snapshot_excludes WHERE hash = ?", [hash])?;
tx.execute("DELETE FROM snapshot_tags WHERE hash = ?", [hash])?;
tx.execute(&format!("DROP TABLE IF EXISTS \"entries_{}\"", hash), [])?;
tx.commit()
}

// Marks ////////////////////////////////////////////////
Expand Down
28 changes: 28 additions & 0 deletions src/cache/sql/none_to_v1.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,34 @@ CREATE TABLE paths (
);
CREATE UNIQUE INDEX paths_parent_component ON paths (parent_id, component);

CREATE TABLE snapshots (
hash TEXT PRIMARY KEY,
time INTEGER,
parent TEXT,
tree TEXT NOT NULL,
hostname TEXT,
username TEXT,
uid INTEGER,
gid INTEGER,
original_id TEXT,
program_version TEXT
) WITHOUT ROWID;
CREATE TABLE snapshot_paths (
hash TEXT,
path TEXT,
PRIMARY KEY (hash, path)
) WITHOUT ROWID;
CREATE TABLE snapshot_excludes (
hash TEXT,
path TEXT,
PRIMARY KEY (hash, path)
) WITHOUT ROWID;
CREATE TABLE snapshot_tags (
hash TEXT,
tag TEXT,
PRIMARY KEY (hash, tag)
) WITHOUT ROWID;

-- The entries tables are sharded per snapshot and created dynamically

CREATE TABLE marks (path TEXT PRIMARY KEY) WITHOUT ROWID;
28 changes: 28 additions & 0 deletions src/cache/sql/v0_to_v1.sql
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,34 @@ CREATE TABLE paths (
);
CREATE UNIQUE INDEX paths_parent_component ON paths (parent_id, component);

CREATE TABLE snapshots (
hash TEXT PRIMARY KEY,
time INTEGER,
parent TEXT,
tree TEXT NOT NULL,
hostname TEXT,
username TEXT,
uid INTEGER,
gid INTEGER,
original_id TEXT,
program_version TEXT
) WITHOUT ROWID;
CREATE TABLE snapshot_paths (
hash TEXT,
path TEXT,
PRIMARY KEY (hash, path)
) WITHOUT ROWID;
CREATE TABLE snapshot_excludes (
hash TEXT,
path TEXT,
PRIMARY KEY (hash, path)
) WITHOUT ROWID;
CREATE TABLE snapshot_tags (
hash TEXT,
tag TEXT,
PRIMARY KEY (hash, tag)
) WITHOUT ROWID;

-- The entries tables are sharded per snapshot and created dynamically

CREATE TABLE new_marks (path TEXT PRIMARY KEY) WITHOUT ROWID;
Expand Down
Loading

0 comments on commit 2ea0776

Please sign in to comment.