diff --git a/src/storage/layout/complement.rs b/src/storage/layout/complement.rs new file mode 100644 index 0000000..ae1c542 --- /dev/null +++ b/src/storage/layout/complement.rs @@ -0,0 +1,100 @@ +use std::num::NonZeroU64; + +use parking_lot::Mutex; +use sprs::TriMat; +use zarrs::array::codec::array_to_bytes::sharding::ShardingCodecBuilder; +use zarrs::array::codec::ArrayToBytesCodecTraits; +use zarrs::array::codec::GzipCodec; +use zarrs::array::ChunkGrid; +use zarrs::array::DataType; +use zarrs::array::DimensionName; +use zarrs::array::FillValue; + +use super::ChunkingStrategy; +use super::Dimensionality; +use super::StorageResult; + +use crate::io::Graph; +use crate::storage::layout::LayoutOps; +use crate::storage::layout::ComplementaryLayout; + +type Chunk = (u32, u32, u32); + +pub struct ComplementLayout; + + + + +impl ComplementaryLayout for ComplementLayout { + fn shape(&self, dimensionality: &Dimensionality) -> Vec { + vec![dimensionality.get_graph_size(), 3] + } + + fn data_type(&self) -> DataType { + DataType::UInt64 + } + + fn chunk_shape(&self, chunking_strategy: ChunkingStrategy, _: &Dimensionality) -> ChunkGrid { + vec![chunking_strategy.into(), NonZeroU64::new(3).unwrap()].into() // TODO: make this a constant value + } + + fn fill_value(&self) -> FillValue { + FillValue::from(0u64) + } + + fn dimension_names(&self) -> Option> { + Some(vec![ + DimensionName::new("Triples"), + DimensionName::new("Complementary fields"), + ]) + } + + fn array_to_bytes_codec( + &self, + _: &Dimensionality, + ) -> StorageResult> { + let mut sharding_codec_builder = ShardingCodecBuilder::new(vec![1, 3].try_into()?); + sharding_codec_builder.bytes_to_bytes_codecs(vec![Box::new(GzipCodec::new(5)?)]); + Ok(Box::new(sharding_codec_builder.build())) + } +} + +impl LayoutOps for ComplementLayout { + fn graph_iter(&self, graph: Graph) -> Vec { + graph + .iter() + .enumerate() + .flat_map(|(first_term, triples)| { + triples + .iter() + .map(|&(second_term, third_term)| (first_term as u32, second_term, third_term)) + .collect::>() + }) + .collect::>() + } + + fn store_chunk_elements(&self, chunk: &[Chunk], _: usize) -> Vec { + let mut ans = Vec::new(); + for &(first_term, second_term, third_term) in chunk { + ans.push(first_term as u64); + ans.push(second_term as u64); + ans.push(third_term as u64); + } + ans + } + + fn retrieve_chunk_elements( + &mut self, + matrix: &Mutex>, + first_term_index: usize, // TODO: will first_term_index instead of chunk[0] do the trick? + chunk: &[usize], + ) { + matrix + .lock() + .add_triplet(chunk[0], chunk[2], chunk[1] as usize); + } + + fn sharding_factor(&self, dimensionality: &Dimensionality) -> usize { + dimensionality.first_term_size * dimensionality.third_term_size + } +} diff --git a/src/storage/layout/mod.rs b/src/storage/layout/mod.rs index f21281b..c9b8b9b 100644 --- a/src/storage/layout/mod.rs +++ b/src/storage/layout/mod.rs @@ -28,6 +28,7 @@ type ArrayToBytesCodec = Box; pub mod matrix; pub mod tabular; +pub mod complement; pub trait LayoutOps { fn retrieve_attributes(&mut self, arr: &Array) -> StorageResult { @@ -172,3 +173,20 @@ pub trait Layout: LayoutOps { dimensionality: &Dimensionality, ) -> StorageResult; } + + +pub trait ComplementaryLayout: LayoutOps { + fn shape(&self, dimensionality: &Dimensionality) -> Vec; + fn data_type(&self) -> DataType; + fn chunk_shape( + &self, + chunking_strategy: ChunkingStrategy, + dimensionality: &Dimensionality, + ) -> ChunkGrid; + fn fill_value(&self) -> FillValue; + fn dimension_names(&self) -> Option>; + fn array_to_bytes_codec( + &self, + dimensionality: &Dimensionality, + ) -> StorageResult; +}