From a5ab4b1d51fc60b3245216a4aaf2030a24eb569f Mon Sep 17 00:00:00 2001 From: angelip2303 Date: Mon, 11 Mar 2024 12:28:24 +0000 Subject: [PATCH] improving the tests --- Cargo.toml | 2 +- src/engine/chunk.rs | 8 +--- src/storage/layout/mod.rs | 12 ++++- tests/common/mod.rs | 78 ++++++++++++++++++++++++++++++++ tests/get_object_test.rs | 30 ++++++++++--- tests/get_predicate_test.rs | 43 +++++++++++++++--- tests/get_subject_test.rs | 89 ++++++++++++++++++++++++++++++++----- tests/orientation.rs | 41 ++++++++++++++++- 8 files changed, 268 insertions(+), 35 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 95475c4..b2f7a51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ version = "0.0.1" edition = "2021" [dependencies] -zarrs = { version = "0.12.3", default-features = false, features = [ "http", "gzip", "sharding", "opendal", "async", "ndarray" ] } +zarrs = { version = "0.12.4", default-features = false, features = [ "http", "gzip", "sharding", "opendal", "async", "ndarray", "crc32c" ] } clap = { version = "4.1.8", features = ["derive"] } serde_json = "1.0.108" thiserror = "1.0.50" diff --git a/src/engine/chunk.rs b/src/engine/chunk.rs index 0f37f6c..9870474 100644 --- a/src/engine/chunk.rs +++ b/src/engine/chunk.rs @@ -42,11 +42,7 @@ impl EngineStrategy> for Array { let objects = self.shape()[0]; let col = index as u64; let shape = ArraySubset::new_with_ranges(&[0..objects, col..col + 1]); - let array_subset = self.retrieve_array_subset(&shape).unwrap(); - let third_term_subset = array_subset - .windows(4) - .map(|w| u32::from_ne_bytes(w.try_into().unwrap())) - .collect::>(); - Ok(third_term_subset) + let array_subset = self.retrieve_array_subset_elements::(&shape)?; + Ok(array_subset) } } diff --git a/src/storage/layout/mod.rs b/src/storage/layout/mod.rs index 11657ad..6bb9338 100644 --- a/src/storage/layout/mod.rs +++ b/src/storage/layout/mod.rs @@ -77,10 +77,18 @@ pub trait LayoutOps { } if !remainder.is_empty() { + // first we count the number of shards that have been processed, and + // multiply it by the number of chunks in every shard. Hence, we will + // obtain the number of rows that have been processed + let rows_processed = count.load(Ordering::Relaxed) * rows_per_shard(&arr); + // then we obtain the size of the last shard that is going to be + // processed; it is equals to the size of the remainder + let last_shard_size = remainder.len() as u64; + // lastly, we store the elements in the provided subset arr.store_array_subset_elements::( &ArraySubset::new_with_start_shape( - vec![count.load(Ordering::Relaxed) * rows_per_shard(&arr), 0], - vec![remainder.len() as u64, columns_per_shard(&arr)], + vec![rows_processed, 0], + vec![last_shard_size, columns_per_shard(&arr)], )?, self.store_chunk_elements(remainder, columns), )?; diff --git a/tests/common/mod.rs b/tests/common/mod.rs index c0d5b80..e97dae3 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -192,3 +192,81 @@ impl Graph { ans.to_csc() } } + +pub fn set_expected_first_term_matrix( + expected: &mut Vec, + subject: Subject, + predicate: Predicate, + object: Object, + dictionary: &Dictionary, + reference_system: ReferenceSystem, +) { + let subject_idx = subject.get_idx(dictionary); + let predicate_idx = predicate.get_idx(dictionary); + let object_idx = object.get_idx(dictionary); + + match reference_system { + ReferenceSystem::SPO => expected[object_idx] = predicate_idx as u32, + ReferenceSystem::SOP => expected[predicate_idx] = object_idx as u32, + ReferenceSystem::PSO => expected[object_idx] = subject_idx as u32, + ReferenceSystem::POS => expected[subject_idx] = object_idx as u32, + ReferenceSystem::OSP => expected[predicate_idx] = subject_idx as u32, + ReferenceSystem::OPS => expected[subject_idx] = predicate_idx as u32, + } +} + +pub fn set_expected_second_term_matrix( + expected: &mut Vec, + subject: Subject, + predicate: Predicate, + object: Object, + dictionary: &Dictionary, + reference_system: ReferenceSystem, +) { + let subject_idx = subject.get_idx(dictionary); + let predicate_idx = predicate.get_idx(dictionary); + let object_idx = object.get_idx(dictionary); + + match reference_system { + ReferenceSystem::SPO => { + expected[subject_idx * dictionary.objects_size() + object_idx] = predicate_idx as u32 + } + ReferenceSystem::SOP => { + expected[subject_idx * dictionary.predicates_size() + predicate_idx] = object_idx as u32 + } + ReferenceSystem::PSO => { + expected[predicate_idx * dictionary.objects_size() + object_idx] = subject_idx as u32 + } + ReferenceSystem::POS => { + expected[predicate_idx * dictionary.subjects_size() + subject_idx] = object_idx as u32 + } + ReferenceSystem::OSP => { + expected[object_idx * dictionary.predicates_size() + predicate_idx] = subject_idx as u32 + } + ReferenceSystem::OPS => { + expected[object_idx * dictionary.subjects_size() + subject_idx] = predicate_idx as u32 + } + } +} + +pub fn set_expected_third_term_matrix( + expected: &mut Vec, + subject: Subject, + predicate: Predicate, + object: Object, + dictionary: &Dictionary, + reference_system: ReferenceSystem, +) { + let subject_idx = subject.get_idx(dictionary); + let predicate_idx = predicate.get_idx(dictionary); + let object_idx = object.get_idx(dictionary); + + match reference_system { + ReferenceSystem::SPO => expected[subject_idx] = predicate_idx as u32, + ReferenceSystem::SOP => expected[subject_idx] = object_idx as u32, + ReferenceSystem::PSO => expected[predicate_idx] = subject_idx as u32, + ReferenceSystem::POS => expected[predicate_idx] = object_idx as u32, + ReferenceSystem::OSP => expected[object_idx] = subject_idx as u32, + ReferenceSystem::OPS => expected[object_idx] = predicate_idx as u32, + } +} diff --git a/tests/get_object_test.rs b/tests/get_object_test.rs index 03f8e38..a6d0882 100644 --- a/tests/get_object_test.rs +++ b/tests/get_object_test.rs @@ -1,3 +1,4 @@ +use common::set_expected_third_term_matrix; use remote_hdt::storage::layout::matrix::MatrixLayout; use remote_hdt::storage::layout::tabular::TabularLayout; use remote_hdt::storage::ops::Ops; @@ -19,7 +20,7 @@ fn get_object_matrix_sharding_test() -> Result<(), Box> { common::setup( common::SHARDING_ZARR, &mut storage, - ChunkingStrategy::Sharding(3), + ChunkingStrategy::Sharding(4), ReferenceSystem::SPO, ); @@ -31,10 +32,19 @@ fn get_object_matrix_sharding_test() -> Result<(), Box> { _ => unreachable!(), }; - if actual == vec![2, 0, 0, 0, 0] { + let mut expected = vec![0u32; storage.get_dictionary().subjects_size()]; + set_expected_third_term_matrix( + &mut expected, + common::Subject::Alan, + common::Predicate::DateOfBirth, + common::Object::Date, + &storage.get_dictionary(), + ReferenceSystem::SPO, + ); + + if actual == expected { Ok(()) } else { - println!("{:?}", actual); Err(String::from("Expected and actual results are not equals").into()) } } @@ -58,11 +68,17 @@ fn get_object_tabular_test() -> Result<(), Box> { _ => unreachable!(), }; - let mut expected = TriMat::new((4, 9)); - expected.add_triplet(1, 3, 3); - let expected = expected.to_csc(); + let mut expected = TriMat::new(( + storage.get_dictionary().subjects_size(), + storage.get_dictionary().objects_size(), + )); + expected.add_triplet( + common::Subject::Bombe.get_idx(&storage.get_dictionary()), + common::Object::Alan.get_idx(&storage.get_dictionary()), + common::Predicate::Discoverer.get_idx(&storage.get_dictionary()), + ); - if actual == expected { + if actual == expected.to_csc() { Ok(()) } else { Err(String::from("Expected and actual results are not equals").into()) diff --git a/tests/get_predicate_test.rs b/tests/get_predicate_test.rs index 6352252..be46da1 100644 --- a/tests/get_predicate_test.rs +++ b/tests/get_predicate_test.rs @@ -1,3 +1,4 @@ +use common::set_expected_second_term_matrix; use remote_hdt::storage::layout::matrix::MatrixLayout; use remote_hdt::storage::layout::tabular::TabularLayout; use remote_hdt::storage::ops::Ops; @@ -31,12 +32,37 @@ fn get_predicate_matrix_chunk_test() -> Result<(), Box> { _ => unreachable!(), }; - if actual - == vec![ - 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 5, 0, 0, 0, - ] - { + let mut expected = vec![ + 0u32; + storage.get_dictionary().subjects_size() + * storage.get_dictionary().objects_size() + ]; + set_expected_second_term_matrix( + &mut expected, + common::Subject::Alan, + common::Predicate::InstanceOf, + common::Object::Human, + &storage.get_dictionary(), + ReferenceSystem::SPO, + ); + set_expected_second_term_matrix( + &mut expected, + common::Subject::Wilmslow, + common::Predicate::InstanceOf, + common::Object::Town, + &storage.get_dictionary(), + ReferenceSystem::SPO, + ); + set_expected_second_term_matrix( + &mut expected, + common::Subject::Bombe, + common::Predicate::InstanceOf, + common::Object::Computer, + &storage.get_dictionary(), + ReferenceSystem::SPO, + ); + + if actual == expected { Ok(()) } else { Err(String::from("Expected and actual results are not equals").into()) @@ -62,7 +88,10 @@ fn get_predicate_tabular_test() -> Result<(), Box> { _ => unreachable!(), }; - let mut expected = TriMat::new((4, 9)); + let mut expected = TriMat::new(( + storage.get_dictionary().subjects_size(), + storage.get_dictionary().objects_size(), + )); expected.add_triplet( common::Subject::Alan.get_idx(&storage.get_dictionary()), common::Object::Human.get_idx(&storage.get_dictionary()), diff --git a/tests/get_subject_test.rs b/tests/get_subject_test.rs index 171e744..fdc8adc 100644 --- a/tests/get_subject_test.rs +++ b/tests/get_subject_test.rs @@ -1,3 +1,4 @@ +use common::set_expected_first_term_matrix; use remote_hdt::storage::layout::matrix::MatrixLayout; use remote_hdt::storage::layout::tabular::TabularLayout; use remote_hdt::storage::ops::Ops; @@ -31,7 +32,49 @@ fn get_subject_matrix_chunk_test() -> Result<(), Box> { _ => unreachable!(), }; - if actual == vec![2, 4, 5, 0, 0, 0, 0, 7, 8] { + let mut expected = vec![0u32; storage.get_dictionary().objects_size()]; + set_expected_first_term_matrix( + &mut expected, + common::Subject::Alan, + common::Predicate::InstanceOf, + common::Object::Human, + &storage.get_dictionary(), + ReferenceSystem::SPO, + ); + set_expected_first_term_matrix( + &mut expected, + common::Subject::Alan, + common::Predicate::PlaceOfBirth, + common::Object::Warrington, + &storage.get_dictionary(), + ReferenceSystem::SPO, + ); + set_expected_first_term_matrix( + &mut expected, + common::Subject::Alan, + common::Predicate::PlaceOfDeath, + common::Object::Wilmslow, + &storage.get_dictionary(), + ReferenceSystem::SPO, + ); + set_expected_first_term_matrix( + &mut expected, + common::Subject::Alan, + common::Predicate::DateOfBirth, + common::Object::Date, + &storage.get_dictionary(), + ReferenceSystem::SPO, + ); + set_expected_first_term_matrix( + &mut expected, + common::Subject::Alan, + common::Predicate::Employer, + common::Object::GCHQ, + &storage.get_dictionary(), + ReferenceSystem::SPO, + ); + + if actual == expected { Ok(()) } else { Err(String::from("Expected and actual results are not equals").into()) @@ -57,7 +100,11 @@ fn get_subject_matrix_sharding_test() -> Result<(), Box> { _ => unreachable!(), }; - if actual == vec![0, 0, 0, 0, 0, 5, 1, 0, 0] { + let mut expected = vec![0u32; storage.get_dictionary().objects_size()]; + expected[5] = common::Predicate::InstanceOf.get_idx(&storage.get_dictionary()) as u32; + expected[6] = common::Predicate::Country.get_idx(&storage.get_dictionary()) as u32; + + if actual == expected { Ok(()) } else { Err(String::from("Expected and actual results are not equals").into()) @@ -83,15 +130,37 @@ fn get_subject_tabular_test() -> Result<(), Box> { _ => unreachable!(), }; - let mut expected = TriMat::new((4, 9)); - expected.add_triplet(0, 0, 2); - expected.add_triplet(0, 1, 4); - expected.add_triplet(0, 2, 5); - expected.add_triplet(0, 7, 7); - expected.add_triplet(0, 8, 8); - let expected = expected.to_csc(); + let mut expected = TriMat::new(( + storage.get_dictionary().subjects_size(), + storage.get_dictionary().objects_size(), + )); + expected.add_triplet( + common::Subject::Alan.get_idx(&storage.get_dictionary()), + common::Object::Human.get_idx(&storage.get_dictionary()), + common::Predicate::InstanceOf.get_idx(&storage.get_dictionary()), + ); + expected.add_triplet( + common::Subject::Alan.get_idx(&storage.get_dictionary()), + common::Object::Warrington.get_idx(&storage.get_dictionary()), + common::Predicate::PlaceOfBirth.get_idx(&storage.get_dictionary()), + ); + expected.add_triplet( + common::Subject::Alan.get_idx(&storage.get_dictionary()), + common::Object::Wilmslow.get_idx(&storage.get_dictionary()), + common::Predicate::PlaceOfDeath.get_idx(&storage.get_dictionary()), + ); + expected.add_triplet( + common::Subject::Alan.get_idx(&storage.get_dictionary()), + common::Object::Date.get_idx(&storage.get_dictionary()), + common::Predicate::DateOfBirth.get_idx(&storage.get_dictionary()), + ); + expected.add_triplet( + common::Subject::Alan.get_idx(&storage.get_dictionary()), + common::Object::GCHQ.get_idx(&storage.get_dictionary()), + common::Predicate::Employer.get_idx(&storage.get_dictionary()), + ); - if actual == expected { + if actual == expected.to_csc() { Ok(()) } else { Err(String::from("Expected and actual results are not equals").into()) diff --git a/tests/orientation.rs b/tests/orientation.rs index cf6a737..5228fb4 100644 --- a/tests/orientation.rs +++ b/tests/orientation.rs @@ -1,3 +1,4 @@ +use common::set_expected_first_term_matrix; use remote_hdt::storage::layout::matrix::MatrixLayout; use remote_hdt::storage::layout::tabular::TabularLayout; use remote_hdt::storage::ops::Ops; @@ -30,7 +31,33 @@ fn orientation_pso_matrix_test() -> Result<(), Box> { _ => unreachable!(), }; - if actual == vec![3, 0, 1] { + let mut expected = vec![0u32; storage.get_dictionary().objects_size()]; + set_expected_first_term_matrix( + &mut expected, + common::Subject::Alan, + common::Predicate::InstanceOf, + common::Object::Human, + &storage.get_dictionary(), + ReferenceSystem::PSO, + ); + set_expected_first_term_matrix( + &mut expected, + common::Subject::Wilmslow, + common::Predicate::InstanceOf, + common::Object::Town, + &storage.get_dictionary(), + ReferenceSystem::PSO, + ); + set_expected_first_term_matrix( + &mut expected, + common::Subject::Bombe, + common::Predicate::InstanceOf, + common::Object::Computer, + &storage.get_dictionary(), + ReferenceSystem::PSO, + ); + + if actual == expected { Ok(()) } else { Err(String::from("Expected and actual results are not equals").into()) @@ -56,7 +83,17 @@ fn orientation_ops_matrix_test() -> Result<(), Box> { _ => unreachable!(), }; - if actual == vec![0, 3, 0, 0] { + let mut expected = vec![0u32; storage.get_dictionary().subjects_size()]; + set_expected_first_term_matrix( + &mut expected, + common::Subject::Bombe, + common::Predicate::Discoverer, + common::Object::Alan, + &storage.get_dictionary(), + ReferenceSystem::OPS, + ); + + if actual == expected { Ok(()) } else { println!("{:?}", actual);