From 1d308e0fc51723d64f66fd420c689fd173d41ec8 Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 25 Dec 2024 20:23:24 +0800 Subject: [PATCH 1/7] feat(rust): introduce catalog interface for rust module --- rust/lance-arrow/src/lib.rs | 5 +- rust/lance/src/catalog.rs | 10 ++ rust/lance/src/catalog/catalog_trait.rs | 51 ++++++ rust/lance/src/catalog/dataset_identifier.rs | 171 +++++++++++++++++++ rust/lance/src/catalog/namespace.rs | 141 +++++++++++++++ rust/lance/src/lib.rs | 1 + 6 files changed, 375 insertions(+), 4 deletions(-) create mode 100644 rust/lance/src/catalog.rs create mode 100644 rust/lance/src/catalog/catalog_trait.rs create mode 100644 rust/lance/src/catalog/dataset_identifier.rs create mode 100644 rust/lance/src/catalog/namespace.rs diff --git a/rust/lance-arrow/src/lib.rs b/rust/lance-arrow/src/lib.rs index 78c2b224e9..d08992bf3b 100644 --- a/rust/lance-arrow/src/lib.rs +++ b/rust/lance-arrow/src/lib.rs @@ -930,10 +930,7 @@ mod tests { DataType::Struct(fields.clone()), false, )]); - let children = types - .iter() - .map(|ty| new_empty_array(ty)) - .collect::>(); + let children = types.iter().map(new_empty_array).collect::>(); let batch = RecordBatch::try_new( Arc::new(schema.clone()), vec![Arc::new(StructArray::new(fields, children, None)) as ArrayRef], diff --git a/rust/lance/src/catalog.rs b/rust/lance/src/catalog.rs new file mode 100644 index 0000000000..a2110cfdc9 --- /dev/null +++ b/rust/lance/src/catalog.rs @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +pub(crate) mod catalog_trait; +pub(crate) mod dataset_identifier; +pub(crate) mod namespace; + +pub use catalog_trait::Catalog; +pub use dataset_identifier::DatasetIdentifier; +pub use namespace::Namespace; diff --git a/rust/lance/src/catalog/catalog_trait.rs b/rust/lance/src/catalog/catalog_trait.rs new file mode 100644 index 0000000000..7fb65eee9c --- /dev/null +++ b/rust/lance/src/catalog/catalog_trait.rs @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +use crate::catalog::dataset_identifier::DatasetIdentifier; +use crate::catalog::namespace::Namespace; +use crate::dataset::Dataset; +use std::collections::HashMap; + +pub trait Catalog { + /// List all datasets under a specified namespace. + fn list_datasets(&self, namespace: &Namespace) -> Vec; + + /// Create a new dataset in the catalog. + fn create_dataset( + &self, + identifier: &DatasetIdentifier, + location: &str, + ) -> Result; + + /// Check if a dataset exists in the catalog. + fn dataset_exists(&self, identifier: &DatasetIdentifier) -> bool; + + /// Drop a dataset from the catalog. + fn drop_dataset(&self, identifier: &DatasetIdentifier) -> Result<(), String>; + + /// Drop a dataset from the catalog and purge the metadata. + fn drop_dataset_with_purge( + &self, + identifier: &DatasetIdentifier, + purge: &bool, + ) -> Result<(), String>; + + /// Rename a dataset in the catalog. + fn rename_dataset( + &self, + from: &DatasetIdentifier, + to: &DatasetIdentifier, + ) -> Result<(), String>; + + /// Load a dataset from the catalog. + fn load_dataset(&self, name: &DatasetIdentifier) -> Result; + + /// Invalidate cached table metadata from current catalog. + fn invalidate_dataset(&self, identifier: &DatasetIdentifier) -> Result<(), String>; + + /// Register a dataset in the catalog. + fn register_dataset(&self, identifier: &DatasetIdentifier) -> Result; + + /// Initialize the catalog. + fn initialize(&self, name: &str, properties: &HashMap<&str, &str>) -> Result<(), String>; +} diff --git a/rust/lance/src/catalog/dataset_identifier.rs b/rust/lance/src/catalog/dataset_identifier.rs new file mode 100644 index 0000000000..b447298116 --- /dev/null +++ b/rust/lance/src/catalog/dataset_identifier.rs @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +use crate::catalog::namespace::Namespace; +use std::fmt; +use std::hash::{Hash, Hasher}; + +#[derive(Clone, Debug)] +pub struct DatasetIdentifier { + namespace: Namespace, + name: String, +} + +impl DatasetIdentifier { + pub fn of(names: &[&str]) -> Self { + assert!( + !names.is_empty(), + "Cannot create dataset identifier without a dataset name" + ); + let namespace = Namespace::of(&names[..names.len() - 1]); + let name = names[names.len() - 1].to_string(); + Self { namespace, name } + } + + pub fn of_namespace(namespace: Namespace, name: &str) -> Self { + assert!(!name.is_empty(), "Invalid dataset name: null or empty"); + Self { + namespace, + name: name.to_string(), + } + } + + pub fn parse(identifier: &str) -> Self { + let parts: Vec<&str> = identifier.split('.').collect(); + Self::of(&parts) + } + + pub fn has_namespace(&self) -> bool { + !self.namespace.is_empty() + } + + pub fn namespace(&self) -> &Namespace { + &self.namespace + } + + pub fn name(&self) -> &str { + &self.name + } + + pub fn to_lowercase(&self) -> Self { + let new_levels: Vec = self + .namespace + .levels() + .iter() + .map(|s| s.to_lowercase()) + .collect(); + let new_name = self.name.to_lowercase(); + Self::of_namespace( + Namespace::of(&new_levels.iter().map(String::as_str).collect::>()), + &new_name, + ) + } +} + +impl PartialEq for DatasetIdentifier { + fn eq(&self, other: &Self) -> bool { + self.namespace == other.namespace && self.name == other.name + } +} + +impl Eq for DatasetIdentifier {} + +impl Hash for DatasetIdentifier { + fn hash(&self, state: &mut H) { + self.namespace.hash(state); + self.name.hash(state); + } +} + +impl fmt::Display for DatasetIdentifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.has_namespace() { + write!(f, "{}.{}", self.namespace, self.name) + } else { + write!(f, "{}", self.name) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::hash::DefaultHasher; + + #[test] + fn test_dataset_identifier_of() { + let ds_id = DatasetIdentifier::of(&["namespace1", "namespace2", "dataset"]); + assert_eq!( + ds_id.namespace().levels(), + &vec!["namespace1".to_string(), "namespace2".to_string()] + ); + assert_eq!(ds_id.name(), "dataset"); + } + + #[test] + fn test_dataset_identifier_of_namespace() { + let namespace = Namespace::of(&["namespace1", "namespace2"]); + let ds_id = DatasetIdentifier::of_namespace(namespace.clone(), "dataset"); + assert_eq!(ds_id.namespace(), &namespace); + assert_eq!(ds_id.name(), "dataset"); + } + + #[test] + fn test_dataset_identifier_parse() { + let ds_id = DatasetIdentifier::parse("namespace1.namespace2.dataset"); + assert_eq!( + ds_id.namespace().levels(), + &vec!["namespace1".to_string(), "namespace2".to_string()] + ); + assert_eq!(ds_id.name(), "dataset"); + } + + #[test] + fn test_dataset_identifier_has_namespace() { + let ds_id = DatasetIdentifier::parse("namespace1.namespace2.dataset"); + assert!(ds_id.has_namespace()); + + let ds_id_no_ns = DatasetIdentifier::of(&["dataset"]); + assert!(!ds_id_no_ns.has_namespace()); + } + + #[test] + fn test_dataset_identifier_to_lowercase() { + let ds_id = DatasetIdentifier::parse("Namespace1.Namespace2.Dataset"); + let lower_ds_id = ds_id.to_lowercase(); + assert_eq!( + lower_ds_id.namespace().levels(), + &vec!["namespace1".to_string(), "namespace2".to_string()] + ); + assert_eq!(lower_ds_id.name(), "dataset"); + } + + #[test] + fn test_dataset_identifier_equality() { + let ds_id1 = DatasetIdentifier::parse("namespace1.namespace2.dataset"); + let ds_id2 = DatasetIdentifier::parse("namespace1.namespace2.dataset"); + let ds_id3 = DatasetIdentifier::parse("namespace1.namespace2.other_dataset"); + assert_eq!(ds_id1, ds_id2); + assert_ne!(ds_id1, ds_id3); + } + + #[test] + fn test_dataset_identifier_hash() { + let ds_id1 = DatasetIdentifier::parse("namespace1.namespace2.dataset"); + let ds_id2 = DatasetIdentifier::parse("namespace1.namespace2.dataset"); + let mut hasher1 = DefaultHasher::new(); + ds_id1.hash(&mut hasher1); + let mut hasher2 = DefaultHasher::new(); + ds_id2.hash(&mut hasher2); + assert_eq!(hasher1.finish(), hasher2.finish()); + } + + #[test] + fn test_dataset_identifier_display() { + let ds_id = DatasetIdentifier::parse("namespace1.namespace2.dataset"); + assert_eq!(format!("{}", ds_id), "namespace1.namespace2.dataset"); + + let ds_id_no_ns = DatasetIdentifier::of(&["dataset"]); + assert_eq!(format!("{}", ds_id_no_ns), "dataset"); + } +} diff --git a/rust/lance/src/catalog/namespace.rs b/rust/lance/src/catalog/namespace.rs new file mode 100644 index 0000000000..960b95f81f --- /dev/null +++ b/rust/lance/src/catalog/namespace.rs @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +use std::fmt; +use std::hash::{Hash, Hasher}; + +#[derive(Clone)] +pub struct Namespace { + levels: Vec, +} + +impl Namespace { + pub fn empty() -> Self { + Self { levels: Vec::new() } + } + + pub fn of(levels: &[&str]) -> Self { + assert!( + levels.iter().all(|&level| level != "\0"), + "Cannot create a namespace with the null-byte character" + ); + Self { + levels: levels.iter().map(|&s| s.to_string()).collect(), + } + } + + pub fn levels(&self) -> &[String] { + &self.levels + } + + pub fn level(&self, pos: usize) -> &str { + &self.levels[pos] + } + + pub fn is_empty(&self) -> bool { + self.levels.is_empty() + } + + pub fn length(&self) -> usize { + self.levels.len() + } +} + +impl PartialEq for Namespace { + fn eq(&self, other: &Self) -> bool { + self.levels == other.levels + } +} + +impl Eq for Namespace {} + +impl Hash for Namespace { + fn hash(&self, state: &mut H) { + self.levels.hash(state); + } +} + +impl fmt::Display for Namespace { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.levels.join(".")) + } +} + +impl fmt::Debug for Namespace { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Namespace") + .field("levels", &self.levels) + .finish() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::hash::DefaultHasher; + + #[test] + fn test_empty_namespace() { + let ns = Namespace::empty(); + assert!(ns.is_empty()); + assert_eq!(ns.length(), 0); + assert_eq!(ns.levels().len(), 0); + } + + #[test] + fn test_namespace_of() { + let ns = Namespace::of(&["level1", "level2"]); + assert!(!ns.is_empty()); + assert_eq!(ns.length(), 2); + assert_eq!(ns.level(0), "level1"); + assert_eq!(ns.level(1), "level2"); + } + + #[test] + #[should_panic(expected = "Cannot create a namespace with the null-byte character")] + fn test_namespace_of_with_null_byte() { + Namespace::of(&["level1", "\0"]); + } + + #[test] + fn test_namespace_levels() { + let ns = Namespace::of(&["level1", "level2"]); + let levels = ns.levels(); + assert_eq!(levels, &vec!["level1".to_string(), "level2".to_string()]); + } + + #[test] + fn test_namespace_equality() { + let ns1 = Namespace::of(&["level1", "level2"]); + let ns2 = Namespace::of(&["level1", "level2"]); + let ns3 = Namespace::of(&["level1", "level3"]); + assert_eq!(ns1, ns2); + assert_ne!(ns1, ns3); + } + + #[test] + fn test_namespace_hash() { + let ns1 = Namespace::of(&["level1", "level2"]); + let ns2 = Namespace::of(&["level1", "level2"]); + let mut hasher1 = DefaultHasher::new(); + ns1.hash(&mut hasher1); + let mut hasher2 = DefaultHasher::new(); + ns2.hash(&mut hasher2); + assert_eq!(hasher1.finish(), hasher2.finish()); + } + + #[test] + fn test_namespace_display() { + let ns = Namespace::of(&["level1", "level2"]); + assert_eq!(format!("{}", ns), "level1.level2"); + } + + #[test] + fn test_namespace_debug() { + let ns = Namespace::of(&["level1", "level2"]); + assert_eq!( + format!("{:?}", ns), + "Namespace { levels: [\"level1\", \"level2\"] }" + ); + } +} diff --git a/rust/lance/src/lib.rs b/rust/lance/src/lib.rs index 706a553841..0c88f7b3c4 100644 --- a/rust/lance/src/lib.rs +++ b/rust/lance/src/lib.rs @@ -75,6 +75,7 @@ pub use lance_core::{datatypes, error}; pub use lance_core::{Error, Result}; pub mod arrow; +pub mod catalog; pub mod datafusion; pub mod dataset; pub mod index; From 98c0f0a82847ddb54e3b98842ea7f95c3eb4da06 Mon Sep 17 00:00:00 2001 From: yanghua Date: Thu, 26 Dec 2024 19:49:02 +0800 Subject: [PATCH 2/7] trigger ci From 68fee4a7c1d73f777ebc947c291366a9ec7091ff Mon Sep 17 00:00:00 2001 From: yanghua Date: Thu, 26 Dec 2024 20:01:46 +0800 Subject: [PATCH 3/7] feat(rust): introduce catalog interface for rust module --- rust/lance/src/catalog/catalog_trait.rs | 58 +++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/rust/lance/src/catalog/catalog_trait.rs b/rust/lance/src/catalog/catalog_trait.rs index 7fb65eee9c..734c086020 100644 --- a/rust/lance/src/catalog/catalog_trait.rs +++ b/rust/lance/src/catalog/catalog_trait.rs @@ -4,9 +4,16 @@ use crate::catalog::dataset_identifier::DatasetIdentifier; use crate::catalog::namespace::Namespace; use crate::dataset::Dataset; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; pub trait Catalog { + /// Initialize the catalog. + fn initialize(&self, name: &str, properties: &HashMap<&str, &str>) -> Result<(), String>; + + /// + /// Dataset traits + /// + /// List all datasets under a specified namespace. fn list_datasets(&self, namespace: &Namespace) -> Vec; @@ -46,6 +53,51 @@ pub trait Catalog { /// Register a dataset in the catalog. fn register_dataset(&self, identifier: &DatasetIdentifier) -> Result; - /// Initialize the catalog. - fn initialize(&self, name: &str, properties: &HashMap<&str, &str>) -> Result<(), String>; + /// + /// Namespace traits + /// + + /// Create a namespace in the catalog. + fn create_namespace( + &self, + namespace: &Namespace, + metadata: HashMap, + ) -> Result<(), String>; + + /// List top-level namespaces from the catalog. + fn list_namespaces(&self) -> Vec { + self.list_child_namespaces(&Namespace::empty()) + .unwrap_or_default() + } + + /// List child namespaces from the namespace. + fn list_child_namespaces(&self, namespace: &Namespace) -> Result, String>; + + /// Load metadata properties for a namespace. + fn load_namespace_metadata( + &self, + namespace: &Namespace, + ) -> Result, String>; + + /// Drop a namespace. + fn drop_namespace(&self, namespace: &Namespace) -> Result; + + /// Set a collection of properties on a namespace in the catalog. + fn set_properties( + &self, + namespace: &Namespace, + properties: HashMap, + ) -> Result; + + /// Remove a set of property keys from a namespace in the catalog. + fn remove_properties( + &self, + namespace: &Namespace, + properties: HashSet, + ) -> Result; + + /// Checks whether the Namespace exists. + fn namespace_exists(&self, namespace: &Namespace) -> bool { + self.load_namespace_metadata(namespace).is_ok() + } } From 23f34e30cadb94f3af77e9c9cd5b402d14fa7c0a Mon Sep 17 00:00:00 2001 From: yanghua Date: Fri, 27 Dec 2024 10:04:38 +0800 Subject: [PATCH 4/7] feat(rust): introduce catalog interface for rust module --- rust/lance/src/catalog/catalog_trait.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/rust/lance/src/catalog/catalog_trait.rs b/rust/lance/src/catalog/catalog_trait.rs index 734c086020..9ae20a6abb 100644 --- a/rust/lance/src/catalog/catalog_trait.rs +++ b/rust/lance/src/catalog/catalog_trait.rs @@ -10,10 +10,6 @@ pub trait Catalog { /// Initialize the catalog. fn initialize(&self, name: &str, properties: &HashMap<&str, &str>) -> Result<(), String>; - /// - /// Dataset traits - /// - /// List all datasets under a specified namespace. fn list_datasets(&self, namespace: &Namespace) -> Vec; @@ -53,10 +49,6 @@ pub trait Catalog { /// Register a dataset in the catalog. fn register_dataset(&self, identifier: &DatasetIdentifier) -> Result; - /// - /// Namespace traits - /// - /// Create a namespace in the catalog. fn create_namespace( &self, From 859a1e8baef92ea81fa4815904089c74027c899d Mon Sep 17 00:00:00 2001 From: yanghua Date: Tue, 31 Dec 2024 11:43:13 +0800 Subject: [PATCH 5/7] Rename namespace to database --- rust/lance/src/catalog.rs | 4 +- rust/lance/src/catalog/catalog_trait.rs | 46 +++++------ .../src/catalog/{namespace.rs => database.rs} | 60 +++++++------- rust/lance/src/catalog/dataset_identifier.rs | 82 +++++++++---------- 4 files changed, 96 insertions(+), 96 deletions(-) rename rust/lance/src/catalog/{namespace.rs => database.rs} (63%) diff --git a/rust/lance/src/catalog.rs b/rust/lance/src/catalog.rs index a2110cfdc9..cba0f37638 100644 --- a/rust/lance/src/catalog.rs +++ b/rust/lance/src/catalog.rs @@ -3,8 +3,8 @@ pub(crate) mod catalog_trait; pub(crate) mod dataset_identifier; -pub(crate) mod namespace; +pub(crate) mod database; pub use catalog_trait::Catalog; pub use dataset_identifier::DatasetIdentifier; -pub use namespace::Namespace; +pub use database::Database; diff --git a/rust/lance/src/catalog/catalog_trait.rs b/rust/lance/src/catalog/catalog_trait.rs index 9ae20a6abb..0208cbf016 100644 --- a/rust/lance/src/catalog/catalog_trait.rs +++ b/rust/lance/src/catalog/catalog_trait.rs @@ -2,7 +2,7 @@ // SPDX-FileCopyrightText: Copyright The Lance Authors use crate::catalog::dataset_identifier::DatasetIdentifier; -use crate::catalog::namespace::Namespace; +use crate::catalog::database::Database; use crate::dataset::Dataset; use std::collections::{HashMap, HashSet}; @@ -10,8 +10,8 @@ pub trait Catalog { /// Initialize the catalog. fn initialize(&self, name: &str, properties: &HashMap<&str, &str>) -> Result<(), String>; - /// List all datasets under a specified namespace. - fn list_datasets(&self, namespace: &Namespace) -> Vec; + /// List all datasets under a specified database. + fn list_datasets(&self, database: &Database) -> Vec; /// Create a new dataset in the catalog. fn create_dataset( @@ -49,47 +49,47 @@ pub trait Catalog { /// Register a dataset in the catalog. fn register_dataset(&self, identifier: &DatasetIdentifier) -> Result; - /// Create a namespace in the catalog. - fn create_namespace( + /// Create a database in the catalog. + fn create_database( &self, - namespace: &Namespace, + database: &Database, metadata: HashMap, ) -> Result<(), String>; - /// List top-level namespaces from the catalog. - fn list_namespaces(&self) -> Vec { - self.list_child_namespaces(&Namespace::empty()) + /// List top-level databases from the catalog. + fn list_databases(&self) -> Vec { + self.list_child_databases(&Database::empty()) .unwrap_or_default() } - /// List child namespaces from the namespace. - fn list_child_namespaces(&self, namespace: &Namespace) -> Result, String>; + /// List child databases from the database. + fn list_child_databases(&self, database: &Database) -> Result, String>; - /// Load metadata properties for a namespace. - fn load_namespace_metadata( + /// Load metadata properties for a database. + fn load_database_metadata( &self, - namespace: &Namespace, + database: &Database, ) -> Result, String>; - /// Drop a namespace. - fn drop_namespace(&self, namespace: &Namespace) -> Result; + /// Drop a database. + fn drop_database(&self, database: &Database) -> Result; - /// Set a collection of properties on a namespace in the catalog. + /// Set a collection of properties on a database in the catalog. fn set_properties( &self, - namespace: &Namespace, + database: &Database, properties: HashMap, ) -> Result; - /// Remove a set of property keys from a namespace in the catalog. + /// Remove a set of property keys from a database in the catalog. fn remove_properties( &self, - namespace: &Namespace, + database: &Database, properties: HashSet, ) -> Result; - /// Checks whether the Namespace exists. - fn namespace_exists(&self, namespace: &Namespace) -> bool { - self.load_namespace_metadata(namespace).is_ok() + /// Checks whether the database exists. + fn database_exists(&self, database: &Database) -> bool { + self.load_database_metadata(database).is_ok() } } diff --git a/rust/lance/src/catalog/namespace.rs b/rust/lance/src/catalog/database.rs similarity index 63% rename from rust/lance/src/catalog/namespace.rs rename to rust/lance/src/catalog/database.rs index 960b95f81f..158dc3485a 100644 --- a/rust/lance/src/catalog/namespace.rs +++ b/rust/lance/src/catalog/database.rs @@ -5,11 +5,11 @@ use std::fmt; use std::hash::{Hash, Hasher}; #[derive(Clone)] -pub struct Namespace { +pub struct Database { levels: Vec, } -impl Namespace { +impl Database { pub fn empty() -> Self { Self { levels: Vec::new() } } @@ -17,7 +17,7 @@ impl Namespace { pub fn of(levels: &[&str]) -> Self { assert!( levels.iter().all(|&level| level != "\0"), - "Cannot create a namespace with the null-byte character" + "Cannot create a database with the null-byte character" ); Self { levels: levels.iter().map(|&s| s.to_string()).collect(), @@ -41,29 +41,29 @@ impl Namespace { } } -impl PartialEq for Namespace { +impl PartialEq for Database { fn eq(&self, other: &Self) -> bool { self.levels == other.levels } } -impl Eq for Namespace {} +impl Eq for Database {} -impl Hash for Namespace { +impl Hash for Database { fn hash(&self, state: &mut H) { self.levels.hash(state); } } -impl fmt::Display for Namespace { +impl fmt::Display for Database { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.levels.join(".")) } } -impl fmt::Debug for Namespace { +impl fmt::Debug for Database { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("Namespace") + f.debug_struct("Database") .field("levels", &self.levels) .finish() } @@ -75,16 +75,16 @@ mod tests { use std::hash::DefaultHasher; #[test] - fn test_empty_namespace() { - let ns = Namespace::empty(); + fn test_empty_database() { + let ns = Database::empty(); assert!(ns.is_empty()); assert_eq!(ns.length(), 0); assert_eq!(ns.levels().len(), 0); } #[test] - fn test_namespace_of() { - let ns = Namespace::of(&["level1", "level2"]); + fn test_database_of() { + let ns = Database::of(&["level1", "level2"]); assert!(!ns.is_empty()); assert_eq!(ns.length(), 2); assert_eq!(ns.level(0), "level1"); @@ -92,31 +92,31 @@ mod tests { } #[test] - #[should_panic(expected = "Cannot create a namespace with the null-byte character")] - fn test_namespace_of_with_null_byte() { - Namespace::of(&["level1", "\0"]); + #[should_panic(expected = "Cannot create a database with the null-byte character")] + fn test_database_of_with_null_byte() { + Database::of(&["level1", "\0"]); } #[test] - fn test_namespace_levels() { - let ns = Namespace::of(&["level1", "level2"]); + fn test_database_levels() { + let ns = Database::of(&["level1", "level2"]); let levels = ns.levels(); assert_eq!(levels, &vec!["level1".to_string(), "level2".to_string()]); } #[test] - fn test_namespace_equality() { - let ns1 = Namespace::of(&["level1", "level2"]); - let ns2 = Namespace::of(&["level1", "level2"]); - let ns3 = Namespace::of(&["level1", "level3"]); + fn test_database_equality() { + let ns1 = Database::of(&["level1", "level2"]); + let ns2 = Database::of(&["level1", "level2"]); + let ns3 = Database::of(&["level1", "level3"]); assert_eq!(ns1, ns2); assert_ne!(ns1, ns3); } #[test] - fn test_namespace_hash() { - let ns1 = Namespace::of(&["level1", "level2"]); - let ns2 = Namespace::of(&["level1", "level2"]); + fn test_database_hash() { + let ns1 = Database::of(&["level1", "level2"]); + let ns2 = Database::of(&["level1", "level2"]); let mut hasher1 = DefaultHasher::new(); ns1.hash(&mut hasher1); let mut hasher2 = DefaultHasher::new(); @@ -125,17 +125,17 @@ mod tests { } #[test] - fn test_namespace_display() { - let ns = Namespace::of(&["level1", "level2"]); + fn test_database_display() { + let ns = Database::of(&["level1", "level2"]); assert_eq!(format!("{}", ns), "level1.level2"); } #[test] - fn test_namespace_debug() { - let ns = Namespace::of(&["level1", "level2"]); + fn test_database_debug() { + let ns = Database::of(&["level1", "level2"]); assert_eq!( format!("{:?}", ns), - "Namespace { levels: [\"level1\", \"level2\"] }" + "Database { levels: [\"level1\", \"level2\"] }" ); } } diff --git a/rust/lance/src/catalog/dataset_identifier.rs b/rust/lance/src/catalog/dataset_identifier.rs index b447298116..8be8c9c934 100644 --- a/rust/lance/src/catalog/dataset_identifier.rs +++ b/rust/lance/src/catalog/dataset_identifier.rs @@ -1,13 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -use crate::catalog::namespace::Namespace; +use crate::catalog::database::Database; use std::fmt; use std::hash::{Hash, Hasher}; #[derive(Clone, Debug)] pub struct DatasetIdentifier { - namespace: Namespace, + database: Database, name: String, } @@ -17,15 +17,15 @@ impl DatasetIdentifier { !names.is_empty(), "Cannot create dataset identifier without a dataset name" ); - let namespace = Namespace::of(&names[..names.len() - 1]); + let database = Database::of(&names[..names.len() - 1]); let name = names[names.len() - 1].to_string(); - Self { namespace, name } + Self { database: database, name } } - pub fn of_namespace(namespace: Namespace, name: &str) -> Self { + pub fn of_database(database: Database, name: &str) -> Self { assert!(!name.is_empty(), "Invalid dataset name: null or empty"); Self { - namespace, + database: database, name: name.to_string(), } } @@ -35,12 +35,12 @@ impl DatasetIdentifier { Self::of(&parts) } - pub fn has_namespace(&self) -> bool { - !self.namespace.is_empty() + pub fn has_database(&self) -> bool { + !self.database.is_empty() } - pub fn namespace(&self) -> &Namespace { - &self.namespace + pub fn database(&self) -> &Database { + &self.database } pub fn name(&self) -> &str { @@ -49,14 +49,14 @@ impl DatasetIdentifier { pub fn to_lowercase(&self) -> Self { let new_levels: Vec = self - .namespace + .database .levels() .iter() .map(|s| s.to_lowercase()) .collect(); let new_name = self.name.to_lowercase(); - Self::of_namespace( - Namespace::of(&new_levels.iter().map(String::as_str).collect::>()), + Self::of_database( + Database::of(&new_levels.iter().map(String::as_str).collect::>()), &new_name, ) } @@ -64,7 +64,7 @@ impl DatasetIdentifier { impl PartialEq for DatasetIdentifier { fn eq(&self, other: &Self) -> bool { - self.namespace == other.namespace && self.name == other.name + self.database == other.database && self.name == other.name } } @@ -72,15 +72,15 @@ impl Eq for DatasetIdentifier {} impl Hash for DatasetIdentifier { fn hash(&self, state: &mut H) { - self.namespace.hash(state); + self.database.hash(state); self.name.hash(state); } } impl fmt::Display for DatasetIdentifier { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self.has_namespace() { - write!(f, "{}.{}", self.namespace, self.name) + if self.has_database() { + write!(f, "{}.{}", self.database, self.name) } else { write!(f, "{}", self.name) } @@ -94,65 +94,65 @@ mod tests { #[test] fn test_dataset_identifier_of() { - let ds_id = DatasetIdentifier::of(&["namespace1", "namespace2", "dataset"]); + let ds_id = DatasetIdentifier::of(&["database1", "database2", "dataset"]); assert_eq!( - ds_id.namespace().levels(), - &vec!["namespace1".to_string(), "namespace2".to_string()] + ds_id.database().levels(), + &vec!["database1".to_string(), "database2".to_string()] ); assert_eq!(ds_id.name(), "dataset"); } #[test] - fn test_dataset_identifier_of_namespace() { - let namespace = Namespace::of(&["namespace1", "namespace2"]); - let ds_id = DatasetIdentifier::of_namespace(namespace.clone(), "dataset"); - assert_eq!(ds_id.namespace(), &namespace); + fn test_dataset_identifier_of_database() { + let database = Database::of(&["database1", "database2"]); + let ds_id = DatasetIdentifier::of_database(database.clone(), "dataset"); + assert_eq!(ds_id.database(), &database); assert_eq!(ds_id.name(), "dataset"); } #[test] fn test_dataset_identifier_parse() { - let ds_id = DatasetIdentifier::parse("namespace1.namespace2.dataset"); + let ds_id = DatasetIdentifier::parse("database1.database2.dataset"); assert_eq!( - ds_id.namespace().levels(), - &vec!["namespace1".to_string(), "namespace2".to_string()] + ds_id.database().levels(), + &vec!["database1".to_string(), "database2".to_string()] ); assert_eq!(ds_id.name(), "dataset"); } #[test] - fn test_dataset_identifier_has_namespace() { - let ds_id = DatasetIdentifier::parse("namespace1.namespace2.dataset"); - assert!(ds_id.has_namespace()); + fn test_dataset_identifier_has_database() { + let ds_id = DatasetIdentifier::parse("database1.database2.dataset"); + assert!(ds_id.has_database()); let ds_id_no_ns = DatasetIdentifier::of(&["dataset"]); - assert!(!ds_id_no_ns.has_namespace()); + assert!(!ds_id_no_ns.has_database()); } #[test] fn test_dataset_identifier_to_lowercase() { - let ds_id = DatasetIdentifier::parse("Namespace1.Namespace2.Dataset"); + let ds_id = DatasetIdentifier::parse("Database1.Database2.Dataset"); let lower_ds_id = ds_id.to_lowercase(); assert_eq!( - lower_ds_id.namespace().levels(), - &vec!["namespace1".to_string(), "namespace2".to_string()] + lower_ds_id.database().levels(), + &vec!["database1".to_string(), "database2".to_string()] ); assert_eq!(lower_ds_id.name(), "dataset"); } #[test] fn test_dataset_identifier_equality() { - let ds_id1 = DatasetIdentifier::parse("namespace1.namespace2.dataset"); - let ds_id2 = DatasetIdentifier::parse("namespace1.namespace2.dataset"); - let ds_id3 = DatasetIdentifier::parse("namespace1.namespace2.other_dataset"); + let ds_id1 = DatasetIdentifier::parse("database1.database2.dataset"); + let ds_id2 = DatasetIdentifier::parse("database1.database2.dataset"); + let ds_id3 = DatasetIdentifier::parse("database1.database2.other_dataset"); assert_eq!(ds_id1, ds_id2); assert_ne!(ds_id1, ds_id3); } #[test] fn test_dataset_identifier_hash() { - let ds_id1 = DatasetIdentifier::parse("namespace1.namespace2.dataset"); - let ds_id2 = DatasetIdentifier::parse("namespace1.namespace2.dataset"); + let ds_id1 = DatasetIdentifier::parse("database1.database2.dataset"); + let ds_id2 = DatasetIdentifier::parse("database1.database2.dataset"); let mut hasher1 = DefaultHasher::new(); ds_id1.hash(&mut hasher1); let mut hasher2 = DefaultHasher::new(); @@ -162,8 +162,8 @@ mod tests { #[test] fn test_dataset_identifier_display() { - let ds_id = DatasetIdentifier::parse("namespace1.namespace2.dataset"); - assert_eq!(format!("{}", ds_id), "namespace1.namespace2.dataset"); + let ds_id = DatasetIdentifier::parse("database1.database2.dataset"); + assert_eq!(format!("{}", ds_id), "database1.database2.dataset"); let ds_id_no_ns = DatasetIdentifier::of(&["dataset"]); assert_eq!(format!("{}", ds_id_no_ns), "dataset"); From c7ac8de953e619db3d8d3c94b8d32b8dda6558f4 Mon Sep 17 00:00:00 2001 From: yanghua Date: Tue, 31 Dec 2024 11:44:36 +0800 Subject: [PATCH 6/7] Rename namespace to database --- rust/lance/src/catalog.rs | 4 ++-- rust/lance/src/catalog/catalog_trait.rs | 2 +- rust/lance/src/catalog/dataset_identifier.rs | 5 ++++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/rust/lance/src/catalog.rs b/rust/lance/src/catalog.rs index cba0f37638..77bca0d1ba 100644 --- a/rust/lance/src/catalog.rs +++ b/rust/lance/src/catalog.rs @@ -2,9 +2,9 @@ // SPDX-FileCopyrightText: Copyright The Lance Authors pub(crate) mod catalog_trait; -pub(crate) mod dataset_identifier; pub(crate) mod database; +pub(crate) mod dataset_identifier; pub use catalog_trait::Catalog; -pub use dataset_identifier::DatasetIdentifier; pub use database::Database; +pub use dataset_identifier::DatasetIdentifier; diff --git a/rust/lance/src/catalog/catalog_trait.rs b/rust/lance/src/catalog/catalog_trait.rs index 0208cbf016..b4291f4bea 100644 --- a/rust/lance/src/catalog/catalog_trait.rs +++ b/rust/lance/src/catalog/catalog_trait.rs @@ -1,8 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors -use crate::catalog::dataset_identifier::DatasetIdentifier; use crate::catalog::database::Database; +use crate::catalog::dataset_identifier::DatasetIdentifier; use crate::dataset::Dataset; use std::collections::{HashMap, HashSet}; diff --git a/rust/lance/src/catalog/dataset_identifier.rs b/rust/lance/src/catalog/dataset_identifier.rs index 8be8c9c934..cb63a9fb38 100644 --- a/rust/lance/src/catalog/dataset_identifier.rs +++ b/rust/lance/src/catalog/dataset_identifier.rs @@ -19,7 +19,10 @@ impl DatasetIdentifier { ); let database = Database::of(&names[..names.len() - 1]); let name = names[names.len() - 1].to_string(); - Self { database: database, name } + Self { + database: database, + name, + } } pub fn of_database(database: Database, name: &str) -> Self { From 8f2bec4d17fb1b4982bafaba7e57edc4a1538474 Mon Sep 17 00:00:00 2001 From: yanghua Date: Tue, 31 Dec 2024 16:05:31 +0800 Subject: [PATCH 7/7] feat(rust): introduce catalog interface for rust module --- rust/lance/src/catalog/dataset_identifier.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/lance/src/catalog/dataset_identifier.rs b/rust/lance/src/catalog/dataset_identifier.rs index cb63a9fb38..5cc2c106f8 100644 --- a/rust/lance/src/catalog/dataset_identifier.rs +++ b/rust/lance/src/catalog/dataset_identifier.rs @@ -20,7 +20,7 @@ impl DatasetIdentifier { let database = Database::of(&names[..names.len() - 1]); let name = names[names.len() - 1].to_string(); Self { - database: database, + database, name, } } @@ -28,7 +28,7 @@ impl DatasetIdentifier { pub fn of_database(database: Database, name: &str) -> Self { assert!(!name.is_empty(), "Invalid dataset name: null or empty"); Self { - database: database, + database, name: name.to_string(), } }