Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(rust): implement a ListingCatalog #3300

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions rust/lance-arrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -930,10 +930,7 @@ mod tests {
DataType::Struct(fields.clone()),
false,
)]);
let children = types
.iter()
.map(|ty| new_empty_array(ty))
.collect::<Vec<_>>();
let children = types.iter().map(new_empty_array).collect::<Vec<_>>();
let batch = RecordBatch::try_new(
Arc::new(schema.clone()),
vec![Arc::new(StructArray::new(fields, children, None)) as ArrayRef],
Expand Down
10 changes: 10 additions & 0 deletions rust/lance/src/catalog.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors

pub(crate) mod catalog_trait;
pub(crate) mod database;
pub(crate) mod dataset_identifier;

pub use catalog_trait::Catalog;
pub use database::Database;
pub use dataset_identifier::DatasetIdentifier;
95 changes: 95 additions & 0 deletions rust/lance/src/catalog/catalog_trait.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors

use crate::catalog::database::Database;
use crate::catalog::dataset_identifier::DatasetIdentifier;
use crate::dataset::Dataset;
use std::collections::{HashMap, HashSet};

pub trait Catalog {
/// Initialize the catalog.
fn initialize(&self, name: &str, properties: &HashMap<&str, &str>) -> Result<(), String>;

/// List all datasets under a specified database.
fn list_datasets(&self, database: &Database) -> Vec<DatasetIdentifier>;

/// Create a new dataset in the catalog.
fn create_dataset(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using register_dataset interface to register an existed lance dataset is also needed.

&self,
identifier: &DatasetIdentifier,
location: &str,
) -> Result<Dataset, String>;

/// Check if a dataset exists in the catalog.
fn dataset_exists(&self, identifier: &DatasetIdentifier) -> bool;

/// Drop a dataset from the catalog.
fn drop_dataset(&self, identifier: &DatasetIdentifier) -> Result<(), String>;

/// Drop a dataset from the catalog and purge the metadata.
fn drop_dataset_with_purge(
&self,
identifier: &DatasetIdentifier,
purge: &bool,
) -> Result<(), String>;

/// Rename a dataset in the catalog.
fn rename_dataset(
&self,
from: &DatasetIdentifier,
to: &DatasetIdentifier,
) -> Result<(), String>;

/// Load a dataset from the catalog.
fn load_dataset(&self, name: &DatasetIdentifier) -> Result<Dataset, String>;

/// Invalidate cached table metadata from current catalog.
fn invalidate_dataset(&self, identifier: &DatasetIdentifier) -> Result<(), String>;

/// Register a dataset in the catalog.
fn register_dataset(&self, identifier: &DatasetIdentifier) -> Result<Dataset, String>;
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @SaintBacchus it's here


/// Create a database in the catalog.
fn create_database(
&self,
database: &Database,
metadata: HashMap<String, String>,
) -> Result<(), String>;

/// List top-level databases from the catalog.
fn list_databases(&self) -> Vec<Database> {
self.list_child_databases(&Database::empty())
.unwrap_or_default()
}

/// List child databases from the database.
fn list_child_databases(&self, database: &Database) -> Result<Vec<Database>, String>;

/// Load metadata properties for a database.
fn load_database_metadata(
&self,
database: &Database,
) -> Result<HashMap<String, String>, String>;

/// Drop a database.
fn drop_database(&self, database: &Database) -> Result<bool, String>;

/// Set a collection of properties on a database in the catalog.
fn set_properties(
&self,
database: &Database,
properties: HashMap<String, String>,
) -> Result<bool, String>;

/// Remove a set of property keys from a database in the catalog.
fn remove_properties(
&self,
database: &Database,
properties: HashSet<String>,
) -> Result<bool, String>;

/// Checks whether the database exists.
fn database_exists(&self, database: &Database) -> bool {
self.load_database_metadata(database).is_ok()
}
}
141 changes: 141 additions & 0 deletions rust/lance/src/catalog/database.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors

use std::fmt;
use std::hash::{Hash, Hasher};

#[derive(Clone)]
pub struct Database {
levels: Vec<String>,
}

impl Database {
pub fn empty() -> Self {
Self { levels: Vec::new() }
}

pub fn of(levels: &[&str]) -> Self {
assert!(
levels.iter().all(|&level| level != "\0"),
"Cannot create a database with the null-byte character"
);
Self {
levels: levels.iter().map(|&s| s.to_string()).collect(),
}
}

pub fn levels(&self) -> &[String] {
&self.levels
}

pub fn level(&self, pos: usize) -> &str {
&self.levels[pos]
}

pub fn is_empty(&self) -> bool {
self.levels.is_empty()
}

pub fn length(&self) -> usize {
self.levels.len()
}
}

impl PartialEq for Database {
fn eq(&self, other: &Self) -> bool {
self.levels == other.levels
}
}

impl Eq for Database {}

impl Hash for Database {
fn hash<H: Hasher>(&self, state: &mut H) {
self.levels.hash(state);
}
}

impl fmt::Display for Database {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.levels.join("."))
}
}

impl fmt::Debug for Database {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Database")
.field("levels", &self.levels)
.finish()
}
}

#[cfg(test)]
mod tests {
use super::*;
use std::hash::DefaultHasher;

#[test]
fn test_empty_database() {
let ns = Database::empty();
assert!(ns.is_empty());
assert_eq!(ns.length(), 0);
assert_eq!(ns.levels().len(), 0);
}

#[test]
fn test_database_of() {
let ns = Database::of(&["level1", "level2"]);
assert!(!ns.is_empty());
assert_eq!(ns.length(), 2);
assert_eq!(ns.level(0), "level1");
assert_eq!(ns.level(1), "level2");
}

#[test]
#[should_panic(expected = "Cannot create a database with the null-byte character")]
fn test_database_of_with_null_byte() {
Database::of(&["level1", "\0"]);
}

#[test]
fn test_database_levels() {
let ns = Database::of(&["level1", "level2"]);
let levels = ns.levels();
assert_eq!(levels, &vec!["level1".to_string(), "level2".to_string()]);
}

#[test]
fn test_database_equality() {
let ns1 = Database::of(&["level1", "level2"]);
let ns2 = Database::of(&["level1", "level2"]);
let ns3 = Database::of(&["level1", "level3"]);
assert_eq!(ns1, ns2);
assert_ne!(ns1, ns3);
}

#[test]
fn test_database_hash() {
let ns1 = Database::of(&["level1", "level2"]);
let ns2 = Database::of(&["level1", "level2"]);
let mut hasher1 = DefaultHasher::new();
ns1.hash(&mut hasher1);
let mut hasher2 = DefaultHasher::new();
ns2.hash(&mut hasher2);
assert_eq!(hasher1.finish(), hasher2.finish());
}

#[test]
fn test_database_display() {
let ns = Database::of(&["level1", "level2"]);
assert_eq!(format!("{}", ns), "level1.level2");
}

#[test]
fn test_database_debug() {
let ns = Database::of(&["level1", "level2"]);
assert_eq!(
format!("{:?}", ns),
"Database { levels: [\"level1\", \"level2\"] }"
);
}
}
Loading
Loading