From ff1621a9fd76ac5b147e6bd2a3314c3bbd96567a Mon Sep 17 00:00:00 2001 From: Devin Smith Date: Fri, 27 Sep 2024 12:43:50 -0700 Subject: [PATCH] feat: Add support for using JDBC Catalog for Iceberg testing (#6144) Fixes #6029 --- extensions/iceberg/TESTING.md | 125 ++++++++++++++++ extensions/iceberg/build.gradle | 18 ++- .../iceberg/layout/IcebergBaseLayout.java | 12 +- .../iceberg/relative/RelativeFileIO.java | 137 ++++++++++++++++++ .../iceberg/relative/RelativeInputFile.java | 39 +++++ .../iceberg/relative/RelativeOutputFile.java | 40 +++++ .../iceberg/util/IcebergCatalogAdapter.java | 2 +- .../deephaven/iceberg/CatalogAdapterTest.java | 49 +++++++ .../io/deephaven/iceberg/PyIceberg1Test.java | 109 ++++++++++++++ .../iceberg/TestCatalog/IcebergTestTable.java | 41 +++--- .../iceberg/junit5/CatalogAdapterBase.java | 36 +++++ .../deephaven/iceberg/sqlite/DbResource.java | 20 +++ .../iceberg/sqlite/SqliteHelper.java | 73 ++++++++++ ...76b5d5-cc59-4f48-bcc5-3a4bf37e5139.parquet | 3 + ...668bfb-107c-4122-8d64-9a078f2f45a9.parquet | 3 + ...-5bde-4642-bac7-cc7573558c7e.metadata.json | 1 + ...-934f-483e-b2b0-4c08baebc5b2.metadata.json | 1 + ...-5f0d-478c-aba2-a05842613e29.metadata.json | 1 + ...-2052-4569-b9f1-1b45ab0c2be0.metadata.json | 1 + ...76b5d5-cc59-4f48-bcc5-3a4bf37e5139-m0.avro | Bin 0 -> 4411 bytes ...668bfb-107c-4122-8d64-9a078f2f45a9-m0.avro | Bin 0 -> 4407 bytes ...-cc668bfb-107c-4122-8d64-9a078f2f45a9.avro | Bin 0 -> 1778 bytes ...-6976b5d5-cc59-4f48-bcc5-3a4bf37e5139.avro | Bin 0 -> 1915 bytes .../sqlite/db_resource/dh-iceberg-test.db | Bin 0 -> 20480 bytes .../db_resource/generate-pyiceberg-1.py | 53 +++++++ gradle/libs.versions.toml | 3 + 26 files changed, 739 insertions(+), 28 deletions(-) create mode 100644 extensions/iceberg/TESTING.md create mode 100644 extensions/iceberg/src/main/java/io/deephaven/iceberg/relative/RelativeFileIO.java create mode 100644 extensions/iceberg/src/main/java/io/deephaven/iceberg/relative/RelativeInputFile.java create mode 100644 extensions/iceberg/src/main/java/io/deephaven/iceberg/relative/RelativeOutputFile.java create mode 100644 extensions/iceberg/src/test/java/io/deephaven/iceberg/CatalogAdapterTest.java create mode 100644 extensions/iceberg/src/test/java/io/deephaven/iceberg/PyIceberg1Test.java create mode 100644 extensions/iceberg/src/test/java/io/deephaven/iceberg/junit5/CatalogAdapterBase.java create mode 100644 extensions/iceberg/src/test/java/io/deephaven/iceberg/sqlite/DbResource.java create mode 100644 extensions/iceberg/src/test/java/io/deephaven/iceberg/sqlite/SqliteHelper.java create mode 100644 extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/data/00000-0-6976b5d5-cc59-4f48-bcc5-3a4bf37e5139.parquet create mode 100644 extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/data/00000-0-cc668bfb-107c-4122-8d64-9a078f2f45a9.parquet create mode 100644 extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00000-4aec1d46-5bde-4642-bac7-cc7573558c7e.metadata.json create mode 100644 extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00001-3128d2b2-934f-483e-b2b0-4c08baebc5b2.metadata.json create mode 100644 extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00002-262f9917-5f0d-478c-aba2-a05842613e29.metadata.json create mode 100644 extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00003-979e64ae-2052-4569-b9f1-1b45ab0c2be0.metadata.json create mode 100644 extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/6976b5d5-cc59-4f48-bcc5-3a4bf37e5139-m0.avro create mode 100644 extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/cc668bfb-107c-4122-8d64-9a078f2f45a9-m0.avro create mode 100644 extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/snap-1743193108934390753-0-cc668bfb-107c-4122-8d64-9a078f2f45a9.avro create mode 100644 extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/snap-4630159959461529013-0-6976b5d5-cc59-4f48-bcc5-3a4bf37e5139.avro create mode 100644 extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/dh-iceberg-test.db create mode 100644 extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/generate-pyiceberg-1.py diff --git a/extensions/iceberg/TESTING.md b/extensions/iceberg/TESTING.md new file mode 100644 index 00000000000..ed638f84419 --- /dev/null +++ b/extensions/iceberg/TESTING.md @@ -0,0 +1,125 @@ +# sqlite catalogs + +The sqlite JDBC catalog is able to support multiple catalogs through a single database file. +As such, the following convention has been established for testing purposes: + +* database file: `/dh-iceberg-test.db` +* warehouse directory: `/catalogs//` + +Both writers and readers of this catalog need to be setup to support relative metadata locations to ensure portability. + +A root directory for extension-iceberg testing has been established at `extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource`. + +## Usage + +### Java + +```java +import org.apache.iceberg.catalog.Catalog; +import io.deephaven.iceberg.sqlite.DbResource; + +Catalog catalog = DbResource.openCatalog(""); +``` + +### pyiceberg + +To setup in [pyiceberg](https://py.iceberg.apache.org/): + +```python +from pyiceberg.catalog.sql import SqlCatalog + +catalog = SqlCatalog( + "", + **{ + "uri": f"sqlite:///dh-iceberg-test.db", + "warehouse": f"catalogs/", + }, +) +``` + +## Generating data + +Note that any scripts that write data should be run relative to +[db_resource](src/test/resources/io/deephaven/iceberg/sqlite/db_resource) working directory to ensure unit testability. + +### pyiceberg-1 + +Here's an example of what was needed to generate this data: + +```shell +$ cd extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource + +# Note: 3.10 explicitly chosen b/c it doesn't seem like pyiceberg produces 3.12 wheels yet +$ python3.10 -m venv /tmp/iceberg + +$ source /tmp/iceberg/bin/activate + +$ pip install --only-binary=":all:" "pyiceberg[sql-sqlite, pyarrow]" + +$ pip freeze +annotated-types==0.7.0 +certifi==2024.8.30 +charset-normalizer==3.3.2 +click==8.1.7 +fsspec==2024.9.0 +greenlet==3.1.1 +idna==3.10 +markdown-it-py==3.0.0 +mdurl==0.1.2 +mmh3==4.1.0 +numpy==1.26.4 +pyarrow==17.0.0 +pydantic==2.9.2 +pydantic_core==2.23.4 +Pygments==2.18.0 +pyiceberg==0.7.1 +pyparsing==3.1.4 +python-dateutil==2.9.0.post0 +requests==2.32.3 +rich==13.8.1 +six==1.16.0 +sortedcontainers==2.4.0 +SQLAlchemy==2.0.35 +strictyaml==1.7.3 +tenacity==8.5.0 +typing_extensions==4.12.2 +urllib3==2.2.3 + +$ python generate-pyiceberg-1.py + +$ sqlite3 dh-iceberg-test.db +SQLite version 3.45.1 2024-01-30 16:01:20 +Enter ".help" for usage hints. +sqlite> .dump +PRAGMA foreign_keys=OFF; +BEGIN TRANSACTION; +CREATE TABLE iceberg_tables ( + catalog_name VARCHAR(255) NOT NULL, + table_namespace VARCHAR(255) NOT NULL, + table_name VARCHAR(255) NOT NULL, + metadata_location VARCHAR(1000), + previous_metadata_location VARCHAR(1000), + PRIMARY KEY (catalog_name, table_namespace, table_name) +); +INSERT INTO iceberg_tables VALUES('pyiceberg-1','dh-default','cities','catalogs/pyiceberg-1/dh-default.db/cities/metadata/00003-68091f71-d3c5-42bb-8161-e2e187dece14.metadata.json','catalogs/pyiceberg-1/dh-default.db/cities/metadata/00002-106b37f8-8818-439d-87c5-3cae608d1972.metadata.json'); +CREATE TABLE iceberg_namespace_properties ( + catalog_name VARCHAR(255) NOT NULL, + namespace VARCHAR(255) NOT NULL, + property_key VARCHAR(255) NOT NULL, + property_value VARCHAR(1000) NOT NULL, + PRIMARY KEY (catalog_name, namespace, property_key) +); +INSERT INTO iceberg_namespace_properties VALUES('pyiceberg-1','dh-default','exists','true'); +COMMIT; +sqlite> +``` + +### sqlite + +If we add a lot of catalogs to the database, we may want to look into vacuuming the database to keep the file size small. + +`sqlite3 dh-iceberg-test.db 'VACUUM;'` + +Ideally, the sqlite database can be a small collection of catalogs that were created via external tooling to verify that +we can integrate with them successfully. + diff --git a/extensions/iceberg/build.gradle b/extensions/iceberg/build.gradle index 2c30f242bfe..6f0b61b03a8 100644 --- a/extensions/iceberg/build.gradle +++ b/extensions/iceberg/build.gradle @@ -44,12 +44,20 @@ dependencies { testRuntimeOnly libs.junit.jupiter.engine testRuntimeOnly libs.junit.platform.launcher - testImplementation libs.testcontainers - testImplementation libs.testcontainers.junit.jupiter - testImplementation libs.testcontainers.localstack - testImplementation libs.testcontainers.minio - testRuntimeOnly project(':test-configs') testRuntimeOnly project(':log-to-slf4j') testRuntimeOnly libs.slf4j.simple + + testRuntimeOnly libs.xerial.sqlite.jdbc + + // NOTE: we need to pull down more hadoop dependencies, + // buildSrc/src/main/groovy/io.deephaven.hadoop-common-dependencies.gradle excludes some stuff we need to do + // Hadoop FileIO. + testRuntimeOnly libs.hadoop.common + + testImplementation project(':engine-test-utils') +} + +test { + useJUnitPlatform() } diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/layout/IcebergBaseLayout.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/layout/IcebergBaseLayout.java index 7bf0f5222a2..c2a3bfc7a67 100644 --- a/extensions/iceberg/src/main/java/io/deephaven/iceberg/layout/IcebergBaseLayout.java +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/layout/IcebergBaseLayout.java @@ -9,6 +9,7 @@ import io.deephaven.engine.table.impl.locations.impl.TableLocationKeyFinder; import io.deephaven.iceberg.location.IcebergTableLocationKey; import io.deephaven.iceberg.location.IcebergTableParquetLocationKey; +import io.deephaven.iceberg.relative.RelativeFileIO; import io.deephaven.iceberg.util.IcebergInstructions; import io.deephaven.parquet.table.ParquetInstructions; import io.deephaven.iceberg.internal.DataInstructionsProviderLoader; @@ -130,6 +131,15 @@ public IcebergBaseLayout( abstract IcebergTableLocationKey keyFromDataFile(DataFile df, URI fileUri); + @NotNull + private URI dataFileUri(@NotNull DataFile df) { + String path = df.path().toString(); + if (fileIO instanceof RelativeFileIO) { + path = ((RelativeFileIO) fileIO).absoluteLocation(path); + } + return FileUtils.convertToURI(path, false); + } + @Override public synchronized void findKeys(@NotNull final Consumer locationKeyObserver) { try { @@ -144,7 +154,7 @@ public synchronized void findKeys(@NotNull final Consumer reader = ManifestFiles.read(manifestFile, fileIO)) { for (DataFile df : reader) { - final URI fileUri = FileUtils.convertToURI(df.path().toString(), false); + final URI fileUri = dataFileUri(df); final IcebergTableLocationKey locationKey = cache.computeIfAbsent(fileUri, uri -> keyFromDataFile(df, fileUri)); if (locationKey != null) { diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/relative/RelativeFileIO.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/relative/RelativeFileIO.java new file mode 100644 index 00000000000..ef1ffcda24b --- /dev/null +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/relative/RelativeFileIO.java @@ -0,0 +1,137 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.relative; + +import io.deephaven.util.annotations.VisibleForTesting; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.hadoop.HadoopConfigurable; +import org.apache.iceberg.hadoop.SerializableConfiguration; +import org.apache.iceberg.io.BulkDeletionFailureException; +import org.apache.iceberg.io.DelegateFileIO; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.FileInfo; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.io.ResolvingFileIO; +import org.apache.iceberg.relocated.com.google.common.collect.Streams; +import org.apache.iceberg.util.SerializableSupplier; + +import java.util.Map; +import java.util.function.Function; + +/** + * While this class is in the public source set, it is meant to support testing use cases only and should not be used in + * production. + * + * @see Support relative paths in Table Metadata + */ +@VisibleForTesting +public final class RelativeFileIO implements HadoopConfigurable, DelegateFileIO { + public static final String BASE_PATH = "relative.base-path"; + private static final String IO_DEFAULT_IMPL = ResolvingFileIO.class.getName(); + + private String basePath; + + private DelegateFileIO io; + + private SerializableSupplier hadoopConf; + + public RelativeFileIO() {} + + public RelativeFileIO(Configuration hadoopConf) { + this(new SerializableConfiguration(hadoopConf)::get); + } + + public RelativeFileIO(SerializableSupplier hadoopConf) { + this.hadoopConf = hadoopConf; + } + + @Override + public Configuration getConf() { + return hadoopConf.get(); + } + + @Override + public void setConf(Configuration conf) { + this.hadoopConf = new SerializableConfiguration(conf)::get; + } + + @Override + public void serializeConfWith(Function> confSerializer) { + this.hadoopConf = confSerializer.apply(getConf()); + } + + public String absoluteLocation(String location) { + return basePath + location; + } + + private String relativeLocation(String location) { + if (!location.startsWith(basePath)) { + throw new IllegalStateException(); + } + return location.substring(basePath.length()); + } + + @Override + public void initialize(Map properties) { + this.basePath = StringUtils.appendIfMissing(properties.get(BASE_PATH), "/"); + // We can add a property here later if we need to override the default + // final String impl = properties.getOrDefault(IO_IMPL, IO_DEFAULT_IMPL); + final FileIO fileIO = CatalogUtil.loadFileIO(IO_DEFAULT_IMPL, properties, hadoopConf.get()); + if (!(fileIO instanceof DelegateFileIO)) { + throw new IllegalArgumentException("filoIO must be DelegateFileIO, " + fileIO.getClass()); + } + this.io = (DelegateFileIO) fileIO; + } + + @Override + public Map properties() { + return io.properties(); + } + + @Override + public InputFile newInputFile(String path) { + return new RelativeInputFile(path, io.newInputFile(absoluteLocation(path))); + } + + @Override + public InputFile newInputFile(String path, long length) { + return new RelativeInputFile(path, io.newInputFile(absoluteLocation(path), length)); + } + + @Override + public OutputFile newOutputFile(String path) { + return new RelativeOutputFile(path, io.newOutputFile(absoluteLocation(path))); + } + + @Override + public void deleteFiles(Iterable iterable) throws BulkDeletionFailureException { + io.deleteFiles(Streams.stream(iterable).map(this::absoluteLocation)::iterator); + } + + @Override + public Iterable listPrefix(String s) { + return Streams.stream(io.listPrefix(absoluteLocation(s))) + .map(x -> new FileInfo(relativeLocation(x.location()), x.size(), x.createdAtMillis()))::iterator; + } + + @Override + public void deletePrefix(String s) { + io.deletePrefix(absoluteLocation(s)); + } + + @Override + public void deleteFile(String path) { + io.deleteFile(absoluteLocation(path)); + } + + @Override + public void close() { + if (io != null) { + io.close(); + } + } +} diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/relative/RelativeInputFile.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/relative/RelativeInputFile.java new file mode 100644 index 00000000000..81fc85ac540 --- /dev/null +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/relative/RelativeInputFile.java @@ -0,0 +1,39 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.relative; + +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.io.SeekableInputStream; + +import java.util.Objects; + +final class RelativeInputFile implements InputFile { + private final String location; + private final InputFile file; + + public RelativeInputFile(String location, InputFile file) { + this.location = Objects.requireNonNull(location); + this.file = Objects.requireNonNull(file); + } + + @Override + public long getLength() { + return file.getLength(); + } + + @Override + public SeekableInputStream newStream() { + return file.newStream(); + } + + @Override + public String location() { + return location; + } + + @Override + public boolean exists() { + return file.exists(); + } +} diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/relative/RelativeOutputFile.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/relative/RelativeOutputFile.java new file mode 100644 index 00000000000..638814b782d --- /dev/null +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/relative/RelativeOutputFile.java @@ -0,0 +1,40 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.relative; + +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.io.OutputFile; +import org.apache.iceberg.io.PositionOutputStream; + +import java.util.Objects; + +final class RelativeOutputFile implements OutputFile { + private final String location; + private final OutputFile file; + + public RelativeOutputFile(String location, OutputFile file) { + this.location = Objects.requireNonNull(location); + this.file = Objects.requireNonNull(file); + } + + @Override + public PositionOutputStream create() { + return file.create(); + } + + @Override + public PositionOutputStream createOrOverwrite() { + return file.createOrOverwrite(); + } + + @Override + public String location() { + return location; + } + + @Override + public InputFile toInputFile() { + return new RelativeInputFile(location, file.toInputFile()); + } +} diff --git a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java index b76a750602d..8e52593b1b7 100644 --- a/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java +++ b/extensions/iceberg/src/main/java/io/deephaven/iceberg/util/IcebergCatalogAdapter.java @@ -770,7 +770,7 @@ private Table readTableInternal( // Do we want the latest or a specific snapshot? final Snapshot snapshot = tableSnapshot != null ? tableSnapshot : table.currentSnapshot(); - final Schema schema = table.schemas().get(snapshot.schemaId()); + final Schema schema = snapshot == null ? table.schema() : table.schemas().get(snapshot.schemaId()); // Load the partitioning schema. final org.apache.iceberg.PartitionSpec partitionSpec = table.spec(); diff --git a/extensions/iceberg/src/test/java/io/deephaven/iceberg/CatalogAdapterTest.java b/extensions/iceberg/src/test/java/io/deephaven/iceberg/CatalogAdapterTest.java new file mode 100644 index 00000000000..d08e1735db0 --- /dev/null +++ b/extensions/iceberg/src/test/java/io/deephaven/iceberg/CatalogAdapterTest.java @@ -0,0 +1,49 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg; + +import io.deephaven.engine.table.ColumnDefinition; +import io.deephaven.engine.table.Table; +import io.deephaven.engine.table.TableDefinition; +import io.deephaven.iceberg.junit5.CatalogAdapterBase; +import org.apache.iceberg.Schema; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +public class CatalogAdapterTest extends CatalogAdapterBase { + @Test + void empty() { + assertThat(catalogAdapter.listNamespaces()).isEmpty(); + } + + @Test + void createEmptyTable() { + final Schema schema = new Schema( + Types.NestedField.required(1, "Foo", Types.StringType.get()), + Types.NestedField.required(2, "Bar", Types.IntegerType.get()), + Types.NestedField.optional(3, "Baz", Types.DoubleType.get())); + final Namespace myNamespace = Namespace.of("MyNamespace"); + final TableIdentifier myTableId = TableIdentifier.of(myNamespace, "MyTable"); + catalogAdapter.catalog().createTable(myTableId, schema); + + assertThat(catalogAdapter.listNamespaces()).containsExactly(myNamespace); + assertThat(catalogAdapter.listTables(myNamespace)).containsExactly(myTableId); + final Table table; + { + final TableDefinition expectedDefinition = TableDefinition.of( + ColumnDefinition.ofString("Foo"), + ColumnDefinition.ofInt("Bar"), + ColumnDefinition.ofDouble("Baz")); + assertThat(catalogAdapter.getTableDefinition(myTableId, null)).isEqualTo(expectedDefinition); + table = catalogAdapter.readTable(myTableId, null); + assertThat(table.getDefinition()).isEqualTo(expectedDefinition); + } + // Note: this is failing w/ NPE, assumes that Snapshot is non-null. + // assertThat(table.isEmpty()).isTrue(); + } +} diff --git a/extensions/iceberg/src/test/java/io/deephaven/iceberg/PyIceberg1Test.java b/extensions/iceberg/src/test/java/io/deephaven/iceberg/PyIceberg1Test.java new file mode 100644 index 00000000000..4f59adae18c --- /dev/null +++ b/extensions/iceberg/src/test/java/io/deephaven/iceberg/PyIceberg1Test.java @@ -0,0 +1,109 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg; + +import io.deephaven.engine.table.ColumnDefinition; +import io.deephaven.engine.table.Table; +import io.deephaven.engine.table.TableDefinition; +import io.deephaven.engine.testutil.TstUtils; +import io.deephaven.engine.util.TableTools; +import io.deephaven.iceberg.sqlite.DbResource; +import io.deephaven.iceberg.util.IcebergCatalogAdapter; +import io.deephaven.iceberg.util.IcebergTools; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.net.URISyntaxException; +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * This test shows that we can integrate with data written by pyiceberg. + * See TESTING.md and generate-pyiceberg-1.py for more details. + */ +public class PyIceberg1Test { + private static final Namespace NAMESPACE = Namespace.of("dh-default"); + private static final TableIdentifier CITIES_ID = TableIdentifier.of(NAMESPACE, "cities"); + + // This will need to be updated if the data is regenerated + private static final long SNAPSHOT_1_ID = 1743193108934390753L; + private static final long SNAPSHOT_2_ID = 4630159959461529013L; + + private static final TableDefinition CITIES_1_TD = TableDefinition.of( + ColumnDefinition.ofString("city"), + ColumnDefinition.ofDouble("latitude"), + ColumnDefinition.ofDouble("lon")); + + private static final TableDefinition CITIES_2_TD = TableDefinition.of( + ColumnDefinition.ofString("city"), + ColumnDefinition.ofDouble("latitude"), + ColumnDefinition.ofDouble("longitude")); + + private IcebergCatalogAdapter catalogAdapter; + + @BeforeEach + void setUp() throws URISyntaxException { + catalogAdapter = IcebergTools.createAdapter(DbResource.openCatalog("pyiceberg-1")); + } + + @Test + void catalogInfo() { + assertThat(catalogAdapter.listNamespaces()).containsExactly(NAMESPACE); + assertThat(catalogAdapter.listTables(NAMESPACE)).containsExactly(CITIES_ID); + final List snapshots = catalogAdapter.listSnapshots(CITIES_ID); + assertThat(snapshots).hasSize(2); + { + final Snapshot snapshot = snapshots.get(0); + assertThat(snapshot.parentId()).isNull(); + assertThat(snapshot.schemaId()).isEqualTo(0); + assertThat(snapshot.sequenceNumber()).isEqualTo(1L); + assertThat(snapshot.snapshotId()).isEqualTo(SNAPSHOT_1_ID); + } + { + final Snapshot snapshot = snapshots.get(1); + assertThat(snapshot.parentId()).isEqualTo(SNAPSHOT_1_ID); + assertThat(snapshot.schemaId()).isEqualTo(1); + assertThat(snapshot.sequenceNumber()).isEqualTo(2L); + assertThat(snapshot.snapshotId()).isEqualTo(SNAPSHOT_2_ID); + } + } + + @Test + void cities1() { + final Table cities1; + { + final TableDefinition td = catalogAdapter.getTableDefinition(CITIES_ID.toString(), SNAPSHOT_1_ID, null); + assertThat(td).isEqualTo(CITIES_1_TD); + cities1 = catalogAdapter.readTable(CITIES_ID, SNAPSHOT_1_ID); + assertThat(cities1.getDefinition()).isEqualTo(CITIES_1_TD); + } + final Table expectedCities1 = TableTools.newTable(CITIES_1_TD, + TableTools.stringCol("city", "Amsterdam", "San Francisco", "Drachten", "Paris"), + TableTools.doubleCol("latitude", 52.371807, 37.773972, 53.11254, 48.864716), + TableTools.doubleCol("lon", 4.896029, -122.431297, 6.0989, 2.349014)); + TstUtils.assertTableEquals(expectedCities1, cities1); + } + + @Test + void cities2() { + final Table cities2; + { + final TableDefinition td = catalogAdapter.getTableDefinition(CITIES_ID.toString(), SNAPSHOT_2_ID, null); + assertThat(td).isEqualTo(CITIES_2_TD); + cities2 = catalogAdapter.readTable(CITIES_ID, SNAPSHOT_2_ID); + assertThat(cities2.getDefinition()).isEqualTo(CITIES_2_TD); + } + // TODO(deephaven-core#6118): Iceberg column rename handling + // final Table expectedCities2 = TableTools.newTable(CITIES_2_TD, + // TableTools.stringCol("city", "Amsterdam", "San Francisco", "Drachten", "Paris", "Minneapolis", "New York"), + // TableTools.doubleCol("latitude", 52.371807, 37.773972, 53.11254, 48.864716, 44.977479, 40.730610), + // TableTools.doubleCol("longitude", 4.896029, -122.431297, 6.0989, 2.349014, -93.264358, -73.935242) + // ); + // TstUtils.assertTableEquals(expectedCities2, cities2); + } +} diff --git a/extensions/iceberg/src/test/java/io/deephaven/iceberg/TestCatalog/IcebergTestTable.java b/extensions/iceberg/src/test/java/io/deephaven/iceberg/TestCatalog/IcebergTestTable.java index bcac783def0..1ae6894bd13 100644 --- a/extensions/iceberg/src/test/java/io/deephaven/iceberg/TestCatalog/IcebergTestTable.java +++ b/extensions/iceberg/src/test/java/io/deephaven/iceberg/TestCatalog/IcebergTestTable.java @@ -10,7 +10,6 @@ import org.apache.iceberg.io.LocationProvider; import org.apache.iceberg.io.ResolvingFileIO; import org.jetbrains.annotations.NotNull; -import org.testcontainers.shaded.org.apache.commons.lang3.NotImplementedException; import java.io.File; import java.nio.file.Path; @@ -60,7 +59,7 @@ public void refresh() {} @Override public TableScan newScan() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override @@ -141,82 +140,82 @@ public Iterable snapshots() { @Override public List history() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public UpdateSchema updateSchema() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public UpdatePartitionSpec updateSpec() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public UpdateProperties updateProperties() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public ReplaceSortOrder replaceSortOrder() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public UpdateLocation updateLocation() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public AppendFiles newAppend() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public RewriteFiles newRewrite() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public RewriteManifests rewriteManifests() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public OverwriteFiles newOverwrite() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public RowDelta newRowDelta() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public ReplacePartitions newReplacePartitions() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public DeleteFiles newDelete() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public ExpireSnapshots expireSnapshots() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public ManageSnapshots manageSnapshots() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public Transaction newTransaction() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override @@ -229,12 +228,12 @@ public FileIO io() { @Override public EncryptionManager encryption() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override public LocationProvider locationProvider() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } @Override @@ -244,6 +243,6 @@ public List statisticsFiles() { @Override public Map refs() { - throw new NotImplementedException("Not implemented"); + throw new UnsupportedOperationException("Not implemented"); } } diff --git a/extensions/iceberg/src/test/java/io/deephaven/iceberg/junit5/CatalogAdapterBase.java b/extensions/iceberg/src/test/java/io/deephaven/iceberg/junit5/CatalogAdapterBase.java new file mode 100644 index 00000000000..eb60c17055c --- /dev/null +++ b/extensions/iceberg/src/test/java/io/deephaven/iceberg/junit5/CatalogAdapterBase.java @@ -0,0 +1,36 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.junit5; + +import io.deephaven.engine.testutil.junit4.EngineCleanup; +import io.deephaven.iceberg.sqlite.SqliteHelper; +import io.deephaven.iceberg.util.IcebergCatalogAdapter; +import io.deephaven.iceberg.util.IcebergTools; +import org.apache.iceberg.catalog.Catalog; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestInfo; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; + + +public abstract class CatalogAdapterBase { + + protected IcebergCatalogAdapter catalogAdapter; + private EngineCleanup engineCleanup = new EngineCleanup(); + + @BeforeEach + void setUp(TestInfo testInfo, @TempDir Path rootDir) throws Exception { + engineCleanup.setUp(); + SqliteHelper.createJdbcDatabase(rootDir); + final Catalog catalog = SqliteHelper.createJdbcCatalog(rootDir, testInfo.getDisplayName(), false); + catalogAdapter = IcebergTools.createAdapter(catalog); + } + + @AfterEach + void tearDown() throws Exception { + engineCleanup.tearDown(); + } +} diff --git a/extensions/iceberg/src/test/java/io/deephaven/iceberg/sqlite/DbResource.java b/extensions/iceberg/src/test/java/io/deephaven/iceberg/sqlite/DbResource.java new file mode 100644 index 00000000000..10c2de49b63 --- /dev/null +++ b/extensions/iceberg/src/test/java/io/deephaven/iceberg/sqlite/DbResource.java @@ -0,0 +1,20 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.sqlite; + +import org.apache.iceberg.catalog.Catalog; + +import java.net.URISyntaxException; +import java.nio.file.Path; + +public class DbResource { + public static Catalog openCatalog(String catalogName) throws URISyntaxException { + return SqliteHelper.openJdbcCatalog( + // Note: we are using a resource path that is not shared with our build classes to ensure we are + // resolving against the actual resource directory + Path.of(DbResource.class.getResource("db_resource").toURI()), + catalogName, + true); + } +} diff --git a/extensions/iceberg/src/test/java/io/deephaven/iceberg/sqlite/SqliteHelper.java b/extensions/iceberg/src/test/java/io/deephaven/iceberg/sqlite/SqliteHelper.java new file mode 100644 index 00000000000..51759f57522 --- /dev/null +++ b/extensions/iceberg/src/test/java/io/deephaven/iceberg/sqlite/SqliteHelper.java @@ -0,0 +1,73 @@ +// +// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending +// +package io.deephaven.iceberg.sqlite; + +import io.deephaven.iceberg.relative.RelativeFileIO; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.jdbc.JdbcCatalog; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Stream; + +public class SqliteHelper { + + private static final String DB_FILE = "dh-iceberg-test.db"; + private static final String CATALOGS_DIR = "catalogs"; + + public static void createJdbcDatabase(Path rootDir) throws IOException { + if (!Files.isDirectory(rootDir)) { + throw new IllegalArgumentException("Must provide rootDir that exists: " + rootDir); + } + try (final Stream list = Files.list(rootDir).limit(1)) { + if (list.iterator().hasNext()) { + throw new IllegalArgumentException("Expected rootDir to be empty: " + rootDir); + } + } + Files.createFile(rootDir.resolve(DB_FILE)); + Files.createDirectory(rootDir.resolve("catalogs")); + } + + public static Catalog createJdbcCatalog(Path rootDir, String catalogName, boolean relativeSupport) + throws IOException { + if (!Files.isDirectory(rootDir)) { + throw new IllegalArgumentException("Must provide rootDir that exists: " + rootDir); + } + if (!Files.isRegularFile(rootDir.resolve(DB_FILE))) { + throw new IllegalArgumentException("Must create jdbc database first: " + rootDir); + } + if (!Files.isDirectory(rootDir.resolve(CATALOGS_DIR))) { + throw new IllegalArgumentException("Must create jdbc database first: " + rootDir); + } + Files.createDirectory(rootDir.resolve(CATALOGS_DIR).resolve(catalogName)); + return openJdbcCatalog(rootDir, catalogName, relativeSupport); + } + + public static Catalog openJdbcCatalog(Path rootDir, String catalogName, boolean relativeSupport) { + final Path warehouseDir = rootDir.resolve(CATALOGS_DIR).resolve(catalogName); + if (!Files.isDirectory(warehouseDir)) { + throw new IllegalArgumentException("Expected warehouse to already exist: " + warehouseDir); + } + final Map properties = new HashMap<>(); + properties.put(CatalogProperties.CATALOG_IMPL, JdbcCatalog.class.getName()); + properties.put(CatalogProperties.URI, String.format("jdbc:sqlite:%s", rootDir.resolve(DB_FILE))); + properties.put(CatalogProperties.WAREHOUSE_LOCATION, warehouseDir.toString()); + if (relativeSupport) { + // When we are referring to a catalog that already exists in our unit testing filesystem, we need to make + // hook in relative file support. See https://github.com/apache/iceberg/issues/1617 + properties.put(CatalogProperties.FILE_IO_IMPL, RelativeFileIO.class.getName()); + properties.put(RelativeFileIO.BASE_PATH, rootDir.toString()); + } + final Configuration hadoopConf = new Configuration(); + // Note: the catalogName is very important here, the JDBC catalog uses it for lookups. In this way, a single + // dbFile can be used for multiple catalogs. + return CatalogUtil.buildIcebergCatalog(catalogName, properties, hadoopConf); + } +} diff --git a/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/data/00000-0-6976b5d5-cc59-4f48-bcc5-3a4bf37e5139.parquet b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/data/00000-0-6976b5d5-cc59-4f48-bcc5-3a4bf37e5139.parquet new file mode 100644 index 00000000000..d142358a461 --- /dev/null +++ b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/data/00000-0-6976b5d5-cc59-4f48-bcc5-3a4bf37e5139.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cee42864f62bfb767514cac315aba2c6c7bc3cec7a37edd696ca01e690375ba +size 1616 diff --git a/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/data/00000-0-cc668bfb-107c-4122-8d64-9a078f2f45a9.parquet b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/data/00000-0-cc668bfb-107c-4122-8d64-9a078f2f45a9.parquet new file mode 100644 index 00000000000..ada32f6e0b2 --- /dev/null +++ b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/data/00000-0-cc668bfb-107c-4122-8d64-9a078f2f45a9.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a2932c3351e5164283e7d76ed8439b08c420a1d6b039081121ed58eeefe2a4e +size 1660 diff --git a/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00000-4aec1d46-5bde-4642-bac7-cc7573558c7e.metadata.json b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00000-4aec1d46-5bde-4642-bac7-cc7573558c7e.metadata.json new file mode 100644 index 00000000000..90a76f4767a --- /dev/null +++ b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00000-4aec1d46-5bde-4642-bac7-cc7573558c7e.metadata.json @@ -0,0 +1 @@ +{"location":"catalogs/pyiceberg-1/dh-default.db/cities","table-uuid":"5f168e92-7a3c-4277-aa9d-0bf8425e0e05","last-updated-ms":1727458044013,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"city","type":"string","required":false},{"id":2,"name":"latitude","type":"double","required":false},{"id":3,"name":"lon","type":"double","required":false}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[]}],"default-spec-id":0,"last-partition-id":999,"properties":{},"snapshots":[],"snapshot-log":[],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{},"format-version":2,"last-sequence-number":0} \ No newline at end of file diff --git a/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00001-3128d2b2-934f-483e-b2b0-4c08baebc5b2.metadata.json b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00001-3128d2b2-934f-483e-b2b0-4c08baebc5b2.metadata.json new file mode 100644 index 00000000000..dd42fa006b9 --- /dev/null +++ b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00001-3128d2b2-934f-483e-b2b0-4c08baebc5b2.metadata.json @@ -0,0 +1 @@ +{"location":"catalogs/pyiceberg-1/dh-default.db/cities","table-uuid":"5f168e92-7a3c-4277-aa9d-0bf8425e0e05","last-updated-ms":1727458044082,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"city","type":"string","required":false},{"id":2,"name":"latitude","type":"double","required":false},{"id":3,"name":"lon","type":"double","required":false}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[]}],"default-spec-id":0,"last-partition-id":999,"properties":{},"current-snapshot-id":1743193108934390753,"snapshots":[{"snapshot-id":1743193108934390753,"sequence-number":1,"timestamp-ms":1727458044082,"manifest-list":"catalogs/pyiceberg-1/dh-default.db/cities/metadata/snap-1743193108934390753-0-cc668bfb-107c-4122-8d64-9a078f2f45a9.avro","summary":{"operation":"append","added-files-size":"1660","added-data-files":"1","added-records":"4","total-data-files":"1","total-delete-files":"0","total-records":"4","total-files-size":"1660","total-position-deletes":"0","total-equality-deletes":"0"},"schema-id":0}],"snapshot-log":[{"snapshot-id":1743193108934390753,"timestamp-ms":1727458044082}],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{"main":{"snapshot-id":1743193108934390753,"type":"branch"}},"format-version":2,"last-sequence-number":1} \ No newline at end of file diff --git a/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00002-262f9917-5f0d-478c-aba2-a05842613e29.metadata.json b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00002-262f9917-5f0d-478c-aba2-a05842613e29.metadata.json new file mode 100644 index 00000000000..01108ba0fe1 --- /dev/null +++ b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00002-262f9917-5f0d-478c-aba2-a05842613e29.metadata.json @@ -0,0 +1 @@ +{"location":"catalogs/pyiceberg-1/dh-default.db/cities","table-uuid":"5f168e92-7a3c-4277-aa9d-0bf8425e0e05","last-updated-ms":1727458044091,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"city","type":"string","required":false},{"id":2,"name":"latitude","type":"double","required":false},{"id":3,"name":"lon","type":"double","required":false}],"schema-id":0,"identifier-field-ids":[]},{"type":"struct","fields":[{"id":1,"name":"city","type":"string","required":false},{"id":2,"name":"latitude","type":"double","required":false},{"id":3,"name":"longitude","type":"double","required":false}],"schema-id":1,"identifier-field-ids":[]}],"current-schema-id":1,"partition-specs":[{"spec-id":0,"fields":[]}],"default-spec-id":0,"last-partition-id":999,"properties":{},"current-snapshot-id":1743193108934390753,"snapshots":[{"snapshot-id":1743193108934390753,"sequence-number":1,"timestamp-ms":1727458044082,"manifest-list":"catalogs/pyiceberg-1/dh-default.db/cities/metadata/snap-1743193108934390753-0-cc668bfb-107c-4122-8d64-9a078f2f45a9.avro","summary":{"operation":"append","added-files-size":"1660","added-data-files":"1","added-records":"4","total-data-files":"1","total-delete-files":"0","total-records":"4","total-files-size":"1660","total-position-deletes":"0","total-equality-deletes":"0"},"schema-id":0}],"snapshot-log":[{"snapshot-id":1743193108934390753,"timestamp-ms":1727458044082}],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{"main":{"snapshot-id":1743193108934390753,"type":"branch"}},"format-version":2,"last-sequence-number":1} \ No newline at end of file diff --git a/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00003-979e64ae-2052-4569-b9f1-1b45ab0c2be0.metadata.json b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00003-979e64ae-2052-4569-b9f1-1b45ab0c2be0.metadata.json new file mode 100644 index 00000000000..48001734dc6 --- /dev/null +++ b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/00003-979e64ae-2052-4569-b9f1-1b45ab0c2be0.metadata.json @@ -0,0 +1 @@ +{"location":"catalogs/pyiceberg-1/dh-default.db/cities","table-uuid":"5f168e92-7a3c-4277-aa9d-0bf8425e0e05","last-updated-ms":1727458044105,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"city","type":"string","required":false},{"id":2,"name":"latitude","type":"double","required":false},{"id":3,"name":"lon","type":"double","required":false}],"schema-id":0,"identifier-field-ids":[]},{"type":"struct","fields":[{"id":1,"name":"city","type":"string","required":false},{"id":2,"name":"latitude","type":"double","required":false},{"id":3,"name":"longitude","type":"double","required":false}],"schema-id":1,"identifier-field-ids":[]}],"current-schema-id":1,"partition-specs":[{"spec-id":0,"fields":[]}],"default-spec-id":0,"last-partition-id":999,"properties":{},"current-snapshot-id":4630159959461529013,"snapshots":[{"snapshot-id":1743193108934390753,"sequence-number":1,"timestamp-ms":1727458044082,"manifest-list":"catalogs/pyiceberg-1/dh-default.db/cities/metadata/snap-1743193108934390753-0-cc668bfb-107c-4122-8d64-9a078f2f45a9.avro","summary":{"operation":"append","added-files-size":"1660","added-data-files":"1","added-records":"4","total-data-files":"1","total-delete-files":"0","total-records":"4","total-files-size":"1660","total-position-deletes":"0","total-equality-deletes":"0"},"schema-id":0},{"snapshot-id":4630159959461529013,"parent-snapshot-id":1743193108934390753,"sequence-number":2,"timestamp-ms":1727458044105,"manifest-list":"catalogs/pyiceberg-1/dh-default.db/cities/metadata/snap-4630159959461529013-0-6976b5d5-cc59-4f48-bcc5-3a4bf37e5139.avro","summary":{"operation":"append","added-files-size":"1616","added-data-files":"1","added-records":"2","total-data-files":"2","total-delete-files":"0","total-records":"6","total-files-size":"3276","total-position-deletes":"0","total-equality-deletes":"0"},"schema-id":1}],"snapshot-log":[{"snapshot-id":1743193108934390753,"timestamp-ms":1727458044082},{"snapshot-id":4630159959461529013,"timestamp-ms":1727458044105}],"metadata-log":[],"sort-orders":[{"order-id":0,"fields":[]}],"default-sort-order-id":0,"refs":{"main":{"snapshot-id":4630159959461529013,"type":"branch"}},"format-version":2,"last-sequence-number":2} \ No newline at end of file diff --git a/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/6976b5d5-cc59-4f48-bcc5-3a4bf37e5139-m0.avro b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/6976b5d5-cc59-4f48-bcc5-3a4bf37e5139-m0.avro new file mode 100644 index 0000000000000000000000000000000000000000..37bf6fa460960528b6d4d6b25f8c14420e08586a GIT binary patch literal 4411 zcmb`L&2QsG6u^^gRP}(W9?(iFRxt8m#UW{9yUj-diI3e%E3u_?cZE~3GFaN9Uc^SVWS^1k^Qh!9cG+aFD`MNj&ll`9Cwql|blJ__nwFfZKDL{? z%$FR_6V}zC;60wJJGvRr5NzVYuSWrMs)t>z_y$&18aI0)98g(5VPOPIE5@25d9_MB&$Oq!7MVYSmg#+m#eDd(%E;;40Pj>GU6=kd#7fOC&)QpB) zWMEzwH7spB4oG5xAB=rc&ASeUXBr*!==?E_#_!h31mEHzv<64m={Uq{f7*eRwP(~yh zK&^v1k`|UpT}qvBtm`f7MAzFHE8LMQR>w@qN+n=v0?~P@daGdh3H4%?q3M?N{DnR3 zQ5RkBi6ib%?}560h&G^sHtNFrV&TFUNfzFt0lFxmL%2(%fL{cvSVDZbJUTAAOBs{| z#4O4V$Q~QI1e6^>X*4oGS4Y{oI!dFlY?LZ@Y7m%?7rRh5)a5~_?~*;bS7eLViZu_g zo-1@O4%pBUu(3E`Ltg-}u{^-Yizr?Y!&spT=J?c~3)5^ZGg{3yz%)CH!!+9k>o1k~ zW@ni&v2($C0CAa6&8l`%A&PiCamqr=2JX-T0?B+qGPakA)@Wj!wlXDGE(At%KH76{ z9iz28wu%)|tcnohAh;OfQZFV0tEu?{)zo-SH9Z%ixx{j+Se*wEE>&y5B*jK23zBg| zZ{`}etYT`3tz8Jxi>}5yz7q}u)i#27l8Ps`C+wmNqrl_R61`r;Z_w&Sn%4v@0Nc%J9rFoDywj|>C^uYx8Yed literal 0 HcmV?d00001 diff --git a/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/cc668bfb-107c-4122-8d64-9a078f2f45a9-m0.avro b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/cc668bfb-107c-4122-8d64-9a078f2f45a9-m0.avro new file mode 100644 index 0000000000000000000000000000000000000000..651f0a05ccb9711276af8f0f704e83465ba5016e GIT binary patch literal 4407 zcmb`LO=u)V6o6-YV;Dq26cLp@6n&max_h#jA1}M&23FR%ZnCE^+thT|WXg1RwO!Q- zQXQVe^A^r90LsJc7W_C=elGIaX&Fpg)_|oj`Jv6y*cI=uN(GXX7Tn*v-@p>NZ z!{>y(lk$YY(mo9&JFaE>oMK-<6<4Xx#(RB{_5%ET#@@zwuZS+Nty?p*^VHE|s|JR% zNO3qxQrE|U_jsc2@wJ#HU=0^hUB=8?+UuI-H?Y#MuJuI{QdK`>iG-zwv*L+Jfyk;) z6@7soCSom9c=AdPkicO)k@#kf6bK10%-|8=OI0e1Z?&bwg2fq6V)96t$2BssBcib! z2<5_|a-rEad%SJWjM5;$?*jqu=)!}~`ZNuc*&+BD-z|Y(Qy(9|;f{yGE^nHzp#KS)X zVh4au`AyS^sTxcxvL@XP`KI%y&~-<6kSHP|_wU^%M_dia)(+88#>#P_U)xlU80$BeEEdjNwnPlLp^96U}=_->w$1RYIO8V9VF9h;IKmCPrAL=70S&N7!^ z=9ZCUsponkg(L#udD4RqZEGT|vm_a7-_~hBM4x05B;lDdWL#FEsZ0JYb9v})w+dFcW1?6cFfA*WfTamUPg1p;CCd+Kkm?N0x8&z9?P(o% z(e;7Y=N=7q^~FQXkjCb57v7f(7rw}{@C_QHi!wTd`$P%&MWC7`#D~x2LD^l}pe!I} zQMN($$j}9#Yy(QiDga#`W&83d9c$4jb?($4Fufr4p>F8QgV4w)+jP6k7OxfeJizuu zp?h+`j*Wnw`2joj9Dtq00Y+X#^MV-03RN;EqS3i98_h*VYoi4)8}0dFHd-a?FO>L= z_99_o=YsVB;xeOJRPC%nl<@}Qh$U_hxI+sFBqs}!bA6F$olT6>W})QLg}~W7AML5P zj?-KmTg{4SR%M839G?wwp%;^b)wJ;h)ztZ)YI-Wf#sbT!W_21wxK!PcDT<9w5hSCA zzA@3b6&2IWZ0$mjo^>_eio9eP>$VZZlUzLM1)^7Wp^O8rT+#1Kre+eMG11Ev>c<^K z(Oj^Ag)j!?Mq_7{_ZwMjb?!aBgC-~nqaUPiW=ARDVSu2pVh97ztc4d$)hP=%FK6Kv z&c4uLNRndb34Mmo6iY%LF_MTQl3R%VTk-6JP6cBEGNALgk^9ld*-;eC49{=-4$1#4nY8XHM^zkQ;zWwkkt5T^vz5??K zgGS^w=qg-1 literal 0 HcmV?d00001 diff --git a/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/snap-1743193108934390753-0-cc668bfb-107c-4122-8d64-9a078f2f45a9.avro b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/snap-1743193108934390753-0-cc668bfb-107c-4122-8d64-9a078f2f45a9.avro new file mode 100644 index 0000000000000000000000000000000000000000..0afb2b512da4f19dc12d37d26339c885ae5a7e9c GIT binary patch literal 1778 zcmb7FJ#W-N5WQTLf)WJ^K$e9<6X)#vK12yf2m}&{DnizJcYL>C@0#7cgi~}q5(R$* zMS_lof`XQY-$6k~$L!ttvWXKXwBC8|&Aj>Wv*h*0l`Snup(oM=oNY&gZYLUbqHs9s zbUUMP(CgeThyq~(Cv5Yn6mUz!+bIYNftcnAsAhB{l`1DD*axM#6wUS)*;jH`(+T9{ z=-Q!gW(D|nJzoKpiupU9pK{2U#&3Ivz99MHH7A0npiNv5Gg${=b{N(RLqwH4+9)o@ zHiSuPiJwSH3`%=1pFi>rxS4nlUw8|B@aH?;85$Xg@fj+SqmoI~IDtk-47WP5E`Y|E zSy@V%W!u4XVNSxS6b2nV4T=VJpwjJl>2?L&bHLS%bB?*L`gI;dzm|b81}rXz;H;WE zm%yhy107R2t(G5l9el+zc*nKDX2*35t_Kwc1{oMEp^m_{phB>8K*@u13W(gMS6i^I zpoXE|!m@%I0@uT83hTMmr^hFBW^d0I(=5Y?P#MhiW3~o$~s5R(Q$ZZ2DE$E7&Wp$3sC0MOcc_LLW9QyP51=??57Uy6H zBZgG0a*X~@@%4SSo8-^0E{;5v4CtmcwQ*36w3|YS$j7T^brtP%b jBcmV>cdh>K`;A7k`TggIk6*sN{~R_OjZOT}sO0hw-@GlD literal 0 HcmV?d00001 diff --git a/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/snap-4630159959461529013-0-6976b5d5-cc59-4f48-bcc5-3a4bf37e5139.avro b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/catalogs/pyiceberg-1/dh-default.db/cities/metadata/snap-4630159959461529013-0-6976b5d5-cc59-4f48-bcc5-3a4bf37e5139.avro new file mode 100644 index 0000000000000000000000000000000000000000..e16fb1846a6f5f5092b7887493a4fb030d9ab21e GIT binary patch literal 1915 zcmbVNzi-n(6i!2j3c-fTf>2~3re5QVf5c{BLi~`Bkf>m(kafO$PE2i|@m*S?DiUKQ z1~&c#MkLq}5)%9aW#lhlU|?WiV&I&_>9ua%!eYg~_kG`c-@W(pAbh%XVO3-#7eg*l z%&yz6Wn#zo9p84b)ALPiUCT)UnM8UR?%NjjEo^$eWm~@4cdV-do{b=*fU;2&FeYoA zV>A-T2FIviO(o$<;}BXIB;!&i5IN95Wmc6^W!cm4aV8JLDbJ)b_$UbXn?RM@!OZO% z*wMiCjMG@WSlOw$6Z$;siuM`KVsLX>0hJXYf20QeAe?#Grdn zV_=YgR3$VK=oZunW)2j5=Y#^Ve(CiqSX9u!&}?B6K(hMq z%;4%o1y47~9F5C-=F4GGg;CKwQHKm7GD@UMw^Tng)Cu2#A_!G$*2WoYOGo1!OAy8> ztfM?XkyTqVXJ9Rbsufu{re{nhr)Y1%I7mSfMkJ|Or5XK?;*0%UPLeUXG+pwPGoUM_ zsV(v5+vAV>cW!PL8|N=>-Sw8gpZm0=_A*KMKy>qQOhNr!3=r0=D zZMD?%zmzF;T`!D6gv~xhHtzM1$6Oov#O!-fFR~r7Q2wcu+wCv=FJHa?u=mbXk}Ff; Hqb~df+^<+< literal 0 HcmV?d00001 diff --git a/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/dh-iceberg-test.db b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/dh-iceberg-test.db new file mode 100644 index 0000000000000000000000000000000000000000..a59334f571f1aef6644091b28392a625ba2ddb40 GIT binary patch literal 20480 zcmeI%!Ee$~90%~%jg5_L!;2v%9-h$yBox|0*%}Xuq=`n{6sKmV=CwSuj!~p#8OJTV z_&52VaPi;p=z~HFap1Dswr|s>eeJJ#ub*Fgc)x!9P9QgY64Qp*OEbrd4wIyC`g1$|ha|R!WT&70wKd#%yeJswApijgKmY;|fB*y_ z009WxOyJ_hYBFP*(xntIyUksP+r0PH>aY{;cbUVjZqMuTUf^v@{mjK;7yWHPvcb<79Xh&pBM zK9Ay5MN#H^eBXHUuG%=FZ_Oi`ogHMJMuW-I*-mq*+?T|9=Gc^o+iCJM|3_O)6fM>d zym8xFMq#PTuC86wq+}+SlP;@ssTnt-WQ47iEBV7~*9RZR$ElB?{>e-nxH#s72dpXb zY_&Zh>3W?gbnHG#aJR=#U9az3*N9z`ns_whH#Se>6Cyp&SJpEJx&NhSe2X6^By6vW zZ)#)oU2Tg^N+*a|u|NO<5P$##AOHafKmY;|fB*#Uw7~g5+Iafp(YbT(bO)2dg{(He z$xYs3{dVBEqax9yYT_4XVv2E4p=lSxz~Xe$Ule5+r6e1>2CtMEmo-J#}$bF7NI+GP9W`GezGkYZbM`HDkW3 zl*p`<{!PS+1p*L&00bZa0SG_<0uX=z1Rwx`+bKZTL}6odreYBnEW%<%`2T-M(!<;N zfEYCdAOHafKmY;|fB*y_009U<;C>4{lSXs@hgZ)1`I+kne$eZ4{Qtk-Zwa0r0uX=z Z1Rwwb2tWV=5P$##?u5XS|NmhP;4j0)j9~x( literal 0 HcmV?d00001 diff --git a/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/generate-pyiceberg-1.py b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/generate-pyiceberg-1.py new file mode 100644 index 00000000000..86c6c2728c8 --- /dev/null +++ b/extensions/iceberg/src/test/resources/io/deephaven/iceberg/sqlite/db_resource/generate-pyiceberg-1.py @@ -0,0 +1,53 @@ +from pyiceberg.schema import Schema +from pyiceberg.types import NestedField, StringType, DoubleType +from pyiceberg.catalog.sql import SqlCatalog + +import pyarrow as pa + +catalog = SqlCatalog( + "pyiceberg-1", + **{ + "uri": f"sqlite:///dh-iceberg-test.db", + "warehouse": f"catalogs/pyiceberg-1", + }, +) + +original_schema = Schema( + NestedField(1, "city", StringType(), required=False), + NestedField(2, "latitude", DoubleType(), required=False), + NestedField(3, "lon", DoubleType(), required=False), +) + +# Using specific names to make clear these aren't a standard / convention +catalog.create_namespace("dh-default") + +table = catalog.create_table( + "dh-default.cities", + schema=original_schema, +) + +# Add some data +table.append( + pa.Table.from_pylist( + [ + {"city": "Amsterdam", "latitude": 52.371807, "lon": 4.896029}, + {"city": "San Francisco", "latitude": 37.773972, "lon": -122.431297}, + {"city": "Drachten", "latitude": 53.11254, "lon": 6.0989}, + {"city": "Paris", "latitude": 48.864716, "lon": 2.349014}, + ], + ) +) + +# Oops, we should be consistent with naming +with table.update_schema() as update: + update.rename_column("lon", "longitude") + +# Add some data. Note, to simplify ingestion, we are matching the latest column names +table.append( + pa.Table.from_pylist( + [ + {"city": "Minneapolis", "latitude": 44.977479, "longitude": -93.264358}, + {"city": "New York", "latitude": 40.730610, "longitude": -73.935242}, + ], + ) +) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index b4158ed3525..f342d603109 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -79,6 +79,7 @@ univocity = "2.6.0" vertispan-nio = "1.0-alpha-2" vertispan-flatbuffers-gwt = "1.12.0-1" vertispan-ts-defs = "1.0.0-RC4" +xerial = "3.46.1.3" # test versions assertj = "3.19.0" @@ -293,6 +294,8 @@ vertispan-flatbuffers-gwt = { module = "com.vertispan.flatbuffers:flatbuffers-gw vertispan-ts-defs-annotations = { module = "com.vertispan.tsdefs:jsinterop-ts-defs-annotations", version.ref = "vertispan-ts-defs" } vertispan-ts-defs-doclet = { module = "com.vertispan.tsdefs:jsinterop-ts-defs-doclet", version.ref = "vertispan-ts-defs" } +xerial-sqlite-jdbc = { module = "org.xerial:sqlite-jdbc", version.ref = "xerial" } + # test libraries assertj = { module = "org.assertj:assertj-core", version.ref = "assertj" }