From 09429a32b4f0a71a504825f2bec6dbf983eef949 Mon Sep 17 00:00:00 2001 From: Jeremihas Caruso Date: Mon, 25 Mar 2024 14:58:12 -0300 Subject: [PATCH 1/2] Unable to read date format columns (int96 type) from avro-parquet schema (#22) INT96 is deprecated so we must set "parquet.avro.readInt96AsFixed" configuration to "true" when build the reader. --- .../labs/delta/sharing/java/format/parquet/TableReader.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/databricks/labs/delta/sharing/java/format/parquet/TableReader.java b/src/main/java/com/databricks/labs/delta/sharing/java/format/parquet/TableReader.java index a3c8549..bfb5d5b 100644 --- a/src/main/java/com/databricks/labs/delta/sharing/java/format/parquet/TableReader.java +++ b/src/main/java/com/databricks/labs/delta/sharing/java/format/parquet/TableReader.java @@ -98,10 +98,14 @@ public List readN(Integer num) throws IOException { */ private List> getReaders() throws IOException { List> readers = new LinkedList<>(); + Configuration conf = new Configuration(); + + conf.set("parquet.avro.readInt96AsFixed", "true"); + for (Path path : paths) { LocalInputFile localInputFile = new LocalInputFile(path); ParquetReader reader = - AvroParquetReader.builder(localInputFile).build(); + AvroParquetReader.builder(localInputFile).withConf(conf).build(); readers.add(reader); } return readers; From 51a36d8f3441f24d9bbd532c33e97c890ee0ecb3 Mon Sep 17 00:00:00 2001 From: Jeremihas Caruso Date: Mon, 25 Mar 2024 15:59:44 -0300 Subject: [PATCH 2/2] Missing impot Importing Configuration from hadoop --- .../labs/delta/sharing/java/format/parquet/TableReader.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/databricks/labs/delta/sharing/java/format/parquet/TableReader.java b/src/main/java/com/databricks/labs/delta/sharing/java/format/parquet/TableReader.java index bfb5d5b..7df0a5d 100644 --- a/src/main/java/com/databricks/labs/delta/sharing/java/format/parquet/TableReader.java +++ b/src/main/java/com/databricks/labs/delta/sharing/java/format/parquet/TableReader.java @@ -1,5 +1,6 @@ package com.databricks.labs.delta.sharing.java.format.parquet; +import org.apache.hadoop.conf.Configuration; import org.apache.parquet.avro.AvroParquetReader; import org.apache.parquet.hadoop.ParquetReader;