Skip to content

Commit

Permalink
Merge branch 'master' into kyle/list-returns-static-stream
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron committed Nov 14, 2024
2 parents dece9a0 + 1d580ec commit 6721477
Show file tree
Hide file tree
Showing 35 changed files with 1,847 additions and 246 deletions.
2 changes: 1 addition & 1 deletion .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,5 @@ If there are user-facing changes then we may require documentation to be updated
-->

<!---
If there are any breaking changes to public APIs, please add the `breaking change` label.
If there are any breaking changes to public APIs, please call them out.
-->
8 changes: 7 additions & 1 deletion .github/workflows/object_store.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ jobs:

- name: Setup LocalStack (AWS emulation)
run: |
echo "LOCALSTACK_CONTAINER=$(docker run -d -p 4566:4566 localstack/localstack:3.3.0)" >> $GITHUB_ENV
echo "LOCALSTACK_CONTAINER=$(docker run -d -p 4566:4566 localstack/localstack:3.8.1)" >> $GITHUB_ENV
echo "EC2_METADATA_CONTAINER=$(docker run -d -p 1338:1338 amazon/amazon-ec2-metadata-mock:v1.9.2 --imdsv2)" >> $GITHUB_ENV
aws --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket
aws --endpoint-url=http://localhost:4566 dynamodb create-table --table-name test-table --key-schema AttributeName=path,KeyType=HASH AttributeName=etag,KeyType=RANGE --attribute-definitions AttributeName=path,AttributeType=S AttributeName=etag,AttributeType=S --provisioned-throughput ReadCapacityUnits=5,WriteCapacityUnits=5
Expand All @@ -161,6 +161,12 @@ jobs:
- name: Run object_store tests
run: cargo test --features=aws,azure,gcp,http

- name: Run object_store tests (AWS native conditional put)
run: cargo test --features=aws
env:
AWS_CONDITIONAL_PUT: etag-put-if-not-exists
AWS_COPY_IF_NOT_EXISTS: multipart

- name: GCS Output
if: ${{ !cancelled() }}
run: docker logs $GCS_CONTAINER
Expand Down
2 changes: 1 addition & 1 deletion arrow-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ chrono = { workspace = true }
chrono-tz = { version = "0.10", optional = true }
num = { version = "0.4.1", default-features = false, features = ["std"] }
half = { version = "2.1", default-features = false, features = ["num-traits"] }
hashbrown = { version = "0.14.2", default-features = false }
hashbrown = { version = "0.15.1", default-features = false }

[features]
ffi = ["arrow-schema/ffi", "arrow-data/ffi"]
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,11 @@ impl<T: ByteArrayType> Array for GenericByteArray<T> {
self.nulls.as_ref()
}

fn logical_null_count(&self) -> usize {
// More efficient that the default implementation
self.null_count()
}

fn get_buffer_memory_size(&self) -> usize {
let mut sum = self.value_offsets.inner().inner().capacity();
sum += self.value_data.capacity();
Expand Down
30 changes: 6 additions & 24 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,6 @@ use super::ByteArrayType;
///
/// [`ByteView`]: arrow_data::ByteView
///
/// # Use the [`eq`] kernel to compare the logical content.
///
/// Comparing two `GenericByteViewArray` using PartialEq compares by structure
/// (the `u128`s) and contents of the buffers, not by logical content. As there
/// are many different buffer layouts to represent the same data (e.g. different
/// offsets, different buffer sizes, etc) two arrays with the same data may not
/// compare equal.
///
/// To compare the logical content of two `GenericByteViewArray`s, use the [`eq`]
/// kernel.
///
/// [`eq`]: https://docs.rs/arrow/latest/arrow/compute/kernels/cmp/fn.eq.html
///
/// # Layout: "views" and buffers
///
/// A `GenericByteViewArray` stores variable length byte strings. An array of
Expand Down Expand Up @@ -192,16 +179,6 @@ impl<T: ByteViewType + ?Sized> Clone for GenericByteViewArray<T> {
}
}

// PartialEq
impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
fn eq(&self, other: &Self) -> bool {
other.data_type.eq(&self.data_type)
&& other.views.eq(&self.views)
&& other.buffers.eq(&self.buffers)
&& other.nulls.eq(&self.nulls)
}
}

impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
/// Create a new [`GenericByteViewArray`] from the provided parts, panicking on failure
///
Expand Down Expand Up @@ -606,6 +583,11 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
self.nulls.as_ref()
}

fn logical_null_count(&self) -> usize {
// More efficient that the default implementation
self.null_count()
}

fn get_buffer_memory_size(&self) -> usize {
let mut sum = self.buffers.iter().map(|b| b.capacity()).sum::<usize>();
sum += self.views.inner().capacity();
Expand Down Expand Up @@ -1065,6 +1047,6 @@ mod tests {
};
assert_eq!(array1, array1.clone());
assert_eq!(array2, array2.clone());
assert_ne!(array1, array2);
assert_eq!(array1, array2);
}
}
4 changes: 4 additions & 0 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,10 @@ impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'_, K, V
self.dictionary.logical_nulls()
}

fn logical_null_count(&self) -> usize {
self.dictionary.logical_null_count()
}

fn is_nullable(&self) -> bool {
self.dictionary.is_nullable()
}
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,11 @@ impl Array for FixedSizeBinaryArray {
self.nulls.as_ref()
}

fn logical_null_count(&self) -> usize {
// More efficient that the default implementation
self.null_count()
}

fn get_buffer_memory_size(&self) -> usize {
let mut sum = self.value_data.capacity();
if let Some(n) = &self.nulls {
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,11 @@ impl Array for FixedSizeListArray {
self.nulls.as_ref()
}

fn logical_null_count(&self) -> usize {
// More efficient that the default implementation
self.null_count()
}

fn get_buffer_memory_size(&self) -> usize {
let mut size = self.values.get_buffer_memory_size();
if let Some(n) = self.nulls.as_ref() {
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,11 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
self.nulls.as_ref()
}

fn logical_null_count(&self) -> usize {
// More efficient that the default implementation
self.null_count()
}

fn get_buffer_memory_size(&self) -> usize {
let mut size = self.values.get_buffer_memory_size();
size += self.value_offsets.inner().inner().capacity();
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/list_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,11 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
self.nulls.as_ref()
}

fn logical_null_count(&self) -> usize {
// More efficient that the default implementation
self.null_count()
}

fn get_buffer_memory_size(&self) -> usize {
let mut size = self.values.get_buffer_memory_size();
size += self.value_offsets.inner().capacity();
Expand Down
5 changes: 5 additions & 0 deletions arrow-array/src/array/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,11 @@ impl Array for MapArray {
self.nulls.as_ref()
}

fn logical_null_count(&self) -> usize {
// More efficient that the default implementation
self.null_count()
}

fn get_buffer_memory_size(&self) -> usize {
let mut size = self.entries.get_buffer_memory_size();
size += self.value_offsets.inner().inner().capacity();
Expand Down
6 changes: 6 additions & 0 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,12 @@ impl PartialEq for StructArray {
}
}

impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
fn eq(&self, other: &Self) -> bool {
self.to_data().eq(&other.to_data())
}
}

/// Constructs an array using the input `data`.
/// Returns a reference-counted `Array` instance.
pub fn make_array(data: ArrayData) -> ArrayRef {
Expand Down
4 changes: 4 additions & 0 deletions arrow-array/src/array/run_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,10 @@ impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
self.run_array.logical_nulls()
}

fn logical_null_count(&self) -> usize {
self.run_array.logical_null_count()
}

fn is_nullable(&self) -> bool {
self.run_array.is_nullable()
}
Expand Down
24 changes: 23 additions & 1 deletion arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::print_long_array;
use crate::{make_array, new_null_array, Array, ArrayRef, RecordBatch};
use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
Expand Down Expand Up @@ -377,6 +378,11 @@ impl Array for StructArray {
self.nulls.as_ref()
}

fn logical_null_count(&self) -> usize {
// More efficient that the default implementation
self.null_count()
}

fn get_buffer_memory_size(&self) -> usize {
let mut size = self.fields.iter().map(|a| a.get_buffer_memory_size()).sum();
if let Some(n) = self.nulls.as_ref() {
Expand Down Expand Up @@ -404,7 +410,11 @@ impl From<Vec<(FieldRef, ArrayRef)>> for StructArray {

impl std::fmt::Debug for StructArray {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "StructArray\n[\n")?;
writeln!(f, "StructArray")?;
writeln!(f, "-- validity: ")?;
writeln!(f, "[")?;
print_long_array(self, f, |_array, _index, f| write!(f, "valid"))?;
writeln!(f, "]\n[")?;
for (child_index, name) in self.column_names().iter().enumerate() {
let column = self.column(child_index);
writeln!(
Expand Down Expand Up @@ -731,4 +741,16 @@ mod tests {
Arc::new(Int32Array::from(vec![Some(42), None, Some(19)])) as ArrayRef,
)]));
}

#[test]
fn test_struct_array_fmt_debug() {
let arr: StructArray = StructArray::new(
vec![Arc::new(Field::new("c", DataType::Int32, true))].into(),
vec![Arc::new(Int32Array::from((0..30).collect::<Vec<_>>())) as ArrayRef],
Some(NullBuffer::new(BooleanBuffer::from(
(0..30).map(|i| i % 2 == 0).collect::<Vec<_>>(),
))),
);
assert_eq!(format!("{arr:?}"), "StructArray\n-- validity: \n[\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n ...10 elements...,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n valid,\n null,\n]\n[\n-- child 0: \"c\" (Int32)\nPrimitiveArray<Int32>\n[\n 0,\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n 7,\n 8,\n 9,\n ...10 elements...,\n 20,\n 21,\n 22,\n 23,\n 24,\n 25,\n 26,\n 27,\n 28,\n 29,\n]\n]")
}
}
Loading

0 comments on commit 6721477

Please sign in to comment.