From df868b4a3df04fd507af88f584ff853f94e3c251 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 20 Sep 2024 09:52:43 -0500 Subject: [PATCH 01/12] regenerate arrow-ipc/src/gen with patched flatbuffers --- arrow-ipc/Cargo.toml | 2 +- arrow-ipc/regen.sh | 91 +++---- arrow-ipc/src/gen/File.rs | 28 ++- arrow-ipc/src/gen/Message.rs | 70 +++--- arrow-ipc/src/gen/Schema.rs | 399 +++++++++++++++--------------- arrow-ipc/src/gen/SparseTensor.rs | 184 +++++++++++--- arrow-ipc/src/gen/Tensor.rs | 152 ++++++++++-- arrow-ipc/src/lib.rs | 11 + 8 files changed, 607 insertions(+), 330 deletions(-) diff --git a/arrow-ipc/Cargo.toml b/arrow-ipc/Cargo.toml index cf91b3a3415f..e5332a54ce23 100644 --- a/arrow-ipc/Cargo.toml +++ b/arrow-ipc/Cargo.toml @@ -38,7 +38,7 @@ arrow-array = { workspace = true } arrow-buffer = { workspace = true } arrow-data = { workspace = true } arrow-schema = { workspace = true } -flatbuffers = { version = "24.3.25", default-features = false } +flatbuffers = { default-features = false, path = "../../flatbuffers/rust/flatbuffers" } lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"], optional = true } zstd = { version = "0.13.0", default-features = false, optional = true } diff --git a/arrow-ipc/regen.sh b/arrow-ipc/regen.sh index 8d8862ccc7f4..11e6273d1206 100755 --- a/arrow-ipc/regen.sh +++ b/arrow-ipc/regen.sh @@ -21,33 +21,36 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" # Change to the toplevel `arrow-rs` directory pushd $DIR/../ -echo "Build flatc from source ..." - -FB_URL="https://github.com/google/flatbuffers" -FB_DIR="arrow/.flatbuffers" -FLATC="$FB_DIR/bazel-bin/flatc" - -if [ -z $(which bazel) ]; then - echo "bazel is required to build flatc" - exit 1 -fi - -echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')" - -if [ ! -e $FB_DIR ]; then - echo "git clone $FB_URL ..." - git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR +if [ -z "$FLATC" ]; then + echo "Build flatc from source ..." + + FB_URL="https://github.com/google/flatbuffers" + FB_DIR="arrow/.flatbuffers" + FLATC="$FB_DIR/bazel-bin/flatc" + + if [ -z $(which bazel) ]; then + echo "bazel is required to build flatc" + exit 1 + fi + + echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')" + + if [ ! -e $FB_DIR ]; then + echo "git clone $FB_URL ..." + git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR + else + echo "git pull $FB_URL ..." + git -C $FB_DIR pull + fi + + pushd $FB_DIR + echo "run: bazel build :flatc ..." + bazel build :flatc + popd else - echo "git pull $FB_URL ..." - git -C $FB_DIR pull + echo "Using flatc $FLATC ..." fi -pushd $FB_DIR -echo "run: bazel build :flatc ..." -bazel build :flatc -popd - - # Execute the code generation: $FLATC --filename-suffix "" --rust -o arrow-ipc/src/gen/ format/*.fbs @@ -80,7 +83,7 @@ PREFIX=$(cat <<'HEREDOC' #![allow(unused_imports)] use std::{cmp::Ordering, mem}; -use flatbuffers::EndianScalar; +use flatbuffers::{EndianScalar, PushAlignment}; HEREDOC ) @@ -99,31 +102,31 @@ for f in `ls *.rs`; do fi echo "Modifying: $f" - sed -i '' '/extern crate flatbuffers;/d' $f - sed -i '' '/use self::flatbuffers::EndianScalar;/d' $f - sed -i '' '/\#\[allow(unused_imports, dead_code)\]/d' $f - sed -i '' '/pub mod org {/d' $f - sed -i '' '/pub mod apache {/d' $f - sed -i '' '/pub mod arrow {/d' $f - sed -i '' '/pub mod flatbuf {/d' $f - sed -i '' '/} \/\/ pub mod flatbuf/d' $f - sed -i '' '/} \/\/ pub mod arrow/d' $f - sed -i '' '/} \/\/ pub mod apache/d' $f - sed -i '' '/} \/\/ pub mod org/d' $f - sed -i '' '/use core::mem;/d' $f - sed -i '' '/use core::cmp::Ordering;/d' $f - sed -i '' '/use self::flatbuffers::{EndianScalar, Follow};/d' $f + sed --in-place='' '/extern crate flatbuffers;/d' $f + sed --in-place='' '/use self::flatbuffers::EndianScalar;/d' $f + sed --in-place='' '/\#\[allow(unused_imports, dead_code)\]/d' $f + sed --in-place='' '/pub mod org {/d' $f + sed --in-place='' '/pub mod apache {/d' $f + sed --in-place='' '/pub mod arrow {/d' $f + sed --in-place='' '/pub mod flatbuf {/d' $f + sed --in-place='' '/} \/\/ pub mod flatbuf/d' $f + sed --in-place='' '/} \/\/ pub mod arrow/d' $f + sed --in-place='' '/} \/\/ pub mod apache/d' $f + sed --in-place='' '/} \/\/ pub mod org/d' $f + sed --in-place='' '/use core::mem;/d' $f + sed --in-place='' '/use core::cmp::Ordering;/d' $f + sed --in-place='' '/use self::flatbuffers::{EndianScalar, Follow};/d' $f # required by flatc 1.12.0+ - sed -i '' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f + sed --in-place='' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f for name in ${names[@]}; do - sed -i '' "/use crate::${name}::\*;/d" $f - sed -i '' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f + sed --in-place='' "/use crate::${name}::\*;/d" $f + sed --in-place='' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f done # Replace all occurrences of "type__" with "type_", "TYPE__" with "TYPE_". - sed -i '' 's/type__/type_/g' $f - sed -i '' 's/TYPE__/TYPE_/g' $f + sed --in-place='' 's/type__/type_/g' $f + sed --in-place='' 's/TYPE__/TYPE_/g' $f # Some files need prefixes if [[ $f == "File.rs" ]]; then diff --git a/arrow-ipc/src/gen/File.rs b/arrow-ipc/src/gen/File.rs index c0c2fb183237..0895a878bcae 100644 --- a/arrow-ipc/src/gen/File.rs +++ b/arrow-ipc/src/gen/File.rs @@ -19,10 +19,12 @@ #![allow(unused_imports)] use crate::gen::Schema::*; -use flatbuffers::EndianScalar; +use flatbuffers::{EndianScalar, PushAlignment}; use std::{cmp::Ordering, mem}; // automatically generated by the FlatBuffers compiler, do not modify +// @generated + // struct Block, aligned to 8 #[repr(transparent)] #[derive(Clone, Copy, PartialEq)] @@ -64,6 +66,10 @@ impl<'b> flatbuffers::Push for Block { let src = ::core::slice::from_raw_parts(self as *const Block as *const u8, Self::size()); dst.copy_from_slice(src); } + #[inline] + fn alignment() -> PushAlignment { + PushAlignment::new(8) + } } impl<'a> flatbuffers::Verifiable for Block { @@ -211,8 +217,8 @@ impl<'a> Footer<'a> { Footer { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, args: &'args FooterArgs<'args>, ) -> flatbuffers::WIPOffset> { let mut builder = FooterBuilder::new(_fbb); @@ -344,11 +350,11 @@ impl<'a> Default for FooterArgs<'a> { } } -pub struct FooterBuilder<'a: 'b, 'b> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, +pub struct FooterBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b> FooterBuilder<'a, 'b> { +impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FooterBuilder<'a, 'b, A> { #[inline] pub fn add_version(&mut self, version: MetadataVersion) { self.fbb_ @@ -388,7 +394,7 @@ impl<'a: 'b, 'b> FooterBuilder<'a, 'b> { ); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FooterBuilder<'a, 'b> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> FooterBuilder<'a, 'b, A> { let start = _fbb.start_table(); FooterBuilder { fbb_: _fbb, @@ -474,16 +480,16 @@ pub unsafe fn size_prefixed_root_as_footer_unchecked(buf: &[u8]) -> Footer { flatbuffers::size_prefixed_root_unchecked::