Skip to content

Commit

Permalink
remove all the deprecated stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
teh-cmc committed Dec 20, 2024
1 parent edfc60d commit d5238f3
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 525 deletions.
344 changes: 0 additions & 344 deletions crates/store/re_chunk/src/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,350 +201,6 @@ impl Chunk {
}
}

/// Returns an iterator over the raw primitive values of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. scalars,
/// points, etc).
///
/// See also:
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component`].
#[inline]
pub fn iter_primitive<T: arrow2::types::NativeType>(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = &[T]> + '_ {
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(values) = list_array
.values()
.as_any()
.downcast_ref::<Arrow2PrimitiveArray<T>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};
let values = values.values().as_slice();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| &values[idx..idx + len]),
)
}

/// Returns an iterator over the raw boolean values of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters.
///
/// See also:
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component`].
#[inline]
pub fn iter_bool(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = Arrow2Bitmap> + '_ {
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(values) = list_array
.values()
.as_any()
.downcast_ref::<Arrow2BooleanArray>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};
let values = values.values().clone();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| values.clone().sliced(idx, len)),
)
}

/// Returns an iterator over the raw primitive arrays of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. scalars,
/// points, etc).
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component`].
pub fn iter_primitive_array<const N: usize, T: arrow2::types::NativeType>(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = &[[T; N]]> + '_
where
[T; N]: bytemuck::Pod,
{
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(fixed_size_list_array) = list_array
.values()
.as_any()
.downcast_ref::<Arrow2FixedSizeListArray>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let Some(values) = fixed_size_list_array
.values()
.as_any()
.downcast_ref::<Arrow2PrimitiveArray<T>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let size = fixed_size_list_array.size();
let values = values.values().as_slice();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| {
bytemuck::cast_slice(&values[idx * size..idx * size + len * size])
}),
)
}

/// Returns an iterator over the raw list of primitive arrays of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. strips, etc).
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_string`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component`].
pub fn iter_primitive_array_list<const N: usize, T: arrow2::types::NativeType>(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = Vec<&[[T; N]]>> + '_
where
[T; N]: bytemuck::Pod,
{
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(inner_list_array) = list_array
.values()
.as_any()
.downcast_ref::<Arrow2ListArray<i32>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let inner_offsets = inner_list_array.offsets();
let inner_lengths = inner_list_array.offsets().lengths().collect_vec();

let Some(fixed_size_list_array) = inner_list_array
.values()
.as_any()
.downcast_ref::<Arrow2FixedSizeListArray>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let Some(values) = fixed_size_list_array
.values()
.as_any()
.downcast_ref::<Arrow2PrimitiveArray<T>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let size = fixed_size_list_array.size();
let values = values.values();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| {
let inner_offsets = &inner_offsets.as_slice()[idx..idx + len];
let inner_lengths = &inner_lengths.as_slice()[idx..idx + len];
izip!(inner_offsets, inner_lengths)
.map(|(&idx, &len)| {
let idx = idx as usize;
bytemuck::cast_slice(&values[idx * size..idx * size + len * size])
})
.collect_vec()
}),
)
}

/// Returns an iterator over the raw strings of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. labels, etc).
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_buffer`].
/// * [`Self::iter_component`].
pub fn iter_string(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = Vec<ArrowString>> + '_ {
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(utf8_array) = list_array
.values()
.as_any()
.downcast_ref::<Arrow2Utf8Array<i32>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let values = utf8_array.values();
let offsets = utf8_array.offsets();
let lengths = utf8_array.offsets().lengths().collect_vec();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| {
let offsets = &offsets.as_slice()[idx..idx + len];
let lengths = &lengths.as_slice()[idx..idx + len];
izip!(offsets, lengths)
.map(|(&idx, &len)| ArrowString::from(values.clone().sliced(idx as _, len)))
.collect_vec()
}),
)
}

/// Returns an iterator over the raw buffers of a [`Chunk`], for a given component.
///
/// This is a very fast path: the entire column will be downcasted at once, and then every
/// component batch will be a slice reference into that global slice.
/// Use this when working with simple arrow datatypes and performance matters (e.g. blobs, etc).
///
/// See also:
/// * [`Self::iter_primitive`]
/// * [`Self::iter_primitive_array`]
/// * [`Self::iter_primitive_array_list`]
/// * [`Self::iter_string`].
/// * [`Self::iter_component`].
pub fn iter_buffer<T: arrow::datatypes::ArrowNativeType + arrow2::types::NativeType>(
&self,
component_name: &ComponentName,
) -> impl Iterator<Item = Vec<ArrowBuffer<T>>> + '_ {
let Some(list_array) = self.get_first_component(component_name) else {
return Either::Left(std::iter::empty());
};

let Some(inner_list_array) = list_array
.values()
.as_any()
.downcast_ref::<Arrow2ListArray<i32>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let Some(values) = inner_list_array
.values()
.as_any()
.downcast_ref::<Arrow2PrimitiveArray<T>>()
else {
if cfg!(debug_assertions) {
panic!("downcast failed for {component_name}, data discarded");
} else {
re_log::error_once!("downcast failed for {component_name}, data discarded");
}
return Either::Left(std::iter::empty());
};

let values = values.values();
let offsets = inner_list_array.offsets();
let lengths = inner_list_array.offsets().lengths().collect_vec();

// NOTE: No need for validity checks here, `iter_offsets` already takes care of that.
Either::Right(
self.iter_component_offsets(component_name)
.map(move |(idx, len)| {
let offsets = &offsets.as_slice()[idx..idx + len];
let lengths = &lengths.as_slice()[idx..idx + len];
izip!(offsets, lengths)
// NOTE: Not an actual clone, just a refbump of the underlying buffer.
.map(|(&idx, &len)| values.clone().sliced(idx as _, len).into())
.collect_vec()
}),
)
}

/// Returns an iterator over the all the sliced component batches in a [`Chunk`]'s column, for
/// a given component.
///
Expand Down
Loading

0 comments on commit d5238f3

Please sign in to comment.