From 24a6bff6769a5c6062aafe52b6702459086d3b94 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sat, 4 Jan 2025 14:32:17 -0500 Subject: [PATCH] Minor: improve `zip` kernel docs, add examples (#6928) * Minor: improve `zip` kernel docs` * Add example for zip with scalar --- arrow-select/src/zip.rs | 66 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 7 deletions(-) diff --git a/arrow-select/src/zip.rs b/arrow-select/src/zip.rs index acb31dfa3bc2..2efd2e749921 100644 --- a/arrow-select/src/zip.rs +++ b/arrow-select/src/zip.rs @@ -15,20 +15,72 @@ // specific language governing permissions and limitations // under the License. -//! Zip two arrays by some boolean mask. Where the mask evaluates `true` values of `truthy` +//! [`zip`]: Combine values from two arrays based on boolean mask use crate::filter::SlicesIterator; use arrow_array::*; use arrow_data::transform::MutableArrayData; use arrow_schema::ArrowError; -/// Zip two arrays by some boolean mask. Where the mask evaluates `true` values of `truthy` -/// are taken, where the mask evaluates `false` values of `falsy` are taken. +/// Zip two arrays by some boolean mask. /// -/// # Arguments -/// * `mask` - Boolean values used to determine from which array to take the values. -/// * `truthy` - Values of this array are taken if mask evaluates `true` -/// * `falsy` - Values of this array are taken if mask evaluates `false` +/// - Where `mask` is `true`, values of `truthy` are taken +/// - Where `mask` is `false` or `NULL`, values of `falsy` are taken +/// +/// # Example: `zip` two arrays +/// ``` +/// # use std::sync::Arc; +/// # use arrow_array::{ArrayRef, BooleanArray, Int32Array}; +/// # use arrow_select::zip::zip; +/// // mask: [true, true, false, NULL, true] +/// let mask = BooleanArray::from(vec![ +/// Some(true), Some(true), Some(false), None, Some(true) +/// ]); +/// // truthy array: [1, NULL, 3, 4, 5] +/// let truthy = Int32Array::from(vec![ +/// Some(1), None, Some(3), Some(4), Some(5) +/// ]); +/// // falsy array: [10, 20, 30, 40, 50] +/// let falsy = Int32Array::from(vec![ +/// Some(10), Some(20), Some(30), Some(40), Some(50) +/// ]); +/// // zip with this mask select the first, second and last value from `truthy` +/// // and the third and fourth value from `falsy` +/// let result = zip(&mask, &truthy, &falsy).unwrap(); +/// // Expected: [1, NULL, 30, 40, 5] +/// let expected: ArrayRef = Arc::new(Int32Array::from(vec![ +/// Some(1), None, Some(30), Some(40), Some(5) +/// ])); +/// assert_eq!(&result, &expected); +/// ``` +/// +/// # Example: `zip` and array with a scalar +/// +/// Use `zip` to replace certain values in an array with a scalar +/// +/// ``` +/// # use std::sync::Arc; +/// # use arrow_array::{ArrayRef, BooleanArray, Int32Array}; +/// # use arrow_select::zip::zip; +/// // mask: [true, true, false, NULL, true] +/// let mask = BooleanArray::from(vec![ +/// Some(true), Some(true), Some(false), None, Some(true) +/// ]); +/// // array: [1, NULL, 3, 4, 5] +/// let arr = Int32Array::from(vec![ +/// Some(1), None, Some(3), Some(4), Some(5) +/// ]); +/// // scalar: 42 +/// let scalar = Int32Array::new_scalar(42); +/// // zip the array with the mask select the first, second and last value from `arr` +/// // and fill the third and fourth value with the scalar 42 +/// let result = zip(&mask, &arr, &scalar).unwrap(); +/// // Expected: [1, NULL, 42, 42, 5] +/// let expected: ArrayRef = Arc::new(Int32Array::from(vec![ +/// Some(1), None, Some(42), Some(42), Some(5) +/// ])); +/// assert_eq!(&result, &expected); +/// ``` pub fn zip( mask: &BooleanArray, truthy: &dyn Datum,