Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1. user can choose to show nested type when write to csv 2. Add features to show struct type data as json format #6950

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions arrow-cast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ features = ["prettyprint"]
[features]
prettyprint = ["comfy-table"]
force_validate = []
struct_display_json =[]

[dependencies]
arrow-array = { workspace = true }
Expand All @@ -54,6 +55,7 @@ atoi = "2.0.0"
comfy-table = { version = "7.0", optional = true, default-features = false }
base64 = "0.22"
ryu = "1.0.16"
serde_json = "1.0"

[dev-dependencies]
criterion = { version = "0.5", default-features = false }
Expand Down
28 changes: 28 additions & 0 deletions arrow-cast/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
//! record batch pretty printing.
//!
//! [`pretty`]: crate::pretty
use std::collections::BTreeMap;
use std::fmt::{Display, Formatter, Write};
use std::ops::Range;
use std::str::FromStr;

use arrow_array::cast::*;
use arrow_array::temporal_conversions::*;
Expand Down Expand Up @@ -938,7 +940,33 @@ impl<'a> DisplayIndexState<'a> for &'a StructArray {
})
.collect()
}
#[cfg(feature = "struct_display_json")]
fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
let mut iter = s.iter();

let mut json = BTreeMap::<String, serde_json::Value>::new();
if let Some((name, display)) = iter.next() {
let mut display_str = String::new();
display.write(idx, &mut display_str)?;
json.insert(
name.to_string(),
serde_json::Value::from_str(&display_str)
.unwrap_or(serde_json::Value::String(display_str)),
);
}
for (name, display) in iter {
let mut display_str = String::new();
display.write(idx, &mut display_str)?;
json.insert(
name.to_string(),
serde_json::Value::from_str(&display_str)
.unwrap_or(serde_json::Value::String(display_str)),
);
}
let _ = f.write_str(&serde_json::to_string(&json).unwrap_or_default());
Ok(())
}
#[cfg(not(feature = "struct_display_json"))]
fn write(&self, s: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
let mut iter = s.iter();
f.write_char('{')?;
Expand Down
15 changes: 14 additions & 1 deletion arrow-csv/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ pub struct Writer<W: Write> {
beginning: bool,
/// The value to represent null entries, defaults to [`DEFAULT_NULL_VALUE`]
null_value: Option<String>,

/// Show nested types to csv
show_nested: bool,
}

impl<W: Write> Writer<W> {
Expand Down Expand Up @@ -132,7 +135,7 @@ impl<W: Write> Writer<W> {
.columns()
.iter()
.map(|a| {
if a.data_type().is_nested() {
if a.data_type().is_nested() && !self.show_nested {
Err(ArrowError::CsvError(format!(
"Nested type {} is not supported in CSV",
a.data_type()
Expand Down Expand Up @@ -211,6 +214,8 @@ pub struct WriterBuilder {
time_format: Option<String>,
/// Optional value to represent null
null_value: Option<String>,
/// Show nested types to csv
show_nested: bool,
}

impl Default for WriterBuilder {
Expand All @@ -227,6 +232,7 @@ impl Default for WriterBuilder {
timestamp_tz_format: None,
time_format: None,
null_value: None,
show_nested: false,
}
}
}
Expand Down Expand Up @@ -389,6 +395,12 @@ impl WriterBuilder {
self.null_value.as_deref().unwrap_or(DEFAULT_NULL_VALUE)
}

/// Set whether to show nested fields
pub fn with_show_nested(mut self, show_nested: bool) -> Self {
self.show_nested = show_nested;
self
}

/// Create a new `Writer`
pub fn build<W: Write>(self, writer: W) -> Writer<W> {
let mut builder = csv::WriterBuilder::new();
Expand All @@ -408,6 +420,7 @@ impl WriterBuilder {
timestamp_format: self.timestamp_format,
timestamp_tz_format: self.timestamp_tz_format,
null_value: self.null_value,
show_nested: self.show_nested,
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ csv = ["arrow-csv"]
ipc = ["arrow-ipc"]
json = ["arrow-json"]
prettyprint = ["arrow-cast/prettyprint"]
struct_display_json =["arrow-cast/struct_display_json"]

# The test utils feature enables code used in benchmarks and tests but
# not the core arrow code itself. Be aware that `rand` must be kept as
# an optional dependency for supporting compile to wasm32-unknown-unknown
Expand Down
Loading