From 15c33df19bdf79329db72168ee11f696794a0ed5 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Thu, 10 Oct 2024 06:45:08 -0400 Subject: [PATCH 1/3] Add macro for creating record batch, useful for unit test or rapid development --- datafusion/common/src/test_util.rs | 91 ++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/datafusion/common/src/test_util.rs b/datafusion/common/src/test_util.rs index 36254192550c8..8d3da350ad08d 100644 --- a/datafusion/common/src/test_util.rs +++ b/datafusion/common/src/test_util.rs @@ -279,6 +279,86 @@ pub fn get_data_dir( } } +#[macro_export] +macro_rules! create_array { + (Boolean, $values: expr) => { + std::sync::Arc::new(arrow::array::BooleanArray::from($values)) + }; + (Int8, $values: expr) => { + std::sync::Arc::new(arrow::array::Int8Array::from($values)) + }; + (Int16, $values: expr) => { + std::sync::Arc::new(arrow::array::Int16Array::from($values)) + }; + (Int32, $values: expr) => { + std::sync::Arc::new(arrow::array::Int32Array::from($values)) + }; + (Int64, $values: expr) => { + std::sync::Arc::new(arrow::array::Int64Array::from($values)) + }; + (UInt8, $values: expr) => { + std::sync::Arc::new(arrow::array::UInt8Array::from($values)) + }; + (UInt16, $values: expr) => { + std::sync::Arc::new(arrow::array::UInt16Array::from($values)) + }; + (UInt32, $values: expr) => { + std::sync::Arc::new(arrow::array::UInt32Array::from($values)) + }; + (UInt64, $values: expr) => { + std::sync::Arc::new(arrow::array::UInt64Array::from($values)) + }; + (Float16, $values: expr) => { + std::sync::Arc::new(arrow::array::Float16Array::from($values)) + }; + (Float32, $values: expr) => { + std::sync::Arc::new(arrow::array::Float32Array::from($values)) + }; + (Float64, $values: expr) => { + std::sync::Arc::new(arrow::array::Float64Array::from($values)) + }; + (Utf8, $values: expr) => { + std::sync::Arc::new(arrow::array::StringArray::from($values)) + }; +} + +/// Creates a record batch from literal slice of values, suitable for rapid +/// testing and development. +/// +/// Example: +/// ``` +/// let batch = create_batch!( +/// ("a", Int32, vec![1, 2, 3]), +/// ("b", Float64, vec![Some(4.0), None, Some(5.0)]), +/// ("c", Utf8, vec!["alpha", "beta", "gamma"]) +/// )?; +/// +/// let ctx = SessionContext::new(); +/// +/// df = ctx.read_batch(batch)?; +/// ``` +#[macro_export] +macro_rules! create_batch { + ($(($name: expr, $type: ident, $values: expr)),*) => { + { + let schema = std::sync::Arc::new(arrow_schema::Schema::new(vec![ + $( + arrow_schema::Field::new($name, arrow_schema::DataType::$type, true), + )* + ])); + + let batch = arrow_array::RecordBatch::try_new( + schema, + vec![$( + create_array!($type, $values), + )*] + ); + + batch + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -333,4 +413,15 @@ mod tests { let res = parquet_test_data(); assert!(PathBuf::from(res).is_dir()); } + + #[test] + fn test_create_record_batch() { + let batch = create_batch!( + ("a", Int32, vec![1, 2, 3]), + ("b", Float64, vec![Some(4.0), None, Some(5.0)]), + ("c", Utf8, vec!["alpha", "beta", "gamma"]) + ); + + assert!(batch.is_ok()); + } } From 9d6c45a26aabde931be58c9bb4606fc377f304df Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Thu, 10 Oct 2024 08:50:01 -0400 Subject: [PATCH 2/3] Update docstring --- datafusion/common/src/test_util.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/datafusion/common/src/test_util.rs b/datafusion/common/src/test_util.rs index 8d3da350ad08d..6c3ad46e0444f 100644 --- a/datafusion/common/src/test_util.rs +++ b/datafusion/common/src/test_util.rs @@ -327,15 +327,12 @@ macro_rules! create_array { /// /// Example: /// ``` +/// use datafusion_common::{create_batch, create_array}; /// let batch = create_batch!( /// ("a", Int32, vec![1, 2, 3]), /// ("b", Float64, vec![Some(4.0), None, Some(5.0)]), /// ("c", Utf8, vec!["alpha", "beta", "gamma"]) -/// )?; -/// -/// let ctx = SessionContext::new(); -/// -/// df = ctx.read_batch(batch)?; +/// ); /// ``` #[macro_export] macro_rules! create_batch { From 2862c0ac10f1eb82b308f67d0422f4b85f2e193e Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 12 Oct 2024 12:34:23 -0400 Subject: [PATCH 3/3] Add additional checks in unit test and rename macro per user input --- datafusion/common/src/test_util.rs | 54 ++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/datafusion/common/src/test_util.rs b/datafusion/common/src/test_util.rs index 6c3ad46e0444f..422fcb5eb3e0b 100644 --- a/datafusion/common/src/test_util.rs +++ b/datafusion/common/src/test_util.rs @@ -327,15 +327,15 @@ macro_rules! create_array { /// /// Example: /// ``` -/// use datafusion_common::{create_batch, create_array}; -/// let batch = create_batch!( +/// use datafusion_common::{record_batch, create_array}; +/// let batch = record_batch!( /// ("a", Int32, vec![1, 2, 3]), /// ("b", Float64, vec![Some(4.0), None, Some(5.0)]), /// ("c", Utf8, vec!["alpha", "beta", "gamma"]) /// ); /// ``` #[macro_export] -macro_rules! create_batch { +macro_rules! record_batch { ($(($name: expr, $type: ident, $values: expr)),*) => { { let schema = std::sync::Arc::new(arrow_schema::Schema::new(vec![ @@ -358,6 +358,9 @@ macro_rules! create_batch { #[cfg(test)] mod tests { + use crate::cast::{as_float64_array, as_int32_array, as_string_array}; + use crate::error::Result; + use super::*; use std::env; @@ -412,13 +415,42 @@ mod tests { } #[test] - fn test_create_record_batch() { - let batch = create_batch!( - ("a", Int32, vec![1, 2, 3]), - ("b", Float64, vec![Some(4.0), None, Some(5.0)]), - ("c", Utf8, vec!["alpha", "beta", "gamma"]) - ); - - assert!(batch.is_ok()); + fn test_create_record_batch() -> Result<()> { + use arrow_array::Array; + + let batch = record_batch!( + ("a", Int32, vec![1, 2, 3, 4]), + ("b", Float64, vec![Some(4.0), None, Some(5.0), None]), + ("c", Utf8, vec!["alpha", "beta", "gamma", "delta"]) + )?; + + assert_eq!(3, batch.num_columns()); + assert_eq!(4, batch.num_rows()); + + let values: Vec<_> = as_int32_array(batch.column(0))? + .values() + .iter() + .map(|v| v.to_owned()) + .collect(); + assert_eq!(values, vec![1, 2, 3, 4]); + + let values: Vec<_> = as_float64_array(batch.column(1))? + .values() + .iter() + .map(|v| v.to_owned()) + .collect(); + assert_eq!(values, vec![4.0, 0.0, 5.0, 0.0]); + + let nulls: Vec<_> = as_float64_array(batch.column(1))? + .nulls() + .unwrap() + .iter() + .collect(); + assert_eq!(nulls, vec![true, false, true, false]); + + let values: Vec<_> = as_string_array(batch.column(2))?.iter().flatten().collect(); + assert_eq!(values, vec!["alpha", "beta", "gamma", "delta"]); + + Ok(()) } }