Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions datafusion/common/src/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,88 @@ pub fn get_data_dir(
}
}

#[macro_export]
macro_rules! create_array {
(Boolean, $values: expr) => {
std::sync::Arc::new(arrow::array::BooleanArray::from($values))
};
(Int8, $values: expr) => {
std::sync::Arc::new(arrow::array::Int8Array::from($values))
};
(Int16, $values: expr) => {
std::sync::Arc::new(arrow::array::Int16Array::from($values))
};
(Int32, $values: expr) => {
std::sync::Arc::new(arrow::array::Int32Array::from($values))
};
(Int64, $values: expr) => {
std::sync::Arc::new(arrow::array::Int64Array::from($values))
};
(UInt8, $values: expr) => {
std::sync::Arc::new(arrow::array::UInt8Array::from($values))
};
(UInt16, $values: expr) => {
std::sync::Arc::new(arrow::array::UInt16Array::from($values))
};
(UInt32, $values: expr) => {
std::sync::Arc::new(arrow::array::UInt32Array::from($values))
};
(UInt64, $values: expr) => {
std::sync::Arc::new(arrow::array::UInt64Array::from($values))
};
(Float16, $values: expr) => {
std::sync::Arc::new(arrow::array::Float16Array::from($values))
};
(Float32, $values: expr) => {
std::sync::Arc::new(arrow::array::Float32Array::from($values))
};
(Float64, $values: expr) => {
std::sync::Arc::new(arrow::array::Float64Array::from($values))
};
(Utf8, $values: expr) => {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are a bunch of other types that might be worth supporting too LargeUtf8, Utf8View, etc

Full list here:
https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html

We could definitely do it as a follow on PR

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added issue to track:
#12895

std::sync::Arc::new(arrow::array::StringArray::from($values))
};
}

/// Creates a record batch from literal slice of values, suitable for rapid
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is beautiful. We can probably use it to clean up a bunch of the testing setup as well

/// testing and development.
///
/// Example:
/// ```
/// use datafusion_common::{record_batch, create_array};
/// let batch = record_batch!(
/// ("a", Int32, vec![1, 2, 3]),
/// ("b", Float64, vec![Some(4.0), None, Some(5.0)]),
/// ("c", Utf8, vec!["alpha", "beta", "gamma"])
/// );
/// ```
#[macro_export]
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this affect how the macro is imported? do we need this?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we do need it so we can use it in downstream repos as well.

macro_rules! record_batch {
($(($name: expr, $type: ident, $values: expr)),*) => {
{
let schema = std::sync::Arc::new(arrow_schema::Schema::new(vec![
$(
arrow_schema::Field::new($name, arrow_schema::DataType::$type, true),
)*
]));

let batch = arrow_array::RecordBatch::try_new(
schema,
vec![$(
create_array!($type, $values),
)*]
);

batch
}
}
}

#[cfg(test)]
mod tests {
use crate::cast::{as_float64_array, as_int32_array, as_string_array};
use crate::error::Result;

use super::*;
use std::env;

Expand Down Expand Up @@ -333,4 +413,44 @@ mod tests {
let res = parquet_test_data();
assert!(PathBuf::from(res).is_dir());
}

#[test]
fn test_create_record_batch() -> Result<()> {
use arrow_array::Array;

let batch = record_batch!(
("a", Int32, vec![1, 2, 3, 4]),
("b", Float64, vec![Some(4.0), None, Some(5.0), None]),
("c", Utf8, vec!["alpha", "beta", "gamma", "delta"])
)?;

assert_eq!(3, batch.num_columns());
assert_eq!(4, batch.num_rows());

let values: Vec<_> = as_int32_array(batch.column(0))?
.values()
.iter()
.map(|v| v.to_owned())
.collect();
assert_eq!(values, vec![1, 2, 3, 4]);

let values: Vec<_> = as_float64_array(batch.column(1))?
.values()
.iter()
.map(|v| v.to_owned())
.collect();
assert_eq!(values, vec![4.0, 0.0, 5.0, 0.0]);

let nulls: Vec<_> = as_float64_array(batch.column(1))?
.nulls()
.unwrap()
.iter()
.collect();
assert_eq!(nulls, vec![true, false, true, false]);

let values: Vec<_> = as_string_array(batch.column(2))?.iter().flatten().collect();
assert_eq!(values, vec!["alpha", "beta", "gamma", "delta"]);

Ok(())
}
}