Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 10 additions & 53 deletions rust/arrow/src/array/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,40 +401,25 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {

/// Common operations for primitive types, including numeric types and boolean type.
pub trait PrimitiveArrayOps<T: ArrowPrimitiveType> {
/// Returns a `Buffer` holding all the values of this array.
///
/// Note this doesn't take the offset of this array into account.
fn values(&self) -> Buffer;

/// Returns the primitive value at index `i`.
///
/// Note this doesn't do any bound checking, for performance reason.
fn value(&self, i: usize) -> T::Native;
}

// This is necessary when caller wants to access `PrimitiveArrayOps`'s methods with
// `ArrowPrimitiveType`. It doesn't have any implementation as the actual implementations
// are delegated to that of `ArrowNumericType` and `BooleanType`.
impl<T: ArrowPrimitiveType> PrimitiveArrayOps<T> for PrimitiveArray<T> {
default fn values(&self) -> Buffer {
unimplemented!()
}

default fn value(&self, _: usize) -> T::Native {
unimplemented!()
}
}

impl<T: ArrowNumericType> PrimitiveArrayOps<T> for PrimitiveArray<T> {
fn values(&self) -> Buffer {
self.values()
self.data.buffers()[0].clone()
}

fn value(&self, i: usize) -> T::Native {
self.value(i)
}
}

impl PrimitiveArrayOps<BooleanType> for BooleanArray {
fn values(&self) -> Buffer {
self.values()
}

fn value(&self, i: usize) -> bool {
self.value(i)
let offset = i + self.offset();
unsafe { T::index(self.raw_values.get(), offset) }
}
}

Expand Down Expand Up @@ -475,13 +460,6 @@ impl<T: ArrowNumericType> PrimitiveArray<T> {
PrimitiveArray::from(array_data)
}

/// Returns a `Buffer` holding all the values of this array.
///
/// Note this doesn't take the offset of this array into account.
pub fn values(&self) -> Buffer {
self.data.buffers()[0].clone()
}

/// Returns the length of this array.
pub fn len(&self) -> usize {
self.data.len()
Expand All @@ -497,13 +475,6 @@ impl<T: ArrowNumericType> PrimitiveArray<T> {
unsafe { self.raw_values.get().add(self.data.offset()) }
}

/// Returns the primitive value at index `i`.
///
/// Note this doesn't do any bound checking, for performance reason.
pub fn value(&self, i: usize) -> T::Native {
unsafe { *(self.raw_values().add(i)) }
}

/// Returns a slice for the given offset and length
///
/// Note this doesn't do any bound checking, for performance reason.
Expand Down Expand Up @@ -695,20 +666,6 @@ impl PrimitiveArray<BooleanType> {
BooleanArray::from(array_data)
}

/// Returns a `Buffer` holds all the values of this array.
///
/// Note this doesn't take account into the offset of this array.
pub fn values(&self) -> Buffer {
self.data.buffers()[0].clone()
}

/// Returns the boolean value at index `i`.
pub fn value(&self, i: usize) -> bool {
assert!(i < self.data.len());
let offset = i + self.offset();
unsafe { bit_util::get_bit_raw(self.raw_values.get() as *const u8, offset) }
}

// Returns a new primitive array builder
pub fn builder(capacity: usize) -> BooleanBuilder {
BooleanBuilder::new(capacity)
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
//! ```
//! extern crate arrow;
//!
//! use arrow::array::Int16Array;
//! use arrow::array::{Int16Array, PrimitiveArrayOps};
//!
//! // Create a new builder with a capacity of 100
//! let mut builder = Int16Array::builder(100);
Expand Down
1 change: 1 addition & 0 deletions rust/arrow/src/compute/kernels/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ pub fn not(left: &BooleanArray) -> Result<BooleanArray> {
#[cfg(test)]
mod tests {
use super::*;
use crate::array::PrimitiveArrayOps;

#[test]
fn test_bool_array_and() {
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/compute/kernels/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ pub struct SortColumn {
/// ```
/// use std::convert::From;
/// use std::sync::Arc;
/// use arrow::array::{ArrayRef, StringArray, PrimitiveArray, as_primitive_array};
/// use arrow::array::{ArrayRef, StringArray, PrimitiveArray, PrimitiveArrayOps, as_primitive_array};
/// use arrow::compute::kernels::sort::{SortColumn, SortOptions, lexsort};
/// use arrow::datatypes::Int64Type;
///
Expand Down
146 changes: 69 additions & 77 deletions rust/arrow/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
//! * [`DataType`](crate::datatypes::DataType) to describe the type of a field.

use std::collections::HashMap;
use std::default::Default;
use std::fmt;
use std::mem::size_of;
#[cfg(feature = "simd")]
Expand All @@ -39,6 +40,7 @@ use serde_json::{
};

use crate::error::{ArrowError, Result};
use crate::util::bit_util;

/// The set of datatypes that are supported by this implementation of Apache Arrow.
///
Expand Down Expand Up @@ -184,7 +186,7 @@ pub struct Field {
}

pub trait ArrowNativeType:
fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + 'static
fmt::Debug + Send + Sync + Copy + PartialOrd + FromStr + Default + 'static
{
fn into_json_value(self) -> Option<Value>;

Expand All @@ -208,12 +210,25 @@ pub trait ArrowPrimitiveType: 'static {
fn get_data_type() -> DataType;

/// Returns the bit width of this primitive type.
fn get_bit_width() -> usize;
fn get_bit_width() -> usize {
size_of::<Self::Native>() * 8
}

/// Returns a default value of this primitive type.
///
/// This is useful for aggregate array ops like `sum()`, `mean()`.
fn default_value() -> Self::Native;
fn default_value() -> Self::Native {
Default::default()
}

/// Returns a value offset from the given pointer by the given index. The default
/// implementation (used for all non-boolean types) is simply equivalent to pointer-arithmetic.
/// # Safety
/// Just like array-access in C: the raw_ptr must be the start of a valid array, and the index
/// must be less than the size of the array.
unsafe fn index(raw_ptr: *const Self::Native, i: usize) -> Self::Native {
*(raw_ptr.add(i))
}
}

impl ArrowNativeType for bool {
Expand Down Expand Up @@ -346,8 +361,32 @@ impl ArrowNativeType for f64 {
}
}

// BooleanType is special: its bit-width is not the size of the primitive type, and its `index`
// operation assumes bit-packing.
#[derive(Debug)]
pub struct BooleanType {}

impl ArrowPrimitiveType for BooleanType {
type Native = bool;

fn get_data_type() -> DataType {
DataType::Boolean
}

fn get_bit_width() -> usize {
1
}

/// # Safety
/// The pointer must be part of a bit-packed boolean array, and the index must be less than the
/// size of the array.
unsafe fn index(raw_ptr: *const Self::Native, i: usize) -> Self::Native {
bit_util::get_bit_raw(raw_ptr as *const u8, i)
}
}

macro_rules! make_type {
($name:ident, $native_ty:ty, $data_ty:expr, $bit_width:expr, $default_val:expr) => {
($name:ident, $native_ty:ty, $data_ty:expr) => {
#[derive(Debug)]
pub struct $name {}

Expand All @@ -357,134 +396,87 @@ macro_rules! make_type {
fn get_data_type() -> DataType {
$data_ty
}

fn get_bit_width() -> usize {
$bit_width
}

fn default_value() -> Self::Native {
$default_val
}
}
};
}

make_type!(BooleanType, bool, DataType::Boolean, 1, false);
make_type!(Int8Type, i8, DataType::Int8, 8, 0i8);
make_type!(Int16Type, i16, DataType::Int16, 16, 0i16);
make_type!(Int32Type, i32, DataType::Int32, 32, 0i32);
make_type!(Int64Type, i64, DataType::Int64, 64, 0i64);
make_type!(UInt8Type, u8, DataType::UInt8, 8, 0u8);
make_type!(UInt16Type, u16, DataType::UInt16, 16, 0u16);
make_type!(UInt32Type, u32, DataType::UInt32, 32, 0u32);
make_type!(UInt64Type, u64, DataType::UInt64, 64, 0u64);
make_type!(Float32Type, f32, DataType::Float32, 32, 0.0f32);
make_type!(Float64Type, f64, DataType::Float64, 64, 0.0f64);
make_type!(Int8Type, i8, DataType::Int8);
make_type!(Int16Type, i16, DataType::Int16);
make_type!(Int32Type, i32, DataType::Int32);
make_type!(Int64Type, i64, DataType::Int64);
make_type!(UInt8Type, u8, DataType::UInt8);
make_type!(UInt16Type, u16, DataType::UInt16);
make_type!(UInt32Type, u32, DataType::UInt32);
make_type!(UInt64Type, u64, DataType::UInt64);
make_type!(Float32Type, f32, DataType::Float32);
make_type!(Float64Type, f64, DataType::Float64);
make_type!(
TimestampSecondType,
i64,
DataType::Timestamp(TimeUnit::Second, None),
64,
0i64
DataType::Timestamp(TimeUnit::Second, None)
);
make_type!(
TimestampMillisecondType,
i64,
DataType::Timestamp(TimeUnit::Millisecond, None),
64,
0i64
DataType::Timestamp(TimeUnit::Millisecond, None)
);
make_type!(
TimestampMicrosecondType,
i64,
DataType::Timestamp(TimeUnit::Microsecond, None),
64,
0i64
DataType::Timestamp(TimeUnit::Microsecond, None)
);
make_type!(
TimestampNanosecondType,
i64,
DataType::Timestamp(TimeUnit::Nanosecond, None),
64,
0i64
);
make_type!(Date32Type, i32, DataType::Date32(DateUnit::Day), 32, 0i32);
make_type!(
Date64Type,
i64,
DataType::Date64(DateUnit::Millisecond),
64,
0i64
);
make_type!(
Time32SecondType,
i32,
DataType::Time32(TimeUnit::Second),
32,
0i32
DataType::Timestamp(TimeUnit::Nanosecond, None)
);
make_type!(Date32Type, i32, DataType::Date32(DateUnit::Day));
make_type!(Date64Type, i64, DataType::Date64(DateUnit::Millisecond));
make_type!(Time32SecondType, i32, DataType::Time32(TimeUnit::Second));
make_type!(
Time32MillisecondType,
i32,
DataType::Time32(TimeUnit::Millisecond),
32,
0i32
DataType::Time32(TimeUnit::Millisecond)
);
make_type!(
Time64MicrosecondType,
i64,
DataType::Time64(TimeUnit::Microsecond),
64,
0i64
DataType::Time64(TimeUnit::Microsecond)
);
make_type!(
Time64NanosecondType,
i64,
DataType::Time64(TimeUnit::Nanosecond),
64,
0i64
DataType::Time64(TimeUnit::Nanosecond)
);
make_type!(
IntervalYearMonthType,
i32,
DataType::Interval(IntervalUnit::YearMonth),
32,
0i32
DataType::Interval(IntervalUnit::YearMonth)
);
make_type!(
IntervalDayTimeType,
i64,
DataType::Interval(IntervalUnit::DayTime),
64,
0i64
DataType::Interval(IntervalUnit::DayTime)
);
make_type!(
DurationSecondType,
i64,
DataType::Duration(TimeUnit::Second),
64,
0i64
DataType::Duration(TimeUnit::Second)
);
make_type!(
DurationMillisecondType,
i64,
DataType::Duration(TimeUnit::Millisecond),
64,
0i64
DataType::Duration(TimeUnit::Millisecond)
);
make_type!(
DurationMicrosecondType,
i64,
DataType::Duration(TimeUnit::Microsecond),
64,
0i64
DataType::Duration(TimeUnit::Microsecond)
);
make_type!(
DurationNanosecondType,
i64,
DataType::Duration(TimeUnit::Nanosecond),
64,
0i64
DataType::Duration(TimeUnit::Nanosecond)
);

/// A subtype of primitive type that represents legal dictionary keys.
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/util/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//! Utilities for printing record batches

use crate::array;
use crate::array::StringArrayOps;
use crate::array::{PrimitiveArrayOps, StringArrayOps};
use crate::datatypes::{DataType, TimeUnit};
use crate::record_batch::RecordBatch;

Expand Down
4 changes: 3 additions & 1 deletion rust/datafusion/examples/simple_udf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
// under the License.

use arrow::{
array::{Array, ArrayRef, Float32Array, Float64Array, Float64Builder},
array::{
Array, ArrayRef, Float32Array, Float64Array, Float64Builder, PrimitiveArrayOps,
},
datatypes::DataType,
record_batch::RecordBatch,
util::pretty,
Expand Down
Loading