From 35974e56467a326cc00fd4a4e617e7424e9ff5ef Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 21 Jan 2026 14:45:46 +0000 Subject: [PATCH 01/31] Extension DType vtable Signed-off-by: Nicholas Gates --- Cargo.lock | 2 + vortex-array/src/vtable/mod.rs | 2 - vortex-dtype/Cargo.toml | 2 + .../src/{extension.rs => extension/mod.rs} | 6 + vortex-dtype/src/extension/temporal.rs | 58 +++++ vortex-dtype/src/extension/v2.rs | 212 ++++++++++++++++++ vortex-dtype/src/extension/vtable.rs | 83 +++++++ vortex-dtype/src/lib.rs | 1 + vortex-dtype/src/session.rs | 37 +++ 9 files changed, 401 insertions(+), 2 deletions(-) rename vortex-dtype/src/{extension.rs => extension/mod.rs} (99%) create mode 100644 vortex-dtype/src/extension/temporal.rs create mode 100644 vortex-dtype/src/extension/v2.rs create mode 100644 vortex-dtype/src/extension/vtable.rs create mode 100644 vortex-dtype/src/session.rs diff --git a/Cargo.lock b/Cargo.lock index aa3f9c589a9..06c006f382e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10366,6 +10366,7 @@ name = "vortex-dtype" version = "0.1.0" dependencies = [ "arbitrary", + "arcref", "arrow-buffer 57.2.0", "arrow-schema 57.2.0", "flatbuffers", @@ -10387,6 +10388,7 @@ dependencies = [ "vortex-error", "vortex-flatbuffers", "vortex-proto", + "vortex-session", "vortex-utils", ] diff --git a/vortex-array/src/vtable/mod.rs b/vortex-array/src/vtable/mod.rs index 5382c29f295..12ae5c87c40 100644 --- a/vortex-array/src/vtable/mod.rs +++ b/vortex-array/src/vtable/mod.rs @@ -4,7 +4,6 @@ //! This module contains the VTable definitions for a Vortex encoding. mod array; -mod canonical; mod compute; mod dyn_; mod operations; @@ -16,7 +15,6 @@ use std::ops::Deref; use std::ops::Range; pub use array::*; -pub use canonical::*; pub use compute::*; pub use dyn_::*; pub use operations::*; diff --git a/vortex-dtype/Cargo.toml b/vortex-dtype/Cargo.toml index d4fc41e22e5..d407e5e4580 100644 --- a/vortex-dtype/Cargo.toml +++ b/vortex-dtype/Cargo.toml @@ -18,6 +18,7 @@ all-features = true [dependencies] arbitrary = { workspace = true, optional = true } +arcref = { workspace = true } arrow-buffer = { workspace = true } arrow-schema = { workspace = true, optional = true } flatbuffers = { workspace = true } @@ -37,6 +38,7 @@ vortex-buffer = { workspace = true } vortex-error = { workspace = true, features = ["flatbuffers"] } vortex-flatbuffers = { workspace = true, features = ["dtype"] } vortex-proto = { workspace = true, features = ["dtype"] } +vortex-session = { workspace = true } vortex-utils = { workspace = true } [dev-dependencies] diff --git a/vortex-dtype/src/extension.rs b/vortex-dtype/src/extension/mod.rs similarity index 99% rename from vortex-dtype/src/extension.rs rename to vortex-dtype/src/extension/mod.rs index 0b1dbe8edde..7cc3cccee6b 100644 --- a/vortex-dtype/src/extension.rs +++ b/vortex-dtype/src/extension/mod.rs @@ -1,10 +1,16 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +mod temporal; +pub mod v2; +mod vtable; + use std::fmt::Display; use std::fmt::Formatter; use std::sync::Arc; +pub use vtable::*; + use crate::DType; use crate::Nullability; diff --git a/vortex-dtype/src/extension/temporal.rs b/vortex-dtype/src/extension/temporal.rs new file mode 100644 index 00000000000..bb90ae1bbd6 --- /dev/null +++ b/vortex-dtype/src/extension/temporal.rs @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Temporal extension data types. + +use std::fmt::Display; +use std::fmt::Formatter; + +use vortex_error::VortexResult; +use vortex_error::vortex_ensure; +use vortex_session::VortexSession; + +use crate::DType; +use crate::PType; +use crate::VTable; +use crate::datetime::TimeUnit; +use crate::extension::vtable::ExtId; + +pub struct TimestampDType; + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct TimestampOptions { + pub time_unit: TimeUnit, + pub timezone: Option, +} + +impl Display for TimestampOptions { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self.timezone { + Some(tz) => write!(f, "unit={}, tz={}", self.time_unit, tz), + None => write!(f, "unit={}", self.time_unit), + } + } +} + +impl VTable for TimestampDType { + type Options = TimestampOptions; + + fn id(_options: &Self::Options) -> ExtId { + ExtId::new_ref("vortex.timestamp") + } + + fn serialize(options: &Self::Options) -> VortexResult> { + todo!() + } + + fn deserialize(data: &[u8], session: &VortexSession) -> VortexResult { + todo!() + } + + fn validate(_options: &Self::Options, storage_dtype: &DType) -> VortexResult<()> { + vortex_ensure!( + matches!(storage_dtype, DType::Primitive(PType::I64, _)), + "Timestamp storage dtype must be i64" + ); + Ok(()) + } +} diff --git a/vortex-dtype/src/extension/v2.rs b/vortex-dtype/src/extension/v2.rs new file mode 100644 index 00000000000..54ca651cffd --- /dev/null +++ b/vortex-dtype/src/extension/v2.rs @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::any::Any; +use std::any::type_name; +use std::fmt::Debug; +use std::fmt::Display; +use std::fmt::Formatter; +use std::hash::Hash; +use std::hash::Hasher; +use std::marker::PhantomData; +use std::sync::Arc; + +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_err; + +use crate::DType; +use crate::ExtId; +use crate::VTable; + +/// An extension data type. +#[derive(Clone)] +pub struct ExtDType(Arc>); + +impl ExtDType { + /// Creates a new extension dtype with the given options and storage dtype. + pub fn try_new(options: V::Options, storage_dtype: DType) -> VortexResult { + V::validate(&options, &storage_dtype)?; + Ok(Self(Arc::new(ExtDTypeAdapter:: { + storage_dtype, + options, + vtable: PhantomData, + }))) + } + + /// Returns the identifier of the extension type. + pub fn id(&self) -> ExtId { + self.0.id() + } + + /// Returns the options of the extension type. + pub fn options(&self) -> &V::Options { + &self.0.options + } + + /// Erase the concrete type information, returning a type-erased extension dtype. + pub fn erase(self) -> ExtDTypeRef { + ExtDTypeRef(self.0) + } +} + +/// Type-erased extension dtype - for heterogeneous storage +pub struct ExtDTypeRef(Arc); + +impl ExtDTypeRef { + /// Returns the identifier of the extension type. + pub fn id(&self) -> ExtId { + self.0.id() + } + + /// Returns the storage dtype of the extension type. + pub fn storage_dtype(&self) -> &DType { + self.0.storage_dtype() + } +} + +/// Methods for downcasting type-erased extension dtypes. +impl ExtDTypeRef { + /// Check if the extension dtype is of the concrete type. + pub fn is(&self) -> bool { + self.0.as_any().is::>() + } + + /// Downcast to the concrete options type. + pub fn as_opt(&self) -> Option<&V::Options> { + self.0 + .as_any() + .downcast_ref::>() + .map(|adapter| &adapter.options) + } + + /// Downcast to the concrete options type. + pub fn as_(&self) -> &V::Options { + self.as_opt::() + .vortex_expect("Failed to downcast DynExtDType") + } + + /// Downcast to the concrete options type. + /// + /// Returns `Err(self)` if the downcast fails. + pub fn try_downcast(self) -> Result, ExtDTypeRef> { + // Check if the concrete type matches + if self.0.as_any().is::>() { + // SAFETY: type matches and ExtDTypeImpl is the only implementor + let ptr = Arc::into_raw(self.0) as *const ExtDTypeAdapter; + let inner = unsafe { Arc::from_raw(ptr) }; + Ok(ExtDType(inner)) + } else { + Err(self) + } + } + + /// Downcast to the concrete options type. + /// + /// # Panics + /// + /// Panics if the downcast fails. + pub fn downcast(self) -> ExtDType { + self.try_downcast::() + .map_err(|this| { + vortex_err!( + "Failed to downcast DynExtDType {} to {}", + this.0.id(), + type_name::(), + ) + }) + .vortex_expect("Failed to downcast DynExtDType") + } +} + +/// Wrapper for type-erased extension dtype options. +pub struct ExtDTypeOptions<'a> { + pub(super) ext_dtype: &'a ExtDTypeRef, +} + +impl Display for ExtDTypeOptions<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.ext_dtype.0.options_display(f) + } +} + +impl Debug for ExtDTypeOptions<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.ext_dtype.0.options_debug(f) + } +} + +impl PartialEq for ExtDTypeOptions<'_> { + fn eq(&self, other: &Self) -> bool { + self.ext_dtype.0.options_eq(other.ext_dtype.0.options_any()) + } +} +impl Eq for ExtDTypeOptions<'_> {} + +impl Hash for ExtDTypeOptions<'_> { + fn hash(&self, mut state: &mut H) { + self.ext_dtype.0.options_hash(&mut state); + } +} + +/// An object-safe trait encapsulating the behavior for extension DTypes. +trait ExtDTypeImpl: 'static + Send + Sync + private::Sealed { + fn as_any(&self) -> &dyn Any; + fn id(&self) -> ExtId; + fn storage_dtype(&self) -> &DType; + fn options_any(&self) -> &dyn Any; + fn options_debug(&self, f: &mut Formatter<'_>) -> std::fmt::Result; + fn options_display(&self, f: &mut Formatter<'_>) -> std::fmt::Result; + fn options_eq(&self, other: &dyn Any) -> bool; + fn options_hash(&self, state: &mut dyn Hasher); +} + +struct ExtDTypeAdapter { + storage_dtype: DType, + options: V::Options, + vtable: PhantomData, +} + +impl ExtDTypeImpl for ExtDTypeAdapter { + fn as_any(&self) -> &dyn Any { + self + } + + fn id(&self) -> ExtId { + V::id(&self.options) + } + + fn storage_dtype(&self) -> &DType { + &self.storage_dtype + } + + fn options_any(&self) -> &dyn Any { + &self.options + } + + fn options_debug(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + ::fmt(&self.options, f) + } + + fn options_display(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + ::fmt(&self.options, f) + } + + fn options_eq(&self, other: &dyn Any) -> bool { + let Some(other) = other.downcast_ref::() else { + return false; + }; + ::eq(&self.options, other) + } + + fn options_hash(&self, mut state: &mut dyn Hasher) { + ::hash(&self.options, &mut state); + } +} + +mod private { + use super::ExtDTypeAdapter; + + pub trait Sealed {} + impl Sealed for ExtDTypeAdapter {} +} diff --git a/vortex-dtype/src/extension/vtable.rs b/vortex-dtype/src/extension/vtable.rs new file mode 100644 index 00000000000..334807f6b1b --- /dev/null +++ b/vortex-dtype/src/extension/vtable.rs @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::fmt::Debug; +use std::fmt::Display; +use std::fmt::Formatter; +use std::hash::Hash; +use std::marker::PhantomData; + +use arcref::ArcRef; +use vortex_error::VortexResult; +use vortex_session::VortexSession; + +use crate::DType; +use crate::v2::ExtDType; +use crate::v2::ExtDTypeRef; + +/// A reference-counted string representing an extension type identifier. +pub type ExtId = ArcRef; + +/// The public API for defining new extension DTypes. +pub trait VTable: 'static + Sized + Send + Sync { + /// Associated type containing the deserialized metadata for this extension type + type Options: 'static + Send + Sync + Clone + Debug + Display + PartialEq + Eq + Hash; + + /// Returns the ID for this extension type. + fn id(options: &Self::Options) -> ExtId; + + /// Serialize the options into a byte vector. + fn serialize(options: &Self::Options) -> VortexResult>; + + /// Deserialize the options from a byte slice. + fn deserialize(data: &[u8], session: &VortexSession) -> VortexResult; + + /// Validate that the given storage type is compatible with this extension type. + fn validate(options: &Self::Options, storage_dtype: &DType) -> VortexResult<()>; +} + +/// A dynamic vtable for extension types, used for type-erased deserialization. +pub trait DynVTable: 'static + Send + Sync + Debug + private::Sealed { + /// Deserialize an extension type from serialized options. + fn deserialize( + &self, + data: &[u8], + storage_dtype: DType, + session: &VortexSession, + ) -> VortexResult; +} + +/// Adapter to convert a strongly typed VTable into a DynVTable. +pub struct VTableAdapter(PhantomData); + +impl Debug for VTableAdapter { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", std::any::type_name::()) + } +} + +impl DynVTable for VTableAdapter { + fn deserialize( + &self, + data: &[u8], + storage_dtype: DType, + session: &VortexSession, + ) -> VortexResult { + let options = V::deserialize(data, session)?; + Ok(ExtDType::::try_new(options, storage_dtype)?.erase()) + } +} + +impl From<&'static V> for &'static dyn DynVTable { + fn from(_value: &'static V) -> Self { + const { &VTableAdapter::(PhantomData) } + } +} + +mod private { + use super::VTableAdapter; + use crate::VTable; + + pub trait Sealed {} + impl Sealed for VTableAdapter {} +} diff --git a/vortex-dtype/src/lib.rs b/vortex-dtype/src/lib.rs index 6243b579284..ba46cee0611 100644 --- a/vortex-dtype/src/lib.rs +++ b/vortex-dtype/src/lib.rs @@ -26,6 +26,7 @@ mod native_dtype; mod nullability; mod ptype; mod serde; +mod session; mod struct_; pub use bigint::*; diff --git a/vortex-dtype/src/session.rs b/vortex-dtype/src/session.rs new file mode 100644 index 00000000000..206e9ed5aaa --- /dev/null +++ b/vortex-dtype/src/session.rs @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_session::Ref; +use vortex_session::SessionExt; +use vortex_session::registry::Registry; + +use crate::DynVTable; +use crate::ExtId; +use crate::VTable; + +#[derive(Debug, Default)] +pub struct DTypeSession { + registry: Registry<&'static dyn DynVTable>, +} + +impl DTypeSession { + /// Register an extension DType with the Vortex session. + pub fn register( + &self, + id: impl Into, + vtable: impl Into<&'static dyn DynVTable>, + ) { + self.registry.register(id, vtable); + } +} + +pub trait DTypeSessionExt: SessionExt { + /// Get the DType session. + fn dtypes(&self) -> Ref; +} + +impl DTypeSessionExt for S { + fn dtypes(&self) -> Ref { + self.get::() + } +} From a86aa7bc206e22db4463beadb859cdb3163e5fcf Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 21 Jan 2026 16:05:07 +0000 Subject: [PATCH 02/31] Extension DType vtable Signed-off-by: Nicholas Gates --- vortex-dtype/src/extension/temporal.rs | 37 ++++++++++++++- vortex-dtype/src/extension/v2.rs | 63 ++++++++++++++++++++++---- 2 files changed, 89 insertions(+), 11 deletions(-) diff --git a/vortex-dtype/src/extension/temporal.rs b/vortex-dtype/src/extension/temporal.rs index bb90ae1bbd6..18a929ee670 100644 --- a/vortex-dtype/src/extension/temporal.rs +++ b/vortex-dtype/src/extension/temporal.rs @@ -6,17 +6,33 @@ use std::fmt::Display; use std::fmt::Formatter; +use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_ensure; use vortex_session::VortexSession; use crate::DType; +use crate::Nullability; use crate::PType; use crate::VTable; use crate::datetime::TimeUnit; use crate::extension::vtable::ExtId; +use crate::v2::ExtDType; -pub struct TimestampDType; +pub struct Timestamp; + +impl Timestamp { + pub fn new(nullability: Nullability) -> ExtDType { + ExtDType::try_new( + TimestampOptions { + time_unit: TimeUnit::Milliseconds, + timezone: None, + }, + DType::Primitive(PType::I64, nullability), + ) + .vortex_expect("failed to create timestamp dtype") + } +} #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct TimestampOptions { @@ -33,7 +49,7 @@ impl Display for TimestampOptions { } } -impl VTable for TimestampDType { +impl VTable for Timestamp { type Options = TimestampOptions; fn id(_options: &Self::Options) -> ExtId { @@ -56,3 +72,20 @@ impl VTable for TimestampDType { Ok(()) } } + +#[cfg(test)] +mod test { + use vortex_error::vortex_bail; + use vortex_error::vortex_panic; + + use super::*; + use crate::Nullability::NonNullable; + + #[test] + fn test_stuff() { + let dtype = Timestamp::new(NonNullable).erase(); + let Some(opts) = dtype.try_options::() else { + vortex_panic!("failed to match as Timestamp"); + }; + } +} diff --git a/vortex-dtype/src/extension/v2.rs b/vortex-dtype/src/extension/v2.rs index 54ca651cffd..3d6234d9170 100644 --- a/vortex-dtype/src/extension/v2.rs +++ b/vortex-dtype/src/extension/v2.rs @@ -59,30 +59,63 @@ impl ExtDTypeRef { self.0.id() } + /// Returns the untyped options of the extension type. + pub fn options_ref(&self) -> ExtDTypeOptions<'_> { + ExtDTypeOptions { ext_dtype: self } + } + /// Returns the storage dtype of the extension type. pub fn storage_dtype(&self) -> &DType { self.0.storage_dtype() } } +/// A trait for matching extension dtypes. +pub trait Matcher { + /// The matched view type. + type Match<'a> + where + T: 'a; + + /// Check if the given item matches this matcher. + fn matches(item: &T) -> bool { + Self::try_match(item).is_some() + } + + /// Check if the given item matches this matcher. + fn try_match<'a>(item: &'a T) -> Option>; +} + +impl Matcher for V { + type Match<'a> = &'a V::Options; + + fn matches(item: &ExtDTypeRef) -> bool { + item.0.as_any().is::>() + } + + fn try_match<'a>(item: &'a ExtDTypeRef) -> Option> { + item.0 + .as_any() + .downcast_ref::>() + .map(|adapter| &adapter.options) + } +} + /// Methods for downcasting type-erased extension dtypes. impl ExtDTypeRef { /// Check if the extension dtype is of the concrete type. - pub fn is(&self) -> bool { - self.0.as_any().is::>() + pub fn is>(&self) -> bool { + M::matches(&self) } /// Downcast to the concrete options type. - pub fn as_opt(&self) -> Option<&V::Options> { - self.0 - .as_any() - .downcast_ref::>() - .map(|adapter| &adapter.options) + pub fn try_options>(&self) -> Option> { + M::try_match(&self) } /// Downcast to the concrete options type. - pub fn as_(&self) -> &V::Options { - self.as_opt::() + pub fn options>(&self) -> M::Match<'_> { + self.try_options::() .vortex_expect("Failed to downcast DynExtDType") } @@ -124,6 +157,13 @@ pub struct ExtDTypeOptions<'a> { pub(super) ext_dtype: &'a ExtDTypeRef, } +impl ExtDTypeOptions<'_> { + /// Serialize the options into a byte vector. + pub fn serialize(&self) -> VortexResult> { + self.ext_dtype.0.options_serialize() + } +} + impl Display for ExtDTypeOptions<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { self.ext_dtype.0.options_display(f) @@ -159,6 +199,7 @@ trait ExtDTypeImpl: 'static + Send + Sync + private::Sealed { fn options_display(&self, f: &mut Formatter<'_>) -> std::fmt::Result; fn options_eq(&self, other: &dyn Any) -> bool; fn options_hash(&self, state: &mut dyn Hasher); + fn options_serialize(&self) -> VortexResult>; } struct ExtDTypeAdapter { @@ -202,6 +243,10 @@ impl ExtDTypeImpl for ExtDTypeAdapter { fn options_hash(&self, mut state: &mut dyn Hasher) { ::hash(&self.options, &mut state); } + + fn options_serialize(&self) -> VortexResult> { + V::serialize(&self.options) + } } mod private { From 2653ba0290d3c66b653ef429078e34d5357bd7c8 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 21 Jan 2026 16:10:46 +0000 Subject: [PATCH 03/31] Sync operators Signed-off-by: Nicholas Gates --- vortex-dtype/src/extension/v2.rs | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/vortex-dtype/src/extension/v2.rs b/vortex-dtype/src/extension/v2.rs index 3d6234d9170..5edaf658130 100644 --- a/vortex-dtype/src/extension/v2.rs +++ b/vortex-dtype/src/extension/v2.rs @@ -71,22 +71,20 @@ impl ExtDTypeRef { } /// A trait for matching extension dtypes. -pub trait Matcher { +pub trait Matcher { /// The matched view type. - type Match<'a> - where - T: 'a; + type Match<'a>; - /// Check if the given item matches this matcher. - fn matches(item: &T) -> bool { + /// Check if the given extension dtype matches this matcher. + fn matches(item: &ExtDTypeRef) -> bool { Self::try_match(item).is_some() } - /// Check if the given item matches this matcher. - fn try_match<'a>(item: &'a T) -> Option>; + /// Check if the given extension dtype matches this matcher. + fn try_match<'a>(item: &'a ExtDTypeRef) -> Option>; } -impl Matcher for V { +impl Matcher for V { type Match<'a> = &'a V::Options; fn matches(item: &ExtDTypeRef) -> bool { @@ -104,17 +102,17 @@ impl Matcher for V { /// Methods for downcasting type-erased extension dtypes. impl ExtDTypeRef { /// Check if the extension dtype is of the concrete type. - pub fn is>(&self) -> bool { + pub fn is(&self) -> bool { M::matches(&self) } /// Downcast to the concrete options type. - pub fn try_options>(&self) -> Option> { + pub fn try_options(&self) -> Option> { M::try_match(&self) } /// Downcast to the concrete options type. - pub fn options>(&self) -> M::Match<'_> { + pub fn options(&self) -> M::Match<'_> { self.try_options::() .vortex_expect("Failed to downcast DynExtDType") } From 484ee3ddf5fc436cfb07a994138c7b1d6a18282e Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 21 Jan 2026 16:30:44 +0000 Subject: [PATCH 04/31] Sync operators Signed-off-by: Nicholas Gates --- vortex-dtype/src/extension/mod.rs | 1 + vortex-dtype/src/extension/temporal.rs | 91 ------------- vortex-dtype/src/extension/temporal/date.rs | 69 ++++++++++ vortex-dtype/src/extension/temporal/mod.rs | 31 +++++ .../src/extension/temporal/timestamp.rs | 123 ++++++++++++++++++ 5 files changed, 224 insertions(+), 91 deletions(-) delete mode 100644 vortex-dtype/src/extension/temporal.rs create mode 100644 vortex-dtype/src/extension/temporal/date.rs create mode 100644 vortex-dtype/src/extension/temporal/mod.rs create mode 100644 vortex-dtype/src/extension/temporal/timestamp.rs diff --git a/vortex-dtype/src/extension/mod.rs b/vortex-dtype/src/extension/mod.rs index 7cc3cccee6b..e326b384a95 100644 --- a/vortex-dtype/src/extension/mod.rs +++ b/vortex-dtype/src/extension/mod.rs @@ -9,6 +9,7 @@ use std::fmt::Display; use std::fmt::Formatter; use std::sync::Arc; +pub use temporal::*; pub use vtable::*; use crate::DType; diff --git a/vortex-dtype/src/extension/temporal.rs b/vortex-dtype/src/extension/temporal.rs deleted file mode 100644 index 18a929ee670..00000000000 --- a/vortex-dtype/src/extension/temporal.rs +++ /dev/null @@ -1,91 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 -// SPDX-FileCopyrightText: Copyright the Vortex contributors - -//! Temporal extension data types. - -use std::fmt::Display; -use std::fmt::Formatter; - -use vortex_error::VortexExpect; -use vortex_error::VortexResult; -use vortex_error::vortex_ensure; -use vortex_session::VortexSession; - -use crate::DType; -use crate::Nullability; -use crate::PType; -use crate::VTable; -use crate::datetime::TimeUnit; -use crate::extension::vtable::ExtId; -use crate::v2::ExtDType; - -pub struct Timestamp; - -impl Timestamp { - pub fn new(nullability: Nullability) -> ExtDType { - ExtDType::try_new( - TimestampOptions { - time_unit: TimeUnit::Milliseconds, - timezone: None, - }, - DType::Primitive(PType::I64, nullability), - ) - .vortex_expect("failed to create timestamp dtype") - } -} - -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub struct TimestampOptions { - pub time_unit: TimeUnit, - pub timezone: Option, -} - -impl Display for TimestampOptions { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match &self.timezone { - Some(tz) => write!(f, "unit={}, tz={}", self.time_unit, tz), - None => write!(f, "unit={}", self.time_unit), - } - } -} - -impl VTable for Timestamp { - type Options = TimestampOptions; - - fn id(_options: &Self::Options) -> ExtId { - ExtId::new_ref("vortex.timestamp") - } - - fn serialize(options: &Self::Options) -> VortexResult> { - todo!() - } - - fn deserialize(data: &[u8], session: &VortexSession) -> VortexResult { - todo!() - } - - fn validate(_options: &Self::Options, storage_dtype: &DType) -> VortexResult<()> { - vortex_ensure!( - matches!(storage_dtype, DType::Primitive(PType::I64, _)), - "Timestamp storage dtype must be i64" - ); - Ok(()) - } -} - -#[cfg(test)] -mod test { - use vortex_error::vortex_bail; - use vortex_error::vortex_panic; - - use super::*; - use crate::Nullability::NonNullable; - - #[test] - fn test_stuff() { - let dtype = Timestamp::new(NonNullable).erase(); - let Some(opts) = dtype.try_options::() else { - vortex_panic!("failed to match as Timestamp"); - }; - } -} diff --git a/vortex-dtype/src/extension/temporal/date.rs b/vortex-dtype/src/extension/temporal/date.rs new file mode 100644 index 00000000000..7dc29723a63 --- /dev/null +++ b/vortex-dtype/src/extension/temporal/date.rs @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_ensure; +use vortex_error::vortex_err; +use vortex_session::VortexSession; + +use crate::DType; +use crate::ExtId; +use crate::Nullability; +use crate::PType; +use crate::VTable; +use crate::datetime::TimeUnit; +use crate::v2::ExtDType; + +pub struct Date; + +impl Date { + pub fn new(time_unit: TimeUnit, nullability: Nullability) -> ExtDType { + let ptype = date_ptype(&time_unit) + .ok_or_else(|| vortex_err!("Date type does not support time unit {}", time_unit)) + .vortex_expect("failed to create date dtype"); + ExtDType::try_new(time_unit, DType::Primitive(ptype, nullability)) + .vortex_expect("failed to create date dtype") + } +} + +impl VTable for Date { + type Options = TimeUnit; + + fn id(_options: &Self::Options) -> ExtId { + ExtId::from("vortex.date") + } + + fn serialize(options: &Self::Options) -> VortexResult> { + Ok(vec![u8::from(*options)]) + } + + fn deserialize(data: &[u8], _session: &VortexSession) -> VortexResult { + let tag = data[0]; + Ok(TimeUnit::try_from(tag)?) + } + + fn validate(options: &Self::Options, storage_dtype: &DType) -> VortexResult<()> { + let ptype = date_ptype(options) + .ok_or_else(|| vortex_err!("Date type does not support time unit {}", options))?; + + vortex_ensure!( + storage_dtype.as_ptype() == ptype, + "Date storage dtype for {} must be {}", + options, + ptype + ); + + Ok(()) + } +} + +fn date_ptype(time_unit: &TimeUnit) -> Option { + match time_unit { + TimeUnit::Nanoseconds => None, + TimeUnit::Microseconds => None, + TimeUnit::Milliseconds => Some(PType::I64), + TimeUnit::Seconds => None, + TimeUnit::Days => Some(PType::I32), + } +} diff --git a/vortex-dtype/src/extension/temporal/mod.rs b/vortex-dtype/src/extension/temporal/mod.rs new file mode 100644 index 00000000000..0b1bb52772a --- /dev/null +++ b/vortex-dtype/src/extension/temporal/mod.rs @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod date; +mod timestamp; + +pub use date::*; +pub use timestamp::*; + +use crate::datetime::TemporalMetadata; +use crate::v2::ExtDTypeRef; +use crate::v2::Matcher; + +/// Matcher for temporal extension data types. +pub struct AnyTemporal; + +impl Matcher for AnyTemporal { + type Match<'a> = TemporalMetadata; + + fn try_match<'a>(item: &'a ExtDTypeRef) -> Option> { + if let Some(opts) = item.try_options::() { + return Some(TemporalMetadata::Timestamp(opts.unit, opts.tz.clone())); + } + if let Some(time_unit) = item.try_options::() { + return Some(TemporalMetadata::Date(*time_unit)); + } + + // FIXME(ngate): time + None + } +} diff --git a/vortex-dtype/src/extension/temporal/timestamp.rs b/vortex-dtype/src/extension/temporal/timestamp.rs new file mode 100644 index 00000000000..6a08d73149a --- /dev/null +++ b/vortex-dtype/src/extension/temporal/timestamp.rs @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Temporal extension data types. + +use std::fmt::Display; +use std::fmt::Formatter; + +use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_error::vortex_ensure; +use vortex_error::vortex_err; +use vortex_error::vortex_panic; +use vortex_session::VortexSession; + +use crate::DType; +use crate::Nullability; +use crate::PType; +use crate::VTable; +use crate::datetime::TimeUnit; +use crate::extension::vtable::ExtId; +use crate::v2::ExtDType; + +/// Timestamp DType. +pub struct Timestamp; + +impl Timestamp { + pub fn new(time_unit: TimeUnit, nullability: Nullability) -> ExtDType { + Self::new_with_tz(time_unit, None, nullability) + } + + pub fn new_with_tz( + time_unit: TimeUnit, + timezone: Option, + nullability: Nullability, + ) -> ExtDType { + ExtDType::try_new( + TimestampOptions { + unit: time_unit, + tz: timezone, + }, + DType::Primitive(PType::I64, nullability), + ) + .vortex_expect("failed to create timestamp dtype") + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct TimestampOptions { + pub unit: TimeUnit, + pub tz: Option, +} + +impl Display for TimestampOptions { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self.tz { + Some(tz) => write!(f, "unit={}, tz={}", self.unit, tz), + None => write!(f, "unit={}", self.unit), + } + } +} + +impl VTable for Timestamp { + type Options = TimestampOptions; + + fn id(_options: &Self::Options) -> ExtId { + ExtId::new_ref("vortex.timestamp") + } + + // NOTE(ngates): unfortunately we're stuck with this hand-rolled serialization format for + // backwards compatibility. + fn serialize(options: &Self::Options) -> VortexResult> { + let mut meta = Vec::with_capacity(4); + let unit_tag: u8 = options.unit.into(); + + meta.push(unit_tag); + + // Encode time_zone as u16 length followed by utf8 bytes. + match &options.tz { + None => meta.extend_from_slice(0u16.to_le_bytes().as_slice()), + Some(tz) => { + let tz_bytes = tz.as_bytes(); + let tz_len = u16::try_from(tz_bytes.len()) + .unwrap_or_else(|err| vortex_panic!("tz did not fit in u16: {}", err)); + meta.extend_from_slice(tz_len.to_le_bytes().as_slice()); + meta.extend_from_slice(tz_bytes); + } + } + + Ok(meta) + } + + fn deserialize(data: &[u8], session: &VortexSession) -> VortexResult { + let tag = data[0]; + let time_unit = TimeUnit::try_from(tag)?; + let tz_len_bytes = &data[1..3]; + let tz_len = u16::from_le_bytes(tz_len_bytes.try_into()?) as usize; + if tz_len == 0 { + return Ok(TimestampOptions { + unit: time_unit, + tz: None, + }); + } + + // Attempt to load from len-prefixed bytes + let tz_bytes = &data[3..][..tz_len]; + let tz = str::from_utf8(tz_bytes) + .map_err(|e| vortex_err!("timezone is not valid utf8 string: {e}"))? + .to_string(); + Ok(TimestampOptions { + unit: time_unit, + tz: Some(tz), + }) + } + + fn validate(_options: &Self::Options, storage_dtype: &DType) -> VortexResult<()> { + vortex_ensure!( + matches!(storage_dtype, DType::Primitive(PType::I64, _)), + "Timestamp storage dtype must be i64" + ); + Ok(()) + } +} From 5ec91b791056649b531bd7b0b93a04b170314187 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 21 Jan 2026 17:04:32 +0000 Subject: [PATCH 05/31] Sync operators Signed-off-by: Nicholas Gates --- vortex-dtype/src/extension/temporal/date.rs | 14 ++-- vortex-dtype/src/extension/temporal/mod.rs | 4 +- vortex-dtype/src/extension/temporal/time.rs | 70 +++++++++++++++++++ .../src/extension/temporal/timestamp.rs | 11 ++- vortex-dtype/src/extension/vtable.rs | 4 +- vortex-dtype/src/lib.rs | 2 +- vortex-dtype/src/session.rs | 22 +++++- vortex-file/src/open.rs | 2 + vortex/src/lib.rs | 2 + 9 files changed, 119 insertions(+), 12 deletions(-) create mode 100644 vortex-dtype/src/extension/temporal/time.rs diff --git a/vortex-dtype/src/extension/temporal/date.rs b/vortex-dtype/src/extension/temporal/date.rs index 7dc29723a63..ff7e50818c7 100644 --- a/vortex-dtype/src/extension/temporal/date.rs +++ b/vortex-dtype/src/extension/temporal/date.rs @@ -15,15 +15,19 @@ use crate::VTable; use crate::datetime::TimeUnit; use crate::v2::ExtDType; +/// Date DType. pub struct Date; impl Date { - pub fn new(time_unit: TimeUnit, nullability: Nullability) -> ExtDType { + pub const ID: ExtId = ExtId::new_ref("vortex.date"); + + /// Creates a new Date extension dtype with the given time unit and nullability. + /// + /// Note that only Milliseconds and Days time units are supported for Date. + pub fn try_new(time_unit: TimeUnit, nullability: Nullability) -> VortexResult> { let ptype = date_ptype(&time_unit) - .ok_or_else(|| vortex_err!("Date type does not support time unit {}", time_unit)) - .vortex_expect("failed to create date dtype"); + .ok_or_else(|| vortex_err!("Date type does not support time unit {}", time_unit))?; ExtDType::try_new(time_unit, DType::Primitive(ptype, nullability)) - .vortex_expect("failed to create date dtype") } } @@ -31,7 +35,7 @@ impl VTable for Date { type Options = TimeUnit; fn id(_options: &Self::Options) -> ExtId { - ExtId::from("vortex.date") + Self::ID } fn serialize(options: &Self::Options) -> VortexResult> { diff --git a/vortex-dtype/src/extension/temporal/mod.rs b/vortex-dtype/src/extension/temporal/mod.rs index 0b1bb52772a..97272393419 100644 --- a/vortex-dtype/src/extension/temporal/mod.rs +++ b/vortex-dtype/src/extension/temporal/mod.rs @@ -2,9 +2,11 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors mod date; +mod time; mod timestamp; pub use date::*; +pub use time::*; pub use timestamp::*; use crate::datetime::TemporalMetadata; @@ -21,7 +23,7 @@ impl Matcher for AnyTemporal { if let Some(opts) = item.try_options::() { return Some(TemporalMetadata::Timestamp(opts.unit, opts.tz.clone())); } - if let Some(time_unit) = item.try_options::() { + if let Some(time_unit) = item.try_options::