diff --git a/encodings/datetime-parts/src/compute/compare.rs b/encodings/datetime-parts/src/compute/compare.rs index c69fd3d1b93..61314672a65 100644 --- a/encodings/datetime-parts/src/compute/compare.rs +++ b/encodings/datetime-parts/src/compute/compare.rs @@ -321,7 +321,8 @@ mod test { .unwrap(); // Timestamp with a value larger than i32::MAX. - let rhs = dtp_array_from_timestamp(i64::MAX, rhs_validity); + // https://github.com/BurntSushi/jiff/blob/e5b7f0d061e4da9598aed73f6171e78baa8b007f/src/shared/tzif.rs#L23 + let rhs = dtp_array_from_timestamp(253_402_207_200i64, rhs_validity); let comparison = lhs .clone() diff --git a/fuzz/src/array/mask.rs b/fuzz/src/array/mask.rs index ac82caaaeb4..f70d95c6454 100644 --- a/fuzz/src/array/mask.rs +++ b/fuzz/src/array/mask.rs @@ -134,7 +134,7 @@ pub fn mask_canonical_array(canonical: Canonical, mask: &Mask) -> VortexResult(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::dtype::extension::ExtVTable::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::dtype::extension::ExtVTable::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType) -> vortex_error::VortexResult<()> pub fn vortex_array::dtype::extension::ExtVTable::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -10256,6 +10258,8 @@ pub fn vortex_array::extension::datetime::Date::serialize_metadata(&self, metada pub fn vortex_array::extension::datetime::Date::unpack_native(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Date::validate_array<'a>(&self, _ext_dtype: &'a vortex_array::dtype::extension::ExtDType, _storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::datetime::Date::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::datetime::Date::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -10274,6 +10278,8 @@ pub fn vortex_array::extension::datetime::Time::serialize_metadata(&self, metada pub fn vortex_array::extension::datetime::Time::unpack_native(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Time::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::datetime::Time::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::datetime::Time::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -10292,6 +10298,8 @@ pub fn vortex_array::extension::datetime::Timestamp::serialize_metadata(&self, m pub fn vortex_array::extension::datetime::Timestamp::unpack_native<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Timestamp::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::datetime::Timestamp::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -10310,6 +10318,8 @@ pub fn vortex_array::extension::uuid::Uuid::serialize_metadata(&self, metadata: pub fn vortex_array::extension::uuid::Uuid::unpack_native<'a>(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::uuid::Uuid::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::uuid::Uuid::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::uuid::Uuid::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -14142,6 +14152,8 @@ pub fn vortex_array::extension::datetime::Date::serialize_metadata(&self, metada pub fn vortex_array::extension::datetime::Date::unpack_native(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Date::validate_array<'a>(&self, _ext_dtype: &'a vortex_array::dtype::extension::ExtDType, _storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::datetime::Date::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::datetime::Date::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -14192,6 +14204,8 @@ pub fn vortex_array::extension::datetime::Time::serialize_metadata(&self, metada pub fn vortex_array::extension::datetime::Time::unpack_native(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Time::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::datetime::Time::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::datetime::Time::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -14244,6 +14258,8 @@ pub fn vortex_array::extension::datetime::Timestamp::serialize_metadata(&self, m pub fn vortex_array::extension::datetime::Timestamp::unpack_native<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::datetime::Timestamp::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::datetime::Timestamp::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::datetime::Timestamp::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> @@ -14320,6 +14336,8 @@ pub fn vortex_array::extension::uuid::Uuid::serialize_metadata(&self, metadata: pub fn vortex_array::extension::uuid::Uuid::unpack_native<'a>(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &'a vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_array::extension::uuid::Uuid::validate_array<'a>(&self, ext_dtype: &'a vortex_array::dtype::extension::ExtDType, storage_array: &'a dyn vortex_array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_array::extension::uuid::Uuid::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType) -> vortex_error::VortexResult<()> pub fn vortex_array::extension::uuid::Uuid::validate_scalar_value(&self, ext_dtype: &vortex_array::dtype::extension::ExtDType, storage_value: &vortex_array::scalar::ScalarValue) -> vortex_error::VortexResult<()> diff --git a/vortex-array/src/arrays/constant/vtable/canonical.rs b/vortex-array/src/arrays/constant/vtable/canonical.rs index f8e2d7f21cf..8715c4a5ed1 100644 --- a/vortex-array/src/arrays/constant/vtable/canonical.rs +++ b/vortex-array/src/arrays/constant/vtable/canonical.rs @@ -161,8 +161,19 @@ pub(crate) fn constant_canonicalize(array: &ConstantArray) -> VortexResult VortexResult { - // TODO(connor): Replace these statements once we add `validate_storage_array`. - // ext_dtype.validate_storage_array(&storage_array)?; - assert_eq!( - ext_dtype.storage_dtype(), - storage_array.dtype(), - "ExtensionArray: storage_dtype must match storage array DType", - ); + if storage_array.is_host() { + ext_dtype.validate_storage_array(&storage_array)?; + } else { + eprintln!("Unable to validate storage array on GPU") + } // SAFETY: we validate that the inputs are valid above. Ok(unsafe { Self::new_unchecked(ext_dtype, storage_array) }) @@ -95,15 +93,22 @@ impl ExtensionArray { /// other words, they must know that `ext_dtype.validate_storage_array(&storage_array)` has been /// called successfully on this storage array. pub unsafe fn new_unchecked(ext_dtype: ExtDTypeRef, storage_array: ArrayRef) -> Self { - // TODO(connor): Replace these statements once we add `validate_storage_array`. - // #[cfg(debug_assertions)] - // ext_dtype - // .validate_storage_array(&storage_array) - // .vortex_expect("[Debug Assertion]: Invalid storage array for `ExtensionArray`"); - debug_assert_eq!( + #[cfg(debug_assertions)] + { + if storage_array.is_host() { + ext_dtype + .validate_storage_array(&storage_array) + .vortex_expect("[Debug Assertion]: Invalid storage array for `ExtensionArray`"); + } else { + eprintln!("Unable to validate storage array on GPU") + } + } + + // The dtype check is much cheaper than the storage validation, so we might as well do this. + assert_eq!( ext_dtype.storage_dtype(), storage_array.dtype(), - "ExtensionArray: storage_dtype must match storage array DType", + "tried to construct an extension array with a storage array that has the wrong dtype" ); Self { diff --git a/vortex-array/src/arrays/extension/compute/cast.rs b/vortex-array/src/arrays/extension/compute/cast.rs index 1b02b479438..870330047ee 100644 --- a/vortex-array/src/arrays/extension/compute/cast.rs +++ b/vortex-array/src/arrays/extension/compute/cast.rs @@ -1,6 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use vortex_error::VortexResult; + use crate::ArrayRef; use crate::IntoArray; use crate::arrays::Extension; @@ -10,7 +12,7 @@ use crate::dtype::DType; use crate::scalar_fn::fns::cast::CastReduce; impl CastReduce for Extension { - fn cast(array: &ExtensionArray, dtype: &DType) -> vortex_error::VortexResult> { + fn cast(array: &ExtensionArray, dtype: &DType) -> VortexResult> { if !array.dtype().eq_ignore_nullability(dtype) { return Ok(None); } @@ -30,8 +32,9 @@ impl CastReduce for Extension { } }; + // We have no idea if the cast invalidated the storage array, so we must run validation. Ok(Some( - ExtensionArray::new(ext_dtype.clone(), new_storage).into_array(), + ExtensionArray::try_new(ext_dtype.clone(), new_storage)?.into_array(), )) } } diff --git a/vortex-array/src/arrays/extension/compute/filter.rs b/vortex-array/src/arrays/extension/compute/filter.rs index 3dd03a8a4a0..07a4ab46193 100644 --- a/vortex-array/src/arrays/extension/compute/filter.rs +++ b/vortex-array/src/arrays/extension/compute/filter.rs @@ -13,10 +13,14 @@ use crate::arrays::filter::FilterReduce; impl FilterReduce for Extension { fn filter(array: &ExtensionArray, mask: &Mask) -> VortexResult> { Ok(Some( - ExtensionArray::new( - array.ext_dtype().clone(), - array.storage_array().filter(mask.clone())?, - ) + // SAFETY: The storage array is filtered from an already-valid extension array, which + // preserves the storage dtype and does not change values. + unsafe { + ExtensionArray::new_unchecked( + array.ext_dtype().clone(), + array.storage_array().filter(mask.clone())?, + ) + } .into_array(), )) } diff --git a/vortex-array/src/arrays/extension/compute/mask.rs b/vortex-array/src/arrays/extension/compute/mask.rs index af7770ee0ca..b9d52ce284c 100644 --- a/vortex-array/src/arrays/extension/compute/mask.rs +++ b/vortex-array/src/arrays/extension/compute/mask.rs @@ -19,13 +19,18 @@ impl MaskReduce for Extension { EmptyOptions, [array.storage_array().clone(), mask.clone()], )?; + Ok(Some( - ExtensionArray::new( - array - .ext_dtype() - .with_nullability(masked_storage.dtype().nullability()), - masked_storage, - ) + // SAFETY: The storage array is masked from an already-valid extension array, which + // preserves the storage dtype and does not change values. + unsafe { + ExtensionArray::new_unchecked( + array + .ext_dtype() + .with_nullability(masked_storage.dtype().nullability()), + masked_storage, + ) + } .into_array(), )) } diff --git a/vortex-array/src/arrays/extension/compute/rules.rs b/vortex-array/src/arrays/extension/compute/rules.rs index e077dda5283..8713867ec04 100644 --- a/vortex-array/src/arrays/extension/compute/rules.rs +++ b/vortex-array/src/arrays/extension/compute/rules.rs @@ -42,8 +42,12 @@ impl ArrayParentReduceRule for ExtensionFilterPushDownRule { .storage_array() .clone() .filter(parent.filter_mask().clone())?; + Ok(Some( - ExtensionArray::new(child.ext_dtype().clone(), filtered_storage).into_array(), + // SAFETY: The storage array is filtered from an already-valid extension array, which + // preserves the storage dtype and does not change values. + unsafe { ExtensionArray::new_unchecked(child.ext_dtype().clone(), filtered_storage) } + .into_array(), )) } } @@ -106,6 +110,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array<'a>( + &self, + _ext_dtype: &'a ExtDType, + _storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } fn test_ext_dtype() -> ExtDTypeRef { @@ -203,6 +215,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array<'a>( + &self, + _ext_dtype: &'a ExtDType, + _storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let ext_dtype1 = ExtDType::::try_new( diff --git a/vortex-array/src/arrays/extension/compute/slice.rs b/vortex-array/src/arrays/extension/compute/slice.rs index 96d00e332bd..c0a0fea3af8 100644 --- a/vortex-array/src/arrays/extension/compute/slice.rs +++ b/vortex-array/src/arrays/extension/compute/slice.rs @@ -14,10 +14,14 @@ use crate::arrays::slice::SliceReduce; impl SliceReduce for Extension { fn slice(array: &Self::Array, range: Range) -> VortexResult> { Ok(Some( - ExtensionArray::new( - array.ext_dtype().clone(), - array.storage_array().slice(range)?, - ) + // SAFETY: The storage array is sliced from an already-valid extension array, which + // preserves the storage dtype and does not change values. + unsafe { + ExtensionArray::new_unchecked( + array.ext_dtype().clone(), + array.storage_array().slice(range)?, + ) + } .into_array(), )) } diff --git a/vortex-array/src/arrays/extension/compute/take.rs b/vortex-array/src/arrays/extension/compute/take.rs index bf8b24c6fe4..c94b7bf0e39 100644 --- a/vortex-array/src/arrays/extension/compute/take.rs +++ b/vortex-array/src/arrays/extension/compute/take.rs @@ -18,13 +18,18 @@ impl TakeExecute for Extension { _ctx: &mut ExecutionCtx, ) -> VortexResult> { let taken_storage = array.storage_array().take(indices.to_array())?; + Ok(Some( - ExtensionArray::new( - array - .ext_dtype() - .with_nullability(taken_storage.dtype().nullability()), - taken_storage, - ) + // SAFETY: The storage array is taken from an already-valid extension array, which + // preserves the storage dtype and does not change values. + unsafe { + ExtensionArray::new_unchecked( + array + .ext_dtype() + .with_nullability(taken_storage.dtype().nullability()), + taken_storage, + ) + } .into_array(), )) } diff --git a/vortex-array/src/arrays/extension/vtable/mod.rs b/vortex-array/src/arrays/extension/vtable/mod.rs index 243a0ef35d8..9e3c4e8ad8a 100644 --- a/vortex-array/src/arrays/extension/vtable/mod.rs +++ b/vortex-array/src/arrays/extension/vtable/mod.rs @@ -137,7 +137,10 @@ impl VTable for Extension { vortex_bail!("Expected 1 child, got {}", children.len()); } let storage = children.get(0, ext_dtype.storage_dtype(), len)?; - Ok(ExtensionArray::new(ext_dtype.clone(), storage)) + + // TODO(connor): Is this correct? + // We do not know what might be on disk, so we must run validation on read. + ExtensionArray::try_new(ext_dtype.clone(), storage) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/filter/execute/mod.rs b/vortex-array/src/arrays/filter/execute/mod.rs index da9b059d8ed..63bc9c1652a 100644 --- a/vortex-array/src/arrays/filter/execute/mod.rs +++ b/vortex-array/src/arrays/filter/execute/mod.rs @@ -92,7 +92,11 @@ pub(super) fn execute_filter(canonical: Canonical, mask: &Arc) -> Ca .storage_array() .filter(values_to_mask(mask)) .vortex_expect("ExtensionArray storage type somehow could not be filtered"); - Canonical::Extension(ExtensionArray::new(a.ext_dtype().clone(), filtered_storage)) + Canonical::Extension( + // SAFETY: The storage array is filtered from an already-valid extension array, + // which preserves the storage dtype and does not change values. + unsafe { ExtensionArray::new_unchecked(a.ext_dtype().clone(), filtered_storage) }, + ) } } } diff --git a/vortex-array/src/arrays/listview/conversion.rs b/vortex-array/src/arrays/listview/conversion.rs index cddee9a2ec5..a9996cc6c56 100644 --- a/vortex-array/src/arrays/listview/conversion.rs +++ b/vortex-array/src/arrays/listview/conversion.rs @@ -266,7 +266,12 @@ pub fn recursive_list_from_list_view(array: ArrayRef) -> VortexResult // Avoid cloning if elements didn't change. if !Arc::ptr_eq(&converted_storage, ext_array.storage_array()) { - ExtensionArray::new(ext_array.ext_dtype().clone(), converted_storage).into_array() + // SAFETY: The logical storage values are not changed, so we do not need to run + // validation again. + unsafe { + ExtensionArray::new_unchecked(ext_array.ext_dtype().clone(), converted_storage) + } + .into_array() } else { ext_array.into_array() } diff --git a/vortex-array/src/arrays/masked/execute.rs b/vortex-array/src/arrays/masked/execute.rs index 936a4ef8d65..ca5c5627bf0 100644 --- a/vortex-array/src/arrays/masked/execute.rs +++ b/vortex-array/src/arrays/masked/execute.rs @@ -189,10 +189,14 @@ fn mask_validity_extension( let storage = array.storage_array().clone().execute::(ctx)?; let masked_storage = mask_validity_canonical(storage, mask, ctx)?; let masked_storage = masked_storage.into_array(); - Ok(ExtensionArray::new( - array - .ext_dtype() - .with_nullability(masked_storage.dtype().nullability()), - masked_storage, - )) + // SAFETY: The storage array is taken from an already-valid extension array, which preserves the + // storage dtype and does not change values. + Ok(unsafe { + ExtensionArray::new_unchecked( + array + .ext_dtype() + .with_nullability(masked_storage.dtype().nullability()), + masked_storage, + ) + }) } diff --git a/vortex-array/src/builders/extension.rs b/vortex-array/src/builders/extension.rs index 4875cd39182..1dbbfb64862 100644 --- a/vortex-array/src/builders/extension.rs +++ b/vortex-array/src/builders/extension.rs @@ -3,8 +3,9 @@ use std::any::Any; +use vortex_error::VortexExpect; use vortex_error::VortexResult; -use vortex_error::vortex_ensure; +use vortex_error::vortex_ensure_eq; use vortex_mask::Mask; use crate::ArrayRef; @@ -40,15 +41,21 @@ impl ExtensionBuilder { } } + // TODO(connor): Should we be validating scalars? Or do we just leave this all to the `finish`? /// Appends an extension `value` to the builder. pub fn append_value(&mut self, value: ExtScalar) -> VortexResult<()> { self.storage.append_scalar(&value.to_storage_scalar()) } /// Finishes the builder directly into a [`ExtensionArray`]. + /// + /// # Panics + /// + /// Panics if any values are invalid for the extension type. pub fn finish_into_extension(&mut self) -> ExtensionArray { let storage = self.storage.finish(); - ExtensionArray::new(self.ext_dtype(), storage) + ExtensionArray::try_new(self.ext_dtype(), storage) + .vortex_expect("Failed to create `ExtensionArray` from `ExtensionBuilder`") } /// The [`ExtDType`] of this builder. @@ -87,8 +94,9 @@ impl ArrayBuilder for ExtensionBuilder { } fn append_scalar(&mut self, scalar: &Scalar) -> VortexResult<()> { - vortex_ensure!( - scalar.dtype() == self.dtype(), + vortex_ensure_eq!( + scalar.dtype(), + self.dtype(), "ExtensionBuilder expected scalar with dtype {}, got {}", self.dtype(), scalar.dtype() diff --git a/vortex-array/src/canonical.rs b/vortex-array/src/canonical.rs index 9000e212a14..d72f093c8fa 100644 --- a/vortex-array/src/canonical.rs +++ b/vortex-array/src/canonical.rs @@ -217,10 +217,17 @@ impl Canonical { 0, ) }), - DType::Extension(ext_dtype) => Canonical::Extension(ExtensionArray::new( - ext_dtype.clone(), - Canonical::empty(ext_dtype.storage_dtype()).into_array(), - )), + + DType::Extension(ext_dtype) => Canonical::Extension( + // SAFETY: An empty array is always valid, no matter what the extension type is + // (unless we decide to add a `Never` or `Void` extension type). + unsafe { + ExtensionArray::new_unchecked( + ext_dtype.clone(), + Canonical::empty(ext_dtype.storage_dtype()).into_array(), + ) + }, + ), } } @@ -628,16 +635,18 @@ impl Executable for CanonicalValidity { StructArray::new_unchecked(fields, struct_fields, len, validity.execute(ctx)?) }))) } - Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension( - ExtensionArray::new( + // SAFETY: The storage array is executed from an already-valid extension array, + // which preserves the storage dtype. + Canonical::Extension(ext) => Ok(CanonicalValidity(Canonical::Extension(unsafe { + ExtensionArray::new_unchecked( ext.ext_dtype().clone(), ext.storage_array() .clone() .execute::(ctx)? .0 .into_array(), - ), - ))), + ) + }))), } } } @@ -758,16 +767,18 @@ impl Executable for RecursiveCanonical { ) }))) } - Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension( - ExtensionArray::new( + // SAFETY: The storage array is executed from an already-valid extension array, + // which preserves the storage dtype. + Canonical::Extension(ext) => Ok(RecursiveCanonical(Canonical::Extension(unsafe { + ExtensionArray::new_unchecked( ext.ext_dtype().clone(), ext.storage_array() .clone() .execute::(ctx)? .0 .into_array(), - ), - ))), + ) + }))), } } } diff --git a/vortex-array/src/dtype/dtype_impl.rs b/vortex-array/src/dtype/dtype_impl.rs index 36476d6f620..6f0022c14dd 100644 --- a/vortex-array/src/dtype/dtype_impl.rs +++ b/vortex-array/src/dtype/dtype_impl.rs @@ -487,7 +487,7 @@ mod tests { Timestamp::new_with_tz(TimeUnit::Seconds, Some("UTC".into()), Nullable).erased(), ); let t2 = DType::Extension( - Timestamp::new_with_tz(TimeUnit::Seconds, Some("ET".into()), Nullable).erased(), + Timestamp::new_with_tz(TimeUnit::Seconds, Some("EST".into()), Nullable).erased(), ); assert!(!t1.eq_ignore_nullability(&t2)); } diff --git a/vortex-array/src/dtype/extension/erased.rs b/vortex-array/src/dtype/extension/erased.rs index 3f58cfadc49..3284dcd5af7 100644 --- a/vortex-array/src/dtype/extension/erased.rs +++ b/vortex-array/src/dtype/extension/erased.rs @@ -13,6 +13,7 @@ use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_err; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::extension::ExtDType; @@ -99,6 +100,11 @@ impl ExtDTypeRef { pub(crate) fn validate_storage_value(&self, storage_value: &ScalarValue) -> VortexResult<()> { self.0.value_validate(storage_value) } + + /// Validates that the given storage scalar value is valid for this dtype. + pub(crate) fn validate_storage_array(&self, storage_array: &dyn DynArray) -> VortexResult<()> { + self.0.array_validate(storage_array) + } } /// Methods for downcasting type-erased extension dtypes. diff --git a/vortex-array/src/dtype/extension/typed.rs b/vortex-array/src/dtype/extension/typed.rs index de843249727..6d0cd4872ea 100644 --- a/vortex-array/src/dtype/extension/typed.rs +++ b/vortex-array/src/dtype/extension/typed.rs @@ -15,7 +15,9 @@ use std::sync::Arc; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_ensure_eq; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::extension::ExtDTypeRef; @@ -118,11 +120,13 @@ pub(super) trait DynExtDType: 'static + Send + Sync + super::sealed::Sealed { fn metadata_serialize(&self) -> VortexResult>; /// Returns a new [`ExtDTypeRef`] with the given nullability. fn with_nullability(&self, nullability: Nullability) -> ExtDTypeRef; - /// Validates that the given storage scalar value is valid for this dtype. + /// Validates that the given storage scalar value is valid for this extension type. fn value_validate(&self, storage_value: &ScalarValue) -> VortexResult<()>; /// Formats an extension scalar value using the current dtype for metadata context. fn value_display(&self, f: &mut fmt::Formatter<'_>, storage_value: &ScalarValue) -> fmt::Result; + /// Validates that the given storage array is valid for this extension type. + fn array_validate(&self, storage_array: &dyn DynArray) -> VortexResult<()>; } impl DynExtDType for ExtDType { @@ -194,4 +198,14 @@ impl DynExtDType for ExtDType { ), } } + + fn array_validate(&self, storage_array: &dyn DynArray) -> VortexResult<()> { + vortex_ensure_eq!( + &self.storage_dtype, + storage_array.dtype(), + "tried to construct an extension array with a storage array that has the wrong dtype" + ); + + self.vtable.validate_array(self, storage_array) + } } diff --git a/vortex-array/src/dtype/extension/vtable.rs b/vortex-array/src/dtype/extension/vtable.rs index 8d324521b41..190d0a3b8c7 100644 --- a/vortex-array/src/dtype/extension/vtable.rs +++ b/vortex-array/src/dtype/extension/vtable.rs @@ -7,6 +7,7 @@ use std::hash::Hash; use vortex_error::VortexResult; +use crate::DynArray; use crate::dtype::extension::ExtDType; use crate::dtype::extension::ExtId; use crate::scalar::ScalarValue; @@ -38,7 +39,7 @@ pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash { /// Validate that the given storage type is compatible with this extension type. fn validate_dtype(&self, ext_dtype: &ExtDType) -> VortexResult<()>; - // Methods related to the extension scalar values. + // Methods related to extension scalar values. /// Validate the given storage value is compatible with the extension type. /// @@ -69,4 +70,17 @@ pub trait ExtVTable: 'static + Sized + Send + Sync + Clone + Debug + Eq + Hash { ext_dtype: &'a ExtDType, storage_value: &'a ScalarValue, ) -> VortexResult>; + + // Methods related to extension arrays. + + /// Validates that the given storage array is compatible with this extension type and type + /// medatada. + /// + /// Note that [`ExtVTable::validate_dtype()`] is always called first on the dtype of the storage + /// array to validate the storage [`DType`](crate::dtype::DType). + fn validate_array<'a>( + &self, + ext_dtype: &'a ExtDType, + storage_array: &'a dyn DynArray, + ) -> VortexResult<()>; } diff --git a/vortex-array/src/extension/datetime/date.rs b/vortex-array/src/extension/datetime/date.rs index 6d67051721b..efd6f14053a 100644 --- a/vortex-array/src/extension/datetime/date.rs +++ b/vortex-array/src/extension/datetime/date.rs @@ -7,9 +7,10 @@ use jiff::Span; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; -use vortex_error::vortex_ensure; +use vortex_error::vortex_ensure_eq; use vortex_error::vortex_err; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; @@ -96,11 +97,10 @@ impl ExtVTable for Date { let ptype = date_ptype(metadata) .ok_or_else(|| vortex_err!("Date type does not support time unit {}", metadata))?; - vortex_ensure!( - ext_dtype.storage_dtype().as_ptype() == ptype, - "Date storage dtype for {} must be {}", - metadata, - ptype + vortex_ensure_eq!( + ext_dtype.storage_dtype().as_ptype(), + ptype, + "Date storage dtype for {metadata} must be {ptype}", ); Ok(()) @@ -120,6 +120,15 @@ impl ExtVTable for Date { _ => vortex_bail!("Date type does not support time unit {}", metadata), } } + + fn validate_array<'a>( + &self, + _ext_dtype: &'a ExtDType, + _storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + // All i64 and i32 values are valid dates. + Ok(()) + } } #[cfg(test)] diff --git a/vortex-array/src/extension/datetime/time.rs b/vortex-array/src/extension/datetime/time.rs index 59e7c98bc7e..28419d02c59 100644 --- a/vortex-array/src/extension/datetime/time.rs +++ b/vortex-array/src/extension/datetime/time.rs @@ -10,6 +10,7 @@ use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_err; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; @@ -143,12 +144,76 @@ impl ExtVTable for Time { Ok(value) } + + fn validate_array<'a>( + &self, + ext_dtype: &'a ExtDType, + storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + // We check both min and max because the stored integer can be negative, which is invalid + // for a time of day. + let stats = storage_array.statistics(); + + let metadata = ext_dtype.metadata(); + match metadata { + TimeUnit::Seconds | TimeUnit::Milliseconds => { + let build_span = |v: i32| match metadata { + TimeUnit::Seconds => Span::new().seconds(v), + TimeUnit::Milliseconds => Span::new().milliseconds(v), + _ => unreachable!(), + }; + + if let Some(min) = stats.compute_min::() { + jiff::civil::Time::MIN + .checked_add(build_span(min)) + .map_err(|e| { + vortex_err!("Time array min value {min} is out of range: {e}") + })?; + } + if let Some(max) = stats.compute_max::() { + jiff::civil::Time::MIN + .checked_add(build_span(max)) + .map_err(|e| { + vortex_err!("Time array max value {max} is out of range: {e}") + })?; + } + } + TimeUnit::Microseconds | TimeUnit::Nanoseconds => { + let build_span = |v: i64| match metadata { + TimeUnit::Microseconds => Span::new().microseconds(v), + TimeUnit::Nanoseconds => Span::new().nanoseconds(v), + _ => unreachable!(), + }; + + if let Some(min) = stats.compute_min::() { + jiff::civil::Time::MIN + .checked_add(build_span(min)) + .map_err(|e| { + vortex_err!("Time array min value {min} is out of range: {e}") + })?; + } + if let Some(max) = stats.compute_max::() { + jiff::civil::Time::MIN + .checked_add(build_span(max)) + .map_err(|e| { + vortex_err!("Time array max value {max} is out of range: {e}") + })?; + } + } + d @ TimeUnit::Days => vortex_bail!("Time type does not support time unit {d}"), + } + + Ok(()) + } } #[cfg(test)] mod tests { use vortex_error::VortexResult; + use crate::array::IntoArray; + use crate::arrays::ExtensionArray; + use crate::arrays::PrimitiveArray; use crate::dtype::DType; use crate::dtype::Nullability::Nullable; use crate::extension::datetime::Time; @@ -187,4 +252,27 @@ mod tests { let scalar = Scalar::new(dtype, Some(ScalarValue::Primitive(PValue::I32(0)))); assert_eq!(format!("{}", scalar.as_extension()), "00:00:00"); } + + #[test] + fn validate_time_array() -> VortexResult<()> { + let ext_dtype = Time::new(TimeUnit::Seconds, Nullable).erased(); + let storage = PrimitiveArray::from_option_iter([Some(0i32), Some(3661), Some(86399)]); + ExtensionArray::try_new(ext_dtype, storage.into_array())?; + Ok(()) + } + + #[test] + fn reject_time_array_out_of_range() { + // 86400 seconds = exactly 24 hours, which exceeds the valid time-of-day range. + let ext_dtype = Time::new(TimeUnit::Seconds, Nullable).erased(); + let storage = PrimitiveArray::from_option_iter([Some(0i32), Some(86400)]); + assert!(ExtensionArray::try_new(ext_dtype, storage.into_array()).is_err()); + } + + #[test] + fn reject_time_array_negative() { + let ext_dtype = Time::new(TimeUnit::Seconds, Nullable).erased(); + let storage = PrimitiveArray::from_option_iter([Some(-1i32)]); + assert!(ExtensionArray::try_new(ext_dtype, storage.into_array()).is_err()); + } } diff --git a/vortex-array/src/extension/datetime/timestamp.rs b/vortex-array/src/extension/datetime/timestamp.rs index d314cef60a3..48856432237 100644 --- a/vortex-array/src/extension/datetime/timestamp.rs +++ b/vortex-array/src/extension/datetime/timestamp.rs @@ -14,6 +14,7 @@ use vortex_error::vortex_ensure; use vortex_error::vortex_err; use vortex_error::vortex_panic; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; @@ -174,6 +175,13 @@ impl ExtVTable for Timestamp { matches!(ext_dtype.storage_dtype(), DType::Primitive(PType::I64, _)), "Timestamp storage dtype must be i64" ); + + if let Some(tz) = &ext_dtype.metadata().tz { + jiff::Timestamp::UNIX_EPOCH + .in_tz(tz.as_ref()) + .map_err(|e| vortex_err!("Invalid timezone for timestamp: {}", e))?; + } + Ok(()) } @@ -218,6 +226,37 @@ impl ExtVTable for Timestamp { Ok(value) } + + fn validate_array<'a>( + &self, + ext_dtype: &'a ExtDType, + storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + // The stored i64 can be negative (representing times before the Unix epoch), which is + // valid. Both bounds only fail for extreme values near the edges of jiff's supported + // range. + let stats = storage_array.statistics(); + let build_span = |v: i64| match ext_dtype.metadata().unit { + TimeUnit::Nanoseconds => Span::new().nanoseconds(v), + TimeUnit::Microseconds => Span::new().microseconds(v), + TimeUnit::Milliseconds => Span::new().milliseconds(v), + TimeUnit::Seconds => Span::new().seconds(v), + TimeUnit::Days => unreachable!(), + }; + + if let Some(min) = stats.compute_min::() { + jiff::Timestamp::UNIX_EPOCH + .checked_add(build_span(min)) + .map_err(|e| vortex_err!("Timestamp array min value {min} is out of range: {e}"))?; + } + if let Some(max) = stats.compute_max::() { + jiff::Timestamp::UNIX_EPOCH + .checked_add(build_span(max)) + .map_err(|e| vortex_err!("Timestamp array max value {max} is out of range: {e}"))?; + } + + Ok(()) + } } #[cfg(test)] @@ -226,10 +265,15 @@ mod tests { use vortex_error::VortexResult; + use crate::array::IntoArray; + use crate::arrays::ExtensionArray; + use crate::arrays::PrimitiveArray; use crate::dtype::DType; use crate::dtype::Nullability::Nullable; + use crate::dtype::extension::ExtDType; use crate::extension::datetime::TimeUnit; use crate::extension::datetime::Timestamp; + use crate::extension::datetime::TimestampOptions; use crate::scalar::PValue; use crate::scalar::Scalar; use crate::scalar::ScalarValue; @@ -245,15 +289,13 @@ mod tests { #[cfg_attr(miri, ignore)] #[test] fn reject_timestamp_with_invalid_timezone() { - let dtype = DType::Extension( - Timestamp::new_with_tz( - TimeUnit::Seconds, - Some(Arc::from("Not/A/Timezone")), - Nullable, - ) - .erased(), + let result = ExtDType::::try_new( + TimestampOptions { + unit: TimeUnit::Seconds, + tz: Some(Arc::from("Not/A/Timezone")), + }, + DType::Primitive(crate::dtype::PType::I64, Nullable), ); - let result = Scalar::try_new(dtype, Some(ScalarValue::Primitive(PValue::I64(0)))); assert!(result.is_err()); } @@ -280,4 +322,26 @@ mod tests { "1969-12-31T19:00:00-05:00[America/New_York]" ); } + + #[test] + fn validate_timestamp_array() -> VortexResult<()> { + let ext_dtype = Timestamp::new(TimeUnit::Seconds, Nullable).erased(); + let storage = PrimitiveArray::from_option_iter([Some(0i64), Some(1_000_000)]); + ExtensionArray::try_new(ext_dtype, storage.into_array())?; + Ok(()) + } + + #[cfg_attr(miri, ignore)] + #[test] + fn validate_timestamp_array_with_timezone() -> VortexResult<()> { + let ext_dtype = Timestamp::new_with_tz( + TimeUnit::Seconds, + Some(Arc::from("America/New_York")), + Nullable, + ) + .erased(); + let storage = PrimitiveArray::from_option_iter([Some(0i64)]); + ExtensionArray::try_new(ext_dtype, storage.into_array())?; + Ok(()) + } } diff --git a/vortex-array/src/extension/tests/divisible_int.rs b/vortex-array/src/extension/tests/divisible_int.rs index 7b6e501b7cb..62bbb184782 100644 --- a/vortex-array/src/extension/tests/divisible_int.rs +++ b/vortex-array/src/extension/tests/divisible_int.rs @@ -6,11 +6,14 @@ use std::fmt; use vortex_error::VortexResult; -use vortex_error::vortex_bail; use vortex_error::vortex_ensure; +use vortex_error::vortex_ensure_eq; +use crate::DynArray; +use crate::arrays::PrimitiveArray; use crate::dtype::DType; use crate::dtype::PType; +use crate::dtype::extension::ExtDType; use crate::dtype::extension::ExtId; use crate::dtype::extension::ExtVTable; use crate::scalar::ScalarValue; @@ -51,10 +54,7 @@ impl ExtVTable for DivisibleInt { Ok(Divisor(n)) } - fn validate_dtype( - &self, - ext_dtype: &crate::dtype::extension::ExtDType, - ) -> VortexResult<()> { + fn validate_dtype(&self, ext_dtype: &ExtDType) -> VortexResult<()> { vortex_ensure!( matches!(ext_dtype.storage_dtype(), DType::Primitive(PType::U64, _)), "divisible int storage dtype must be u64" @@ -64,16 +64,47 @@ impl ExtVTable for DivisibleInt { fn unpack_native<'a>( &self, - ext_dtype: &'a crate::dtype::extension::ExtDType, + ext_dtype: &'a ExtDType, storage_value: &'a ScalarValue, ) -> VortexResult> { let value = storage_value.as_primitive().cast::()?; + let metadata = ext_dtype.metadata(); - if value % metadata.0 != 0 { - vortex_bail!("{} is not divisible by {}", value, metadata.0); - } + vortex_ensure_eq!( + value % metadata.0, + 0, + "{value} is not divisible by {}", + metadata.0 + ); + Ok(value) } + + fn validate_array<'a>( + &self, + ext_dtype: &'a ExtDType, + storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + let primitive = DynArray::as_any(storage_array) + .downcast_ref::() + .ok_or_else(|| { + vortex_error::vortex_err!("expected PrimitiveArray for divisible int storage") + })?; + + let slice = primitive.as_slice::(); + let metadata = ext_dtype.metadata(); + + for value in slice { + vortex_ensure_eq!( + value % metadata.0, + 0, + "{value} is not divisible by {}", + metadata.0 + ); + } + + Ok(()) + } } #[cfg(test)] diff --git a/vortex-array/src/extension/uuid/vtable.rs b/vortex-array/src/extension/uuid/vtable.rs index 962b83f8907..4169c065c0e 100644 --- a/vortex-array/src/extension/uuid/vtable.rs +++ b/vortex-array/src/extension/uuid/vtable.rs @@ -2,12 +2,15 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use uuid; +use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_ensure; use vortex_error::vortex_ensure_eq; use vortex_error::vortex_err; +use crate::DynArray; +use crate::ToCanonical; use crate::dtype::DType; use crate::dtype::PType; use crate::dtype::extension::ExtDType; @@ -119,6 +122,40 @@ impl ExtVTable for Uuid { Ok(parsed) } + + fn validate_array<'a>( + &self, + ext_dtype: &'a ExtDType, + storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + let fsl = storage_array.to_fixed_size_list(); + let elements = fsl.elements().to_primitive(); + let bytes = elements.as_slice::(); + + for chunk in bytes.chunks_exact(UUID_BYTE_LEN) { + let bytes: [u8; UUID_BYTE_LEN] = chunk + .try_into() + .map_err(|_| vortex_err!("hack bc we can't have nice things")) + .vortex_expect("The chunk needs to be exactly size UUID_BYTE_LEN"); + + let uuid = uuid::Uuid::from_bytes(bytes); + let actual = uuid + .get_version() + .ok_or_else(|| vortex_err!("UUID has unrecognized version nibble"))? + as u8; + + if let Some(expected) = ext_dtype.metadata().version { + let expected = expected as u8; + vortex_ensure_eq!( + expected, + actual, + "UUID version mismatch: expected v{expected}, got v{actual}", + ); + } + } + + Ok(()) + } } #[expect( @@ -131,8 +168,13 @@ mod tests { use rstest::rstest; use uuid::Version; + use vortex_buffer::Buffer; use vortex_error::VortexResult; + use crate::array::IntoArray; + use crate::arrays::ExtensionArray; + use crate::arrays::FixedSizeListArray; + use crate::arrays::PrimitiveArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; @@ -143,6 +185,7 @@ mod tests { use crate::extension::uuid::vtable::UUID_BYTE_LEN; use crate::scalar::Scalar; use crate::scalar::ScalarValue; + use crate::validity::Validity; #[rstest] #[case::no_version(None)] @@ -347,4 +390,76 @@ mod tests { nullability, ) } + + /// Builds a [`FixedSizeListArray`] storage array from a slice of UUIDs. + fn uuid_storage_array(uuids: &[uuid::Uuid]) -> crate::ArrayRef { + let flat_bytes: Vec = uuids + .iter() + .flat_map(|u| u.as_bytes().iter().copied()) + .collect(); + let elements = + PrimitiveArray::new(Buffer::copy_from(&flat_bytes), Validity::NonNullable).into_array(); + FixedSizeListArray::new( + elements, + UUID_BYTE_LEN as u32, + Validity::NonNullable, + uuids.len(), + ) + .into_array() + } + + #[test] + fn validate_uuid_array_with_matching_version() -> VortexResult<()> { + let v4_a = uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000") + .map_err(|e| vortex_error::vortex_err!("{e}"))?; + let v4_b = uuid::Uuid::parse_str("6ba7b810-9dad-41d4-80b4-00c04fd430c8") + .map_err(|e| vortex_error::vortex_err!("{e}"))?; + + let ext_dtype = ExtDType::::try_new( + UuidMetadata { + version: Some(Version::Random), + }, + uuid_storage_dtype(Nullability::NonNullable), + )? + .erased(); + + let storage = uuid_storage_array(&[v4_a, v4_b]); + ExtensionArray::try_new(ext_dtype, storage)?; + Ok(()) + } + + #[test] + fn validate_uuid_array_rejects_version_mismatch() -> VortexResult<()> { + // This is a v4 UUID, but the metadata says v7. + let v4_uuid = uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000") + .map_err(|e| vortex_error::vortex_err!("{e}"))?; + + let ext_dtype = ExtDType::::try_new( + UuidMetadata { + version: Some(Version::SortRand), + }, + uuid_storage_dtype(Nullability::NonNullable), + )? + .erased(); + + let storage = uuid_storage_array(&[v4_uuid]); + assert!(ExtensionArray::try_new(ext_dtype, storage).is_err()); + Ok(()) + } + + #[test] + fn validate_uuid_array_no_version_constraint() -> VortexResult<()> { + let v4_uuid = uuid::Uuid::parse_str("550e8400-e29b-41d4-a716-446655440000") + .map_err(|e| vortex_error::vortex_err!("{e}"))?; + + let ext_dtype = ExtDType::::try_new( + UuidMetadata::default(), + uuid_storage_dtype(Nullability::NonNullable), + )? + .erased(); + + let storage = uuid_storage_array(&[v4_uuid]); + ExtensionArray::try_new(ext_dtype, storage)?; + Ok(()) + } } diff --git a/vortex-array/src/scalar/arrow.rs b/vortex-array/src/scalar/arrow.rs index 45672e745fa..689ea9ee5cc 100644 --- a/vortex-array/src/scalar/arrow.rs +++ b/vortex-array/src/scalar/arrow.rs @@ -195,6 +195,7 @@ mod tests { use vortex_error::VortexResult; use vortex_error::vortex_bail; + use crate::DynArray; use crate::dtype::DType; use crate::dtype::DecimalDType; use crate::dtype::FieldDType; @@ -202,6 +203,7 @@ mod tests { use crate::dtype::Nullability; use crate::dtype::PType; use crate::dtype::StructFields; + use crate::dtype::extension::ExtDType; use crate::dtype::extension::ExtId; use crate::dtype::extension::ExtVTable; use crate::dtype::i256; @@ -464,20 +466,25 @@ mod tests { vortex_bail!("not implemented") } - fn validate_dtype( - &self, - _ext_dtype: &crate::dtype::extension::ExtDType, - ) -> VortexResult<()> { + fn validate_dtype(&self, _ext_dtype: &ExtDType) -> VortexResult<()> { Ok(()) } fn unpack_native<'a>( &self, - _ext_dtype: &'a crate::dtype::extension::ExtDType, + _ext_dtype: &'a ExtDType, _storage_value: &'a ScalarValue, ) -> VortexResult> { Ok("") } + + fn validate_array<'a>( + &self, + _ext_dtype: &'a ExtDType, + _storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let scalar = Scalar::extension::( diff --git a/vortex-array/src/scalar/tests/casting.rs b/vortex-array/src/scalar/tests/casting.rs index e5677f7c1f9..480bb21a2b5 100644 --- a/vortex-array/src/scalar/tests/casting.rs +++ b/vortex-array/src/scalar/tests/casting.rs @@ -11,6 +11,7 @@ mod tests { use vortex_error::VortexResult; use vortex_error::vortex_bail; + use crate::DynArray; use crate::dtype::DType; use crate::dtype::FieldDType; use crate::dtype::Nullability; @@ -54,6 +55,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array<'a>( + &self, + _ext_dtype: &'a ExtDType, + _storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } impl Apples { @@ -270,6 +279,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array<'a>( + &self, + _ext_dtype: &'a ExtDType, + _storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let storage_dtype = DType::Primitive(PType::F16, Nullability::NonNullable); @@ -328,6 +345,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array<'a>( + &self, + _ext_dtype: &'a ExtDType, + _storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let struct_dtype = DType::Struct( diff --git a/vortex-array/src/scalar/typed_view/extension/tests.rs b/vortex-array/src/scalar/typed_view/extension/tests.rs index 4801e8e6e0c..2be82328a4d 100644 --- a/vortex-array/src/scalar/typed_view/extension/tests.rs +++ b/vortex-array/src/scalar/typed_view/extension/tests.rs @@ -4,6 +4,7 @@ use vortex_error::VortexResult; use vortex_error::vortex_bail; +use crate::DynArray; use crate::dtype::DType; use crate::dtype::Nullability; use crate::dtype::PType; @@ -43,6 +44,14 @@ impl ExtVTable for TestI32Ext { ) -> VortexResult> { Ok("") } + + fn validate_array<'a>( + &self, + _ext_dtype: &'a ExtDType, + _storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } impl TestI32Ext { @@ -127,6 +136,14 @@ fn test_ext_scalar_partial_ord_different_types() { ) -> VortexResult> { Ok("") } + + fn validate_array<'a>( + &self, + _ext_dtype: &'a ExtDType, + _storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let scalar1 = Scalar::extension::( @@ -310,6 +327,14 @@ fn test_ext_scalar_with_metadata() { ) -> VortexResult> { Ok("") } + + fn validate_array<'a>( + &self, + _ext_dtype: &'a ExtDType, + _storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let scalar = Scalar::extension::( diff --git a/vortex-btrblocks/src/canonical_compressor.rs b/vortex-btrblocks/src/canonical_compressor.rs index ca3dc2a05c9..451b38e4e8f 100644 --- a/vortex-btrblocks/src/canonical_compressor.rs +++ b/vortex-btrblocks/src/canonical_compressor.rs @@ -302,8 +302,14 @@ impl CanonicalCompressor for BtrBlocksCompressor { let compressed_storage = self.compress(ext_array.storage_array())?; Ok( - ExtensionArray::new(ext_array.ext_dtype().clone(), compressed_storage) - .into_array(), + // SAFETY: The values of the array are the same, so no validation is needed. + unsafe { + ExtensionArray::new_unchecked( + ext_array.ext_dtype().clone(), + compressed_storage, + ) + } + .into_array(), ) } } diff --git a/vortex-cuda/src/canonical.rs b/vortex-cuda/src/canonical.rs index d4eb37e09f3..91eae49959a 100644 --- a/vortex-cuda/src/canonical.rs +++ b/vortex-cuda/src/canonical.rs @@ -138,10 +138,10 @@ impl CanonicalCudaExt for Canonical { .into_host() .await? .into_array(); - Ok(Canonical::Extension(ExtensionArray::new( - ext.ext_dtype().clone(), - host_storage, - ))) + Ok(Canonical::Extension( + // SAFETY: The values of the array are the same, so no validation is needed. + unsafe { ExtensionArray::new_unchecked(ext.ext_dtype().clone(), host_storage) }, + )) } c => todo!("{} not implemented", c.dtype()), } diff --git a/vortex-duckdb/src/convert/dtype.rs b/vortex-duckdb/src/convert/dtype.rs index 7a96b66c729..002b1f32779 100644 --- a/vortex-duckdb/src/convert/dtype.rs +++ b/vortex-duckdb/src/convert/dtype.rs @@ -348,6 +348,7 @@ mod tests { use vortex::extension::datetime::Time; use vortex::extension::datetime::Timestamp; use vortex::scalar::ScalarValue; + use vortex_array::DynArray; use crate::cpp; use crate::duckdb::LogicalType; @@ -602,6 +603,14 @@ mod tests { ) -> VortexResult> { Ok("") } + + fn validate_array<'a>( + &self, + _ext_dtype: &'a ExtDType, + _storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + Ok(()) + } } let ext_dtype = ExtDType::::try_new( diff --git a/vortex-duckdb/src/convert/vector.rs b/vortex-duckdb/src/convert/vector.rs index d6b008efe64..70dc0b66f9d 100644 --- a/vortex-duckdb/src/convert/vector.rs +++ b/vortex-duckdb/src/convert/vector.rs @@ -512,12 +512,15 @@ mod tests { fn test_timestamp_extreme_values() { // Test extreme timestamp values let values = vec![ - i64::MAX, // Maximum possible timestamp - i64::MIN, // Minimum possible timestamp - 0i64, // Epoch - 9_223_372_036_854_775_000_i64, // Near max but reasonable - -9_223_372_036_854_775_000_i64, // Near min but reasonable + i32::MAX as i64, + i32::MIN as i64, + 0i64, + // These don't work because they get added to the unix epoch, so it overflows + // 631_107_417_600_000_000, // The max for microseconds in jiff. + // -631_107_417_600_000_000, // The min for microseconds in jiff. ]; + // https://docs.rs/jiff/latest/jiff/struct.Span.html#method.microseconds + let len = values.len(); let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP); diff --git a/vortex-tensor/public-api.lock b/vortex-tensor/public-api.lock index 2104eb97c95..fe25cd5058f 100644 --- a/vortex-tensor/public-api.lock +++ b/vortex-tensor/public-api.lock @@ -42,6 +42,8 @@ pub fn vortex_tensor::fixed_shape::FixedShapeTensor::serialize_metadata(&self, m pub fn vortex_tensor::fixed_shape::FixedShapeTensor::unpack_native<'a>(&self, _ext_dtype: &'a vortex_array::dtype::extension::typed::ExtDType, storage_value: &'a vortex_array::scalar::scalar_value::ScalarValue) -> vortex_error::VortexResult +pub fn vortex_tensor::fixed_shape::FixedShapeTensor::validate_array<'a>(&self, _ext_dtype: &'a vortex_array::dtype::extension::typed::ExtDType, _storage_array: &'a dyn vortex_array::array::DynArray) -> vortex_error::VortexResult<()> + pub fn vortex_tensor::fixed_shape::FixedShapeTensor::validate_dtype(&self, ext_dtype: &vortex_array::dtype::extension::typed::ExtDType) -> vortex_error::VortexResult<()> pub struct vortex_tensor::fixed_shape::FixedShapeTensorMetadata diff --git a/vortex-tensor/src/fixed_shape/vtable.rs b/vortex-tensor/src/fixed_shape/vtable.rs index 15e47456ba9..7c010a839f0 100644 --- a/vortex-tensor/src/fixed_shape/vtable.rs +++ b/vortex-tensor/src/fixed_shape/vtable.rs @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors +use vortex::array::DynArray; use vortex::dtype::DType; use vortex::dtype::extension::ExtDType; use vortex::dtype::extension::ExtId; @@ -73,6 +74,15 @@ impl ExtVTable for FixedShapeTensor { // should be valid for a given tensor. Ok(storage_value) } + + fn validate_array<'a>( + &self, + _ext_dtype: &'a ExtDType, + _storage_array: &'a dyn DynArray, + ) -> VortexResult<()> { + // As long as the dtype and the metadata is correct, any value is valid under a tensor. + Ok(()) + } } #[cfg(test)]