Skip to content

Commit 41de47d

Browse files
committed
Spark version specific behaviour for cast string to. timestamp
1 parent 92553f1 commit 41de47d

File tree

6 files changed

+140
-103
lines changed

6 files changed

+140
-103
lines changed

native/core/src/execution/planner.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,12 @@ impl PhysicalPlanner {
406406
Ok(Arc::new(Cast::new(
407407
child,
408408
datatype,
409-
SparkCastOptions::new(eval_mode, &expr.timezone, expr.allow_incompat),
409+
{
410+
let mut opts =
411+
SparkCastOptions::new(eval_mode, &expr.timezone, expr.allow_incompat);
412+
opts.is_spark4_plus = expr.is_spark4_plus;
413+
opts
414+
},
410415
spark_expr.expr_id,
411416
query_context,
412417
)))

native/proto/src/proto/expr.proto

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,9 @@ message Cast {
266266
string timezone = 3;
267267
EvalMode eval_mode = 4;
268268
bool allow_incompat = 5;
269+
// True when running against Spark 4.0+. Controls version-specific cast behaviour
270+
// such as the handling of leading whitespace before T-prefixed time-only strings.
271+
bool is_spark4_plus = 6;
269272
}
270273

271274
message BinaryExpr {

native/spark-expr/src/conversion_funcs/cast.rs

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ pub struct SparkCastOptions {
131131
pub timezone: String,
132132
/// Allow casts that are supported but not guaranteed to be 100% compatible
133133
pub allow_incompat: bool,
134+
/// True when running against Spark 4.0+. Enables version-specific cast behaviour
135+
/// such as the handling of leading whitespace before T-prefixed time-only strings.
136+
pub is_spark4_plus: bool,
134137
/// Support casting unsigned ints to signed ints (used by Parquet SchemaAdapter)
135138
pub allow_cast_unsigned_ints: bool,
136139
/// We also use the cast logic for adapting Parquet schemas, so this flag is used
@@ -148,6 +151,7 @@ impl SparkCastOptions {
148151
eval_mode,
149152
timezone: timezone.to_string(),
150153
allow_incompat,
154+
is_spark4_plus: false,
151155
allow_cast_unsigned_ints: false,
152156
is_adapting_schema: false,
153157
null_string: "null".to_string(),
@@ -160,6 +164,7 @@ impl SparkCastOptions {
160164
eval_mode,
161165
timezone: "".to_string(),
162166
allow_incompat,
167+
is_spark4_plus: false,
163168
allow_cast_unsigned_ints: false,
164169
is_adapting_schema: false,
165170
null_string: "null".to_string(),
@@ -297,7 +302,13 @@ pub(crate) fn cast_array(
297302
(Utf8, Boolean) => spark_cast_utf8_to_boolean::<i32>(&array, eval_mode),
298303
(LargeUtf8, Boolean) => spark_cast_utf8_to_boolean::<i64>(&array, eval_mode),
299304
(Utf8, Timestamp(_, _)) => {
300-
cast_string_to_timestamp(&array, to_type, eval_mode, &cast_options.timezone)
305+
cast_string_to_timestamp(
306+
&array,
307+
to_type,
308+
eval_mode,
309+
&cast_options.timezone,
310+
cast_options.is_spark4_plus,
311+
)
301312
}
302313
(Utf8, Date32) => cast_string_to_date(&array, to_type, eval_mode),
303314
(Date32, Int32) => {

0 commit comments

Comments
 (0)