Skip to content

Commit 2829ce8

Browse files
authored
feat: add support crc32 expression (#3498)
1 parent 732b9fe commit 2829ce8

File tree

5 files changed

+38
-1
lines changed

5 files changed

+38
-1
lines changed

docs/spark_expressions_support.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@
234234

235235
### hash_funcs
236236

237-
- [ ] crc32
237+
- [x] crc32
238238
- [ ] hash
239239
- [x] md5
240240
- [ ] sha

native/core/src/execution/jni_api.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ use datafusion_spark::function::datetime::date_add::SparkDateAdd;
4646
use datafusion_spark::function::datetime::date_sub::SparkDateSub;
4747
use datafusion_spark::function::datetime::last_day::SparkLastDay;
4848
use datafusion_spark::function::datetime::next_day::SparkNextDay;
49+
use datafusion_spark::function::hash::crc32::SparkCrc32;
4950
use datafusion_spark::function::hash::sha1::SparkSha1;
5051
use datafusion_spark::function::hash::sha2::SparkSha2;
5152
use datafusion_spark::function::map::map_from_entries::MapFromEntries;
@@ -375,6 +376,7 @@ fn register_datafusion_spark_function(session_ctx: &SessionContext) {
375376
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkHex::default()));
376377
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkWidthBucket::default()));
377378
session_ctx.register_udf(ScalarUDF::new_from_impl(MapFromEntries::default()));
379+
session_ctx.register_udf(ScalarUDF::new_from_impl(SparkCrc32::default()));
378380
}
379381

380382
/// Prepares arrow arrays for output.

spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
139139
classOf[StructsToCsv] -> CometStructsToCsv)
140140

141141
private val hashExpressions: Map[Class[_ <: Expression], CometExpressionSerde[_]] = Map(
142+
classOf[Crc32] -> CometScalarFunction("crc32"),
142143
classOf[Md5] -> CometScalarFunction("md5"),
143144
classOf[Murmur3Hash] -> CometMurmur3Hash,
144145
classOf[Sha2] -> CometSha2,
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing,
12+
-- software distributed under the License is distributed on an
13+
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
-- KIND, either express or implied. See the License for the
15+
-- specific language governing permissions and limitations
16+
-- under the License.
17+
18+
-- ConfigMatrix: parquet.enable.dictionary=false,true
19+
20+
-- crc32 function
21+
statement
22+
CREATE TABLE test(col string, a int, b float) USING parquet
23+
24+
statement
25+
INSERT INTO test VALUES ('Spark SQL ', 10, 1.2), (NULL, NULL, NULL), ('', 0, 0.0), ('苹果手机', NULL, 3.999999), ('Spark SQL ', 10, 1.2), (NULL, NULL, NULL), ('', 0, 0.0), ('苹果手机', NULL, 3.999999)
26+
27+
query
28+
SELECT crc32(col), crc32(cast(a as string)), crc32(cast(b as string)) FROM test
29+
30+
-- literal arguments
31+
query
32+
SELECT crc32('Spark SQL')

spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1987,6 +1987,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
19871987
|md5(col), md5(cast(a as string)), md5(cast(b as string)),
19881988
|hash(col), hash(col, 1), hash(col, 0), hash(col, a, b), hash(b, a, col),
19891989
|xxhash64(col), xxhash64(col, 1), xxhash64(col, 0), xxhash64(col, a, b), xxhash64(b, a, col),
1990+
|crc32(col), crc32(cast(a as string)), crc32(cast(b as string)),
19901991
|sha2(col, 0), sha2(col, 256), sha2(col, 224), sha2(col, 384), sha2(col, 512), sha2(col, 128), sha2(col, -1),
19911992
|sha1(col), sha1(cast(a as string)), sha1(cast(b as string))
19921993
|from test
@@ -2097,6 +2098,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
20972098
|md5(col), md5(cast(a as string)), --md5(cast(b as string)),
20982099
|hash(col), hash(col, 1), hash(col, 0), hash(col, a, b), hash(b, a, col),
20992100
|xxhash64(col), xxhash64(col, 1), xxhash64(col, 0), xxhash64(col, a, b), xxhash64(b, a, col),
2101+
|crc32(col), crc32(cast(a as string)),
21002102
|sha2(col, 0), sha2(col, 256), sha2(col, 224), sha2(col, 384), sha2(col, 512), sha2(col, 128), sha2(col, -1),
21012103
|sha1(col), sha1(cast(a as string))
21022104
|from test

0 commit comments

Comments
 (0)