Skip to content

Commit c0be213

Browse files
committed
fix: link accumulator-related IgnoreCometNativeScan tests to #3867
Update all tests ignored because native scans cannot propagate JVM-side Spark accumulators to reference the new tracking issue across all three Spark version diffs (3.4.3, 3.5.8, 4.0.1). Affected tests: - filter pushdown - StringPredicate - Filters should be pushed down for vectorized Parquet reader at row group level - SPARK-34562: Bloom filter push down
1 parent ba6b3ce commit c0be213

File tree

3 files changed

+31
-29
lines changed

3 files changed

+31
-29
lines changed

dev/diffs/3.4.3.diff

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2032,7 +2032,7 @@ index 07e2849ce6f..3e73645b638 100644
20322032
ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
20332033
)
20342034
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
2035-
index 104b4e416cd..b8af360fa14 100644
2035+
index 104b4e416cd..f05bfdecb69 100644
20362036
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
20372037
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
20382038
@@ -38,6 +38,7 @@ import org.apache.parquet.schema.MessageType
@@ -2062,7 +2062,7 @@ index 104b4e416cd..b8af360fa14 100644
20622062

20632063
- test("Filters should be pushed down for vectorized Parquet reader at row group level") {
20642064
+ test("Filters should be pushed down for vectorized Parquet reader at row group level",
2065-
+ IgnoreCometNativeScan("Native scans do not support the tested accumulator")) {
2065+
+ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) {
20662066
import testImplicits._
20672067

20682068
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
@@ -2098,7 +2098,7 @@ index 104b4e416cd..b8af360fa14 100644
20982098

20992099
- test("filter pushdown - StringPredicate") {
21002100
+ test("filter pushdown - StringPredicate",
2101-
+ IgnoreCometNativeDataFusion("cannot be pushed down")) {
2101+
+ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) {
21022102
import testImplicits._
21032103
// keep() should take effect on StartsWith/EndsWith/Contains
21042104
Seq(
@@ -2153,7 +2153,7 @@ index 104b4e416cd..b8af360fa14 100644
21532153

21542154
- test("SPARK-34562: Bloom filter push down") {
21552155
+ test("SPARK-34562: Bloom filter push down",
2156-
+ IgnoreCometNativeScan("Native scans do not support the tested accumulator")) {
2156+
+ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) {
21572157
withTempPath { dir =>
21582158
val path = dir.getCanonicalPath
21592159
spark.range(100).selectExpr("id * 2 AS id")

dev/diffs/3.5.8.diff

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1982,7 +1982,7 @@ index 07e2849ce6f..3e73645b638 100644
19821982
ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
19831983
)
19841984
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
1985-
index 8e88049f51e..f9d515edee1 100644
1985+
index 8e88049f51e..e3b20fe8845 100644
19861986
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
19871987
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
19881988
@@ -1095,7 +1095,11 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
@@ -2004,7 +2004,7 @@ index 8e88049f51e..f9d515edee1 100644
20042004

20052005
- test("Filters should be pushed down for vectorized Parquet reader at row group level") {
20062006
+ test("Filters should be pushed down for vectorized Parquet reader at row group level",
2007-
+ IgnoreCometNativeScan("Native scans do not support the tested accumulator")) {
2007+
+ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) {
20082008
import testImplicits._
20092009

20102010
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
@@ -2034,16 +2034,17 @@ index 8e88049f51e..f9d515edee1 100644
20342034
}
20352035
}
20362036
}
2037-
@@ -1699,7 +1712,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2037+
@@ -1699,7 +1712,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
20382038
(attr, value) => sources.StringContains(attr, value))
20392039
}
20402040

20412041
- test("filter pushdown - StringPredicate") {
2042-
+ test("filter pushdown - StringPredicate", IgnoreCometNativeScan("cannot be pushed down")) {
2042+
+ test("filter pushdown - StringPredicate",
2043+
+ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) {
20432044
import testImplicits._
20442045
// keep() should take effect on StartsWith/EndsWith/Contains
20452046
Seq(
2046-
@@ -1743,7 +1756,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2047+
@@ -1743,7 +1757,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
20472048
}
20482049
}
20492050

@@ -2053,9 +2054,9 @@ index 8e88049f51e..f9d515edee1 100644
20532054
val schema = StructType(Seq(
20542055
StructField("a", IntegerType, nullable = false)
20552056
))
2056-
@@ -1949,11 +1965,24 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2057+
@@ -1949,11 +1964,24 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
20572058
""".stripMargin)
2058-
2059+
20592060
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
20602061
- val e = intercept[SparkException] {
20612062
+ // Spark native readers wrap the error in SparkException(FAILED_READ_FILE).
@@ -2081,7 +2082,7 @@ index 8e88049f51e..f9d515edee1 100644
20812082
}
20822083

20832084
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
2084-
@@ -1984,7 +2013,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2085+
@@ -1984,7 +2012,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
20852086
}
20862087
}
20872088

@@ -2091,17 +2092,17 @@ index 8e88049f51e..f9d515edee1 100644
20912092
// block 1:
20922093
// null count min max
20932094
// page-0 0 0 99
2094-
@@ -2044,7 +2074,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2095+
@@ -2044,7 +2073,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
20952096
}
20962097
}
20972098

20982099
- test("SPARK-34562: Bloom filter push down") {
20992100
+ test("SPARK-34562: Bloom filter push down",
2100-
+ IgnoreCometNativeScan("Native scans do not support the tested accumulator")) {
2101+
+ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) {
21012102
withTempPath { dir =>
21022103
val path = dir.getCanonicalPath
21032104
spark.range(100).selectExpr("id * 2 AS id")
2104-
@@ -2276,7 +2307,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
2105+
@@ -2276,7 +2306,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
21052106
assert(pushedParquetFilters.exists(_.getClass === filterClass),
21062107
s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
21072108

@@ -2114,7 +2115,7 @@ index 8e88049f51e..f9d515edee1 100644
21142115
} else {
21152116
assert(selectedFilters.isEmpty, "There is filter pushed down")
21162117
}
2117-
@@ -2336,7 +2371,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
2118+
@@ -2336,7 +2370,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
21182119
assert(pushedParquetFilters.exists(_.getClass === filterClass),
21192120
s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
21202121

dev/diffs/4.0.1.diff

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -246,12 +246,12 @@ index aa3d02dc2fb..c4f878d9908 100644
246246
WITH t(c1) AS (SELECT replace(listagg(DISTINCT col1 COLLATE unicode_rtrim) COLLATE utf8_binary, ' ', '') FROM (VALUES ('xbc '), ('xbc '), ('a'), ('xbc'))) SELECT len(c1), regexp_count(c1, 'a'), regexp_count(c1, 'xbc') FROM t;
247247
WITH t(c1) AS (SELECT listagg(col1) WITHIN GROUP (ORDER BY col1 COLLATE unicode_rtrim) FROM (VALUES ('abc '), ('abc\n'), ('abc'), ('x'))) SELECT replace(replace(c1, ' ', ''), '\n', '$') FROM t;
248248
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
249-
index 0000000..0000000 100644
249+
index 41fd4de2a09..162d5a817b6 100644
250250
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
251251
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
252252
@@ -6,6 +6,10 @@
253253
-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L352-L605
254-
254+
255255
-- Test aggregate operator with codegen on and off.
256256
+
257257
+-- Floating-point precision difference between DataFusion and JVM for FILTER aggregates
@@ -2765,7 +2765,7 @@ index cd6f41b4ef4..4b6a17344bc 100644
27652765
ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString
27662766
)
27672767
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
2768-
index 6080a5e8e4b..ea058d57b4b 100644
2768+
index 6080a5e8e4b..0d394024e85 100644
27692769
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
27702770
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
27712771
@@ -38,6 +38,7 @@ import org.apache.parquet.schema.MessageType
@@ -2795,7 +2795,7 @@ index 6080a5e8e4b..ea058d57b4b 100644
27952795

27962796
- test("Filters should be pushed down for vectorized Parquet reader at row group level") {
27972797
+ test("Filters should be pushed down for vectorized Parquet reader at row group level",
2798-
+ IgnoreCometNativeScan("Native scans do not support the tested accumulator")) {
2798+
+ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) {
27992799
import testImplicits._
28002800

28012801
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
@@ -2825,16 +2825,17 @@ index 6080a5e8e4b..ea058d57b4b 100644
28252825
}
28262826
}
28272827
}
2828-
@@ -1706,7 +1720,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2828+
@@ -1706,7 +1720,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
28292829
(attr, value) => sources.StringContains(attr, value))
28302830
}
28312831

28322832
- test("filter pushdown - StringPredicate") {
2833-
+ test("filter pushdown - StringPredicate", IgnoreCometNativeScan("cannot be pushed down")) {
2833+
+ test("filter pushdown - StringPredicate",
2834+
+ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) {
28342835
import testImplicits._
28352836
// keep() should take effect on StartsWith/EndsWith/Contains
28362837
Seq(
2837-
@@ -1750,7 +1764,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2838+
@@ -1750,7 +1765,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
28382839
}
28392840
}
28402841

@@ -2844,7 +2845,7 @@ index 6080a5e8e4b..ea058d57b4b 100644
28442845
val schema = StructType(Seq(
28452846
StructField("a", IntegerType, nullable = false)
28462847
))
2847-
@@ -1956,13 +1971,21 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2848+
@@ -1956,13 +1972,21 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
28482849
""".stripMargin)
28492850

28502851
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
@@ -2871,7 +2872,7 @@ index 6080a5e8e4b..ea058d57b4b 100644
28712872
}
28722873

28732874
withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
2874-
@@ -1993,7 +2016,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2875+
@@ -1993,7 +2017,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
28752876
}
28762877
}
28772878

@@ -2881,17 +2882,17 @@ index 6080a5e8e4b..ea058d57b4b 100644
28812882
// block 1:
28822883
// null count min max
28832884
// page-0 0 0 99
2884-
@@ -2053,7 +2077,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
2885+
@@ -2053,7 +2078,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
28852886
}
28862887
}
28872888

28882889
- test("SPARK-34562: Bloom filter push down") {
28892890
+ test("SPARK-34562: Bloom filter push down",
2890-
+ IgnoreCometNativeScan("Native scans do not support the tested accumulator")) {
2891+
+ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) {
28912892
withTempPath { dir =>
28922893
val path = dir.getCanonicalPath
28932894
spark.range(100).selectExpr("id * 2 AS id")
2894-
@@ -2305,7 +2330,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
2895+
@@ -2305,7 +2331,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
28952896
assert(pushedParquetFilters.exists(_.getClass === filterClass),
28962897
s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
28972898

@@ -2904,7 +2905,7 @@ index 6080a5e8e4b..ea058d57b4b 100644
29042905
} else {
29052906
assert(selectedFilters.isEmpty, "There is filter pushed down")
29062907
}
2907-
@@ -2368,7 +2397,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
2908+
@@ -2368,7 +2398,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
29082909
assert(pushedParquetFilters.exists(_.getClass === filterClass),
29092910
s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")
29102911

0 commit comments

Comments
 (0)