From 1ac0e55db7f120c048b8374bf1c724447bbec21d Mon Sep 17 00:00:00 2001 From: sarangat_LinkedIn Date: Thu, 1 Jan 2026 03:10:01 -0800 Subject: [PATCH 1/3] add all aggregations --- .../auron/utils/AuronSparkTestSettings.scala | 6 ++++++ .../sql/AuronDataFrameAggregateSuite.scala | 19 +++++++++++++++++++ .../sql/AuronDatasetAggregatorSuite.scala | 19 +++++++++++++++++++ .../AuronTypedImperativeAggregateSuite.scala | 19 +++++++++++++++++++ 4 files changed, 63 insertions(+) create mode 100644 auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala create mode 100644 auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala create mode 100644 auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala diff --git a/auron-spark-tests/spark33/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark33/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index 2bccdc086..f62893e7b 100644 --- a/auron-spark-tests/spark33/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark33/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -28,6 +28,12 @@ class AuronSparkTestSettings extends SparkTestSettings { // See https://github.com/apache/auron/issues/1724 .exclude("string / binary substring function") + enableSuite[AuronDataFrameAggregateSuite] + + enableSuite[AuronDatasetAggregatorSuite] + + enableSuite[AuronTypedImperativeAggregateSuite] + // Will be implemented in the future. override def getSQLQueryTestSettings = new SQLQueryTestSettings { override def getResourceFilePath: String = ??? diff --git a/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala b/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala new file mode 100644 index 000000000..a4786c3f4 --- /dev/null +++ b/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronDataFrameAggregateSuite extends DataFrameAggregateSuite with SparkQueryTestsBase diff --git a/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala b/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala new file mode 100644 index 000000000..b446ab7d0 --- /dev/null +++ b/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDatasetAggregatorSuite.scala @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronDatasetAggregatorSuite extends DatasetAggregatorSuite with SparkQueryTestsBase diff --git a/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala b/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala new file mode 100644 index 000000000..36d69ca09 --- /dev/null +++ b/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +class AuronTypedImperativeAggregateSuite extends TypedImperativeAggregateSuite with SparkQueryTestsBase From 8c6b1b7ca180fa2060057227eabc8ee465a8f3ad Mon Sep 17 00:00:00 2001 From: sarangat_LinkedIn Date: Thu, 1 Jan 2026 04:51:14 -0800 Subject: [PATCH 2/3] exclude tests for now --- .../org/apache/auron/utils/AuronSparkTestSettings.scala | 5 +++++ .../org/apache/spark/sql/AuronDataFrameAggregateSuite.scala | 2 +- .../spark/sql/AuronTypedImperativeAggregateSuite.scala | 4 +++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/auron-spark-tests/spark33/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark33/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index f62893e7b..b37e1b981 100644 --- a/auron-spark-tests/spark33/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark33/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -29,6 +29,11 @@ class AuronSparkTestSettings extends SparkTestSettings { .exclude("string / binary substring function") enableSuite[AuronDataFrameAggregateSuite] + .excludeByPrefix("collect functions") + .exclude( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") + .exclude( + "SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail") enableSuite[AuronDatasetAggregatorSuite] diff --git a/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala b/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala index a4786c3f4..eb2f89b0d 100644 --- a/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala +++ b/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala @@ -16,4 +16,4 @@ */ package org.apache.spark.sql -class AuronDataFrameAggregateSuite extends DataFrameAggregateSuite with SparkQueryTestsBase +class AuronDataFrameAggregateSuite extends DataFrameAggregateSuite with SparkQueryTestsBase {} diff --git a/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala b/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala index 36d69ca09..a6e3af241 100644 --- a/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala +++ b/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronTypedImperativeAggregateSuite.scala @@ -16,4 +16,6 @@ */ package org.apache.spark.sql -class AuronTypedImperativeAggregateSuite extends TypedImperativeAggregateSuite with SparkQueryTestsBase +class AuronTypedImperativeAggregateSuite + extends TypedImperativeAggregateSuite + with SparkQueryTestsBase From d0ef9a119f3ecf84bf54b31a253f1032c86b770b Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Mon, 5 Jan 2026 05:13:59 -0800 Subject: [PATCH 3/3] add issue details --- .../auron/utils/AuronSparkTestSettings.scala | 2 + .../sql/AuronDataFrameAggregateSuite.scala | 61 ++++++++++++++++++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/auron-spark-tests/spark33/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala b/auron-spark-tests/spark33/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala index b37e1b981..1774272a7 100644 --- a/auron-spark-tests/spark33/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala +++ b/auron-spark-tests/spark33/src/test/scala/org/apache/auron/utils/AuronSparkTestSettings.scala @@ -29,7 +29,9 @@ class AuronSparkTestSettings extends SparkTestSettings { .exclude("string / binary substring function") enableSuite[AuronDataFrameAggregateSuite] + // See https://github.com/apache/auron/issues/1840 .excludeByPrefix("collect functions") + // Added a new test to handle this issue .exclude( "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") .exclude( diff --git a/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala b/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala index eb2f89b0d..d1361ab7e 100644 --- a/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala +++ b/auron-spark-tests/spark33/src/test/scala/org/apache/spark/sql/AuronDataFrameAggregateSuite.scala @@ -16,4 +16,63 @@ */ package org.apache.spark.sql -class AuronDataFrameAggregateSuite extends DataFrameAggregateSuite with SparkQueryTestsBase {} +import scala.util.Random + +import org.apache.spark.sql.execution.WholeStageCodegenExec +import org.apache.spark.sql.execution.aggregate.HashAggregateExec +import org.apache.spark.sql.execution.auron.plan.NativeAggBase +import org.apache.spark.sql.functions.{collect_list, monotonically_increasing_id, rand, randn, spark_partition_id, sum} +import org.apache.spark.sql.internal.SQLConf + +class AuronDataFrameAggregateSuite extends DataFrameAggregateSuite with SparkQueryTestsBase { + import testImplicits._ + + // Ported from spark DataFrameAggregateSuite only with plan check changed. + private def assertNoExceptions(c: Column): Unit = { + for ((wholeStage, useObjectHashAgg) <- + Seq((true, true), (true, false), (false, true), (false, false))) { + withSQLConf( + (SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, wholeStage.toString), + (SQLConf.USE_OBJECT_HASH_AGG.key, useObjectHashAgg.toString)) { + + val df = Seq(("1", 1), ("1", 2), ("2", 3), ("2", 4)).toDF("x", "y") + + val hashAggDF = df.groupBy("x").agg(c, sum("y")) + hashAggDF.collect() + val hashAggPlan = hashAggDF.queryExecution.executedPlan + if (wholeStage) { + assert(find(hashAggPlan) { + case WholeStageCodegenExec(_: HashAggregateExec) => true + // If offloaded, Spark whole stage codegen takes no effect and a native hash agg is + // expected to be used. + case _: NativeAggBase => true + case _ => false + }.isDefined) + } else { + assert( + stripAQEPlan(hashAggPlan).isInstanceOf[HashAggregateExec] || + stripAQEPlan(hashAggPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + + val objHashAggOrSortAggDF = df.groupBy("x").agg(c, collect_list("y")) + objHashAggOrSortAggDF.collect() + assert(stripAQEPlan(objHashAggOrSortAggDF.queryExecution.executedPlan).find { + case _: NativeAggBase => true + case _ => false + }.isDefined) + } + } + } + + testAuron( + "SPARK-19471: AggregationIterator does not initialize the generated result projection before using it") { + Seq( + monotonically_increasing_id(), + spark_partition_id(), + rand(Random.nextLong()), + randn(Random.nextLong())).foreach(assertNoExceptions) + } +}