apache · lyne7-sc · Jul 2, 2026 · Jul 3, 2026 · Jul 3, 2026 · Jul 3, 2026
diff --git a/spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/auron/ShimsImpl.scala b/spark-extension-shims-spark/src/main/scala/org/apache/spark/sql/auron/ShimsImpl.scala
@@ -97,6 +97,7 @@ import org.apache.spark.sql.execution.auron.plan.NativeWindowBase
 import org.apache.spark.sql.execution.auron.plan.NativeWindowExec
 import org.apache.spark.sql.execution.auron.shuffle.{AuronBlockStoreShuffleReaderBase, AuronRssShuffleManagerBase, RssPartitionWriterBase}
 import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeLike, ReusedExchangeExec, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, ShuffledHashJoinExec}
 import org.apache.spark.sql.execution.joins.auron.plan.NativeBroadcastJoinExec
@@ -301,6 +302,45 @@ class ShimsImpl extends Shims with Logging {
       child: SparkPlan): NativeGenerateBase =
     NativeGenerateExec(generator, requiredChildOutput, outer, generatorOutput, child)
 
+  @sparkver("3.0 / 3.1")
+  override def copyBatchScanExecWithRuntimeFilters(
+      exec: BatchScanExec,
+      runtimeFilters: Seq[Expression]): BatchScanExec =
+    exec.copy(exec.output, exec.scan)
+
+  @sparkver("3.2")
+  override def copyBatchScanExecWithRuntimeFilters(
+      exec: BatchScanExec,
+      runtimeFilters: Seq[Expression]): BatchScanExec =
+    exec.copy(exec.output, exec.scan, runtimeFilters)
+
+  @sparkver("3.3")
+  override def copyBatchScanExecWithRuntimeFilters(
+      exec: BatchScanExec,
+      runtimeFilters: Seq[Expression]): BatchScanExec =
+    exec.copy(exec.output, exec.scan, runtimeFilters, exec.keyGroupedPartitioning)
+
+  @sparkver("3.4")
+  override def copyBatchScanExecWithRuntimeFilters(
+      exec: BatchScanExec,
+      runtimeFilters: Seq[Expression]): BatchScanExec =
+    exec.copy(
+      exec.output,
+      exec.scan,
+      runtimeFilters,
+      exec.keyGroupedPartitioning,
+      exec.ordering,
+      exec.table,
+      exec.commonPartitionValues,
+      exec.applyPartialClustering,
+      exec.replicatePartitions)
+
+  @sparkver("3.5 / 4.0 / 4.1")
+  override def copyBatchScanExecWithRuntimeFilters(
+      exec: BatchScanExec,
+      runtimeFilters: Seq[Expression]): BatchScanExec =
+    exec.copy(exec.output, exec.scan, runtimeFilters, exec.ordering, exec.table, exec.spjParams)
+
   @sparkver("3.4 / 3.5 / 4.0 / 4.1")
   private def effectiveLimit(rawLimit: Int): Int =
     if (rawLimit == -1) Int.MaxValue else rawLimit

diff --git a/spark-extension/src/main/scala/org/apache/spark/sql/auron/Shims.scala b/spark-extension/src/main/scala/org/apache/spark/sql/auron/Shims.scala
@@ -49,6 +49,7 @@ import org.apache.spark.sql.execution.auron.plan.NativeBroadcastJoinBase
 import org.apache.spark.sql.execution.auron.plan.NativeSortMergeJoinBase
 import org.apache.spark.sql.execution.auron.shuffle.RssPartitionWriterBase
 import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeLike, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, ShuffledHashJoinExec}
 import org.apache.spark.sql.execution.metric.SQLMetric
@@ -125,6 +126,10 @@ abstract class Shims {
       generatorOutput: Seq[Attribute],
       child: SparkPlan): NativeGenerateBase
 
+  def copyBatchScanExecWithRuntimeFilters(
+      exec: BatchScanExec,
+      runtimeFilters: Seq[Expression]): BatchScanExec
+
   def getLimitAndOffset(plan: GlobalLimitExec): (Int, Int) = (plan.limit, 0)
 
   def createNativeGlobalLimitExec(

diff --git a/...on-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergConvertProvider.scala b/...on-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergConvertProvider.scala
@@ -55,7 +55,8 @@ class IcebergConvertProvider extends AuronConvertProvider with Logging {
       case e: BatchScanExec =>
         IcebergScanSupport.plan(e) match {
           case Some(plan) =>
-            AuronConverters.addRenameColumnsExec(NativeIcebergTableScanExec(e, plan))
+            AuronConverters.addRenameColumnsExec(
+              NativeIcebergTableScanExec(e, plan, e.runtimeFilters))
           case None =>
             IcebergScanSupport.fallbackReason(e) match {
               case Some(reason) => throw new AssertionError(reason)

diff --git a/.../auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala b/.../auron-iceberg/src/main/scala/org/apache/spark/sql/auron/iceberg/IcebergScanSupport.scala
@@ -19,11 +19,12 @@ package org.apache.spark.sql.auron.iceberg
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
+import org.apache.commons.lang3.reflect.MethodUtils
 import org.apache.iceberg.{AddedRowsScanTask, ChangelogOperation, ChangelogScanTask, FileFormat, FileScanTask, MetadataColumns, ScanTask}
 import org.apache.iceberg.expressions.{And => IcebergAnd, BoundPredicate, Expression => IcebergExpression, Not => IcebergNot, Or => IcebergOr, UnboundPredicate}
 import org.apache.iceberg.spark.source.AuronIcebergSourceUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.auron.NativeConverters
+import org.apache.spark.sql.auron.{NativeConverters, Shims}
 import org.apache.spark.sql.catalyst.expressions.{And => SparkAnd, AttributeReference, EqualTo, Expression => SparkExpression, GreaterThan, GreaterThanOrEqual, In, IsNaN, IsNotNull, IsNull, LessThan, LessThanOrEqual, Literal, Not => SparkNot, Or => SparkOr}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.connector.read.{InputPartition, Scan}
@@ -55,6 +56,8 @@ final case class IcebergScanPlan(
 object IcebergScanSupport extends Logging {
   private val scanPlanTag: TreeNodeTag[Option[IcebergScanPlan]] = TreeNodeTag(
     "auron.iceberg.scan.plan")
+  private val runtimeFilteredScanPlanTag: TreeNodeTag[Option[IcebergScanPlan]] = TreeNodeTag(
+    "auron.iceberg.runtime.filtered.scan.plan")
 
   private val SparkChangelogScanClassName =
     "org.apache.iceberg.spark.source.SparkChangelogScan"
@@ -82,35 +85,54 @@ object IcebergScanSupport extends Logging {
     }
   }
 
-  def plan(exec: BatchScanExec): Option[IcebergScanPlan] = {
-    exec.getTagValue(scanPlanTag) match {
+  def plan(exec: BatchScanExec, useRuntimeFilters: Boolean = false): Option[IcebergScanPlan] = {
+    val tag =
+      if (useRuntimeFilters && exec.runtimeFilters.nonEmpty) {
+        runtimeFilteredScanPlanTag
+      } else {
+        scanPlanTag
+      }
+    exec.getTagValue(tag) match {
       case Some(cached) => cached
       case None =>
-        val planned = planUncached(exec)
-        exec.setTagValue(scanPlanTag, planned)
+        val planned = planUncached(exec, useRuntimeFilters)
+        exec.setTagValue(tag, planned)
         planned
     }
   }
 
-  private def planUncached(exec: BatchScanExec): Option[IcebergScanPlan] = {
+  def withRuntimeFilters(
+      exec: BatchScanExec,
+      runtimeFilters: Seq[SparkExpression]): BatchScanExec = {
+    if (exec.runtimeFilters == runtimeFilters) {
+      exec
+    } else {
+      Shims.get.copyBatchScanExecWithRuntimeFilters(exec, runtimeFilters)
+    }
+  }
+
+  private def planUncached(
+      exec: BatchScanExec,
+      useRuntimeFilters: Boolean): Option[IcebergScanPlan] = {
     val scan = exec.scan
     val scanClassName = scan.getClass.getName
     // Only handle Iceberg scans; other sources must stay on Spark's path.
     if (scanClassName == SparkChangelogScanClassName) {
-      return planChangelogScan(exec, scan)
+      return planChangelogScan(exec, scan, useRuntimeFilters)
     }
 
     if (!AuronIcebergSourceUtil.getClassOfSparkBatchQueryScan.isInstance(scan)) {
       return None
     }
 
-    planFileScan(exec, scan, scanClassName)
+    planFileScan(exec, scan, scanClassName, useRuntimeFilters)
   }
 
   private def planFileScan(
       exec: BatchScanExec,
       scan: Scan,
-      scanClassName: String): Option[IcebergScanPlan] = {
+      scanClassName: String,
+      useRuntimeFilters: Boolean): Option[IcebergScanPlan] = {
     val readSchema = scan.readSchema
     val schemas = supportedSchemas(readSchema, isChangelogScan = false)
     if (schemas.isEmpty) {
@@ -143,7 +165,7 @@ object IcebergScanSupport extends Logging {
       missingFieldIds.isEmpty,
       s"Missing Iceberg field ids for columns: ${missingFieldIds.mkString(", ")}")
 
-    val partitions = inputPartitions(exec)
+    val partitions = inputPartitions(exec, useRuntimeFilters)
     // Empty scan (e.g. empty table) should still build a plan to return no rows.
     if (partitions.isEmpty) {
       logWarning(s"Native Iceberg scan planned with empty partitions for $scanClassName.")
@@ -203,15 +225,18 @@ object IcebergScanSupport extends Logging {
         fieldIdsByName))
   }
 
-  private def planChangelogScan(exec: BatchScanExec, scan: Scan): Option[IcebergScanPlan] = {
+  private def planChangelogScan(
+      exec: BatchScanExec,
+      scan: Scan,
+      useRuntimeFilters: Boolean): Option[IcebergScanPlan] = {
     val readSchema = scan.readSchema
     val schemas = supportedSchemas(readSchema, isChangelogScan = true)
     if (schemas.isEmpty) {
       return None
     }
     val (fileSchema, partitionSchema) = schemas.get
 
-    val partitions = inputPartitions(exec)
+    val partitions = inputPartitions(exec, useRuntimeFilters)
     if (partitions.isEmpty) {
       return Some(
         IcebergScanPlan(
@@ -326,7 +351,16 @@ object IcebergScanSupport extends Logging {
   private def deletesEmpty(deletes: java.util.List[_]): Boolean =
     deletes == null || deletes.isEmpty
 
-  private def inputPartitions(exec: BatchScanExec): Seq[InputPartition] = {
+  private def inputPartitions(
+      exec: BatchScanExec,
+      useRuntimeFilters: Boolean): Seq[InputPartition] = {
+    if (useRuntimeFilters) {
+      runtimeFilteredPartitions(exec) match {
+        case Some(partitions) => return partitions
+        case None =>
+      }
+    }
+
     // Prefer DataSource V2 batch API; if not available, fallback to exec methods via reflection.
     val fromBatch =
       try {
@@ -382,6 +416,40 @@ object IcebergScanSupport extends Logging {
     }
   }
 
+  private def runtimeFilteredPartitions(exec: BatchScanExec): Option[Seq[InputPartition]] = {
+    if (exec.runtimeFilters.isEmpty) {
+      return None
+    }
+
+    try {
+      MethodUtils.invokeMethod(exec, true, "prepare")
+      MethodUtils.invokeMethod(exec, true, "waitForSubqueries")
+      invokeDeclaredMethod(exec, "filteredPartitions") match {
+        case Some(seq: scala.collection.Seq[_]) =>
+          Some(flattenPartitions(seq))
+        case _ =>
+          None
+      }
+    } catch {
+      case NonFatal(t) =>
+        logWarning(
+          s"Failed to obtain runtime-filtered input partitions for ${exec.getClass.getName}.",
+          t)
+        None
+    }
+  }
+
+  private def flattenPartitions(seq: scala.collection.Seq[_]): Seq[InputPartition] = {
+    seq.flatMap {
+      case partition: InputPartition =>
+        Seq(partition)
+      case nested: scala.collection.Seq[_] =>
+        flattenPartitions(nested)
+      case _ =>
+        Seq.empty
+    }.toSeq
+  }
+
   private case class IcebergPartitionView(tasks: Seq[ScanTask])
 
   private def icebergPartition(partition: InputPartition): Option[IcebergPartitionView] = {

diff --git a/...src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeIcebergTableScanExec.scala b/...src/main/scala/org/apache/spark/sql/execution/auron/plan/NativeIcebergTableScanExec.scala
@@ -31,9 +31,9 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.auron.{EmptyNativeRDD, NativeConverters, NativeHelper, NativeRDD, NativeSupports, Shims}
-import org.apache.spark.sql.auron.iceberg.{IcebergNativeScanTask, IcebergScanPlan}
+import org.apache.spark.sql.auron.iceberg.{IcebergNativeScanTask, IcebergScanPlan, IcebergScanSupport}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Expression, GenericInternalRow, Literal}
 import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
 import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, SQLExecution}
 import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile}
@@ -47,7 +47,10 @@ import org.apache.auron.{protobuf => pb}
 import org.apache.auron.jni.JniBridge
 import org.apache.auron.metric.SparkMetricNode
 
-case class NativeIcebergTableScanExec(basedScan: BatchScanExec, plan: IcebergScanPlan)
+case class NativeIcebergTableScanExec(
+    basedScan: BatchScanExec,
+    staticPlan: IcebergScanPlan,
+    runtimeFilters: Seq[Expression])
     extends LeafExecNode
     with NativeSupports
     with Logging {
@@ -60,6 +63,15 @@ case class NativeIcebergTableScanExec(basedScan: BatchScanExec, plan: IcebergSca
   override val output = basedScan.output
   override val outputPartitioning = basedScan.outputPartitioning
 
+  private lazy val plan: IcebergScanPlan = {
+    if (runtimeFilters.nonEmpty) {
+      val filteredScan = IcebergScanSupport.withRuntimeFilters(basedScan, runtimeFilters)
+      IcebergScanSupport.plan(filteredScan, useRuntimeFilters = true).getOrElse(staticPlan)
+    } else {
+      staticPlan
+    }
+  }
+
   private lazy val fileSchema: StructType = plan.fileSchema
   private lazy val partitionSchema: StructType = plan.partitionSchema
   private lazy val projectableSchema: StructType =
@@ -213,8 +225,29 @@ case class NativeIcebergTableScanExec(basedScan: BatchScanExec, plan: IcebergSca
 
   override val nodeName: String = "NativeIcebergTableScan"
 
-  // Delegate canonicalization to the original scan to keep plan equivalence checks consistent.
-  override protected def doCanonicalize(): SparkPlan = basedScan.canonicalized
+  override def simpleString(maxFields: Int): String = {
+    val runtimeFiltersString =
+      if (runtimeFilters.nonEmpty) {
+        s", runtimeFilters=${runtimeFilters.mkString("[", ", ", "]")}"
+      } else {
+        ""
+      }
+    s"$nodeName (${basedScan.simpleString(maxFields)}$runtimeFiltersString)"
+  }
+
+  override def verboseStringWithOperatorId(): String = {
+    s"""
+       |$formattedNodeName
+       |Output: ${output.mkString("[", ", ", "]")}
+       |${basedScan.scan.description()}
+       |RuntimeFilters: ${runtimeFilters.mkString("[", ", ", "]")}
+       |""".stripMargin
+  }
+
+  // Keep canonicalization aligned with Spark's BatchScanExec, but first make sure it sees
+  // the top-level runtime filters carried by this native scan.
+  override protected def doCanonicalize(): SparkPlan =
+    IcebergScanSupport.withRuntimeFilters(basedScan, runtimeFilters).canonicalized
 
   private def buildFileSizes(): Map[String, Long] = {
     // Map file path to full file size; tasks may split a file into multiple ranges.