apache
diff --git a/‎backends-velox/pom.xml
+4 b/‎backends-velox/pom.xml
+4
diff --git a/‎backends-velox/src/main/java/org/apache/gluten/vectorized/HashJoinBuilder.java
+53 b/‎backends-velox/src/main/java/org/apache/gluten/vectorized/HashJoinBuilder.java
+53
diff --git a/‎backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
+5 b/‎backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
+5
diff --git a/‎backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
+9 b/‎backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala
+9
diff --git a/‎backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+8 b/‎backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+8
diff --git a/‎backends-velox/src/main/scala/org/apache/gluten/execution/HashJoinExecTransformer.scala
+85-3 b/‎backends-velox/src/main/scala/org/apache/gluten/execution/HashJoinExecTransformer.scala
+85-3
diff --git a/‎backends-velox/src/main/scala/org/apache/gluten/execution/VeloxBroadcastBuildSideCache.scala
+101 b/‎backends-velox/src/main/scala/org/apache/gluten/execution/VeloxBroadcastBuildSideCache.scala
+101
diff --git a/‎backends-velox/src/main/scala/org/apache/gluten/execution/VeloxBroadcastBuildSideRDD.scala
+4-8 b/‎backends-velox/src/main/scala/org/apache/gluten/execution/VeloxBroadcastBuildSideRDD.scala
+4-8
@@ -284,6 +284,10 @@
       <version>${project.version}</version>
       <scope>compile</scope>
     </dependency>
+    <dependency>
+      <groupId>com.github.ben-manes.caffeine</groupId>
+      <artifactId>caffeine</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
 
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.vectorized;
+
+import org.apache.gluten.runtime.Runtime;
+import org.apache.gluten.runtime.RuntimeAware;
+
+public class HashJoinBuilder implements RuntimeAware {
+  private final Runtime runtime;
+
+  private HashJoinBuilder(Runtime runtime) {
+    this.runtime = runtime;
+  }
+
+  public static HashJoinBuilder create(Runtime runtime) {
+    return new HashJoinBuilder(runtime);
+  }
+
+  @Override
+  public long rtHandle() {
+    return runtime.getHandle();
+  }
+
+  public static native void clearHashTable(long hashTableData);
+
+  public static native long cloneHashTable(long hashTableData);
+
+  public static native long nativeBuild(
+      String buildHashTableId,
+      long[] batchHandlers,
+      long rowCount,
+      String joinKeys,
+      int joinType,
+      boolean hasMixedFiltCondition,
+      boolean isExistenceJoin,
+      byte[] namedStruct,
+      boolean isNullAwareAntiJoin,
+      boolean hasNullKeyValues);
+}
@@ -95,6 +95,11 @@ object VeloxBackendSettings extends BackendSettingsApi {
   val GLUTEN_VELOX_INTERNAL_UDF_LIB_PATHS = VeloxBackend.CONF_PREFIX + ".internal.udfLibraryPaths"
   val GLUTEN_VELOX_UDF_ALLOW_TYPE_CONVERSION = VeloxBackend.CONF_PREFIX + ".udfAllowTypeConversion"
 
+  val GLUTEN_VELOX_BROADCAST_CACHE_EXPIRED_TIME: String =
+    VeloxBackend.CONF_PREFIX + ("broadcast.cache.expired.time")
+  // unit: SECONDS, default 1 day
+  val GLUTEN_VELOX_BROADCAST_CACHE_EXPIRED_TIME_DEFAULT: Int = 86400
+
   /** The columnar-batch type this backend is by default using. */
   override def primaryBatchType: Convention.BatchType = VeloxBatch
 
 
@@ -21,6 +21,7 @@ import org.apache.gluten.columnarbatch.ArrowBatches.{ArrowJavaBatch, ArrowNative
 import org.apache.gluten.columnarbatch.VeloxBatch
 import org.apache.gluten.config.GlutenConfig
 import org.apache.gluten.config.VeloxConfig._
+import org.apache.gluten.execution.VeloxBroadcastBuildSideCache
 import org.apache.gluten.execution.datasource.GlutenFormatFactory
 import org.apache.gluten.expression.UDFMappings
 import org.apache.gluten.extension.columnar.transition.Convention
@@ -32,7 +33,9 @@ import org.apache.gluten.utils._
 import org.apache.spark.{HdfsConfGenerator, ShuffleDependency, SparkConf, SparkContext}
 import org.apache.spark.api.plugin.PluginContext
 import org.apache.spark.internal.Logging
+import org.apache.spark.listener.VeloxGlutenSQLAppStatusListener
 import org.apache.spark.network.util.ByteUnit
+import org.apache.spark.rpc.{GlutenDriverEndpoint, GlutenExecutorEndpoint}
 import org.apache.spark.shuffle.{ColumnarShuffleDependency, LookupKey, ShuffleManagerRegistry}
 import org.apache.spark.shuffle.sort.ColumnarShuffleManager
 import org.apache.spark.sql.execution.ColumnarCachedBatchSerializer
@@ -50,6 +53,9 @@ class VeloxListenerApi extends ListenerApi with Logging {
   import VeloxListenerApi._
 
   override def onDriverStart(sc: SparkContext, pc: PluginContext): Unit = {
+    GlutenDriverEndpoint.glutenDriverEndpointRef = (new GlutenDriverEndpoint).self
+    VeloxGlutenSQLAppStatusListener.registerListener(sc)
+
     val conf = pc.conf()
 
     // When the Velox cache is enabled, the Velox file handle cache should also be enabled.
@@ -123,6 +129,8 @@ class VeloxListenerApi extends ListenerApi with Logging {
   override def onDriverShutdown(): Unit = shutdown()
 
   override def onExecutorStart(pc: PluginContext): Unit = {
+    GlutenExecutorEndpoint.executorEndpoint = new GlutenExecutorEndpoint(pc.executorID, pc.conf)
+
     val conf = pc.conf()
 
     // Static initializers for executor.
@@ -215,6 +223,7 @@ class VeloxListenerApi extends ListenerApi with Logging {
 
   private def shutdown(): Unit = {
     // TODO shutdown implementation in velox to release resources
+    VeloxBroadcastBuildSideCache.cleanAll()
   }
 }
 
 
@@ -639,6 +639,14 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
     }
     numOutputRows += serialized.map(_.getNumRows).sum
     dataSize += rawSize
+
+    val (buildKeys, isNullAware) = mode match {
+      case mode1: HashedRelationBroadcastMode =>
+        (mode1.key, mode1.isNullAware)
+      case _ =>
+        // IdentityBroadcastMode
+        (Seq.empty, false)
+    }
     if (useOffheapBroadcastBuildRelation) {
       TaskResources.runUnsafe {
         new UnsafeColumnarBuildSideRelation(child.output, serialized.map(_.getSerialized), mode)
 
@@ -17,15 +17,65 @@
 package org.apache.gluten.execution
 
 import org.apache.spark.rdd.RDD
+import org.apache.spark.rpc.GlutenDriverEndpoint
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.optimizer.BuildSide
+import org.apache.spark.sql.catalyst.optimizer.{BuildRight, BuildSide}
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
 import org.apache.spark.sql.execution.joins.BuildSideRelation
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 import io.substrait.proto.JoinRel
 
+object JoinTypeTransform {
+
+  // ExistenceJoin is introduced in #SPARK-14781. It returns all rows from the left table with
+  // a new column to indecate whether the row is matched in the right table.
+  // Indeed, the ExistenceJoin is transformed into left any join in CH.
+  // We don't have left any join in substrait, so use left semi join instead.
+  // and isExistenceJoin is set to true to indicate that it is an existence join.
+  def toSubstraitJoinType(sparkJoin: JoinType, buildRight: Boolean): JoinRel.JoinType =
+    sparkJoin match {
+      case _: InnerLike =>
+        JoinRel.JoinType.JOIN_TYPE_INNER
+      case FullOuter =>
+        JoinRel.JoinType.JOIN_TYPE_OUTER
+      case LeftOuter =>
+        if (!buildRight) {
+          JoinRel.JoinType.JOIN_TYPE_RIGHT
+        } else {
+          JoinRel.JoinType.JOIN_TYPE_LEFT
+        }
+      case RightOuter =>
+        if (!buildRight) {
+          JoinRel.JoinType.JOIN_TYPE_LEFT
+        } else {
+          JoinRel.JoinType.JOIN_TYPE_RIGHT
+        }
+      case LeftSemi =>
+        if (!buildRight) {
+          JoinRel.JoinType.JOIN_TYPE_RIGHT_SEMI
+        } else {
+          JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI
+        }
+      case LeftAnti =>
+        if (!buildRight) {
+          JoinRel.JoinType.JOIN_TYPE_RIGHT_ANTI
+        } else {
+          JoinRel.JoinType.JOIN_TYPE_LEFT_ANTI
+        }
+      case ExistenceJoin(_) =>
+        if (!buildRight) {
+          throw new IllegalArgumentException("Existence join should not switch children")
+        }
+        JoinRel.JoinType.JOIN_TYPE_LEFT_SEMI
+      case _ =>
+        // TODO: Support cross join with Cross Rel
+        JoinRel.JoinType.UNRECOGNIZED
+    }
+
+}
+
 case class ShuffledHashJoinExecTransformer(
     leftKeys: Seq[Expression],
     rightKeys: Seq[Expression],
@@ -99,6 +149,9 @@ case class BroadcastHashJoinExecTransformer(
     right,
     isNullAwareAntiJoin) {
 
+  // Unique ID for builded table
+  lazy val buildBroadcastTableId: String = buildPlan.id.toString
+
   override protected lazy val substraitJoinType: JoinRel.JoinType = joinType match {
     case _: InnerLike =>
       JoinRel.JoinType.JOIN_TYPE_INNER
@@ -125,9 +178,38 @@ case class BroadcastHashJoinExecTransformer(
 
   override def columnarInputRDDs: Seq[RDD[ColumnarBatch]] = {
     val streamedRDD = getColumnarInputRDDs(streamedPlan)
+    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+    if (executionId != null) {
+      GlutenDriverEndpoint.collectResources(executionId, buildBroadcastTableId)
+    } else {
+      logWarning(
+        s"Can't not trace broadcast table data $buildBroadcastTableId" +
+          s" because execution id is null." +
+          s" Will clean up until expire time.")
+    }
+
     val broadcast = buildPlan.executeBroadcast[BuildSideRelation]()
-    val broadcastRDD = VeloxBroadcastBuildSideRDD(sparkContext, broadcast)
+    val context =
+      BroadCastHashJoinContext(
+        buildKeyExprs,
+        joinType,
+        buildSide == BuildRight,
+        condition.isDefined,
+        joinType.isInstanceOf[ExistenceJoin],
+        buildPlan.output,
+        buildBroadcastTableId)
+    val broadcastRDD = VeloxBroadcastBuildSideRDD(sparkContext, broadcast, context)
     // FIXME: Do we have to make build side a RDD?
     streamedRDD :+ broadcastRDD
   }
 }
+
+case class BroadCastHashJoinContext(
+    buildSideJoinKeys: Seq[Expression],
+    joinType: JoinType,
+    buildRight: Boolean,
+    hasMixedFiltCondition: Boolean,
+    isExistenceJoin: Boolean,
+    buildSideStructure: Seq[Attribute],
+    buildHashTableId: String,
+    isNullAwareAntiJoin: Boolean = false)
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.execution
+
+import org.apache.gluten.backendsapi.velox.VeloxBackendSettings
+import org.apache.gluten.vectorized.HashJoinBuilder
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.ColumnarBuildSideRelation
+import org.apache.spark.sql.execution.joins.BuildSideRelation
+
+import com.github.benmanes.caffeine.cache.{Cache, Caffeine, RemovalCause, RemovalListener}
+
+import java.util.concurrent.TimeUnit
+
+case class BroadcastHashTable(pointer: Long, relation: ColumnarBuildSideRelation)
+
+/**
+ * `CHBroadcastBuildSideCache` is used for controlling to build bhj hash table once.
+ *
+ * The complicated part is due to reuse exchange, where multiple BHJ IDs correspond to a
+ * `ClickHouseBuildSideRelation`.
+ */
+object VeloxBroadcastBuildSideCache
+  extends Logging
+  with RemovalListener[String, BroadcastHashTable] {
+
+  private lazy val expiredTime = SparkEnv.get.conf.getLong(
+    VeloxBackendSettings.GLUTEN_VELOX_BROADCAST_CACHE_EXPIRED_TIME,
+    VeloxBackendSettings.GLUTEN_VELOX_BROADCAST_CACHE_EXPIRED_TIME_DEFAULT
+  )
+
+  // Use for controlling to build bhj hash table once.
+  // key: hashtable id, value is hashtable backend pointer(long to string).
+  private val buildSideRelationCache: Cache[String, BroadcastHashTable] =
+    Caffeine.newBuilder
+      .expireAfterAccess(expiredTime, TimeUnit.SECONDS)
+      .removalListener(this)
+      .build[String, BroadcastHashTable]()
+
+  def getOrBuildBroadcastHashTable(
+      broadcast: Broadcast[BuildSideRelation],
+      broadCastContext: BroadCastHashJoinContext): BroadcastHashTable = {
+
+    buildSideRelationCache
+      .get(
+        broadCastContext.buildHashTableId,
+        (broadcast_id: String) => {
+          val (pointer, relation) =
+            broadcast.value
+              .asInstanceOf[ColumnarBuildSideRelation]
+              .buildHashTable(broadCastContext)
+          logDebug(s"Create bhj $broadcast_id = 0x${pointer.toHexString}")
+          BroadcastHashTable(pointer, relation)
+        }
+      )
+  }
+
+  /** This is callback from c++ backend. */
+  def get(broadcastHashtableId: String): Long =
+    Option(buildSideRelationCache.getIfPresent(broadcastHashtableId))
+      .map(_.pointer)
+      .getOrElse(0)
+
+  def invalidateBroadcastHashtable(broadcastHashtableId: String): Unit = {
+    // Cleanup operations on the backend are idempotent.
+    buildSideRelationCache.invalidate(broadcastHashtableId)
+  }
+
+  /** Only used in UT. */
+  def size(): Long = buildSideRelationCache.estimatedSize()
+
+  def cleanAll(): Unit = buildSideRelationCache.invalidateAll()
+
+  override def onRemoval(key: String, value: BroadcastHashTable, cause: RemovalCause): Unit = {
+    synchronized {
+      logDebug(s"Remove bhj $key = 0x${value.pointer.toHexString}")
+      if (value.relation != null) {
+        value.relation.reset()
+      }
+
+      HashJoinBuilder.clearHashTable(value.pointer)
+    }
+  }
+}
@@ -16,22 +16,18 @@
  */
 package org.apache.gluten.execution
 
-import org.apache.gluten.iterator.Iterators
-
 import org.apache.spark.{broadcast, SparkContext}
 import org.apache.spark.sql.execution.joins.BuildSideRelation
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 case class VeloxBroadcastBuildSideRDD(
     @transient private val sc: SparkContext,
-    broadcasted: broadcast.Broadcast[BuildSideRelation])
+    broadcasted: broadcast.Broadcast[BuildSideRelation],
+    broadcastContext: BroadCastHashJoinContext)
   extends BroadcastBuildSideRDD(sc, broadcasted) {
 
   override def genBroadcastBuildSideIterator(): Iterator[ColumnarBatch] = {
-    val relation = broadcasted.value.asReadOnlyCopy()
-    Iterators
-      .wrap(relation.deserialized)
-      .recyclePayload(batch => batch.close())
-      .create()
+    VeloxBroadcastBuildSideCache.getOrBuildBroadcastHashTable(broadcasted, broadcastContext)
+    Iterator.empty
   }
 }