dengziming · dengziming · Oct 11, 2023 · Oct 11, 2023 · Oct 11, 2023 · Oct 11, 2023
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -647,7 +647,7 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper with PrivateM
 
   test("Dataset result collection") {
     def checkResult(rows: IterableOnce[java.lang.Long], expectedValues: Long*): Unit = {
-      rows.toIterator.zipAll(expectedValues.iterator, null, null).foreach {
+      rows.iterator.zipAll(expectedValues.iterator, null, null).foreach {
         case (actual, expected) => assert(actual === expected)
       }
     }

diff --git a/...ver/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/...ver/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
@@ -63,15 +63,15 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
   /**
    * Interrupt this sender and make it exit.
    */
-  def interrupt(): Unit = executionObserver.synchronized {
+  def interrupt(): Unit = {
     interrupted = true
-    executionObserver.notifyAll()
+    wakeUp()
   }
 
   // For testing
-  private[connect] def setDeadline(deadlineMs: Long) = executionObserver.synchronized {
+  private[connect] def setDeadline(deadlineMs: Long) = {
     deadlineTimeMillis = deadlineMs
-    executionObserver.notifyAll()
+    wakeUp()
   }
 
   def run(lastConsumedStreamIndex: Long): Unit = {
@@ -152,9 +152,6 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
         s"lastConsumedStreamIndex=$lastConsumedStreamIndex")
     val startTime = System.nanoTime()
 
-    // register to be notified about available responses.
-    executionObserver.attachConsumer(this)
-
     var nextIndex = lastConsumedStreamIndex + 1
     var finished = false
 
@@ -191,7 +188,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
         sentResponsesSize > maximumResponseSize || deadlineTimeMillis < System.currentTimeMillis()
 
       logTrace(s"Trying to get next response with index=$nextIndex.")
-      executionObserver.synchronized {
+      executionObserver.responseLock.synchronized {
         logTrace(s"Acquired executionObserver lock.")
         val sleepStart = System.nanoTime()
         var sleepEnd = 0L
@@ -208,7 +205,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
           if (response.isEmpty) {
             val timeout = Math.max(1, deadlineTimeMillis - System.currentTimeMillis())
             logTrace(s"Wait for response to become available with timeout=$timeout ms.")
-            executionObserver.wait(timeout)
+            executionObserver.responseLock.wait(timeout)
             logTrace(s"Reacquired executionObserver lock after waiting.")
             sleepEnd = System.nanoTime()
           }
@@ -339,4 +336,15 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
       }
     }
   }
+
+  private def wakeUp(): Unit = {
+    // Can be sleeping on either of these two locks, wake them up.
+    // (Neither of these locks is ever taken for extended period of time, so this won't block)
+    executionObserver.responseLock.synchronized {
+      executionObserver.responseLock.notifyAll()
+    }
+    grpcCallObserverReadySignal.synchronized {
+      grpcCallObserverReadySignal.notifyAll()
+    }
+  }
 }
diff --git a/...erver/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala b/...erver/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala
@@ -33,8 +33,7 @@ import org.apache.spark.sql.connect.service.ExecuteHolder
 /**
  * This StreamObserver is running on the execution thread. Execution pushes responses to it, it
  * caches them. ExecuteResponseGRPCSender is the consumer of the responses ExecuteResponseObserver
- * "produces". It waits on the monitor of ExecuteResponseObserver. New produced responses notify
- * the monitor.
+ * "produces". It waits on the responseLock. New produced responses notify the responseLock.
  * @see
  *   getResponse.
  *
@@ -85,10 +84,12 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder:
   private[connect] var highestConsumedIndex: Long = 0
 
   /**
-   * Consumer that waits for available responses. There can be only one at a time, @see
-   * attachConsumer.
+   * Lock used for synchronization between responseObserver and grpcResponseSenders. *
+   * grpcResponseSenders wait on it for a new response to be available. * grpcResponseSenders also
+   * notify it to wake up when interrupted * responseObserver notifies it when new responses are
+   * available.
    */
-  private var responseSender: Option[ExecuteGrpcResponseSender[T]] = None
+  private[connect] val responseLock = new Object()
 
   // Statistics about cached responses.
   private val cachedSizeUntilHighestConsumed = CachedSize()
@@ -106,7 +107,7 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder:
     0
   }
 
-  def onNext(r: T): Unit = synchronized {
+  def onNext(r: T): Unit = responseLock.synchronized {
     if (finalProducedIndex.nonEmpty) {
       throw new IllegalStateException("Stream onNext can't be called after stream completed")
     }
@@ -125,10 +126,10 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder:
     logDebug(
       s"Execution opId=${executeHolder.operationId} produced response " +
         s"responseId=${responseId} idx=$lastProducedIndex")
-    notifyAll()
+    responseLock.notifyAll()
   }
 
-  def onError(t: Throwable): Unit = synchronized {
+  def onError(t: Throwable): Unit = responseLock.synchronized {
     if (finalProducedIndex.nonEmpty) {
       throw new IllegalStateException("Stream onError can't be called after stream completed")
     }
@@ -137,33 +138,26 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder:
     logDebug(
       s"Execution opId=${executeHolder.operationId} produced error. " +
         s"Last stream index is $lastProducedIndex.")
-    notifyAll()
+    responseLock.notifyAll()
   }
 
-  def onCompleted(): Unit = synchronized {
+  def onCompleted(): Unit = responseLock.synchronized {
     if (finalProducedIndex.nonEmpty) {
       throw new IllegalStateException("Stream onCompleted can't be called after stream completed")
     }
     finalProducedIndex = Some(lastProducedIndex)
     logDebug(
       s"Execution opId=${executeHolder.operationId} completed stream. " +
         s"Last stream index is $lastProducedIndex.")
-    notifyAll()
-  }
-
-  /** Attach a new consumer (ExecuteResponseGRPCSender). */
-  def attachConsumer(newSender: ExecuteGrpcResponseSender[T]): Unit = synchronized {
-    // interrupt the current sender before attaching new one
-    responseSender.foreach(_.interrupt())
-    responseSender = Some(newSender)
+    responseLock.notifyAll()
   }
 
   /**
    * Get response with a given index in the stream, if set. Note: Upon returning the response,
    * this response observer assumes that the response is consumed, and the response and previous
    * response can be uncached, keeping retryBufferSize of responses for the case of retries.
    */
-  def consumeResponse(index: Long): Option[CachedStreamResponse[T]] = synchronized {
+  def consumeResponse(index: Long): Option[CachedStreamResponse[T]] = responseLock.synchronized {
     // we index stream responses from 1, getting a lower index would be invalid.
     assert(index >= 1)
     // it would be invalid if consumer would skip a response
@@ -198,17 +192,17 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder:
   }
 
   /** Get the stream error if there is one, otherwise None. */
-  def getError(): Option[Throwable] = synchronized {
+  def getError(): Option[Throwable] = responseLock.synchronized {
     error
   }
 
   /** If the stream is finished, the index of the last response, otherwise None. */
-  def getLastResponseIndex(): Option[Long] = synchronized {
+  def getLastResponseIndex(): Option[Long] = responseLock.synchronized {
     finalProducedIndex
   }
 
   /** Get the index in the stream for given response id. */
-  def getResponseIndexById(responseId: String): Long = synchronized {
+  def getResponseIndexById(responseId: String): Long = responseLock.synchronized {
     responseIdToIndex.getOrElse(
       responseId,
       throw new SparkSQLException(
@@ -217,7 +211,7 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder:
   }
 
   /** Remove cached responses up to and including response with given id. */
-  def removeResponsesUntilId(responseId: String): Unit = synchronized {
+  def removeResponsesUntilId(responseId: String): Unit = responseLock.synchronized {
     val index = getResponseIndexById(responseId)
     removeResponsesUntilIndex(index)
     logDebug(
@@ -229,7 +223,7 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder:
   }
 
   /** Remove all cached responses */
-  def removeAll(): Unit = synchronized {
+  def removeAll(): Unit = responseLock.synchronized {
     removeResponsesUntilIndex(lastProducedIndex)
     logInfo(
       s"Release all for opId=${executeHolder.operationId}. Execution stats: " +
@@ -242,7 +236,7 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder:
   }
 
   /** Returns if the stream is finished. */
-  def completed(): Boolean = synchronized {
+  def completed(): Boolean = responseLock.synchronized {
     finalProducedIndex.isDefined
   }
 

diff --git a/...ct/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala b/...ct/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
@@ -195,11 +195,8 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends
     val responseObserver = executeHolder.responseObserver
 
     val command = request.getPlan.getCommand
-    val planner = new SparkConnectPlanner(executeHolder.sessionHolder)
-    planner.process(
-      command = command,
-      responseObserver = responseObserver,
-      executeHolder = executeHolder)
+    val planner = new SparkConnectPlanner(executeHolder)
+    planner.process(command = command, responseObserver = responseObserver)
   }
 
   private def requestString(request: Message) = {

diff --git a/...ver/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala b/...ver/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
@@ -56,7 +56,7 @@ private[execution] class SparkConnectPlanExecution(executeHolder: ExecuteHolder)
       throw new IllegalStateException(
         s"Illegal operation type ${request.getPlan.getOpTypeCase} to be handled here.")
     }
-    val planner = new SparkConnectPlanner(sessionHolder)
+    val planner = new SparkConnectPlanner(executeHolder)
     val tracker = executeHolder.eventsManager.createQueryPlanningTracker
     val dataframe =
       Dataset.ofRows(