apache
diff --git a/‎core/src/main/scala/org/apache/spark/MapOutputTracker.scala‎
Lines changed: 29 additions & 74 deletions b/‎core/src/main/scala/org/apache/spark/MapOutputTracker.scala‎
Lines changed: 29 additions & 74 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala‎
Lines changed: 6 additions & 16 deletions b/‎core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala‎
Lines changed: 6 additions & 16 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala‎
Lines changed: 5 additions & 17 deletions b/‎core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala‎
Lines changed: 5 additions & 17 deletions
diff --git a/‎sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala‎
Lines changed: 0 additions & 9 deletions b/‎sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LocalShuffledRowRDD.scala‎
Lines changed: 2 additions & 1 deletion b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LocalShuffledRowRDD.scala‎
Lines changed: 2 additions & 1 deletion
@@ -343,30 +343,17 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
   /**
    * Called from executors to get the server URIs and output sizes for each shuffle block that
    * needs to be read from a given range of map output partitions (startPartition is included but
-   * endPartition is excluded from the range) and is produced by a specific mapper.
+   * endPartition is excluded from the range) and is produced by
+   * a range mapper (startMapId, endMapId, startMapId is included and the endMapId is excluded).
    *
    * @return A sequence of 2-item tuples, where the first item in the tuple is a BlockManagerId,
    *         and the second item is a sequence of (shuffle block id, shuffle block size, map index)
    *         tuples describing the shuffle blocks that are stored at that block manager.
    */
-  def getMapSizesByMapIndex(
+  def getMapSizesByRange(
       shuffleId: Int,
-      mapIndex: Int,
       startPartition: Int,
-      endPartition: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])]
-
-  /**
-   * Called from executors to get the server URIs and output sizes for each shuffle block that
-   * needs to be read from a specific map output partitions (partitionIndex) and is
-   * produced by a range mapper (startMapId, endMapId)
-   *
-   * @return A sequence of 2-item tuples, where the first item in the tuple is a BlockManagerId,
-   *         and the second item is a sequence of (shuffle block id, shuffle block size, map index)
-   *         tuples describing the shuffle blocks that are stored at that block manager.
-   */
-  def getMapSizesByRangeMapIndex(
-      shuffleId: Int,
-      partitionIndex: Int,
+      endPartition: Int,
       startMapId: Int,
       endMapId: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])]
 
@@ -767,44 +754,25 @@ private[spark] class MapOutputTrackerMaster(
     }
   }
 
-  override def getMapSizesByMapIndex(
-      shuffleId: Int,
-      mapIndex: Int,
-      startPartition: Int,
-      endPartition: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
-    logDebug(s"Fetching outputs for shuffle $shuffleId, mapIndex $mapIndex" +
-      s"partitions $startPartition-$endPartition")
-    shuffleStatuses.get(shuffleId) match {
-      case Some (shuffleStatus) =>
-        shuffleStatus.withMapStatuses { statuses =>
-          MapOutputTracker.convertMapStatuses(
-            shuffleId,
-            startPartition,
-            endPartition,
-            statuses,
-            Some(mapIndex))
-        }
-      case None =>
-        Iterator.empty
-    }
-  }
-
-  override def getMapSizesByRangeMapIndex(
-      shuffleId: Int,
-      partitionIndex: Int,
-      startMapId: Int,
-      endMapId: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
+  override def getMapSizesByRange(
+       shuffleId: Int,
+       startPartition: Int,
+       endPartition: Int,
+       startMapId: Int,
+       endMapId: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
     shuffleStatuses.get(shuffleId) match {
       case Some(shuffleStatus) =>
         shuffleStatus.withMapStatuses { statuses =>
           MapOutputTracker.convertMapStatuses(
-            shuffleId, partitionIndex, statuses, startMapId, endMapId)
+            shuffleId, startPartition, endPartition, statuses, startMapId, endMapId)
         }
       case None =>
         Iterator.empty
     }
   }
 
+
+
   override def stop(): Unit = {
     mapOutputRequests.offer(PoisonPill)
     threadpool.shutdown()
@@ -850,33 +818,16 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
     }
   }
 
-  override def getMapSizesByMapIndex(
+  override def getMapSizesByRange(
       shuffleId: Int,
-      mapIndex: Int,
       startPartition: Int,
-      endPartition: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
-    logDebug(s"Fetching outputs for shuffle $shuffleId, mapIndex $mapIndex" +
-      s"partitions $startPartition-$endPartition")
-    val statuses = getStatuses(shuffleId, conf)
-    try {
-      MapOutputTracker.convertMapStatuses(shuffleId, startPartition, endPartition,
-        statuses, Some(mapIndex))
-    } catch {
-      case e: MetadataFetchFailedException =>
-        // We experienced a fetch failure so our mapStatuses cache is outdated; clear it:
-        mapStatuses.clear()
-        throw e
-    }
-  }
-
-  override def getMapSizesByRangeMapIndex(
-      shuffleId: Int,
-      partitionIndex: Int,
+      endPartition: Int,
       startMapId: Int,
       endMapId: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
     val statuses = getStatuses(shuffleId, conf)
     try {
-      MapOutputTracker.convertMapStatuses(shuffleId, partitionIndex, statuses, startMapId, endMapId)
+      MapOutputTracker.convertMapStatuses(
+        shuffleId, startPartition, endPartition, statuses, startMapId, endMapId)
     } catch {
       case e: MetadataFetchFailedException =>
         // We experienced a fetch failure so our mapStatuses cache is outdated; clear it:
@@ -1069,7 +1020,7 @@ private[spark] object MapOutputTracker extends Logging {
   }
 
   /**
-   * Given an array of map statuses, a specific map output partitions and a range
+   * Given an array of map statuses, a range map output partitions and a range
    * mappers (startMapId, endMapId),returns a sequence that, for each block manager ID,
    * lists the shuffle block IDs and corresponding shuffle
    * block sizes stored at that block manager.
@@ -1079,7 +1030,8 @@ private[spark] object MapOutputTracker extends Logging {
    * throws a FetchFailedException.
    *
    * @param shuffleId Identifier for the shuffle
-   * @param partitionIndex Specific of map output partition ID
+   * @param startPartition Start map output partition ID
+   * @param endPartition End map output partition ID
    * @param statuses List of map statuses, indexed by map partition index.
    * @param startMapId Start Map ID
    * @param endMapId End map ID
@@ -1089,7 +1041,8 @@ private[spark] object MapOutputTracker extends Logging {
    */
   def convertMapStatuses(
       shuffleId: Int,
-      partitionIndex: Int,
+      startPartition: Int,
+      endPartition: Int,
       statuses: Array[MapStatus],
       startMapId: Int,
       endMapId: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
@@ -1100,12 +1053,14 @@ private[spark] object MapOutputTracker extends Logging {
       if (status == null) {
         val errorMessage = s"Missing an output location for shuffle $shuffleId"
         logError(errorMessage)
-        throw new MetadataFetchFailedException(shuffleId, partitionIndex, errorMessage)
+        throw new MetadataFetchFailedException(shuffleId, startPartition, errorMessage)
       } else {
-        val size = status.getSizeForBlock(partitionIndex)
-        if (size != 0) {
-          splitsByAddress.getOrElseUpdate(status.location, ListBuffer()) +=
-            ((ShuffleBlockId(shuffleId, status.mapId, partitionIndex), size, mapIndex))
+        for (part <- startPartition until endPartition) {
+          val size = status.getSizeForBlock(part)
+          if (size != 0) {
+            splitsByAddress.getOrElseUpdate(status.location, ListBuffer()) +=
+              ((ShuffleBlockId(shuffleId, status.mapId, part), size, mapIndex))
+          }
         }
       }
     }
 
@@ -55,26 +55,16 @@ private[spark] trait ShuffleManager {
       metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C]
 
   /**
-   * Get a reader for a range of reduce partitions (startPartition to endPartition-1, inclusive)
-   * that are produced by one specific mapper. Called on executors by reduce tasks.
-   */
-  def getReaderForOneMapper[K, C](
-      handle: ShuffleHandle,
-      mapIndex: Int,
-      startPartition: Int,
-      endPartition: Int,
-      context: TaskContext,
-      metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C]
-
-  /**
-   * Get a reader for the specific partitionIndex in map output statistics that are
-   * produced by range mappers. Called on executors by reduce tasks.
+   * Get a reader for a range of reduce partitions (startPartition to endPartition-1, inclusive) to
+   * read from map output (startMapId to endMapId - 1, inclusive).
+   * Called on executors by reduce tasks.
    */
-  def getReaderForRangeMapper[K, C](
+  def getReaderForRange[K, C](
       handle: ShuffleHandle,
-      partitionIndex: Int,
       startMapId: Int,
       endMapId: Int,
+      startPartition: Int,
+      endPartition: Int,
       context: TaskContext,
       metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C]
 
 
@@ -131,32 +131,20 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
       shouldBatchFetch = canUseBatchFetch(startPartition, endPartition, context))
   }
 
-  override def getReaderForOneMapper[K, C](
+  override def getReaderForRange[K, C](
       handle: ShuffleHandle,
-      mapIndex: Int,
+      startMapId: Int,
+      endMapId: Int,
       startPartition: Int,
       endPartition: Int,
       context: TaskContext,
       metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C] = {
-    val blocksByAddress = SparkEnv.get.mapOutputTracker.getMapSizesByMapIndex(
-      handle.shuffleId, mapIndex, startPartition, endPartition)
+    val blocksByAddress = SparkEnv.get.mapOutputTracker.getMapSizesByRange(
+      handle.shuffleId, startPartition, endPartition, startMapId, endMapId)
     new BlockStoreShuffleReader(
       handle.asInstanceOf[BaseShuffleHandle[K, _, C]], blocksByAddress, context, metrics,
       shouldBatchFetch = canUseBatchFetch(startPartition, endPartition, context))
-  }
 
-  override def getReaderForRangeMapper[K, C](
-      handle: ShuffleHandle,
-      partitionIndex: Int,
-      startMapId: Int,
-      endMapId: Int,
-      context: TaskContext,
-      metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C] = {
-    val blocksByAddress = SparkEnv.get.mapOutputTracker.getMapSizesByRangeMapIndex(
-      handle.shuffleId, partitionIndex, startMapId, endMapId)
-    new BlockStoreShuffleReader(
-      handle.asInstanceOf[BaseShuffleHandle[K, _, C]], blocksByAddress, context, metrics,
-      shouldBatchFetch = canUseBatchFetch(partitionIndex, partitionIndex + 1, context))
   }
 
   /** Get a writer for a given partition. Called on executors by map tasks. */
 
@@ -423,13 +423,6 @@ object SQLConf {
       .longConf
       .createWithDefault(64 * 1024 * 1024L)
 
-  val ADAPTIVE_EXECUTION_SKEWED_PARTITION_MAX_SPLITS =
-    buildConf("spark.sql.adaptive.skewedPartitionMaxSplits")
-      .doc("Configures the maximum number of task to handle a skewed partition in adaptive skewed" +
-        "join.")
-      .intConf
-      .createWithDefault(5)
-
   val NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN =
     buildConf("spark.sql.adaptive.nonEmptyPartitionRatioForBroadcastJoin")
       .doc("The relation with a non-empty partition ratio lower than this config will not be " +
@@ -2213,8 +2206,6 @@ class SQLConf extends Serializable with Logging {
   def adaptiveSkewedSizeThreshold: Long =
     getConf(ADAPTIVE_EXECUTION_SKEWED_PARTITION_SIZE_THRESHOLD)
 
-  def adaptiveSkewedMaxSplits: Int = getConf(ADAPTIVE_EXECUTION_SKEWED_PARTITION_MAX_SPLITS)
-
   def minBatchesToRetain: Int = getConf(MIN_BATCHES_TO_RETAIN)
 
   def maxBatchesToRetainInMemory: Int = getConf(MAX_BATCHES_TO_RETAIN_IN_MEMORY)
 
@@ -80,9 +80,10 @@ class LocalShuffledRowRDD(
     // as well as the `tempMetrics` for basic shuffle metrics.
     val sqlMetricsReporter = new SQLShuffleReadMetricsReporter(tempMetrics, metrics)
 
-    val reader = SparkEnv.get.shuffleManager.getReaderForOneMapper(
+    val reader = SparkEnv.get.shuffleManager.getReaderForRange(
       dependency.shuffleHandle,
       mapIndex,
+      mapIndex + 1,
       0,
       numReducers,
       context,