apache · Jing9 · Nov 9, 2020 · Aug 14, 2020 · Aug 31, 2020 · Aug 31, 2020
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -1503,6 +1503,16 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final boolean DFS_PROTECTED_SUBDIRECTORIES_ENABLE_DEFAULT =
       false;
 
+  public static final String DFS_DATANODE_ALLOW_SAME_DISK_TIERING =
+      "dfs.datanode.same-disk-tiering.enabled";
+  public static final boolean DFS_DATANODE_ALLOW_SAME_DISK_TIERING_DEFAULT =
+      false;
+
+  public static final String DFS_DATANODE_RESERVE_FOR_ARCHIVE_PERCENTAGE =
+      "dfs.datanode.reserve-for-archive.percentage";
+  public static final double
+      DFS_DATANODE_RESERVE_FOR_ARCHIVE_PERCENTAGE_DEFAULT = 0.0;
+
   // dfs.client.retry confs are moved to HdfsClientConfigKeys.Retry
   @Deprecated
   public static final String  DFS_CLIENT_RETRY_POLICY_ENABLED_KEY

diff --git a/...fs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/...fs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
@@ -190,6 +190,12 @@ public FsVolumeImpl getVolume(final ExtendedBlock b) {
     }
   }
 
+  // Get volume by device and storage type.
+  // Only used when turning on same disk tiering feature.
+  FsVolumeReference getVolumeRef(String device, StorageType storageType) {
+    return volumes.getVolumeRefByDeviceAndStorageType(device, storageType);
+  }
+
   @Override // FsDatasetSpi
   public Block getStoredBlock(String bpid, long blkid)
       throws IOException {
@@ -365,7 +371,7 @@ public LengthInputStream getMetaDataInputStream(ExtendedBlock b)
             RoundRobinVolumeChoosingPolicy.class,
             VolumeChoosingPolicy.class), conf);
     volumes = new FsVolumeList(volumeFailureInfos, datanode.getBlockScanner(),
-        blockChooserImpl);
+        blockChooserImpl, conf);
     asyncDiskService = new FsDatasetAsyncDiskService(datanode, this);
     asyncLazyPersistService = new RamDiskAsyncLazyPersistService(datanode, conf);
     deletingBlock = new HashMap<String, Set<Long>>();

diff --git a/...dfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java b/...dfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
@@ -134,6 +134,9 @@ public class FsVolumeImpl implements FsVolumeSpi {
   private final FileIoProvider fileIoProvider;
   private final DataNodeVolumeMetrics metrics;
   private URI baseURI;
+  private boolean enableSameDiskArchival;
+  private final String device;
+  private double reservedForArchive;
 
   /**
    * Per-volume worker pool that processes new blocks to cache.
@@ -190,6 +193,26 @@ public class FsVolumeImpl implements FsVolumeSpi {
     }
     this.conf = conf;
     this.fileIoProvider = fileIoProvider;
+    this.enableSameDiskArchival =
+        conf.getBoolean(DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING,
+            DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING_DEFAULT);
+    if (enableSameDiskArchival) {
+      this.device = usage.getMount();
+      reservedForArchive = conf.getDouble(
+          DFSConfigKeys.DFS_DATANODE_RESERVE_FOR_ARCHIVE_PERCENTAGE,
+          DFSConfigKeys.DFS_DATANODE_RESERVE_FOR_ARCHIVE_PERCENTAGE_DEFAULT);
+      if (reservedForArchive >= 1) {
+        FsDatasetImpl.LOG.warn("Value of reserve-for-archival is >= 100% for "
+            + currentDir + ". Setting it to 99%.");
+        reservedForArchive = 0.99;
+      }
+    } else {
+      device = "";
+    }
+  }
+
+  String getDevice() {
+    return device;
   }
 
   protected ThreadPoolExecutor initializeCacheExecutor(File parent) {
@@ -412,16 +435,31 @@ long getBlockPoolUsed(String bpid) throws IOException {
    */
   @VisibleForTesting
   public long getCapacity() {
+    long capacity;
     if (configuredCapacity < 0L) {
       long remaining;
       if (cachedCapacity > 0L) {
         remaining = cachedCapacity - getReserved();
       } else {
         remaining = usage.getCapacity() - getReserved();
       }
-      return Math.max(remaining, 0L);
+      capacity = Math.max(remaining, 0L);
+    } else {
+      capacity = configuredCapacity;
+    }
+
+    if (enableSameDiskArchival) {
+      double reservedForArchival = conf.getDouble(
+          DFSConfigKeys.DFS_DATANODE_RESERVE_FOR_ARCHIVE_PERCENTAGE,
+          DFSConfigKeys.DFS_DATANODE_RESERVE_FOR_ARCHIVE_PERCENTAGE_DEFAULT);
+      if (storageType == StorageType.ARCHIVE) {
+        capacity = (long) (capacity * reservedForArchival);
+      } else {
+        capacity = (long) (capacity * (1 - reservedForArchival));
+      }
     }
-    return configuredCapacity;
+
+    return capacity;
   }
 
   /**
@@ -452,7 +490,33 @@ public long getAvailable() throws IOException {
   }
 
   long getActualNonDfsUsed() throws IOException {
-    return usage.getUsed() - getDfsUsed();
+    // DISK and ARCHIVAL on same disk
+    // should share the same amount of reserved capacity.
+    // When calculating actual non dfs used,
+    // exclude DFS used capacity by another volume.
+    if (enableSameDiskArchival &&
+        (storageType == StorageType.DISK
+            || storageType == StorageType.ARCHIVE)) {
+      StorageType counterpartStorageType = storageType == StorageType.DISK
+          ? StorageType.ARCHIVE : StorageType.DISK;
+      FsVolumeReference counterpartRef = dataset
+          .getVolumeRef(device, counterpartStorageType);
+      if (counterpartRef != null) {
+        FsVolumeImpl counterpartVol = (FsVolumeImpl) counterpartRef.getVolume();
+        long used = getDfUsed() - getDfsUsed() - counterpartVol.getDfsUsed();
+        counterpartRef.close();
+        return used;
+      }
+    }
+    return getDfUsed() - getDfsUsed();
+  }
+
+  /**
+   * This function is only used for Mock.
+   */
+  @VisibleForTesting
+  public long getDfUsed() {
+    return usage.getUsed();
   }
 
   private long getRemainingReserved() throws IOException {

diff --git a/...dfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java b/...dfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeList.java
@@ -29,12 +29,14 @@
 import java.util.Map;
 import java.util.TreeMap;
 import java.util.Set;
+import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.Condition;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.StorageType;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
@@ -62,9 +64,14 @@ class FsVolumeList {
   private final VolumeChoosingPolicy<FsVolumeImpl> blockChooser;
   private final BlockScanner blockScanner;
 
+  private boolean enableSameDiskTiering;
+  private ConcurrentMap<String, Map<StorageType, FsVolumeImpl>>
+      deviceVolumeMapping;
+
   FsVolumeList(List<VolumeFailureInfo> initialVolumeFailureInfos,
       BlockScanner blockScanner,
-      VolumeChoosingPolicy<FsVolumeImpl> blockChooser) {
+      VolumeChoosingPolicy<FsVolumeImpl> blockChooser,
+      Configuration config) {
     this.blockChooser = blockChooser;
     this.blockScanner = blockScanner;
     this.checkDirsLock = new AutoCloseableLock();
@@ -73,6 +80,12 @@ class FsVolumeList {
       volumeFailureInfos.put(volumeFailureInfo.getFailedStorageLocation(),
           volumeFailureInfo);
     }
+    enableSameDiskTiering = config.getBoolean(
+        DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING,
+        DFSConfigKeys.DFS_DATANODE_ALLOW_SAME_DISK_TIERING_DEFAULT);
+    if (enableSameDiskTiering) {
+      deviceVolumeMapping = new ConcurrentHashMap<>();
+    }
   }
 
   /**
@@ -82,6 +95,29 @@ List<FsVolumeImpl> getVolumes() {
     return Collections.unmodifiableList(volumes);
   }
 
+  /**
+   * Get vol by device and storage type.
+   * This is used when same-disk-tiering is enabled.
+   */
+  FsVolumeReference getVolumeRefByDeviceAndStorageType(String device,
+      StorageType storageType) {
+    if (deviceVolumeMapping != null
+        && deviceVolumeMapping.containsKey(device)) {
+      try {
+        FsVolumeImpl volume = deviceVolumeMapping
+            .get(device).getOrDefault(storageType, null);
+        if (volume != null) {
+          return volume.obtainReference();
+        }
+      } catch (ClosedChannelException e) {
+        FsDatasetImpl.LOG.warn("Volume closed when getting volume" +
+            " by device and storage type: "
+            + device + ", " + storageType);
+      }
+    }
+    return null;
+  }
+
   private FsVolumeReference chooseVolume(List<FsVolumeImpl> list,
       long blockSize, String storageId) throws IOException {
     while (true) {
@@ -291,6 +327,23 @@ public String toString() {
   void addVolume(FsVolumeReference ref) {
     FsVolumeImpl volume = (FsVolumeImpl) ref.getVolume();
     volumes.add(volume);
+    if (enableSameDiskTiering &&
+        (volume.getStorageType() == StorageType.DISK
+        || volume.getStorageType() == StorageType.ARCHIVE)) {
+      String device = volume.getDevice();
+      if (!device.isEmpty()) {
+        Map<StorageType, FsVolumeImpl> storageTypeMap =
+            deviceVolumeMapping
+                .getOrDefault(device, new ConcurrentHashMap<>());
+        if (storageTypeMap.containsKey(volume.getStorageType())) {
+          FsDatasetImpl.LOG.error("Found storage type already exist." +
+              " Skipping for now. Please check disk configuration");
+        } else {
+          storageTypeMap.put(volume.getStorageType(), volume);
+          deviceVolumeMapping.put(device, storageTypeMap);
+        }
+      }
+    }
     if (blockScanner != null) {
       blockScanner.addVolumeScanner(ref);
     } else {
@@ -311,6 +364,18 @@ void addVolume(FsVolumeReference ref) {
    */
   private void removeVolume(FsVolumeImpl target) {
     if (volumes.remove(target)) {
+      if (enableSameDiskTiering &&
+          (target.getStorageType() == StorageType.DISK
+              || target.getStorageType() == StorageType.ARCHIVE)) {
+        String device = target.getDevice();
+        if (!device.isEmpty()) {
+          Map storageTypeMap = deviceVolumeMapping.get(device);
+          storageTypeMap.remove(target.getStorageType());
+          if (storageTypeMap.isEmpty()) {
+            deviceVolumeMapping.remove(device);
+          }
+        }
+      }
       if (blockScanner != null) {
         blockScanner.removeVolumeScanner(target);
       }

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -5975,4 +5975,13 @@
     </description>
   </property>
 
+  <property>
+    <name>dfs.datanode.same-disk-tiering.enabled</name>
+    <value>false</value>
+  </property>
+
+  <property>
+    <name>dfs.datanode.reserve-for-archive.percentage</name>
+    <value>0.0</value>
+  </property>
 </configuration>