Skip to content

Commit c09dc25

Browse files
author
Zhe Zhang
committed
HDFS-8882. Erasure Coding: Use datablocks, parityblocks and cell size from ErasureCodingPolicy. Contributed by Vinayakumar B.
Change-Id: Ic56da0b426f47c63dac440aef6f5fc8554f6cf13
1 parent 7bff8ca commit c09dc25

38 files changed

+177
-156
lines changed

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,17 +86,6 @@ public final class HdfsConstants {
8686
public static final String CLIENT_NAMENODE_PROTOCOL_NAME =
8787
"org.apache.hadoop.hdfs.protocol.ClientProtocol";
8888

89-
/*
90-
* These values correspond to the values used by the system default erasure
91-
* coding policy.
92-
* TODO: get these values from ec policy of the associated INodeFile
93-
*/
94-
95-
public static final byte NUM_DATA_BLOCKS = 6;
96-
public static final byte NUM_PARITY_BLOCKS = 3;
97-
// The chunk size for striped block which is used by erasure coding
98-
public static final int BLOCK_STRIPED_CELL_SIZE = 64 * 1024;
99-
10089
// Timeouts for communicating with DataNode for streaming writes/reads
10190
public static final int READ_TIMEOUT = 60 * 1000;
10291
public static final int READ_TIMEOUT_EXTENSION = 5 * 1000;

hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,7 @@ message BlockWithLocationsProto {
536536

537537
optional bytes indices = 5;
538538
optional uint32 dataBlockNum = 6;
539+
optional uint32 cellSize = 7;
539540
}
540541

541542
/**

hadoop-hdfs-project/hadoop-hdfs/CHANGES-HDFS-EC-7285.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,4 +446,7 @@
446446
getAllErasureCodingPolicies. (Rakesh R via zhz)
447447

448448
HDFS-8920. Erasure Coding: when recovering lost blocks, logs can be too
449-
verbose and hurt performance. (Rui Li via Kai Zheng)
449+
verbose and hurt performance. (Rui Li via Kai Zheng)
450+
451+
HDFS-8882. Erasure Coding: Use datablocks, parityblocks and cell size from
452+
ErasureCodingPolicy (Vinayakumar B via zhz)

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/StripedDataStreamer.java

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,25 +18,20 @@
1818

1919
package org.apache.hadoop.hdfs;
2020

21-
import static org.apache.hadoop.hdfs.protocol.HdfsConstants.BLOCK_STRIPED_CELL_SIZE;
22-
import static org.apache.hadoop.hdfs.protocol.HdfsConstants.NUM_DATA_BLOCKS;
23-
import static org.apache.hadoop.hdfs.protocol.HdfsConstants.NUM_PARITY_BLOCKS;
24-
2521
import java.io.IOException;
2622
import java.io.InterruptedIOException;
2723
import java.util.concurrent.atomic.AtomicReference;
28-
2924
import org.apache.hadoop.hdfs.DFSStripedOutputStream.Coordinator;
3025
import org.apache.hadoop.hdfs.DFSStripedOutputStream.MultipleBlockingQueue;
3126
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
3227
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
33-
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
3428
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
3529
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
3630
import org.apache.hadoop.hdfs.protocol.LocatedStripedBlock;
3731
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
3832
import org.apache.hadoop.hdfs.util.ByteArrayManager;
3933
import org.apache.hadoop.hdfs.util.StripedBlockUtil;
34+
import org.apache.hadoop.io.erasurecode.ECSchema;
4035
import org.apache.hadoop.util.DataChecksum;
4136
import org.apache.hadoop.util.Progressable;
4237

@@ -109,6 +104,8 @@ private static void sleep(long ms, String op) throws InterruptedIOException {
109104
private final Coordinator coordinator;
110105
private final int index;
111106
private volatile boolean failed;
107+
private final ECSchema schema;
108+
private final int cellSize;
112109

113110
StripedDataStreamer(HdfsFileStatus stat,
114111
DFSClient dfsClient, String src,
@@ -120,6 +117,8 @@ private static void sleep(long ms, String op) throws InterruptedIOException {
120117
byteArrayManage, favoredNodes);
121118
this.index = index;
122119
this.coordinator = coordinator;
120+
this.schema = stat.getErasureCodingPolicy().getSchema();
121+
this.cellSize = stat.getErasureCodingPolicy().getCellSize();
123122
}
124123

125124
int getIndex() {
@@ -135,7 +134,7 @@ boolean isFailed() {
135134
}
136135

137136
private boolean isParityStreamer() {
138-
return index >= NUM_DATA_BLOCKS;
137+
return index >= schema.getNumDataUnits();
139138
}
140139

141140
@Override
@@ -168,7 +167,7 @@ void populate() throws IOException {
168167
if (block != null) {
169168
// set numByte for the previous block group
170169
long bytes = 0;
171-
for (int i = 0; i < NUM_DATA_BLOCKS; i++) {
170+
for (int i = 0; i < schema.getNumDataUnits(); i++) {
172171
final ExtendedBlock b = coordinator.takeEndBlock(i);
173172
StripedBlockUtil.checkBlocks(index, block, i, b);
174173
bytes += b.getNumBytes();
@@ -183,15 +182,15 @@ void populate() throws IOException {
183182

184183
final LocatedBlock lb = StripedDataStreamer.super.locateFollowingBlock(
185184
excludedNodes);
186-
if (lb.getLocations().length < HdfsConstants.NUM_DATA_BLOCKS) {
185+
if (lb.getLocations().length < schema.getNumDataUnits()) {
187186
throw new IOException(
188187
"Failed to get datablocks number of nodes from namenode: blockGroupSize= "
189-
+ (HdfsConstants.NUM_DATA_BLOCKS + HdfsConstants.NUM_PARITY_BLOCKS)
188+
+ (schema.getNumDataUnits() + schema.getNumParityUnits())
190189
+ ", blocks.length= " + lb.getLocations().length);
191190
}
192-
final LocatedBlock[] blocks = StripedBlockUtil.parseStripedBlockGroup(
193-
(LocatedStripedBlock)lb,
194-
BLOCK_STRIPED_CELL_SIZE, NUM_DATA_BLOCKS, NUM_PARITY_BLOCKS);
191+
final LocatedBlock[] blocks =
192+
StripedBlockUtil.parseStripedBlockGroup((LocatedStripedBlock) lb,
193+
cellSize, schema.getNumDataUnits(), schema.getNumParityUnits());
195194

196195
for (int i = 0; i < blocks.length; i++) {
197196
StripedDataStreamer si = coordinator.getStripedDataStreamer(i);
@@ -233,9 +232,10 @@ void populate() throws IOException {
233232
final LocatedBlock updated = callUpdateBlockForPipeline(bg);
234233
final long newGS = updated.getBlock().getGenerationStamp();
235234
final LocatedBlock[] updatedBlks = StripedBlockUtil
236-
.parseStripedBlockGroup((LocatedStripedBlock) updated,
237-
BLOCK_STRIPED_CELL_SIZE, NUM_DATA_BLOCKS, NUM_PARITY_BLOCKS);
238-
for (int i = 0; i < NUM_DATA_BLOCKS + NUM_PARITY_BLOCKS; i++) {
235+
.parseStripedBlockGroup((LocatedStripedBlock) updated, cellSize,
236+
schema.getNumDataUnits(), schema.getNumParityUnits());
237+
for (int i = 0; i < schema.getNumDataUnits()
238+
+ schema.getNumParityUnits(); i++) {
239239
StripedDataStreamer si = coordinator.getStripedDataStreamer(i);
240240
if (si.isFailed()) {
241241
continue; // skipping failed data streamer
@@ -280,7 +280,7 @@ void populate() throws IOException {
280280
final ExtendedBlock bg = coordinator.getBlockGroup();
281281
final ExtendedBlock newBG = newBlock(bg, newGS);
282282

283-
final int n = NUM_DATA_BLOCKS + NUM_PARITY_BLOCKS;
283+
final int n = schema.getNumDataUnits() + schema.getNumParityUnits();
284284
final DatanodeInfo[] newNodes = new DatanodeInfo[n];
285285
final String[] newStorageIDs = new String[n];
286286
for (int i = 0; i < n; i++) {

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ public static BlockWithLocationsProto convert(BlockWithLocations blk) {
192192
StripedBlockWithLocations sblk = (StripedBlockWithLocations) blk;
193193
builder.setIndices(PBHelperClient.getByteString(sblk.getIndices()));
194194
builder.setDataBlockNum(sblk.getDataBlockNum());
195+
builder.setCellSize(sblk.getCellSize());
195196
}
196197
return builder.build();
197198
}
@@ -207,7 +208,7 @@ public static BlockWithLocations convert(BlockWithLocationsProto b) {
207208
PBHelperClient.convertStorageTypes(storageTypes, storageUuids.size()));
208209
if (b.hasIndices()) {
209210
blk = new StripedBlockWithLocations(blk, b.getIndices().toByteArray(),
210-
(short) b.getDataBlockNum());
211+
(short) b.getDataBlockNum(), b.getCellSize());
211212
}
212213
return blk;
213214
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -415,11 +415,14 @@ public static class DBlockStriped extends DBlock {
415415

416416
final byte[] indices;
417417
final short dataBlockNum;
418+
final int cellSize;
418419

419-
public DBlockStriped(Block block, byte[] indices, short dataBlockNum) {
420+
public DBlockStriped(Block block, byte[] indices, short dataBlockNum,
421+
int cellSize) {
420422
super(block);
421423
this.indices = indices;
422424
this.dataBlockNum = dataBlockNum;
425+
this.cellSize = cellSize;
423426
}
424427

425428
public DBlock getInternalBlock(StorageGroup storage) {
@@ -429,8 +432,8 @@ public DBlock getInternalBlock(StorageGroup storage) {
429432
}
430433
byte idxInGroup = indices[idxInLocs];
431434
long blkId = getBlock().getBlockId() + idxInGroup;
432-
long numBytes = getInternalBlockLength(getNumBytes(),
433-
HdfsConstants.BLOCK_STRIPED_CELL_SIZE, dataBlockNum, idxInGroup);
435+
long numBytes = getInternalBlockLength(getNumBytes(), cellSize,
436+
dataBlockNum, idxInGroup);
434437
Block blk = new Block(getBlock());
435438
blk.setBlockId(blkId);
436439
blk.setNumBytes(numBytes);
@@ -717,8 +720,8 @@ private long getBlockList() throws IOException {
717720
bytesReceived += sblkLocs.getBlock().getNumBytes() /
718721
sblkLocs.getDataBlockNum();
719722
block = new DBlockStriped(sblkLocs.getBlock(), sblkLocs.getIndices(),
720-
sblkLocs.getDataBlockNum());
721-
} else{
723+
sblkLocs.getDataBlockNum(), sblkLocs.getCellSize());
724+
} else {
722725
bytesReceived += blkLocs.getBlock().getNumBytes();
723726
block = new DBlock(blkLocs.getBlock());
724727
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoStriped.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222
import org.apache.hadoop.hdfs.util.StripedBlockUtil;
2323
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
2424

25-
import static org.apache.hadoop.hdfs.protocol.HdfsConstants.BLOCK_STRIPED_CELL_SIZE;
26-
2725
/**
2826
* Subclass of {@link BlockInfo}, presenting a block group in erasure coding.
2927
*
@@ -65,6 +63,10 @@ public short getParityBlockNum() {
6563
return (short) ecPolicy.getNumParityUnits();
6664
}
6765

66+
public int getCellSize() {
67+
return ecPolicy.getCellSize();
68+
}
69+
6870
/**
6971
* If the block is committed/completed and its length is less than a full
7072
* stripe, it returns the the number of actual data blocks.
@@ -73,7 +75,7 @@ public short getParityBlockNum() {
7375
public short getRealDataBlockNum() {
7476
if (isComplete() || getBlockUCState() == BlockUCState.COMMITTED) {
7577
return (short) Math.min(getDataBlockNum(),
76-
(getNumBytes() - 1) / BLOCK_STRIPED_CELL_SIZE + 1);
78+
(getNumBytes() - 1) / ecPolicy.getCellSize() + 1);
7779
} else {
7880
return getDataBlockNum();
7981
}
@@ -200,7 +202,7 @@ public long spaceConsumed() {
200202
// `getNumBytes` is the total of actual data block size.
201203
return StripedBlockUtil.spaceConsumedByStripedBlock(getNumBytes(),
202204
ecPolicy.getNumDataUnits(), ecPolicy.getNumParityUnits(),
203-
BLOCK_STRIPED_CELL_SIZE);
205+
ecPolicy.getCellSize());
204206
}
205207

206208
@Override

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,6 @@
9292
import org.apache.hadoop.hdfs.util.LightWeightLinkedSet;
9393
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
9494

95-
import static org.apache.hadoop.hdfs.protocol.HdfsConstants.BLOCK_STRIPED_CELL_SIZE;
9695
import static org.apache.hadoop.hdfs.util.StripedBlockUtil.getInternalBlockLength;
9796

9897
import org.apache.hadoop.metrics2.util.MBeans;
@@ -2554,10 +2553,9 @@ private BlockToMarkCorrupt checkReplicaCorrupt(
25542553
BlockIdManager.convertToStripedID(reported.getBlockId());
25552554
BlockInfoStriped stripedBlock = (BlockInfoStriped) storedBlock;
25562555
int reportedBlkIdx = BlockIdManager.getBlockIndex(reported);
2557-
wrongSize = reported.getNumBytes() !=
2558-
getInternalBlockLength(stripedBlock.getNumBytes(),
2559-
BLOCK_STRIPED_CELL_SIZE,
2560-
stripedBlock.getDataBlockNum(), reportedBlkIdx);
2556+
wrongSize = reported.getNumBytes() != getInternalBlockLength(
2557+
stripedBlock.getNumBytes(), stripedBlock.getCellSize(),
2558+
stripedBlock.getDataBlockNum(), reportedBlkIdx);
25612559
} else {
25622560
wrongSize = storedBlock.getNumBytes() != reported.getNumBytes();
25632561
}
@@ -3413,7 +3411,7 @@ private long addBlock(BlockInfo block, List<BlockWithLocations> results) {
34133411
(byte) blockStriped.getStorageBlockIndex(locations.get(i));
34143412
}
34153413
results.add(new StripedBlockWithLocations(blkWithLocs, indices,
3416-
blockStriped.getDataBlockNum()));
3414+
blockStriped.getDataBlockNum(), blockStriped.getCellSize()));
34173415
// approximate size
34183416
return block.getNumBytes() / blockStriped.getDataBlockNum();
34193417
}else{

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,8 @@ DBlock newDBlock(LocatedBlock lb, List<MLocation> locations,
185185
for (int i = 0; i < indices.length; i++) {
186186
indices[i] = (byte) lsb.getBlockIndices()[i];
187187
}
188-
db = new DBlockStriped(blk, indices, (short) ecPolicy.getNumDataUnits());
188+
db = new DBlockStriped(blk, indices, (short) ecPolicy.getNumDataUnits(),
189+
ecPolicy.getCellSize());
189190
} else {
190191
db = new DBlock(blk);
191192
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,9 +204,14 @@ static ValidateAddBlockResult validateAddBlock(
204204
clientMachine = pendingFile.getFileUnderConstructionFeature()
205205
.getClientMachine();
206206
isStriped = pendingFile.isStriped();
207-
numTargets = isStriped ?
208-
HdfsConstants.NUM_DATA_BLOCKS + HdfsConstants.NUM_PARITY_BLOCKS :
209-
pendingFile.getFileReplication();
207+
ErasureCodingPolicy ecPolicy = null;
208+
if (isStriped) {
209+
ecPolicy = FSDirErasureCodingOp.getErasureCodingPolicy(fsn, src);
210+
numTargets = (short) (ecPolicy.getSchema().getNumDataUnits()
211+
+ ecPolicy.getSchema().getNumParityUnits());
212+
} else {
213+
numTargets = pendingFile.getFileReplication();
214+
}
210215
storagePolicyID = pendingFile.getStoragePolicyID();
211216
return new ValidateAddBlockResult(blockSize, numTargets, storagePolicyID,
212217
clientMachine, isStriped);

0 commit comments

Comments
 (0)