-
Notifications
You must be signed in to change notification settings - Fork 9.2k
HDFS-15643. TestFileChecksumCompositeCrc fails intermittently. #2408
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,6 +55,7 @@ public class TestFileChecksum { | |
| .getLogger(TestFileChecksum.class); | ||
| private final ErasureCodingPolicy ecPolicy = | ||
| StripedFileTestUtil.getDefaultECPolicy(); | ||
| private final static long STALE_INTERVAL = 2000; | ||
| private int dataBlocks = ecPolicy.getNumDataUnits(); | ||
| private int parityBlocks = ecPolicy.getNumParityUnits(); | ||
|
|
||
|
|
@@ -88,9 +89,20 @@ public void setup() throws IOException { | |
| conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_CONSIDERLOAD_KEY, | ||
| false); | ||
| conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 0); | ||
| conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, | ||
| STALE_INTERVAL); | ||
| conf.setBoolean(DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY, true); | ||
| conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, | ||
| (int) (STALE_INTERVAL / 2)); | ||
| conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); | ||
| conf.setInt(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1); | ||
| conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, | ||
| (int) (STALE_INTERVAL / 4)); | ||
| conf.setInt( | ||
| DFSConfigKeys.DFS_NAMENODE_RECONSTRUCTION_PENDING_TIMEOUT_SEC_KEY, 4); | ||
| customizeConf(conf); | ||
| cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDNs).build(); | ||
| cluster.waitClusterUp(); | ||
| Path ecPath = new Path(ecDir); | ||
| cluster.getFileSystem().mkdir(ecPath, FsPermission.getDirDefault()); | ||
| cluster.getFileSystem().getClient().setErasureCodingPolicy(ecDir, | ||
|
|
@@ -107,6 +119,22 @@ public void setup() throws IOException { | |
|
|
||
| @After | ||
| public void tearDown() { | ||
| // delete the directory | ||
| Path ecPath = new Path(ecDir); | ||
| try { | ||
| fs.delete(ecPath, true); | ||
| } catch (Exception ex) { | ||
| LOG.error("Could not delete ecDir", ex); | ||
| } | ||
|
|
||
| if (client != null) { | ||
| try { | ||
| client.close(); | ||
| } catch (IOException e) { | ||
| LOG.error("Error closing the fsClient", e); | ||
| } | ||
| } | ||
|
|
||
| if (cluster != null) { | ||
| cluster.shutdown(); | ||
| cluster = null; | ||
|
|
@@ -475,7 +503,6 @@ public void testStripedFileChecksumWithMissedDataBlocksRangeQuery16() | |
| throws Exception { | ||
| int fileLength = 100; | ||
| String stripedFile3 = ecDir + "/stripedFileChecksum3"; | ||
| prepareTestFiles(fileLength, new String[] {stripedFile3}); | ||
| testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile3, | ||
| fileLength - 1); | ||
| } | ||
|
|
@@ -487,9 +514,7 @@ public void testStripedFileChecksumWithMissedDataBlocksRangeQuery16() | |
| @Test(timeout = 90000) | ||
| public void testStripedFileChecksumWithMissedDataBlocksRangeQuery17() | ||
| throws Exception { | ||
| int fileLength = 100; | ||
| String stripedFile3 = ecDir + "/stripedFileChecksum3"; | ||
| prepareTestFiles(fileLength, new String[] {stripedFile3}); | ||
| testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile3, 1); | ||
| } | ||
|
|
||
|
|
@@ -502,7 +527,6 @@ public void testStripedFileChecksumWithMissedDataBlocksRangeQuery18() | |
| throws Exception { | ||
| int fileLength = 100; | ||
| String stripedFile3 = ecDir + "/stripedFileChecksum3"; | ||
| prepareTestFiles(fileLength, new String[] {stripedFile3}); | ||
| testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile3, 10); | ||
| } | ||
|
|
||
|
|
@@ -515,7 +539,6 @@ public void testStripedFileChecksumWithMissedDataBlocksRangeQuery19() | |
| throws Exception { | ||
| int fileLength = 100; | ||
| String stripedFile3 = ecDir + "/stripedFileChecksum3"; | ||
| prepareTestFiles(fileLength, new String[] {stripedFile3}); | ||
| testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile3, | ||
| fileLength * 2); | ||
| } | ||
|
|
@@ -527,9 +550,7 @@ public void testStripedFileChecksumWithMissedDataBlocksRangeQuery19() | |
| @Test(timeout = 90000) | ||
| public void testStripedFileChecksumWithMissedDataBlocksRangeQuery20() | ||
| throws Exception { | ||
| int fileLength = bytesPerCRC; | ||
| String stripedFile3 = ecDir + "/stripedFileChecksum3"; | ||
| prepareTestFiles(fileLength, new String[] {stripedFile3}); | ||
| testStripedFileChecksumWithMissedDataBlocksRangeQuery(stripedFile3, | ||
| bytesPerCRC - 1); | ||
| } | ||
|
|
@@ -575,6 +596,8 @@ private FileChecksum getFileChecksum(String filePath, int range, | |
| dnIdxToDie = getDataNodeToKill(filePath); | ||
| DataNode dnToDie = cluster.getDataNodes().get(dnIdxToDie); | ||
| shutdownDataNode(dnToDie); | ||
| // wait enough time for the locations to be updated. | ||
| Thread.sleep(STALE_INTERVAL); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we waitFor instead?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I agree with you @goiri .
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not very close to this part of the code but there must be ways to force the statistics to update.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. the problem is that I cannot reproduce it on my local machine. However, it seems that it fails in a consistent way on Yetus.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure exactly what test you are interested, but here you can see one of the failed tests full logs: Here are all the tests:
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see there is truncation there too though.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @goiri
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could reproduce the failure locally: Attached the stdout in the JIRA: https://issues.apache.org/jira/secure/attachment/13014321/org.apache.hadoop.hdfs.TestFileChecksum-output.txt
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could reproduce even without |
||
| } | ||
|
|
||
| Path testPath = new Path(filePath); | ||
|
|
@@ -588,6 +611,7 @@ private FileChecksum getFileChecksum(String filePath, int range, | |
|
|
||
| if (dnIdxToDie != -1) { | ||
| cluster.restartDataNode(dnIdxToDie); | ||
| cluster.waitActive(); | ||
| } | ||
|
|
||
| return fc; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.