Skip to content

Commit 4c4849f

Browse files
steveloughranKeeProMise
authored andcommitted
HADOOP-18508. S3A: Support parallel integration test runs on same bucket (apache#5081)
It is now possible to provide a job ID in the maven "job.id" property hadoop-aws test runs to isolate paths under a the test bucket under which all tests will be executed. This will allow independent builds *in different source trees* to test against the same bucket in parallel, and is designed for CI testing. Example: mvn verify -Dparallel-tests -Droot.tests.enabled=false -Djob.id=1 mvn verify -Droot.tests.enabled=false -Djob.id=2 - Root tests must be be disabled to stop them cleaning up the test paths of other test runs. - Do still regularly run the root tests just to force cleanup of the output of any interrupted test suites. Contributed by Steve Loughran
1 parent 2d149e5 commit 4c4849f

File tree

18 files changed

+197
-170
lines changed

18 files changed

+197
-170
lines changed

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FSMainOperationsBaseTest.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,9 @@ public void setUp() throws Exception {
102102

103103
@After
104104
public void tearDown() throws Exception {
105-
fSys.delete(new Path(getAbsoluteTestRootPath(fSys), new Path("test")), true);
105+
if (fSys != null) {
106+
fSys.delete(new Path(getAbsoluteTestRootPath(fSys), new Path("test")), true);
107+
}
106108
}
107109

108110

@@ -192,7 +194,7 @@ public void testWorkingDirectory() throws Exception {
192194

193195
@Test
194196
public void testWDAbsolute() throws IOException {
195-
Path absoluteDir = new Path(fSys.getUri() + "/test/existingDir");
197+
Path absoluteDir = getTestRootPath(fSys, "test/existingDir");
196198
fSys.mkdirs(absoluteDir);
197199
fSys.setWorkingDirectory(absoluteDir);
198200
Assert.assertEquals(absoluteDir, fSys.getWorkingDirectory());

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ public abstract class FileContextMainOperationsBaseTest {
8181
protected final FileContextTestHelper fileContextTestHelper =
8282
createFileContextHelper();
8383

84+
/**
85+
* Create the test helper.
86+
* Important: this is invoked during the construction of the base class,
87+
* so is very brittle.
88+
* @return a test helper.
89+
*/
8490
protected FileContextTestHelper createFileContextHelper() {
8591
return new FileContextTestHelper();
8692
}
@@ -107,7 +113,7 @@ public boolean accept(Path file) {
107113

108114
private static final byte[] data = getFileData(numBlocks,
109115
getDefaultBlockSize());
110-
116+
111117
@Before
112118
public void setUp() throws Exception {
113119
File testBuildData = GenericTestUtils.getRandomizedTestDir();

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFSMainOperationsLocalFileSystem.java

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,13 @@
2121
import java.io.IOException;
2222

2323
import org.apache.hadoop.conf.Configuration;
24-
import org.junit.After;
25-
import org.junit.Assert;
26-
import org.junit.Before;
27-
import org.junit.Test;
2824

2925
public class TestFSMainOperationsLocalFileSystem extends FSMainOperationsBaseTest {
3026

3127
@Override
3228
protected FileSystem createFileSystem() throws IOException {
3329
return FileSystem.getLocal(new Configuration());
3430
}
35-
36-
@Override
37-
@Before
38-
public void setUp() throws Exception {
39-
super.setUp();
40-
}
4131

4232
static Path wd = null;
4333
@Override
@@ -46,19 +36,5 @@ protected Path getDefaultWorkingDirectory() throws IOException {
4636
wd = FileSystem.getLocal(new Configuration()).getWorkingDirectory();
4737
return wd;
4838
}
49-
50-
@Override
51-
@After
52-
public void tearDown() throws Exception {
53-
super.tearDown();
54-
}
55-
56-
@Test
57-
@Override
58-
public void testWDAbsolute() throws IOException {
59-
Path absoluteDir = getTestRootPath(fSys, "test/existingDir");
60-
fSys.mkdirs(absoluteDir);
61-
fSys.setWorkingDirectory(absoluteDir);
62-
Assert.assertEquals(absoluteDir, fSys.getWorkingDirectory());
63-
}
39+
6440
}

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/viewfs/TestFSMainOperationsLocalFileSystem.java

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,5 @@ public void tearDown() throws Exception {
5353
super.tearDown();
5454
ViewFileSystemTestSetup.tearDown(this, fcTarget);
5555
}
56-
57-
@Test
58-
@Override
59-
public void testWDAbsolute() throws IOException {
60-
Path absoluteDir = getTestRootPath(fSys, "test/existingDir");
61-
fSys.mkdirs(absoluteDir);
62-
fSys.setWorkingDirectory(absoluteDir);
63-
Assert.assertEquals(absoluteDir, fSys.getWorkingDirectory());
6456

65-
}
6657
}

hadoop-tools/hadoop-aws/pom.xml

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@
5656

5757
<!-- Is prefetch enabled? -->
5858
<fs.s3a.prefetch.enabled>unset</fs.s3a.prefetch.enabled>
59+
<!-- Job ID; allows for parallel jobs on same bucket -->
60+
<!-- job.id is used to build the path for tests; default is 00.-->
61+
<job.id>00</job.id>
62+
<!-- are root tests enabled. Set to false when running parallel jobs on same bucket -->
63+
<root.tests.enabled>unset</root.tests.enabled>
5964
</properties>
6065

6166
<profiles>
@@ -115,14 +120,8 @@
115120
<test.build.data>${test.build.data}/${surefire.forkNumber}</test.build.data>
116121
<test.build.dir>${test.build.dir}/${surefire.forkNumber}</test.build.dir>
117122
<hadoop.tmp.dir>${hadoop.tmp.dir}/${surefire.forkNumber}</hadoop.tmp.dir>
123+
<test.unique.fork.id>job-${job.id}-fork-000${surefire.forkNumber}</test.unique.fork.id>
118124

119-
<!-- Due to a Maven quirk, setting this to just -->
120-
<!-- surefire.forkNumber won't do the parameter -->
121-
<!-- substitution. Putting a prefix in front of it like -->
122-
<!-- "fork-" makes it work. -->
123-
<!-- Important: Those leading 0s are needed to guarantee that -->
124-
<!-- trailing three chars are always numeric and unique -->
125-
<test.unique.fork.id>fork-000${surefire.forkNumber}</test.unique.fork.id>
126125
<!-- Propagate scale parameters -->
127126
<fs.s3a.scale.test.enabled>${fs.s3a.scale.test.enabled}</fs.s3a.scale.test.enabled>
128127
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
@@ -163,7 +162,7 @@
163162
<!-- surefire.forkNumber won't do the parameter -->
164163
<!-- substitution. Putting a prefix in front of it like -->
165164
<!-- "fork-" makes it work. -->
166-
<test.unique.fork.id>fork-000${surefire.forkNumber}</test.unique.fork.id>
165+
<test.unique.fork.id>job-${job.id}-fork-000${surefire.forkNumber}</test.unique.fork.id>
167166
<!-- Propagate scale parameters -->
168167
<fs.s3a.scale.test.enabled>${fs.s3a.scale.test.enabled}</fs.s3a.scale.test.enabled>
169168
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
@@ -174,14 +173,14 @@
174173
<test.default.timeout>${test.integration.timeout}</test.default.timeout>
175174
<!-- Prefetch -->
176175
<fs.s3a.prefetch.enabled>${fs.s3a.prefetch.enabled}</fs.s3a.prefetch.enabled>
176+
<!-- are root tests enabled. Set to false when running parallel jobs on same bucket -->
177+
<fs.s3a.root.tests.enabled>${root.tests.enabled}</fs.s3a.root.tests.enabled>
178+
177179
</systemPropertyVariables>
178180
<!-- Some tests cannot run in parallel. Tests that cover -->
179181
<!-- access to the root directory must run in isolation -->
180182
<!-- from anything else that could modify the bucket. -->
181-
<!-- S3A tests that cover multi-part upload must run in -->
182-
<!-- isolation, because the file system is configured to -->
183-
<!-- purge existing multi-part upload data on -->
184-
<!-- initialization. MiniYARNCluster has not yet been -->
183+
<!-- MiniYARNCluster has not yet been -->
185184
<!-- changed to handle parallel test execution gracefully. -->
186185
<!-- Exclude all of these tests from parallel execution, -->
187186
<!-- and instead run them sequentially in a separate -->
@@ -228,6 +227,9 @@
228227
<fs.s3a.directory.marker.audit>${fs.s3a.directory.marker.audit}</fs.s3a.directory.marker.audit>
229228
<!-- Prefetch -->
230229
<fs.s3a.prefetch.enabled>${fs.s3a.prefetch.enabled}</fs.s3a.prefetch.enabled>
230+
<!-- are root tests enabled. Set to false when running parallel jobs on same bucket -->
231+
<fs.s3a.root.tests.enabled>${root.tests.enabled}</fs.s3a.root.tests.enabled>
232+
<test.unique.fork.id>job-${job.id}</test.unique.fork.id>
231233
</systemPropertyVariables>
232234
<!-- Do a sequential run for tests that cannot handle -->
233235
<!-- parallel execution. -->
@@ -289,6 +291,7 @@
289291
<fs.s3a.directory.marker.audit>${fs.s3a.directory.marker.audit}</fs.s3a.directory.marker.audit>
290292
<!-- Prefetch -->
291293
<fs.s3a.prefetch.enabled>${fs.s3a.prefetch.enabled}</fs.s3a.prefetch.enabled>
294+
<test.unique.fork.id>job-${job.id}</test.unique.fork.id>
292295
</systemPropertyVariables>
293296
<forkedProcessTimeoutInSeconds>${fs.s3a.scale.test.timeout}</forkedProcessTimeoutInSeconds>
294297
</configuration>

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ is a more specific lie and harder to make. And, if you get caught out: you
4343
lose all credibility with the project.
4444

4545
You don't need to test from a VM within the AWS infrastructure; with the
46-
`-Dparallel=tests` option the non-scale tests complete in under ten minutes.
46+
`-Dparallel-tests` option the non-scale tests complete in under twenty minutes.
4747
Because the tests clean up after themselves, they are also designed to be low
4848
cost. It's neither hard nor expensive to run the tests; if you can't,
4949
there's no guarantee your patch works. The reviewers have enough to do, and
@@ -539,12 +539,51 @@ Otherwise, set a large timeout in `fs.s3a.scale.test.timeout`
539539
The tests are executed in an order to only clean up created files after
540540
the end of all the tests. If the tests are interrupted, the test data will remain.
541541

542+
## <a name="CI"/> Testing through continuous integration
543+
544+
### Parallel CI builds.
545+
For CI testing of the module, including the integration tests,
546+
it is generally necessary to support testing multiple PRs simultaneously.
547+
548+
To do this
549+
1. A job ID must be supplied in the `job.id` property, so each job works on an isolated directory
550+
tree. This should be a number or unique string, which will be used within a path element, so
551+
must only contain characters valid in an S3/hadoop path element.
552+
2. Root directory tests need to be disabled by setting `fs.s3a.root.tests.enabled` to
553+
`false`, either in the command line to maven or in the XML configurations.
554+
555+
```
556+
mvn verify -T 1C -Dparallel-tests -DtestsThreadCount=14 -Dscale -Dfs.s3a.root.tests.enabled=false -Djob.id=001
557+
```
558+
559+
This parallel execution feature is only for isolated builds sharing a single S3 bucket; it does
560+
not support parallel builds and tests from the same local source tree.
561+
562+
Without the root tests being executed, set up a scheduled job to purge the test bucket of all
563+
data on a regular basis, to keep costs down.
564+
The easiest way to do this is to have a bucket lifecycle rule for the bucket to delete all files more than a few days old,
565+
alongside one to abort all pending uploads more than 24h old.
566+
567+
568+
### Securing CI builds
569+
570+
It's clearly unsafe to have CI infrastructure testing PRs submitted to apache github account
571+
with AWS credentials -which is why it isn't done by the Yetus-initiated builds.
572+
573+
Anyone doing this privately should:
574+
* Review incoming patches before triggering the tests.
575+
* Have a dedicated IAM role with restricted access to the test bucket, any KMS keys used, and the
576+
external bucket containing the CSV test file.
577+
* Have a build process which generates short-lived session credentials for this role.
578+
* Run the tests in an EC2 VM/container which collects the restricted IAM credentials
579+
from the IAM instance/container credentials provider.
580+
542581
## <a name="load"></a> Load tests.
543582

544-
Some are designed to overload AWS services with more
583+
Some tests are designed to overload AWS services with more
545584
requests per second than an AWS account is permitted.
546585

547-
The operation of these test maybe observable to other users of the same
586+
The operation of these tests may be observable to other users of the same
548587
account -especially if they are working in the AWS region to which the
549588
tests are targeted.
550589

@@ -556,6 +595,10 @@ They do not run automatically: they must be explicitly run from the command line
556595

557596
Look in the source for these and reads the Javadocs before executing.
558597

598+
Note: one fear here was that asking for two many session/role credentials in a short period
599+
of time would actually lock an account out of a region. It doesn't: it simply triggers
600+
throttling of STS requests.
601+
559602
## <a name="alternate_s3"></a> Testing against non-AWS S3 Stores.
560603

561604
The S3A filesystem is designed to work with S3 stores which implement

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRootDir.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
import org.apache.hadoop.fs.contract.AbstractFSContract;
2828
import org.apache.hadoop.fs.s3a.S3AFileSystem;
2929

30+
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeSkipRootTests;
31+
3032
/**
3133
* root dir operations against an S3 bucket.
3234
*/
@@ -36,6 +38,12 @@ public class ITestS3AContractRootDir extends
3638
private static final Logger LOG =
3739
LoggerFactory.getLogger(ITestS3AContractRootDir.class);
3840

41+
@Override
42+
public void setup() throws Exception {
43+
super.setup();
44+
maybeSkipRootTests(getFileSystem().getConf());
45+
}
46+
3947
@Override
4048
protected AbstractFSContract createContract(Configuration conf) {
4149
return new S3AContract(conf);

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,8 +386,10 @@ public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty()
386386
s3Configuration.pathStyleAccessEnabled());
387387
byte[] file = ContractTestUtils.toAsciiByteArray("test file");
388388
ContractTestUtils.writeAndRead(fs,
389-
new Path("/path/style/access/testFile"), file, file.length,
390-
(int) conf.getLongBytes(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true);
389+
createTestPath(new Path("/path/style/access/testFile")),
390+
file, file.length,
391+
(int) conf.getLongBytes(Constants.FS_S3A_BLOCK_SIZE, file.length),
392+
false, true);
391393
} catch (final AWSRedirectException e) {
392394
LOG.error("Caught exception: ", e);
393395
// Catch/pass standard path style access behaviour when live bucket

0 commit comments

Comments
 (0)