Skip to content

Commit 92767f9

Browse files
committed
HADOOP-11601. Enhance FS spec & tests to mandate FileStatus.getBlocksize() >0 for non-empty files. Contributed by Steve Loughran
(cherry picked from commit ae8849f) This closes #50
1 parent d3fa53a commit 92767f9

File tree

3 files changed

+134
-10
lines changed

3 files changed

+134
-10
lines changed

hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ Get the status of a path
7878
if isFile(FS, p) :
7979
stat.length = len(FS.Files[p])
8080
stat.isdir = False
81+
stat.blockSize > 0
8182
elif isDir(FS, p) :
8283
stat.length = 0
8384
stat.isdir = True
@@ -448,13 +449,13 @@ split calculations to divide work optimally across a set of worker processes.
448449

449450
#### Postconditions
450451

451-
result = integer >= 0
452+
result = integer > 0
452453

453454
Although there is no defined minimum value for this result, as it
454455
is used to partition work during job submission, a block size
455-
that is too small will result in either too many jobs being submitted
456-
for efficient work, or the `JobSubmissionClient` running out of memory.
457-
456+
that is too small will result in badly partitioned workload,
457+
or even the `JobSubmissionClient` and equivalent
458+
running out of memory as it calculates the partitions.
458459

459460
Any FileSystem that does not actually break files into blocks SHOULD
460461
return a number for this that results in efficient processing.
@@ -500,12 +501,12 @@ on the filesystem.
500501

501502
#### Postconditions
502503

503-
504+
if len(FS, P) > 0: getFileStatus(P).getBlockSize() > 0
504505
result == getFileStatus(P).getBlockSize()
505506

506-
The outcome of this operation MUST be identical to that contained in
507-
the `FileStatus` returned from `getFileStatus(P)`.
508-
507+
1. The outcome of this operation MUST be identical to the value of
508+
`getFileStatus(P).getBlockSize()`.
509+
1. By inference, it MUST be > 0 for any file of length > 0.
509510

510511
## State Changing Operations
511512

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCreateTest.java

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,25 +21,31 @@
2121
import org.apache.hadoop.fs.FSDataOutputStream;
2222
import org.apache.hadoop.fs.FileAlreadyExistsException;
2323
import org.apache.hadoop.fs.FileStatus;
24+
import org.apache.hadoop.fs.FileSystem;
2425
import org.apache.hadoop.fs.Path;
25-
import org.apache.hadoop.io.IOUtils;
2626
import org.junit.Test;
2727
import org.junit.internal.AssumptionViolatedException;
2828

2929
import java.io.FileNotFoundException;
3030
import java.io.IOException;
3131

3232
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
33+
import static org.apache.hadoop.fs.contract.ContractTestUtils.getFileStatusEventually;
3334
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
3435
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
3536
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile;
3637

3738
/**
38-
* Test creating files, overwrite options &c
39+
* Test creating files, overwrite options etc.
3940
*/
4041
public abstract class AbstractContractCreateTest extends
4142
AbstractFSContractTestBase {
4243

44+
/**
45+
* How long to wait for a path to become visible.
46+
*/
47+
public static final int CREATE_TIMEOUT = 15000;
48+
4349
@Test
4450
public void testCreateNewFile() throws Throwable {
4551
describe("Foundational 'create a file' test");
@@ -180,4 +186,90 @@ public void testCreatedFileIsImmediatelyVisible() throws Throwable {
180186
}
181187
}
182188
}
189+
190+
@Test
191+
public void testCreatedFileIsVisibleOnFlush() throws Throwable {
192+
describe("verify that a newly created file exists once a flush has taken "
193+
+ "place");
194+
Path path = path("testCreatedFileIsVisibleOnFlush");
195+
FileSystem fs = getFileSystem();
196+
try(FSDataOutputStream out = fs.create(path,
197+
false,
198+
4096,
199+
(short) 1,
200+
1024)) {
201+
out.write('a');
202+
out.flush();
203+
if (!fs.exists(path)) {
204+
205+
if (isSupported(IS_BLOBSTORE)) {
206+
// object store: downgrade to a skip so that the failure is visible
207+
// in test results
208+
skip("Filesystem is an object store and newly created files are not "
209+
+ "immediately visible");
210+
}
211+
assertPathExists("expected path to be visible before file closed",
212+
path);
213+
}
214+
}
215+
}
216+
217+
@Test
218+
public void testCreatedFileIsEventuallyVisible() throws Throwable {
219+
describe("verify a written to file is visible after the stream is closed");
220+
Path path = path("testCreatedFileIsEventuallyVisible");
221+
FileSystem fs = getFileSystem();
222+
try(
223+
FSDataOutputStream out = fs.create(path,
224+
false,
225+
4096,
226+
(short) 1,
227+
1024)
228+
) {
229+
out.write(0x01);
230+
out.close();
231+
getFileStatusEventually(fs, path, CREATE_TIMEOUT);
232+
}
233+
}
234+
235+
@Test
236+
public void testFileStatusBlocksizeNonEmptyFile() throws Throwable {
237+
describe("validate the block size of a filesystem and files within it");
238+
FileSystem fs = getFileSystem();
239+
240+
long rootPath = fs.getDefaultBlockSize(path("/"));
241+
assertTrue("Root block size is invalid " + rootPath,
242+
rootPath > 0);
243+
244+
Path path = path("testFileStatusBlocksizeNonEmptyFile");
245+
byte[] data = dataset(256, 'a', 'z');
246+
247+
writeDataset(fs, path, data, data.length, 1024 * 1024, false);
248+
249+
validateBlockSize(fs, path, 1);
250+
}
251+
252+
@Test
253+
public void testFileStatusBlocksizeEmptyFile() throws Throwable {
254+
describe("check that an empty file may return a 0-byte blocksize");
255+
FileSystem fs = getFileSystem();
256+
Path path = path("testFileStatusBlocksizeEmptyFile");
257+
ContractTestUtils.touch(fs, path);
258+
validateBlockSize(fs, path, 0);
259+
}
260+
261+
private void validateBlockSize(FileSystem fs, Path path, int minValue)
262+
throws IOException, InterruptedException {
263+
FileStatus status =
264+
getFileStatusEventually(fs, path, CREATE_TIMEOUT);
265+
String statusDetails = status.toString();
266+
assertTrue("File status block size too low: " + statusDetails
267+
+ " min value: " + minValue,
268+
status.getBlockSize() >= minValue);
269+
long defaultBlockSize = fs.getDefaultBlockSize(path);
270+
assertTrue("fs.getDefaultBlockSize(" + path + ") size " +
271+
defaultBlockSize + " is below the minimum of " + minValue,
272+
defaultBlockSize >= minValue);
273+
}
274+
183275
}

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,6 +1097,36 @@ public static boolean containsDuplicates(Collection<Path> paths) {
10971097
return new HashSet<>(paths).size() != paths.size();
10981098
}
10991099

1100+
/**
1101+
* Get the status of a path eventually, even if the FS doesn't have create
1102+
* consistency. If the path is not there by the time the timeout completes,
1103+
* an assertion is raised.
1104+
* @param fs FileSystem
1105+
* @param path path to look for
1106+
* @param timeout timeout in milliseconds
1107+
* @return the status
1108+
* @throws IOException if an I/O error occurs while writing or reading the
1109+
* test file <i>other than file not found</i>
1110+
*/
1111+
public static FileStatus getFileStatusEventually(FileSystem fs, Path path,
1112+
int timeout) throws IOException, InterruptedException {
1113+
long endTime = System.currentTimeMillis() + timeout;
1114+
FileStatus stat = null;
1115+
do {
1116+
try {
1117+
stat = fs.getFileStatus(path);
1118+
} catch (FileNotFoundException e) {
1119+
if (System.currentTimeMillis() > endTime) {
1120+
// timeout, raise an assert with more diagnostics
1121+
assertPathExists(fs, "Path not found after " + timeout + " mS", path);
1122+
} else {
1123+
Thread.sleep(50);
1124+
}
1125+
}
1126+
} while (stat == null);
1127+
return stat;
1128+
}
1129+
11001130
/**
11011131
* Recursively list all entries, with a depth first traversal of the
11021132
* directory tree.
@@ -1471,4 +1501,5 @@ public long getEndTime() {
14711501
return endTime;
14721502
}
14731503
}
1504+
14741505
}

0 commit comments

Comments
 (0)