Skip to content

Commit fd83274

Browse files
committed
HADOOP-17451. IOStatistics test failures in S3A code. (#2594)
Caused by HADOOP-16830 and HADOOP-17271. Fixes tests which fail intermittently based on configs and in the case of the HugeFile tests, bulk runs with existing FS instances meant statistic probes sometimes ended up probing those of a previous FS. Contributed by Steve Loughran. Change-Id: I65ba3f44444e59d298df25ac5c8dc5a8781dfb7d
1 parent 6a55232 commit fd83274

File tree

7 files changed

+80
-62
lines changed

7 files changed

+80
-62
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/impl/StorageStatisticsFromIOStatistics.java

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,23 +67,46 @@ public Iterator<LongStatistic> iterator() {
6767
public Iterator<LongStatistic> getLongStatistics() {
6868
final Set<Map.Entry<String, Long>> counters = counters()
6969
.entrySet();
70-
return counters.stream().map(e ->
71-
new StorageStatistics.LongStatistic(e.getKey(), e.getValue()))
72-
.collect(Collectors.toSet()).iterator();
70+
final Set<LongStatistic> statisticSet = counters.stream().map(
71+
this::toLongStatistic)
72+
.collect(Collectors.toSet());
73+
74+
// add the gauges
75+
gauges().entrySet().forEach(entry ->
76+
statisticSet.add(toLongStatistic(entry)));
77+
return statisticSet.iterator();
78+
}
79+
80+
/**
81+
* Convert a counter/gauge entry to a long statistics.
82+
* @param e entry
83+
* @return statistic
84+
*/
85+
private LongStatistic toLongStatistic(final Map.Entry<String, Long> e) {
86+
return new LongStatistic(e.getKey(), e.getValue());
7387
}
7488

7589
private Map<String, Long> counters() {
7690
return ioStatistics.counters();
7791
}
7892

93+
private Map<String, Long> gauges() {
94+
return ioStatistics.gauges();
95+
}
96+
7997
@Override
8098
public Long getLong(final String key) {
81-
return counters().get(key);
99+
Long l = counters().get(key);
100+
if (l == null) {
101+
l = gauges().get(key);
102+
}
103+
return l;
82104
}
83105

84106
@Override
85107
public boolean isTracked(final String key) {
86-
return counters().containsKey(key);
108+
return counters().containsKey(key)
109+
|| gauges().containsKey(key);
87110
}
88111

89112
@Override

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java

Lines changed: 12 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,8 @@
6464
import java.io.Closeable;
6565
import java.net.URI;
6666
import java.time.Duration;
67-
import java.util.Arrays;
6867
import java.util.EnumSet;
6968
import java.util.HashMap;
70-
import java.util.List;
7169
import java.util.Map;
7270
import java.util.UUID;
7371
import java.util.concurrent.atomic.AtomicInteger;
@@ -182,20 +180,6 @@ public class S3AInstrumentation implements Closeable, MetricsSource,
182180
*/
183181
private final IOStatisticsStore instanceIOStatistics;
184182

185-
/**
186-
* Gauges to create.
187-
* <p></p>
188-
* All statistics which are not gauges or quantiles
189-
* are registered as counters.
190-
*/
191-
private static final Statistic[] GAUGES_TO_CREATE = {
192-
OBJECT_PUT_REQUESTS_ACTIVE,
193-
OBJECT_PUT_BYTES_PENDING,
194-
STREAM_WRITE_BLOCK_UPLOADS_ACTIVE,
195-
STREAM_WRITE_BLOCK_UPLOADS_PENDING,
196-
STREAM_WRITE_BLOCK_UPLOADS_BYTES_PENDING,
197-
};
198-
199183
/**
200184
* Construct the instrumentation for a filesystem.
201185
* @param name URI of filesystem.
@@ -211,10 +195,6 @@ public S3AInstrumentation(URI name) {
211195
// create the builder
212196
IOStatisticsStoreBuilder storeBuilder = iostatisticsStore();
213197

214-
// add the gauges
215-
List<Statistic> gauges = Arrays.asList(GAUGES_TO_CREATE);
216-
gauges.forEach(this::gauge);
217-
218198
// declare all counter statistics
219199
EnumSet.allOf(Statistic.class).stream()
220200
.filter(statistic ->
@@ -223,6 +203,14 @@ public S3AInstrumentation(URI name) {
223203
counter(stat);
224204
storeBuilder.withCounters(stat.getSymbol());
225205
});
206+
// declare all gauge statistics
207+
EnumSet.allOf(Statistic.class).stream()
208+
.filter(statistic ->
209+
statistic.getType() == StatisticTypeEnum.TYPE_GAUGE)
210+
.forEach(stat -> {
211+
gauge(stat);
212+
storeBuilder.withGauges(stat.getSymbol());
213+
});
226214

227215
// and durations
228216
EnumSet.allOf(Statistic.class).stream()
@@ -1352,15 +1340,13 @@ private OutputStreamStatistics(
13521340
this.filesystemStatistics = filesystemStatistics;
13531341
IOStatisticsStore st = iostatisticsStore()
13541342
.withCounters(
1355-
StreamStatisticNames.STREAM_WRITE_BLOCK_UPLOADS,
1343+
STREAM_WRITE_BLOCK_UPLOADS.getSymbol(),
13561344
STREAM_WRITE_BYTES.getSymbol(),
13571345
STREAM_WRITE_EXCEPTIONS.getSymbol(),
1358-
StreamStatisticNames.STREAM_WRITE_BLOCK_UPLOADS_BYTES_PENDING,
1359-
STREAM_WRITE_TOTAL_TIME.getSymbol(),
1346+
STREAM_WRITE_EXCEPTIONS_COMPLETING_UPLOADS.getSymbol(),
13601347
STREAM_WRITE_QUEUE_DURATION.getSymbol(),
13611348
STREAM_WRITE_TOTAL_DATA.getSymbol(),
1362-
STREAM_WRITE_EXCEPTIONS.getSymbol(),
1363-
STREAM_WRITE_EXCEPTIONS_COMPLETING_UPLOADS.getSymbol())
1349+
STREAM_WRITE_TOTAL_TIME.getSymbol())
13641350
.withGauges(
13651351
STREAM_WRITE_BLOCK_UPLOADS_PENDING.getSymbol(),
13661352
STREAM_WRITE_BLOCK_UPLOADS_BYTES_PENDING.getSymbol())
@@ -1470,7 +1456,7 @@ public void blockUploadFailed(
14701456
@Override
14711457
public void bytesTransferred(long byteCount) {
14721458
bytesUploaded.addAndGet(byteCount);
1473-
incrementGauge(STREAM_WRITE_BLOCK_UPLOADS_BYTES_PENDING, -byteCount);
1459+
incAllGauges(STREAM_WRITE_BLOCK_UPLOADS_BYTES_PENDING, -byteCount);
14741460
}
14751461

14761462
@Override

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestPartialRenamesDeletes.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
import static org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteSupport.toPathList;
7878
import static org.apache.hadoop.fs.s3a.test.ExtraAssertions.assertFileCount;
7979
import static org.apache.hadoop.fs.s3a.test.ExtraAssertions.extractCause;
80+
import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsSourceToString;
8081
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
8182
import static org.apache.hadoop.test.LambdaTestUtils.eval;
8283

@@ -685,7 +686,8 @@ public void testPartialDirDelete() throws Throwable {
685686
readOnlyFiles.size());
686687
rejectionCount.assertDiffEquals("Wrong rejection count",
687688
readOnlyFiles.size());
688-
reset(rejectionCount, deleteVerbCount, deleteObjectCount);
689+
reset(rejectionCount, deleteVerbCount, deleteObjectCount,
690+
bulkDeleteVerbCount);
689691
}
690692
// all the files are still there? (avoid in scale test due to cost)
691693
if (!scaleTest) {
@@ -694,9 +696,13 @@ public void testPartialDirDelete() throws Throwable {
694696

695697
describe("Trying to delete upper-level directory");
696698
ex = expectDeleteForbidden(basePath);
699+
String iostats = ioStatisticsSourceToString(roleFS);
700+
697701
if (multiDelete) {
698702
// multi-delete status checks
699-
deleteVerbCount.assertDiffEquals("Wrong delete count", 1);
703+
deleteVerbCount.assertDiffEquals("Wrong delete request count", 0);
704+
bulkDeleteVerbCount.assertDiffEquals(
705+
"Wrong count of delete operations in " + iostats, 1);
700706
MultiObjectDeleteException mde = extractCause(
701707
MultiObjectDeleteException.class, ex);
702708
List<MultiObjectDeleteSupport.KeyPath> undeletedKeyPaths =

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/AbstractS3ACostTest.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ protected OperationCostValidator.ExpectedProbe whenDeleting(
475475

476476
/**
477477
* Execute a closure expecting a specific number of HEAD/LIST calls
478-
* on <i>raw</i> S3 stores only.
478+
* on <i>raw</i> S3 stores only. The operation is always evaluated.
479479
* @param cost expected cost
480480
* @param eval closure to evaluate
481481
* @param <T> return type of closure
@@ -484,7 +484,8 @@ protected OperationCostValidator.ExpectedProbe whenDeleting(
484484
protected <T> T verifyRaw(
485485
OperationCost cost,
486486
Callable<T> eval) throws Exception {
487-
return verifyMetrics(eval, whenRaw(cost));
487+
return verifyMetrics(eval,
488+
whenRaw(cost), OperationCostValidator.always());
488489
}
489490

490491
/**

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,17 +121,21 @@ public void testDeleteSingleFileInDir() throws Throwable {
121121
with(DIRECTORIES_DELETED, 0),
122122
with(FILES_DELETED, 1),
123123

124+
// a single DELETE call is made to delete the object
125+
with(OBJECT_DELETE_REQUEST, DELETE_OBJECT_REQUEST),
126+
124127
// keeping: create no parent dirs or delete parents
125128
withWhenKeeping(DIRECTORIES_CREATED, 0),
126-
withWhenKeeping(OBJECT_DELETE_OBJECTS, DELETE_OBJECT_REQUEST),
129+
withWhenKeeping(OBJECT_BULK_DELETE_REQUEST, 0),
127130

128131
// deleting: create a parent and delete any of its parents
129132
withWhenDeleting(DIRECTORIES_CREATED, 1),
130-
// two objects will be deleted
131-
withWhenDeleting(OBJECT_DELETE_OBJECTS,
132-
DELETE_OBJECT_REQUEST
133-
+ DELETE_MARKER_REQUEST)
133+
// a bulk delete for all parents is issued.
134+
// the number of objects in it depends on the depth of the tree;
135+
// don't worry about that
136+
withWhenDeleting(OBJECT_BULK_DELETE_REQUEST, DELETE_MARKER_REQUEST)
134137
);
138+
135139
// there is an empty dir for a parent
136140
S3AFileStatus status = verifyRawInnerGetFileStatus(dir, true,
137141
StatusProbeEnum.ALL, GET_FILE_STATUS_ON_DIR);

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
import org.apache.hadoop.fs.FSDataOutputStream;
3838
import org.apache.hadoop.fs.FileStatus;
3939
import org.apache.hadoop.fs.Path;
40-
import org.apache.hadoop.fs.StorageStatistics;
4140
import org.apache.hadoop.fs.contract.ContractTestUtils;
4241
import org.apache.hadoop.fs.s3a.S3AFileSystem;
4342
import org.apache.hadoop.fs.s3a.S3ATestUtils;
@@ -49,10 +48,11 @@
4948
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
5049
import static org.apache.hadoop.fs.s3a.Constants.*;
5150
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
51+
import static org.apache.hadoop.fs.s3a.Statistic.STREAM_WRITE_BLOCK_UPLOADS_BYTES_PENDING;
52+
import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupCounterStatistic;
53+
import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.verifyStatisticGaugeValue;
5254
import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.demandStringifyIOStatistics;
5355
import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsSourceToString;
54-
import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.retrieveIOStatistics;
55-
import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.snapshotIOStatistics;
5656

5757
/**
5858
* Scale test which creates a huge file.
@@ -169,7 +169,8 @@ public void test_010_CreateHugeFile() throws IOException {
169169
// there's lots of logging here, so that a tail -f on the output log
170170
// can give a view of what is happening.
171171
S3AFileSystem fs = getFileSystem();
172-
StorageStatistics storageStatistics = fs.getStorageStatistics();
172+
IOStatistics iostats = fs.getIOStatistics();
173+
173174
String putRequests = Statistic.OBJECT_PUT_REQUESTS.getSymbol();
174175
String putBytes = Statistic.OBJECT_PUT_BYTES.getSymbol();
175176
Statistic putRequestsActive = Statistic.OBJECT_PUT_REQUESTS_ACTIVE;
@@ -205,9 +206,9 @@ public void test_010_CreateHugeFile() throws IOException {
205206
percentage,
206207
writtenMB,
207208
filesizeMB,
208-
storageStatistics.getLong(putBytes),
209+
iostats.counters().get(putBytes),
209210
gaugeValue(putBytesPending),
210-
storageStatistics.getLong(putRequests),
211+
iostats.counters().get(putRequests),
211212
gaugeValue(putRequestsActive),
212213
elapsedTime,
213214
writtenMB / elapsedTime));
@@ -227,27 +228,27 @@ public void test_010_CreateHugeFile() throws IOException {
227228
logFSState();
228229
bandwidth(timer, filesize);
229230
LOG.info("Statistics after stream closed: {}", streamStatistics);
230-
IOStatistics iostats = snapshotIOStatistics(
231-
retrieveIOStatistics(getFileSystem()));
231+
232232
LOG.info("IOStatistics after upload: {}",
233233
demandStringifyIOStatistics(iostats));
234-
long putRequestCount = storageStatistics.getLong(putRequests);
235-
Long putByteCount = storageStatistics.getLong(putBytes);
234+
long putRequestCount = lookupCounterStatistic(iostats, putRequests);
235+
long putByteCount = lookupCounterStatistic(iostats, putBytes);
236236
Assertions.assertThat(putRequestCount)
237237
.describedAs("Put request count from filesystem stats %s",
238238
iostats)
239239
.isGreaterThan(0);
240240
Assertions.assertThat(putByteCount)
241-
.describedAs("putByteCount count from filesystem stats %s",
242-
iostats)
241+
.describedAs("%s count from filesystem stats %s",
242+
putBytes, iostats)
243243
.isGreaterThan(0);
244244
LOG.info("PUT {} bytes in {} operations; {} MB/operation",
245245
putByteCount, putRequestCount,
246246
putByteCount / (putRequestCount * _1MB));
247247
LOG.info("Time per PUT {} nS",
248248
toHuman(timer.nanosPerOperation(putRequestCount)));
249-
assertEquals("active put requests in \n" + fs,
250-
0, gaugeValue(putRequestsActive));
249+
verifyStatisticGaugeValue(iostats, putRequestsActive.getSymbol(), 0);
250+
verifyStatisticGaugeValue(iostats,
251+
STREAM_WRITE_BLOCK_UPLOADS_BYTES_PENDING.getSymbol(), 0);
251252
progress.verifyNoFailures(
252253
"Put file " + fileToCreate + " of size " + filesize);
253254
if (streamStatistics != null) {

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,17 @@
2323
import org.apache.hadoop.fs.Path;
2424
import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
2525
import org.apache.hadoop.fs.s3a.S3AInputStream;
26-
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
2726
import org.apache.hadoop.fs.s3a.S3ATestConstants;
2827
import org.apache.hadoop.fs.s3a.Statistic;
2928
import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics;
30-
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
3129

3230
import org.slf4j.Logger;
3331
import org.slf4j.LoggerFactory;
3432

3533
import java.io.InputStream;
3634

3735
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
36+
import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupGaugeStatistic;
3837

3938
/**
4039
* Base class for scale tests; here is where the common scale configuration
@@ -184,17 +183,15 @@ protected S3AInputStream getS3AInputStream(
184183
}
185184

186185
/**
187-
* Get the gauge value of a statistic. Raises an assertion if
186+
* Get the gauge value of a statistic from the
187+
* IOStatistics of the filesystem. Raises an assertion if
188188
* there is no such gauge.
189189
* @param statistic statistic to look up
190190
* @return the value.
191191
*/
192192
public long gaugeValue(Statistic statistic) {
193-
S3AInstrumentation instrumentation = getFileSystem().getInstrumentation();
194-
MutableGaugeLong gauge = instrumentation.lookupGauge(statistic.getSymbol());
195-
assertNotNull("No gauge " + statistic
196-
+ " in " + instrumentation.dump("", " = ", "\n", true), gauge);
197-
return gauge.value();
193+
return lookupGaugeStatistic(getFileSystem().getIOStatistics(),
194+
statistic.getSymbol());
198195
}
199196

200197
/**

0 commit comments

Comments
 (0)