Skip to content
This repository was archived by the owner on Oct 29, 2023. It is now read-only.

Commit 1712279

Browse files
committed
Bump utils-java version.
Also add more metrics for shard timings.
1 parent b5c25c4 commit 1712279

File tree

3 files changed

+35
-5
lines changed

3 files changed

+35
-5
lines changed

pom.xml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@
127127
<dependency>
128128
<groupId>com.google.cloud.genomics</groupId>
129129
<artifactId>google-genomics-utils</artifactId>
130-
<version>v1beta2-0.32</version>
130+
<version>v1beta2-0.33</version>
131131
<exclusions>
132132
<!-- Exclude an old version of guava which is being pulled
133133
in by a transitive dependency google-api-client 1.19.0 -->
@@ -148,6 +148,11 @@
148148
<artifactId>gov.nist.math.jama</artifactId>
149149
<version>1.1.1</version>
150150
</dependency>
151+
<dependency>
152+
<groupId>org.apache.commons</groupId>
153+
<artifactId>commons-math3</artifactId>
154+
<version>3.2</version>
155+
</dependency>
151156
<dependency>
152157
<groupId>org.reflections</groupId>
153158
<artifactId>reflections</artifactId>
@@ -203,7 +208,7 @@
203208
<groupId>com.google.protobuf</groupId>
204209
<artifactId>protobuf-java</artifactId>
205210
<version>3.0.0-alpha-3</version>
206-
</dependency>
211+
</dependency>
207212
</dependencies>
208213

209214
<profiles>

src/main/java/com/google/cloud/genomics/dataflow/readers/ReadStreamer.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,19 @@
1717
import java.security.GeneralSecurityException;
1818
import java.util.Iterator;
1919
import java.util.List;
20+
import java.util.concurrent.TimeUnit;
2021

2122
import com.google.cloud.dataflow.sdk.transforms.Aggregator;
2223
import com.google.cloud.dataflow.sdk.transforms.DoFn;
24+
import com.google.cloud.dataflow.sdk.transforms.Max;
2325
import com.google.cloud.dataflow.sdk.transforms.PTransform;
2426
import com.google.cloud.dataflow.sdk.transforms.ParDo;
2527
import com.google.cloud.dataflow.sdk.transforms.Sum;
2628
import com.google.cloud.dataflow.sdk.values.PCollection;
2729
import com.google.cloud.genomics.utils.GenomicsFactory;
2830
import com.google.cloud.genomics.utils.ShardBoundary;
2931
import com.google.cloud.genomics.utils.grpc.ReadStreamIterator;
32+
import com.google.common.base.Stopwatch;
3033
import com.google.genomics.v1.Read;
3134
import com.google.genomics.v1.StreamReadsRequest;
3235
import com.google.genomics.v1.StreamReadsResponse;
@@ -65,20 +68,26 @@ private class RetrieveReads extends DoFn<StreamReadsRequest, List<Read>> {
6568

6669
protected Aggregator<Integer, Integer> initializedShardCount;
6770
protected Aggregator<Integer, Integer> finishedShardCount;
71+
protected Aggregator<Long, Long> shardTimeMaxSec;
6872

6973
public RetrieveReads() {
7074
initializedShardCount = createAggregator("Initialized Shard Count", new Sum.SumIntegerFn());
7175
finishedShardCount = createAggregator("Finished Shard Count", new Sum.SumIntegerFn());
76+
shardTimeMaxSec = createAggregator("Maximum Shard Processing Time (sec)", new Max.MaxLongFn());
7277
}
7378

7479
@Override
7580
public void processElement(ProcessContext c) throws IOException, GeneralSecurityException {
7681
initializedShardCount.addValue(1);
82+
shardTimeMaxSec.addValue(0L);
83+
Stopwatch stopWatch = Stopwatch.createStarted();
7784
Iterator<StreamReadsResponse> iter = new ReadStreamIterator(c.element(), auth, shardBoundary, fields);
7885
while (iter.hasNext()) {
7986
StreamReadsResponse readResponse = iter.next();
8087
c.output(readResponse.getAlignmentsList());
8188
}
89+
stopWatch.stop();
90+
shardTimeMaxSec.addValue(stopWatch.elapsed(TimeUnit.SECONDS));
8291
finishedShardCount.addValue(1);
8392
}
8493
}

src/main/java/com/google/cloud/genomics/dataflow/readers/VariantStreamer.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,23 @@
1717
import java.security.GeneralSecurityException;
1818
import java.util.Iterator;
1919
import java.util.List;
20+
import java.util.concurrent.TimeUnit;
21+
22+
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
23+
import org.slf4j.Logger;
24+
import org.slf4j.LoggerFactory;
2025

2126
import com.google.cloud.dataflow.sdk.transforms.Aggregator;
2227
import com.google.cloud.dataflow.sdk.transforms.DoFn;
28+
import com.google.cloud.dataflow.sdk.transforms.Max;
2329
import com.google.cloud.dataflow.sdk.transforms.PTransform;
2430
import com.google.cloud.dataflow.sdk.transforms.ParDo;
2531
import com.google.cloud.dataflow.sdk.transforms.Sum;
2632
import com.google.cloud.dataflow.sdk.values.PCollection;
2733
import com.google.cloud.genomics.utils.GenomicsFactory;
2834
import com.google.cloud.genomics.utils.ShardBoundary;
2935
import com.google.cloud.genomics.utils.grpc.VariantStreamIterator;
36+
import com.google.common.base.Stopwatch;
3037
import com.google.genomics.v1.StreamVariantsRequest;
3138
import com.google.genomics.v1.StreamVariantsResponse;
3239
import com.google.genomics.v1.Variant;
@@ -37,6 +44,7 @@
3744
public class VariantStreamer extends
3845
PTransform<PCollection<StreamVariantsRequest>, PCollection<Variant>> {
3946

47+
private static final Logger LOG = LoggerFactory.getLogger(VariantStreamer.class);
4048
protected final GenomicsFactory.OfflineAuth auth;
4149
protected final ShardBoundary.Requirement shardBoundary;
4250
protected final String fields;
@@ -63,25 +71,33 @@ public PCollection<Variant> apply(PCollection<StreamVariantsRequest> input) {
6371
private class RetrieveVariants extends DoFn<StreamVariantsRequest, List<Variant>> {
6472

6573
protected Aggregator<Integer, Integer> initializedShardCount;
66-
protected Aggregator<Long, Long> itemCount;
6774
protected Aggregator<Integer, Integer> finishedShardCount;
75+
protected Aggregator<Long, Long> shardTimeMaxSec;
76+
DescriptiveStatistics stats;
6877

6978
public RetrieveVariants() {
7079
initializedShardCount = createAggregator("Initialized Shard Count", new Sum.SumIntegerFn());
71-
itemCount = createAggregator("Number of variant lists", new Sum.SumLongFn());
7280
finishedShardCount = createAggregator("Finished Shard Count", new Sum.SumIntegerFn());
81+
shardTimeMaxSec = createAggregator("Maximum Shard Processing Time (sec)", new Max.MaxLongFn());
82+
stats = new DescriptiveStatistics(500);
7383
}
7484

7585
@Override
7686
public void processElement(ProcessContext c) throws IOException, GeneralSecurityException, InterruptedException {
7787
initializedShardCount.addValue(1);
88+
shardTimeMaxSec.addValue(0L);
89+
Stopwatch stopWatch = Stopwatch.createStarted();
7890
Iterator<StreamVariantsResponse> iter = new VariantStreamIterator(c.element(), auth, shardBoundary, fields);
7991
while (iter.hasNext()) {
8092
StreamVariantsResponse variantResponse = iter.next();
8193
c.output(variantResponse.getVariantsList());
82-
itemCount.addValue(1L);
8394
}
95+
stopWatch.stop();
96+
shardTimeMaxSec.addValue(stopWatch.elapsed(TimeUnit.SECONDS));
97+
stats.addValue(stopWatch.elapsed(TimeUnit.SECONDS));
8498
finishedShardCount.addValue(1);
99+
LOG.info("Shard Duration in Seconds - Min: " + stats.getMin() + " Max: " + stats.getMax() +
100+
" Avg: " + stats.getMean() + " StdDev: " + stats.getStandardDeviation());
85101
}
86102
}
87103

0 commit comments

Comments
 (0)