Skip to content

Commit ccaa99c

Browse files
committed
HADOOP-16381. The JSON License is included in binary tarball via azure-documentdb:1.16.2. Contributed by Sushil Ks.
1 parent 555dabf commit ccaa99c

File tree

9 files changed

+459
-149
lines changed

9 files changed

+459
-149
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-documentstore/pom.xml

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
<properties>
2929
<!-- Needed for generating FindBugs warnings using parent pom -->
3030
<yarn.basedir>${project.parent.parent.basedir}</yarn.basedir>
31-
<azure.documentdb.version>1.16.2</azure.documentdb.version>
31+
<azure.documentdb.version>2.4.5</azure.documentdb.version>
32+
<rxjava.version>1.3.8</rxjava.version>
3233
</properties>
3334

3435
<dependencies>
@@ -44,11 +45,6 @@
4445
<type>test-jar</type>
4546
<scope>test</scope>
4647
</dependency>
47-
<dependency>
48-
<groupId>com.microsoft.azure</groupId>
49-
<artifactId>azure-documentdb</artifactId>
50-
<version>${azure.documentdb.version}</version>
51-
</dependency>
5248
<dependency>
5349
<groupId>junit</groupId>
5450
<artifactId>junit</artifactId>
@@ -84,6 +80,38 @@
8480
</exclusion>
8581
</exclusions>
8682
</dependency>
83+
<dependency>
84+
<groupId>io.reactivex</groupId>
85+
<artifactId>rxjava</artifactId>
86+
<version>${rxjava.version}</version>
87+
</dependency>
88+
<dependency>
89+
<groupId>com.microsoft.azure</groupId>
90+
<artifactId>azure-cosmosdb</artifactId>
91+
<version>${azure.documentdb.version}</version>
92+
<exclusions>
93+
<exclusion>
94+
<groupId>io.netty</groupId>
95+
<artifactId>netty-handler</artifactId>
96+
</exclusion>
97+
<exclusion>
98+
<groupId>io.netty</groupId>
99+
<artifactId>netty-codec-http</artifactId>
100+
</exclusion>
101+
<exclusion>
102+
<groupId>io.reactivex</groupId>
103+
<artifactId>rxjava</artifactId>
104+
</exclusion>
105+
<exclusion>
106+
<groupId>io.netty</groupId>
107+
<artifactId>netty-transport</artifactId>
108+
</exclusion>
109+
<exclusion>
110+
<groupId>io.netty</groupId>
111+
<artifactId>netty-handler-proxy</artifactId>
112+
</exclusion>
113+
</exclusions>
114+
</dependency>
87115
</dependencies>
88116

89117
<build>

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-documentstore/src/main/java/org/apache/hadoop/yarn/server/timelineservice/documentstore/DocumentStoreUtils.java

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818

1919
package org.apache.hadoop.yarn.server.timelineservice.documentstore;
2020

21-
import com.microsoft.azure.documentdb.ConnectionPolicy;
22-
import com.microsoft.azure.documentdb.ConsistencyLevel;
23-
import com.microsoft.azure.documentdb.DocumentClient;
21+
import com.microsoft.azure.cosmosdb.ConnectionPolicy;
22+
import com.microsoft.azure.cosmosdb.ConsistencyLevel;
23+
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient;
2424
import org.apache.hadoop.conf.Configuration;
2525
import org.apache.hadoop.yarn.api.records.timelineservice.ApplicationEntity;
2626
import org.apache.hadoop.yarn.api.records.timelineservice.FlowActivityEntity;
@@ -134,6 +134,10 @@ public static TimelineEvent fetchEvent(TimelineEntity timelineEntity,
134134
* @return false if any of the string is null or empty else true
135135
*/
136136
public static boolean isNullOrEmpty(String...values) {
137+
if (values == null || values.length == 0) {
138+
return true;
139+
}
140+
137141
for (String value : values) {
138142
if (value == null || value.isEmpty()) {
139143
return true;
@@ -143,15 +147,20 @@ public static boolean isNullOrEmpty(String...values) {
143147
}
144148

145149
/**
146-
* Creates CosmosDB Document Client.
150+
* Creates CosmosDB Async Document Client.
147151
* @param conf
148152
* to retrieve cosmos db endpoint and key
149153
* @return async document client for CosmosDB
150154
*/
151-
public static DocumentClient createCosmosDBClient(Configuration conf){
152-
return new DocumentClient(DocumentStoreUtils.getCosmosDBEndpoint(conf),
153-
DocumentStoreUtils.getCosmosDBMasterKey(conf),
154-
ConnectionPolicy.GetDefault(), ConsistencyLevel.Session);
155+
public static AsyncDocumentClient createCosmosDBAsyncClient(
156+
Configuration conf){
157+
return new AsyncDocumentClient.Builder()
158+
.withServiceEndpoint(DocumentStoreUtils.getCosmosDBEndpoint(conf))
159+
.withMasterKeyOrResourceToken(
160+
DocumentStoreUtils.getCosmosDBMasterKey(conf))
161+
.withConnectionPolicy(ConnectionPolicy.GetDefault())
162+
.withConsistencyLevel(ConsistencyLevel.Session)
163+
.build();
155164
}
156165

157166
/**
@@ -486,4 +495,4 @@ private static boolean hasDataToBeRetrieve(
486495
return false;
487496
}
488497
}
489-
}
498+
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-documentstore/src/main/java/org/apache/hadoop/yarn/server/timelineservice/documentstore/collection/document/entity/TimelineEventSubDoc.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
public class TimelineEventSubDoc {
3131

3232
private final TimelineEvent timelineEvent;
33-
private boolean valid;
3433

3534
public TimelineEventSubDoc() {
3635
timelineEvent = new TimelineEvent();
@@ -51,11 +50,7 @@ public void setId(String eventId) {
5150
public boolean isValid() {
5251
return timelineEvent.isValid();
5352
}
54-
55-
public void setValid(boolean valid) {
56-
this.valid = valid;
57-
}
58-
53+
5954
public long getTimestamp() {
6055
return timelineEvent.getTimestamp();
6156
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-documentstore/src/main/java/org/apache/hadoop/yarn/server/timelineservice/documentstore/collection/document/entity/TimelineMetricSubDoc.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
public class TimelineMetricSubDoc {
3232

3333
private final TimelineMetric timelineMetric;
34-
private boolean valid;
3534
private long singleDataTimestamp;
3635
private Number singleDataValue = 0;
3736

@@ -41,7 +40,6 @@ public TimelineMetricSubDoc() {
4140

4241
public TimelineMetricSubDoc(TimelineMetric timelineMetric) {
4342
this.timelineMetric = timelineMetric;
44-
this.valid = timelineMetric.isValid();
4543
if (timelineMetric.getType() == TimelineMetric.Type.SINGLE_VALUE &&
4644
timelineMetric.getValues().size() > 0) {
4745
this.singleDataTimestamp = timelineMetric.getSingleDataTimestamp();
@@ -130,10 +128,6 @@ public void setType(TimelineMetric.Type metricType) {
130128
timelineMetric.setType(metricType);
131129
}
132130

133-
public void setValid(boolean valid) {
134-
this.valid = valid;
135-
}
136-
137131
public boolean isValid() {
138132
return (timelineMetric.getId() != null);
139133
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-documentstore/src/main/java/org/apache/hadoop/yarn/server/timelineservice/documentstore/reader/cosmosdb/CosmosDBDocumentStoreReader.java

Lines changed: 68 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818

1919
package org.apache.hadoop.yarn.server.timelineservice.documentstore.reader.cosmosdb;
2020

21-
import com.microsoft.azure.documentdb.Document;
22-
import com.microsoft.azure.documentdb.DocumentClient;
23-
import com.microsoft.azure.documentdb.FeedOptions;
21+
import com.google.common.annotations.VisibleForTesting;
22+
import com.google.common.collect.Sets;
23+
import com.microsoft.azure.cosmosdb.FeedOptions;
24+
import com.microsoft.azure.cosmosdb.FeedResponse;
25+
import com.microsoft.azure.cosmosdb.rx.AsyncDocumentClient;
2426
import org.apache.hadoop.conf.Configuration;
2527
import org.apache.hadoop.yarn.server.timelineservice.reader.TimelineReaderContext;
2628
import org.apache.hadoop.yarn.server.timelineservice.documentstore.DocumentStoreUtils;
@@ -30,12 +32,14 @@
3032
import org.apache.hadoop.yarn.server.timelineservice.documentstore.reader.DocumentStoreReader;
3133
import org.slf4j.Logger;
3234
import org.slf4j.LoggerFactory;
35+
import rx.Observable;
36+
import rx.Scheduler;
37+
import rx.schedulers.Schedulers;
3338

34-
import java.util.ArrayList;
35-
import java.util.HashSet;
36-
import java.util.Iterator;
3739
import java.util.List;
3840
import java.util.Set;
41+
import java.util.concurrent.ExecutorService;
42+
import java.util.concurrent.Executors;
3943

4044

4145
/**
@@ -49,7 +53,7 @@ public class CosmosDBDocumentStoreReader<TimelineDoc extends TimelineDocument>
4953
.getLogger(CosmosDBDocumentStoreReader.class);
5054
private static final int DEFAULT_DOCUMENTS_SIZE = 1;
5155

52-
private static volatile DocumentClient client;
56+
private static AsyncDocumentClient client;
5357
private final String databaseName;
5458
private final static String COLLECTION_LINK = "/dbs/%s/colls/%s";
5559
private final static String SELECT_TOP_FROM_COLLECTION = "SELECT TOP %d * " +
@@ -66,17 +70,24 @@ public class CosmosDBDocumentStoreReader<TimelineDoc extends TimelineDocument>
6670
"\"%s\") ";
6771
private final static String ORDER_BY_CLAUSE = " ORDER BY c.createdTime";
6872

73+
// creating thread pool of size, half of the total available threads from JVM
74+
private static ExecutorService executorService = Executors.newFixedThreadPool(
75+
Runtime.getRuntime().availableProcessors() / 2);
76+
private static Scheduler schedulerForBlockingWork =
77+
Schedulers.from(executorService);
78+
6979
public CosmosDBDocumentStoreReader(Configuration conf) {
7080
LOG.info("Initializing Cosmos DB DocumentStoreReader...");
7181
databaseName = DocumentStoreUtils.getCosmosDBDatabaseName(conf);
72-
// making CosmosDB Client Singleton
82+
initCosmosDBClient(conf);
83+
}
84+
85+
private synchronized void initCosmosDBClient(Configuration conf) {
86+
// making CosmosDB Async Client Singleton
7387
if (client == null) {
74-
synchronized (this) {
75-
if (client == null) {
76-
LOG.info("Creating Cosmos DB Client...");
77-
client = DocumentStoreUtils.createCosmosDBClient(conf);
78-
}
79-
}
88+
LOG.info("Creating Cosmos DB Reader Async Client...");
89+
client = DocumentStoreUtils.createCosmosDBAsyncClient(conf);
90+
addShutdownHook();
8091
}
8192
}
8293

@@ -104,15 +115,16 @@ public Set<String> fetchEntityTypes(String collectionName,
104115
LOG.debug("Querying Collection : {} , with query {}", collectionName,
105116
sqlQuery);
106117

107-
Set<String> entityTypes = new HashSet<>();
108-
Iterator<Document> documentIterator = client.queryDocuments(
118+
return Sets.newHashSet(client.queryDocuments(
109119
String.format(COLLECTION_LINK, databaseName, collectionName),
110-
sqlQuery, null).getQueryIterator();
111-
while (documentIterator.hasNext()) {
112-
Document document = documentIterator.next();
113-
entityTypes.add(document.getString(ENTITY_TYPE_COLUMN));
114-
}
115-
return entityTypes;
120+
sqlQuery, new FeedOptions())
121+
.map(FeedResponse::getResults) // Map the page to the list of documents
122+
.concatMap(Observable::from)
123+
.map(document -> String.valueOf(document.get(ENTITY_TYPE_COLUMN)))
124+
.toList()
125+
.subscribeOn(schedulerForBlockingWork)
126+
.toBlocking()
127+
.single());
116128
}
117129

118130
@Override
@@ -133,25 +145,25 @@ private List<TimelineDoc> queryDocuments(String collectionName,
133145
final long maxDocumentsSize) {
134146
final String sqlQuery = buildQueryWithPredicates(context, collectionName,
135147
maxDocumentsSize);
136-
List<TimelineDoc> timelineDocs = new ArrayList<>();
137148
LOG.debug("Querying Collection : {} , with query {}", collectionName,
138149
sqlQuery);
139150

140-
FeedOptions feedOptions = new FeedOptions();
141-
feedOptions.setPageSize((int) maxDocumentsSize);
142-
Iterator<Document> documentIterator = client.queryDocuments(
143-
String.format(COLLECTION_LINK, databaseName, collectionName),
144-
sqlQuery, feedOptions).getQueryIterator();
145-
while (documentIterator.hasNext()) {
146-
Document document = documentIterator.next();
147-
TimelineDoc resultDoc = document.toObject(docClass);
148-
if (resultDoc.getCreatedTime() == 0 &&
149-
document.getTimestamp() != null) {
150-
resultDoc.setCreatedTime(document.getTimestamp().getTime());
151-
}
152-
timelineDocs.add(resultDoc);
153-
}
154-
return timelineDocs;
151+
return client.queryDocuments(String.format(COLLECTION_LINK,
152+
databaseName, collectionName), sqlQuery, new FeedOptions())
153+
.map(FeedResponse::getResults) // Map the page to the list of documents
154+
.concatMap(Observable::from)
155+
.map(document -> {
156+
TimelineDoc resultDoc = document.toObject(docClass);
157+
if (resultDoc.getCreatedTime() == 0 &&
158+
document.getTimestamp() != null) {
159+
resultDoc.setCreatedTime(document.getTimestamp().getTime());
160+
}
161+
return resultDoc;
162+
})
163+
.toList()
164+
.subscribeOn(schedulerForBlockingWork)
165+
.toBlocking()
166+
.single();
155167
}
156168

157169
private String buildQueryWithPredicates(TimelineReaderContext context,
@@ -168,33 +180,34 @@ private String buildQueryWithPredicates(TimelineReaderContext context,
168180
return addPredicates(context, collectionName, queryStrBuilder);
169181
}
170182

171-
private String addPredicates(TimelineReaderContext context,
183+
@VisibleForTesting
184+
String addPredicates(TimelineReaderContext context,
172185
String collectionName, StringBuilder queryStrBuilder) {
173186
boolean hasPredicate = false;
174187

175188
queryStrBuilder.append(WHERE_CLAUSE);
176189

177-
if (context.getClusterId() != null) {
190+
if (!DocumentStoreUtils.isNullOrEmpty(context.getClusterId())) {
178191
hasPredicate = true;
179192
queryStrBuilder.append(String.format(CONTAINS_FUNC_FOR_ID,
180193
context.getClusterId()));
181194
}
182-
if (context.getUserId() != null) {
195+
if (!DocumentStoreUtils.isNullOrEmpty(context.getUserId())) {
183196
hasPredicate = true;
184197
queryStrBuilder.append(AND_OPERATOR)
185198
.append(String.format(CONTAINS_FUNC_FOR_ID, context.getUserId()));
186199
}
187-
if (context.getFlowName() != null) {
200+
if (!DocumentStoreUtils.isNullOrEmpty(context.getFlowName())) {
188201
hasPredicate = true;
189202
queryStrBuilder.append(AND_OPERATOR)
190203
.append(String.format(CONTAINS_FUNC_FOR_ID, context.getFlowName()));
191204
}
192-
if (context.getAppId() != null) {
205+
if (!DocumentStoreUtils.isNullOrEmpty(context.getAppId())) {
193206
hasPredicate = true;
194207
queryStrBuilder.append(AND_OPERATOR)
195208
.append(String.format(CONTAINS_FUNC_FOR_ID, context.getAppId()));
196209
}
197-
if (context.getEntityId() != null) {
210+
if (!DocumentStoreUtils.isNullOrEmpty(context.getEntityId())) {
198211
hasPredicate = true;
199212
queryStrBuilder.append(AND_OPERATOR)
200213
.append(String.format(CONTAINS_FUNC_FOR_ID, context.getEntityId()));
@@ -204,7 +217,7 @@ private String addPredicates(TimelineReaderContext context,
204217
queryStrBuilder.append(AND_OPERATOR)
205218
.append(String.format(CONTAINS_FUNC_FOR_ID, context.getFlowRunId()));
206219
}
207-
if (context.getEntityType() != null){
220+
if (!DocumentStoreUtils.isNullOrEmpty(context.getEntityType())){
208221
hasPredicate = true;
209222
queryStrBuilder.append(AND_OPERATOR)
210223
.append(String.format(CONTAINS_FUNC_FOR_TYPE,
@@ -224,9 +237,17 @@ private String addPredicates(TimelineReaderContext context,
224237
@Override
225238
public synchronized void close() {
226239
if (client != null) {
227-
LOG.info("Closing Cosmos DB Client...");
240+
LOG.info("Closing Cosmos DB Reader Async Client...");
228241
client.close();
229242
client = null;
230243
}
231244
}
232-
}
245+
246+
private void addShutdownHook() {
247+
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
248+
if (executorService != null) {
249+
executorService.shutdown();
250+
}
251+
}));
252+
}
253+
}

0 commit comments

Comments
 (0)