Skip to content

Commit 2959a9c

Browse files
committed
Merge branch 'trunk' into HADOOP-18547
2 parents b7b2633 + 4af4997 commit 2959a9c

File tree

8 files changed

+586
-93
lines changed

8 files changed

+586
-93
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java

Lines changed: 1 addition & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,8 @@
2929
import org.apache.hadoop.yarn.api.records.ApplicationId;
3030
import org.apache.hadoop.yarn.api.records.Priority;
3131
import org.apache.hadoop.yarn.api.records.QueueACL;
32-
import org.apache.hadoop.yarn.api.records.QueueConfigurations;
3332
import org.apache.hadoop.yarn.api.records.QueueInfo;
3433
import org.apache.hadoop.yarn.api.records.QueueState;
35-
import org.apache.hadoop.yarn.api.records.QueueStatistics;
3634
import org.apache.hadoop.yarn.api.records.Resource;
3735
import org.apache.hadoop.yarn.api.records.ResourceInformation;
3836
import org.apache.hadoop.yarn.exceptions.YarnException;
@@ -584,83 +582,7 @@ protected QueueInfo getQueueInfo() {
584582
// from schedulerApplicationAttempt, to avoid deadlock, sacrifice
585583
// consistency here.
586584
// TODO, improve this
587-
QueueInfo queueInfo = recordFactory.newRecordInstance(QueueInfo.class);
588-
queueInfo.setQueueName(queuePath.getLeafName());
589-
queueInfo.setQueuePath(queuePath.getFullPath());
590-
queueInfo.setAccessibleNodeLabels(queueNodeLabelsSettings.getAccessibleNodeLabels());
591-
queueInfo.setCapacity(queueCapacities.getCapacity());
592-
queueInfo.setMaximumCapacity(queueCapacities.getMaximumCapacity());
593-
queueInfo.setQueueState(getState());
594-
queueInfo.setDefaultNodeLabelExpression(queueNodeLabelsSettings.getDefaultLabelExpression());
595-
queueInfo.setCurrentCapacity(getUsedCapacity());
596-
queueInfo.setQueueStatistics(getQueueStatistics());
597-
queueInfo.setPreemptionDisabled(preemptionSettings.isPreemptionDisabled());
598-
queueInfo.setIntraQueuePreemptionDisabled(
599-
getIntraQueuePreemptionDisabled());
600-
queueInfo.setQueueConfigurations(getQueueConfigurations());
601-
queueInfo.setWeight(queueCapacities.getWeight());
602-
queueInfo.setMaxParallelApps(queueAppLifetimeSettings.getMaxParallelApps());
603-
return queueInfo;
604-
}
605-
606-
public QueueStatistics getQueueStatistics() {
607-
// Deliberately doesn't use lock here, because this method will be invoked
608-
// from schedulerApplicationAttempt, to avoid deadlock, sacrifice
609-
// consistency here.
610-
// TODO, improve this
611-
QueueStatistics stats = recordFactory.newRecordInstance(
612-
QueueStatistics.class);
613-
stats.setNumAppsSubmitted(getMetrics().getAppsSubmitted());
614-
stats.setNumAppsRunning(getMetrics().getAppsRunning());
615-
stats.setNumAppsPending(getMetrics().getAppsPending());
616-
stats.setNumAppsCompleted(getMetrics().getAppsCompleted());
617-
stats.setNumAppsKilled(getMetrics().getAppsKilled());
618-
stats.setNumAppsFailed(getMetrics().getAppsFailed());
619-
stats.setNumActiveUsers(getMetrics().getActiveUsers());
620-
stats.setAvailableMemoryMB(getMetrics().getAvailableMB());
621-
stats.setAllocatedMemoryMB(getMetrics().getAllocatedMB());
622-
stats.setPendingMemoryMB(getMetrics().getPendingMB());
623-
stats.setReservedMemoryMB(getMetrics().getReservedMB());
624-
stats.setAvailableVCores(getMetrics().getAvailableVirtualCores());
625-
stats.setAllocatedVCores(getMetrics().getAllocatedVirtualCores());
626-
stats.setPendingVCores(getMetrics().getPendingVirtualCores());
627-
stats.setReservedVCores(getMetrics().getReservedVirtualCores());
628-
stats.setPendingContainers(getMetrics().getPendingContainers());
629-
stats.setAllocatedContainers(getMetrics().getAllocatedContainers());
630-
stats.setReservedContainers(getMetrics().getReservedContainers());
631-
return stats;
632-
}
633-
634-
public Map<String, QueueConfigurations> getQueueConfigurations() {
635-
Map<String, QueueConfigurations> queueConfigurations = new HashMap<>();
636-
Set<String> nodeLabels = getNodeLabelsForQueue();
637-
QueueResourceQuotas queueResourceQuotas = usageTracker.getQueueResourceQuotas();
638-
for (String nodeLabel : nodeLabels) {
639-
QueueConfigurations queueConfiguration =
640-
recordFactory.newRecordInstance(QueueConfigurations.class);
641-
float capacity = queueCapacities.getCapacity(nodeLabel);
642-
float absoluteCapacity = queueCapacities.getAbsoluteCapacity(nodeLabel);
643-
float maxCapacity = queueCapacities.getMaximumCapacity(nodeLabel);
644-
float absMaxCapacity =
645-
queueCapacities.getAbsoluteMaximumCapacity(nodeLabel);
646-
float maxAMPercentage =
647-
queueCapacities.getMaxAMResourcePercentage(nodeLabel);
648-
queueConfiguration.setCapacity(capacity);
649-
queueConfiguration.setAbsoluteCapacity(absoluteCapacity);
650-
queueConfiguration.setMaxCapacity(maxCapacity);
651-
queueConfiguration.setAbsoluteMaxCapacity(absMaxCapacity);
652-
queueConfiguration.setMaxAMPercentage(maxAMPercentage);
653-
queueConfiguration.setConfiguredMinCapacity(
654-
queueResourceQuotas.getConfiguredMinResource(nodeLabel));
655-
queueConfiguration.setConfiguredMaxCapacity(
656-
queueResourceQuotas.getConfiguredMaxResource(nodeLabel));
657-
queueConfiguration.setEffectiveMinCapacity(
658-
queueResourceQuotas.getEffectiveMinResource(nodeLabel));
659-
queueConfiguration.setEffectiveMaxCapacity(
660-
queueResourceQuotas.getEffectiveMaxResource(nodeLabel));
661-
queueConfigurations.put(nodeLabel, queueConfiguration);
662-
}
663-
return queueConfigurations;
585+
return CSQueueInfoProvider.getQueueInfo(this);
664586
}
665587

666588
@Private
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
20+
21+
import org.apache.hadoop.yarn.api.records.QueueConfigurations;
22+
import org.apache.hadoop.yarn.api.records.QueueInfo;
23+
import org.apache.hadoop.yarn.api.records.QueueStatistics;
24+
import org.apache.hadoop.yarn.factories.RecordFactory;
25+
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
26+
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueResourceQuotas;
27+
28+
import java.util.HashMap;
29+
import java.util.Map;
30+
import java.util.Set;
31+
32+
public final class CSQueueInfoProvider {
33+
34+
private static final RecordFactory RECORD_FACTORY =
35+
RecordFactoryProvider.getRecordFactory(null);
36+
37+
private CSQueueInfoProvider() {
38+
}
39+
40+
public static QueueInfo getQueueInfo(AbstractCSQueue csQueue) {
41+
QueueInfo queueInfo = RECORD_FACTORY.newRecordInstance(QueueInfo.class);
42+
queueInfo.setQueueName(csQueue.getQueuePathObject().getLeafName());
43+
queueInfo.setQueuePath(csQueue.getQueuePathObject().getFullPath());
44+
queueInfo.setAccessibleNodeLabels(csQueue.getAccessibleNodeLabels());
45+
queueInfo.setCapacity(csQueue.getCapacity());
46+
queueInfo.setMaximumCapacity(csQueue.getMaximumCapacity());
47+
queueInfo.setQueueState(csQueue.getState());
48+
queueInfo.setDefaultNodeLabelExpression(csQueue.getDefaultNodeLabelExpression());
49+
queueInfo.setCurrentCapacity(csQueue.getUsedCapacity());
50+
queueInfo.setQueueStatistics(getQueueStatistics(csQueue));
51+
queueInfo.setPreemptionDisabled(csQueue.getPreemptionDisabled());
52+
queueInfo.setIntraQueuePreemptionDisabled(
53+
csQueue.getIntraQueuePreemptionDisabled());
54+
queueInfo.setQueueConfigurations(getQueueConfigurations(csQueue));
55+
queueInfo.setWeight(csQueue.getQueueCapacities().getWeight());
56+
queueInfo.setMaxParallelApps(csQueue.getMaxParallelApps());
57+
return queueInfo;
58+
}
59+
60+
private static QueueStatistics getQueueStatistics(AbstractCSQueue csQueue) {
61+
QueueStatistics stats = RECORD_FACTORY.newRecordInstance(
62+
QueueStatistics.class);
63+
CSQueueMetrics queueMetrics = csQueue.getMetrics();
64+
stats.setNumAppsSubmitted(queueMetrics.getAppsSubmitted());
65+
stats.setNumAppsRunning(queueMetrics.getAppsRunning());
66+
stats.setNumAppsPending(queueMetrics.getAppsPending());
67+
stats.setNumAppsCompleted(queueMetrics.getAppsCompleted());
68+
stats.setNumAppsKilled(queueMetrics.getAppsKilled());
69+
stats.setNumAppsFailed(queueMetrics.getAppsFailed());
70+
stats.setNumActiveUsers(queueMetrics.getActiveUsers());
71+
stats.setAvailableMemoryMB(queueMetrics.getAvailableMB());
72+
stats.setAllocatedMemoryMB(queueMetrics.getAllocatedMB());
73+
stats.setPendingMemoryMB(queueMetrics.getPendingMB());
74+
stats.setReservedMemoryMB(queueMetrics.getReservedMB());
75+
stats.setAvailableVCores(queueMetrics.getAvailableVirtualCores());
76+
stats.setAllocatedVCores(queueMetrics.getAllocatedVirtualCores());
77+
stats.setPendingVCores(queueMetrics.getPendingVirtualCores());
78+
stats.setReservedVCores(queueMetrics.getReservedVirtualCores());
79+
stats.setPendingContainers(queueMetrics.getPendingContainers());
80+
stats.setAllocatedContainers(queueMetrics.getAllocatedContainers());
81+
stats.setReservedContainers(queueMetrics.getReservedContainers());
82+
return stats;
83+
}
84+
85+
private static Map<String, QueueConfigurations> getQueueConfigurations(AbstractCSQueue csQueue) {
86+
Map<String, QueueConfigurations> queueConfigurations = new HashMap<>();
87+
Set<String> nodeLabels = csQueue.getNodeLabelsForQueue();
88+
QueueResourceQuotas queueResourceQuotas = csQueue.getQueueResourceQuotas();
89+
for (String nodeLabel : nodeLabels) {
90+
QueueConfigurations queueConfiguration =
91+
RECORD_FACTORY.newRecordInstance(QueueConfigurations.class);
92+
QueueCapacities queueCapacities = csQueue.getQueueCapacities();
93+
float capacity = queueCapacities.getCapacity(nodeLabel);
94+
float absoluteCapacity = queueCapacities.getAbsoluteCapacity(nodeLabel);
95+
float maxCapacity = queueCapacities.getMaximumCapacity(nodeLabel);
96+
float absMaxCapacity =
97+
queueCapacities.getAbsoluteMaximumCapacity(nodeLabel);
98+
float maxAMPercentage =
99+
queueCapacities.getMaxAMResourcePercentage(nodeLabel);
100+
queueConfiguration.setCapacity(capacity);
101+
queueConfiguration.setAbsoluteCapacity(absoluteCapacity);
102+
queueConfiguration.setMaxCapacity(maxCapacity);
103+
queueConfiguration.setAbsoluteMaxCapacity(absMaxCapacity);
104+
queueConfiguration.setMaxAMPercentage(maxAMPercentage);
105+
queueConfiguration.setConfiguredMinCapacity(
106+
queueResourceQuotas.getConfiguredMinResource(nodeLabel));
107+
queueConfiguration.setConfiguredMaxCapacity(
108+
queueResourceQuotas.getConfiguredMaxResource(nodeLabel));
109+
queueConfiguration.setEffectiveMinCapacity(
110+
queueResourceQuotas.getEffectiveMinResource(nodeLabel));
111+
queueConfiguration.setEffectiveMaxCapacity(
112+
queueResourceQuotas.getEffectiveMaxResource(nodeLabel));
113+
queueConfigurations.put(nodeLabel, queueConfiguration);
114+
}
115+
return queueConfigurations;
116+
}
117+
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/RouterMetrics.java

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,12 @@ public final class RouterMetrics {
127127
private MutableGaugeInt numGetRMNodeLabelsFailedRetrieved;
128128
@Metric("# of checkUserAccessToQueue failed to be retrieved")
129129
private MutableGaugeInt numCheckUserAccessToQueueFailedRetrieved;
130+
@Metric("# of getDelegationToken failed to be retrieved")
131+
private MutableGaugeInt numGetDelegationTokenFailedRetrieved;
132+
@Metric("# of renewDelegationToken failed to be retrieved")
133+
private MutableGaugeInt numRenewDelegationTokenFailedRetrieved;
134+
@Metric("# of renewDelegationToken failed to be retrieved")
135+
private MutableGaugeInt numCancelDelegationTokenFailedRetrieved;
130136

131137
// Aggregate metrics are shared, and don't have to be looked up per call
132138
@Metric("Total number of successful Submitted apps and latency(ms)")
@@ -215,6 +221,12 @@ public final class RouterMetrics {
215221
private MutableRate totalSucceededGetRMNodeLabelsRetrieved;
216222
@Metric("Total number of successful Retrieved CheckUserAccessToQueue and latency(ms)")
217223
private MutableRate totalSucceededCheckUserAccessToQueueRetrieved;
224+
@Metric("Total number of successful Retrieved GetDelegationToken and latency(ms)")
225+
private MutableRate totalSucceededGetDelegationTokenRetrieved;
226+
@Metric("Total number of successful Retrieved RenewDelegationToken and latency(ms)")
227+
private MutableRate totalSucceededRenewDelegationTokenRetrieved;
228+
@Metric("Total number of successful Retrieved CancelDelegationToken and latency(ms)")
229+
private MutableRate totalSucceededCancelDelegationTokenRetrieved;
218230

219231
/**
220232
* Provide quantile counters for all latencies.
@@ -262,6 +274,9 @@ public final class RouterMetrics {
262274
private MutableQuantiles getRefreshQueuesLatency;
263275
private MutableQuantiles getRMNodeLabelsLatency;
264276
private MutableQuantiles checkUserAccessToQueueLatency;
277+
private MutableQuantiles getDelegationTokenLatency;
278+
private MutableQuantiles renewDelegationTokenLatency;
279+
private MutableQuantiles cancelDelegationTokenLatency;
265280

266281
private static volatile RouterMetrics instance = null;
267282
private static MetricsRegistry registry;
@@ -423,6 +438,15 @@ private RouterMetrics() {
423438

424439
checkUserAccessToQueueLatency = registry.newQuantiles("checkUserAccessToQueueLatency",
425440
"latency of get apptimeouts timeouts", "ops", "latency", 10);
441+
442+
getDelegationTokenLatency = registry.newQuantiles("getDelegationTokenLatency",
443+
"latency of get delegation token timeouts", "ops", "latency", 10);
444+
445+
renewDelegationTokenLatency = registry.newQuantiles("renewDelegationTokenLatency",
446+
"latency of renew delegation token timeouts", "ops", "latency", 10);
447+
448+
cancelDelegationTokenLatency = registry.newQuantiles("cancelDelegationTokenLatency",
449+
"latency of cancel delegation token timeouts", "ops", "latency", 10);
426450
}
427451

428452
public static RouterMetrics getMetrics() {
@@ -655,10 +679,25 @@ public long getNumSucceededGetRMNodeLabelsRetrieved() {
655679
}
656680

657681
@VisibleForTesting
658-
public long getNumSucceededCheckUserAccessToQueueRetrievedRetrieved() {
682+
public long getNumSucceededCheckUserAccessToQueueRetrieved() {
659683
return totalSucceededCheckUserAccessToQueueRetrieved.lastStat().numSamples();
660684
}
661685

686+
@VisibleForTesting
687+
public long getNumSucceededGetDelegationTokenRetrieved() {
688+
return totalSucceededGetDelegationTokenRetrieved.lastStat().numSamples();
689+
}
690+
691+
@VisibleForTesting
692+
public long getNumSucceededRenewDelegationTokenRetrieved() {
693+
return totalSucceededRenewDelegationTokenRetrieved.lastStat().numSamples();
694+
}
695+
696+
@VisibleForTesting
697+
public long getNumSucceededCancelDelegationTokenRetrieved() {
698+
return totalSucceededCancelDelegationTokenRetrieved.lastStat().numSamples();
699+
}
700+
662701
@VisibleForTesting
663702
public double getLatencySucceededAppsCreated() {
664703
return totalSucceededAppsCreated.lastStat().mean();
@@ -874,6 +913,21 @@ public double getLatencySucceededCheckUserAccessToQueueRetrieved() {
874913
return totalSucceededCheckUserAccessToQueueRetrieved.lastStat().mean();
875914
}
876915

916+
@VisibleForTesting
917+
public double getLatencySucceededGetDelegationTokenRetrieved() {
918+
return totalSucceededGetDelegationTokenRetrieved.lastStat().mean();
919+
}
920+
921+
@VisibleForTesting
922+
public double getLatencySucceededRenewDelegationTokenRetrieved() {
923+
return totalSucceededRenewDelegationTokenRetrieved.lastStat().mean();
924+
}
925+
926+
@VisibleForTesting
927+
public double getLatencySucceededCancelDelegationTokenRetrieved() {
928+
return totalSucceededCancelDelegationTokenRetrieved.lastStat().mean();
929+
}
930+
877931
@VisibleForTesting
878932
public int getAppsFailedCreated() {
879933
return numAppsFailedCreated.value();
@@ -1068,6 +1122,18 @@ public int getCheckUserAccessToQueueFailedRetrieved() {
10681122
return numCheckUserAccessToQueueFailedRetrieved.value();
10691123
}
10701124

1125+
public int getDelegationTokenFailedRetrieved() {
1126+
return numGetDelegationTokenFailedRetrieved.value();
1127+
}
1128+
1129+
public int getRenewDelegationTokenFailedRetrieved() {
1130+
return numRenewDelegationTokenFailedRetrieved.value();
1131+
}
1132+
1133+
public int getCancelDelegationTokenFailedRetrieved() {
1134+
return numCancelDelegationTokenFailedRetrieved.value();
1135+
}
1136+
10711137
public void succeededAppsCreated(long duration) {
10721138
totalSucceededAppsCreated.add(duration);
10731139
getNewApplicationLatency.add(duration);
@@ -1283,6 +1349,21 @@ public void succeededCheckUserAccessToQueueRetrieved(long duration) {
12831349
checkUserAccessToQueueLatency.add(duration);
12841350
}
12851351

1352+
public void succeededGetDelegationTokenRetrieved(long duration) {
1353+
totalSucceededGetDelegationTokenRetrieved.add(duration);
1354+
getDelegationTokenLatency.add(duration);
1355+
}
1356+
1357+
public void succeededRenewDelegationTokenRetrieved(long duration) {
1358+
totalSucceededRenewDelegationTokenRetrieved.add(duration);
1359+
renewDelegationTokenLatency.add(duration);
1360+
}
1361+
1362+
public void succeededCancelDelegationTokenRetrieved(long duration) {
1363+
totalSucceededCancelDelegationTokenRetrieved.add(duration);
1364+
cancelDelegationTokenLatency.add(duration);
1365+
}
1366+
12861367
public void incrAppsFailedCreated() {
12871368
numAppsFailedCreated.incr();
12881369
}
@@ -1454,4 +1535,16 @@ public void incrGetRMNodeLabelsFailedRetrieved() {
14541535
public void incrCheckUserAccessToQueueFailedRetrieved() {
14551536
numCheckUserAccessToQueueFailedRetrieved.incr();
14561537
}
1457-
}
1538+
1539+
public void incrGetDelegationTokenFailedRetrieved() {
1540+
numGetDelegationTokenFailedRetrieved.incr();
1541+
}
1542+
1543+
public void incrRenewDelegationTokenFailedRetrieved() {
1544+
numRenewDelegationTokenFailedRetrieved.incr();
1545+
}
1546+
1547+
public void incrCancelDelegationTokenFailedRetrieved() {
1548+
numCancelDelegationTokenFailedRetrieved.incr();
1549+
}
1550+
}

0 commit comments

Comments
 (0)