Skip to content

Commit 2fe122e

Browse files
committed
YARN-10154. CS Dynamic Queues cannot be configured with absolute resources. Contributed by Manikandan R.
1 parent 5635066 commit 2fe122e

File tree

11 files changed

+573
-26
lines changed

11 files changed

+573
-26
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractAutoCreatedLeafQueue.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
1919

20+
import org.apache.hadoop.yarn.api.records.Resource;
2021
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
2122
.SchedulerDynamicEditException;
2223
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common
@@ -67,6 +68,22 @@ public void setEntitlement(QueueEntitlement entitlement)
6768
setEntitlement(NO_LABEL, entitlement);
6869
}
6970

71+
@Override
72+
protected Resource getMinimumAbsoluteResource(String queuePath,
73+
String label) {
74+
return super.getMinimumAbsoluteResource(csContext.getConfiguration()
75+
.getAutoCreatedQueueTemplateConfPrefix(this.getParent().getQueuePath()),
76+
label);
77+
}
78+
79+
@Override
80+
protected Resource getMaximumAbsoluteResource(String queuePath,
81+
String label) {
82+
return super.getMaximumAbsoluteResource(csContext.getConfiguration()
83+
.getAutoCreatedQueueTemplateConfPrefix(this.getParent().getQueuePath()),
84+
label);
85+
}
86+
7087
/**
7188
* This methods to change capacity for a queue and adjusts its
7289
* absoluteCapacity

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -524,16 +524,26 @@ private void setupMaximumAllocation(CapacitySchedulerConfiguration csConf) {
524524
return unionInheritedWeights;
525525
}
526526

527+
protected Resource getMinimumAbsoluteResource(String queuePath, String label) {
528+
Resource minResource = csContext.getConfiguration()
529+
.getMinimumResourceRequirement(label, queuePath, resourceTypes);
530+
return minResource;
531+
}
532+
533+
protected Resource getMaximumAbsoluteResource(String queuePath, String label) {
534+
Resource maxResource = csContext.getConfiguration()
535+
.getMaximumResourceRequirement(label, queuePath, resourceTypes);
536+
return maxResource;
537+
}
538+
527539
protected void updateConfigurableResourceRequirement(String queuePath,
528540
Resource clusterResource) {
529541
CapacitySchedulerConfiguration conf = csContext.getConfiguration();
530542
Set<String> configuredNodelabels = conf.getConfiguredNodeLabels(queuePath);
531543

532544
for (String label : configuredNodelabels) {
533-
Resource minResource = conf.getMinimumResourceRequirement(label,
534-
queuePath, resourceTypes);
535-
Resource maxResource = conf.getMaximumResourceRequirement(label,
536-
queuePath, resourceTypes);
545+
Resource minResource = getMinimumAbsoluteResource(queuePath, label);
546+
Resource maxResource = getMaximumAbsoluteResource(queuePath, label);
537547

538548
LOG.debug("capacityConfigType is '{}' for queue {}",
539549
capacityConfigType, getQueuePath());

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueue.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
2020

2121
import org.apache.hadoop.yarn.api.records.Resource;
22+
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits;
2223
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException;
2324
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement;
2425

@@ -74,6 +75,9 @@ public void reinitializeFromTemplate(AutoCreatedLeafQueueConfig
7475
writeLock.lock();
7576
try {
7677

78+
this.getParent().updateClusterResource(this.csContext.getClusterResource(),
79+
new ResourceLimits(this.csContext.getClusterResource()));
80+
7781
// TODO:
7882
// reinitialize only capacities for now since 0 capacity updates
7983
// can cause
@@ -100,7 +104,7 @@ public void reinitializeFromTemplate(AutoCreatedLeafQueueConfig
100104
}
101105
}
102106

103-
private void mergeCapacities(QueueCapacities capacities) {
107+
public void mergeCapacities(QueueCapacities capacities) {
104108
for ( String nodeLabel : capacities.getExistingNodeLabels()) {
105109
queueCapacities.setCapacity(nodeLabel,
106110
capacities.getCapacity(nodeLabel));

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,12 @@ public void setMaximumCapacityByLabel(String queue, String label,
557557
float capacity) {
558558
setFloat(getNodeLabelPrefix(queue, label) + MAXIMUM_CAPACITY, capacity);
559559
}
560+
561+
public void setMaximumCapacityByLabel(String queue, String label,
562+
String absoluteResourceCapacity) {
563+
set(getNodeLabelPrefix(queue, label) + MAXIMUM_CAPACITY,
564+
absoluteResourceCapacity);
565+
}
560566

561567
public int getUserLimit(String queue) {
562568
int userLimit = getInt(getQueuePrefix(queue) + USER_LIMIT,
@@ -1960,11 +1966,29 @@ public void setAutoCreatedLeafQueueConfigCapacity(String queuePath,
19601966
@Private
19611967
public void setAutoCreatedLeafQueueTemplateCapacityByLabel(String queuePath,
19621968
String label, float val) {
1963-
String leafQueueConfPrefix = getAutoCreatedQueueTemplateConfPrefix(
1964-
queuePath);
1969+
String leafQueueConfPrefix =
1970+
getAutoCreatedQueueTemplateConfPrefix(queuePath);
19651971
setCapacityByLabel(leafQueueConfPrefix, label, val);
19661972
}
19671973

1974+
@VisibleForTesting
1975+
@Private
1976+
public void setAutoCreatedLeafQueueTemplateCapacityByLabel(String queuePath,
1977+
String label, Resource resource) {
1978+
1979+
String leafQueueConfPrefix =
1980+
getAutoCreatedQueueTemplateConfPrefix(queuePath);
1981+
1982+
StringBuilder resourceString = new StringBuilder();
1983+
resourceString
1984+
.append("[" + AbsoluteResourceType.MEMORY.toString().toLowerCase() + "="
1985+
+ resource.getMemorySize() + ","
1986+
+ AbsoluteResourceType.VCORES.toString().toLowerCase() + "="
1987+
+ resource.getVirtualCores() + "]");
1988+
1989+
setCapacityByLabel(leafQueueConfPrefix, label, resourceString.toString());
1990+
}
1991+
19681992
@Private
19691993
@VisibleForTesting
19701994
public void setAutoCreatedLeafQueueConfigMaxCapacity(String queuePath,
@@ -1983,6 +2007,23 @@ public void setAutoCreatedLeafQueueTemplateMaxCapacity(String queuePath,
19832007
setMaximumCapacityByLabel(leafQueueConfPrefix, label, val);
19842008
}
19852009

2010+
@Private
2011+
@VisibleForTesting
2012+
public void setAutoCreatedLeafQueueTemplateMaxCapacity(String queuePath,
2013+
String label, Resource resource) {
2014+
String leafQueueConfPrefix = getAutoCreatedQueueTemplateConfPrefix(
2015+
queuePath);
2016+
2017+
StringBuilder resourceString = new StringBuilder();
2018+
resourceString
2019+
.append("[" + AbsoluteResourceType.MEMORY.toString().toLowerCase() + "="
2020+
+ resource.getMemorySize() + ","
2021+
+ AbsoluteResourceType.VCORES.toString().toLowerCase() + "="
2022+
+ resource.getVirtualCores() + "]");
2023+
2024+
setMaximumCapacityByLabel(leafQueueConfPrefix, label, resourceString.toString());
2025+
}
2026+
19862027
@VisibleForTesting
19872028
@Private
19882029
public void setAutoCreatedLeafQueueConfigUserLimit(String queuePath,
@@ -2115,7 +2156,6 @@ private Resource internalGetLabeledResourceRequirementForQueue(String queue,
21152156
if (subGroup.trim().isEmpty()) {
21162157
return Resources.none();
21172158
}
2118-
21192159
subGroup = subGroup.substring(1, subGroup.length() - 1);
21202160
for (String kvPair : subGroup.trim().split(",")) {
21212161
String[] splits = kvPair.split("=");

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ManagedParentQueue.java

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@
2222
import org.apache.hadoop.yarn.exceptions.YarnException;
2323
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
2424
.SchedulerDynamicEditException;
25+
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.AbstractCSQueue.CapacityConfigType;
2526
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica
2627
.FiCaSchedulerApp;
27-
28+
import org.apache.hadoop.yarn.util.resource.Resources;
2829
import org.slf4j.Logger;
2930
import org.slf4j.LoggerFactory;
3031

@@ -150,24 +151,78 @@ private void reinitializeQueueManagementPolicy() throws IOException {
150151
}
151152
}
152153

153-
protected AutoCreatedLeafQueueConfig.Builder initializeLeafQueueConfigs() {
154+
protected AutoCreatedLeafQueueConfig.Builder initializeLeafQueueConfigs() throws IOException {
154155

155156
AutoCreatedLeafQueueConfig.Builder builder =
156157
new AutoCreatedLeafQueueConfig.Builder();
157158

158159
String leafQueueTemplateConfPrefix = getLeafQueueConfigPrefix(
159160
csContext.getConfiguration());
160161
//Load template configuration
161-
builder.configuration(
162-
super.initializeLeafQueueConfigs(leafQueueTemplateConfPrefix));
162+
CapacitySchedulerConfiguration conf =
163+
super.initializeLeafQueueConfigs(leafQueueTemplateConfPrefix);
164+
builder.configuration(conf);
165+
166+
for (String nodeLabel : conf
167+
.getConfiguredNodeLabels(csContext.getConfiguration()
168+
.getAutoCreatedQueueTemplateConfPrefix(getQueuePath()))) {
169+
Resource templateMinResource = conf.getMinimumResourceRequirement(
170+
nodeLabel, csContext.getConfiguration()
171+
.getAutoCreatedQueueTemplateConfPrefix(getQueuePath()),
172+
resourceTypes);
173+
174+
if (this.capacityConfigType.equals(CapacityConfigType.PERCENTAGE)
175+
&& !templateMinResource.equals(Resources.none())) {
176+
throw new IOException("Managed Parent Queue " + this.getQueuePath()
177+
+ " config type is different from leaf queue template config type");
178+
}
179+
}
163180

164181
//Load template capacities
165182
QueueCapacities queueCapacities = new QueueCapacities(false);
166183
CSQueueUtils.loadUpdateAndCheckCapacities(csContext.getConfiguration()
167184
.getAutoCreatedQueueTemplateConfPrefix(getQueuePath()),
168185
csContext.getConfiguration(), queueCapacities, getQueueCapacities());
169-
builder.capacities(queueCapacities);
170186

187+
188+
/**
189+
* Populate leaf queue template (of Parent resources configured in
190+
* ABSOLUTE_RESOURCE) capacities with actual values for which configured has
191+
* been defined in ABSOLUTE_RESOURCE format.
192+
*
193+
*/
194+
if (this.capacityConfigType.equals(CapacityConfigType.ABSOLUTE_RESOURCE)) {
195+
for (String label : queueCapacities.getExistingNodeLabels()) {
196+
queueCapacities.setCapacity(label,
197+
this.csContext.getResourceCalculator().divide(
198+
this.csContext.getClusterResource(),
199+
this.csContext.getConfiguration().getMinimumResourceRequirement(
200+
label,
201+
this.csContext.getConfiguration()
202+
.getAutoCreatedQueueTemplateConfPrefix(getQueuePath()),
203+
resourceTypes),
204+
getQueueResourceQuotas().getConfiguredMinResource(label)));
205+
206+
queueCapacities.setMaximumCapacity(label,
207+
this.csContext.getResourceCalculator().divide(
208+
this.csContext.getClusterResource(),
209+
this.csContext.getConfiguration().getMaximumResourceRequirement(
210+
label,
211+
this.csContext.getConfiguration()
212+
.getAutoCreatedQueueTemplateConfPrefix(getQueuePath()),
213+
resourceTypes),
214+
getQueueResourceQuotas().getConfiguredMaxResource(label)));
215+
216+
queueCapacities.setAbsoluteCapacity(label,
217+
queueCapacities.getCapacity(label)
218+
* getQueueCapacities().getAbsoluteCapacity(label));
219+
220+
queueCapacities.setAbsoluteMaximumCapacity(label,
221+
queueCapacities.getMaximumCapacity(label)
222+
* getQueueCapacities().getAbsoluteMaximumCapacity(label));
223+
}
224+
}
225+
builder.capacities(queueCapacities);
171226
return builder;
172227
}
173228

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,6 @@ public boolean hasChildQueues() {
933933

934934
private void calculateEffectiveResourcesAndCapacity(String label,
935935
Resource clusterResource) {
936-
937936
// For root queue, ensure that max/min resource is updated to latest
938937
// cluster resource.
939938
Resource resourceByLabel = labelManager.getResourceByLabel(label,
@@ -1134,7 +1133,9 @@ private void deriveCapacityFromAbsoluteConfigurations(String label,
11341133
LOG.info("LeafQueue:" + leafQueue.getQueuePath() + ", maxApplications="
11351134
+ maxApplications + ", maxApplicationsPerUser="
11361135
+ maxApplicationsPerUser + ", Abs Cap:"
1137-
+ childQueue.getQueueCapacities().getAbsoluteCapacity(label));
1136+
+ childQueue.getQueueCapacities().getAbsoluteCapacity(label) + ", Cap: "
1137+
+ childQueue.getQueueCapacities().getCapacity(label) + ", MaxCap : "
1138+
+ childQueue.getQueueCapacities().getMaximumCapacity(label));
11381139
}
11391140
}
11401141

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/queuemanagement/GuaranteedOrZeroCapacityOverTimePolicy.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.slf4j.Logger;
2525
import org.slf4j.LoggerFactory;
2626
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
27+
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits;
2728
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
2829
.SchedulerDynamicEditException;
2930
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity
@@ -51,6 +52,7 @@
5152
.FiCaSchedulerApp;
5253
import org.apache.hadoop.yarn.util.Clock;
5354
import org.apache.hadoop.yarn.util.MonotonicClock;
55+
import org.apache.hadoop.yarn.util.resource.Resources;
5456

5557
import java.io.IOException;
5658
import java.util.ArrayList;
@@ -679,6 +681,19 @@ public void commitQueueManagementChanges(
679681
LOG.debug("Queue is already de-activated. Skipping "
680682
+ "de-activation : {}", leafQueue.getQueuePath());
681683
} else{
684+
/**
685+
* While deactivating queues of type ABSOLUTE_RESOURCE, configured
686+
* min resource has to be set based on updated capacity (which is
687+
* again based on updated queue entitlements). Otherwise,
688+
* ParentQueue#calculateEffectiveResourcesAndCapacity calculations
689+
* leads to incorrect results.
690+
*/
691+
leafQueue
692+
.mergeCapacities(updatedQueueTemplate.getQueueCapacities());
693+
leafQueue.getQueueResourceQuotas()
694+
.setConfiguredMinResource(Resources.multiply(
695+
this.scheduler.getClusterResource(), updatedQueueTemplate
696+
.getQueueCapacities().getCapacity(nodeLabel)));
682697
deactivate(leafQueue, nodeLabel);
683698
}
684699
}

0 commit comments

Comments
 (0)