Skip to content

Commit cff663f

Browse files
committed
YARN-11801: NPE in FifoCandidatesSelector.selectCandidates when preempting resources for an auto-created queue without child queues
1 parent 3d2f4d6 commit cff663f

File tree

3 files changed

+78
-34
lines changed

3 files changed

+78
-34
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ProportionalCapacityPreemptionPolicy.java

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet;
2323
import org.apache.commons.lang3.StringUtils;
2424
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.AbstractParentQueue;
25+
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
2526
import org.slf4j.Logger;
2627
import org.slf4j.LoggerFactory;
2728
import org.apache.hadoop.classification.InterfaceStability.Unstable;
@@ -430,15 +431,28 @@ private void cleanupStaledPreemptionCandidates(long currentTime) {
430431
}
431432

432433
private Set<String> getLeafQueueNames(TempQueuePerPartition q) {
433-
// Also exclude ParentQueues, which might be without children
434-
if (CollectionUtils.isEmpty(q.children)
435-
&& !(q.parentQueue instanceof ManagedParentQueue)
436-
&& (q.parentQueue == null
437-
|| !q.parentQueue.isEligibleForAutoQueueCreation())) {
438-
return ImmutableSet.of(q.queueName);
434+
Set<String> leafQueueNames = new HashSet<>();
435+
436+
boolean isAutoQueueEligible = q.parentQueue != null &&
437+
(q.parentQueue.isEligibleForAutoQueueCreation() ||
438+
q.parentQueue.isEligibleForLegacyAutoQueueCreation());
439+
boolean isManagedParent = q.parentQueue instanceof ManagedParentQueue;
440+
441+
if (isAutoQueueEligible || isManagedParent) {
442+
return leafQueueNames;
443+
}
444+
445+
// Only consider this a leaf queue if:
446+
// It has no children AND
447+
// It is a concrete leaf queue (not a childless parent)
448+
if (CollectionUtils.isEmpty(q.children)) {
449+
CSQueue queue = scheduler.getQueue(q.queueName);
450+
if (queue instanceof LeafQueue) {
451+
leafQueueNames.add(q.queueName);
452+
}
453+
return leafQueueNames;
439454
}
440455

441-
Set<String> leafQueueNames = new HashSet<>();
442456
for (TempQueuePerPartition child : q.children) {
443457
leafQueueNames.addAll(getLeafQueueNames(child));
444458
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractParentQueue.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,11 @@ public boolean isEligibleForAutoQueueCreation() {
553553
isAutoQueueCreationV2Enabled(getQueuePathObject());
554554
}
555555

556+
public boolean isEligibleForLegacyAutoQueueCreation() {
557+
return isDynamicQueue() || queueContext.getConfiguration().
558+
isAutoCreateChildQueueEnabled(getQueuePathObject());
559+
}
560+
556561
@Override
557562
public void reinitialize(CSQueue newlyParsedQueue,
558563
Resource clusterResource) throws IOException {

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/TestProportionalCapacityPreemptionPolicy.java

Lines changed: 52 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,46 +1083,67 @@ public void testRefreshPreemptionProperties() throws Exception {
10831083
}
10841084

10851085
@Test
1086-
public void testLeafQueueNameExtraction() throws Exception {
1087-
ProportionalCapacityPreemptionPolicy policy =
1088-
buildPolicy(Q_DATA_FOR_IGNORE);
1086+
public void testLeafQueueNameExtractionWithFlexibleAQC() throws Exception {
1087+
runLeafQueueNameExtractionTest(true);
1088+
}
1089+
1090+
@Test
1091+
public void testLeafQueueNameExtractionWithLegacyAQC() throws Exception {
1092+
runLeafQueueNameExtractionTest(false);
1093+
}
1094+
1095+
private void runLeafQueueNameExtractionTest(boolean isFlexible) throws Exception {
1096+
ProportionalCapacityPreemptionPolicy policy = buildPolicy(Q_DATA_FOR_IGNORE);
10891097
ParentQueue root = (ParentQueue) mCS.getRootQueue();
1090-
root.addDynamicParentQueue("childlessFlexible");
1098+
1099+
String queueName = isFlexible ? "childlessFlexible" : "childlessLegacy";
1100+
String dynamicQueuePath = isFlexible ? "root.dynamicParent" : "root.dynamicLegacyParent";
1101+
1102+
root.addDynamicParentQueue(queueName);
10911103
List<CSQueue> queues = root.getChildQueues();
10921104
ArrayList<CSQueue> extendedQueues = new ArrayList<>();
10931105
LinkedList<ParentQueue> pqs = new LinkedList<>();
1094-
ParentQueue dynamicParent = mockParentQueue(
1095-
null, 0, pqs);
1096-
when(dynamicParent.getQueuePath()).thenReturn("root.dynamicParent");
1097-
when(dynamicParent.getQueueCapacities()).thenReturn(
1098-
new QueueCapacities(false));
1099-
QueueResourceQuotas dynamicParentQr = new QueueResourceQuotas();
1100-
dynamicParentQr.setEffectiveMaxResource(Resource.newInstance(1, 1));
1101-
dynamicParentQr.setEffectiveMinResource(Resources.createResource(1));
1102-
dynamicParentQr.setEffectiveMaxResource(RMNodeLabelsManager.NO_LABEL,
1103-
Resource.newInstance(1, 1));
1104-
dynamicParentQr.setEffectiveMinResource(RMNodeLabelsManager.NO_LABEL,
1105-
Resources.createResource(1));
1106-
when(dynamicParent.getQueueResourceQuotas()).thenReturn(dynamicParentQr);
1107-
when(dynamicParent.getEffectiveCapacity(RMNodeLabelsManager.NO_LABEL))
1108-
.thenReturn(Resources.createResource(1));
1109-
when(dynamicParent.getEffectiveMaxCapacity(RMNodeLabelsManager.NO_LABEL))
1110-
.thenReturn(Resource.newInstance(1, 1));
1111-
ResourceUsage resUsage = new ResourceUsage();
1112-
resUsage.setUsed(Resources.createResource(1024));
1113-
resUsage.setReserved(Resources.createResource(1024));
1114-
when(dynamicParent.getQueueResourceUsage()).thenReturn(resUsage);
1115-
when(dynamicParent.isEligibleForAutoQueueCreation()).thenReturn(true);
1106+
1107+
ParentQueue dynamicParent = mockParentQueue(null, 0, pqs);
1108+
mockQueueFields(dynamicParent, dynamicQueuePath);
1109+
1110+
if (isFlexible) {
1111+
when(dynamicParent.isEligibleForAutoQueueCreation()).thenReturn(true);
1112+
} else {
1113+
when(dynamicParent.isEligibleForLegacyAutoQueueCreation()).thenReturn(true);
1114+
}
1115+
11161116
extendedQueues.add(dynamicParent);
11171117
extendedQueues.addAll(queues);
11181118
when(root.getChildQueues()).thenReturn(extendedQueues);
11191119

11201120
policy.editSchedule();
11211121

1122-
assertFalse(policy.getLeafQueueNames().contains("root.dynamicParent"),
1122+
assertFalse(policy.getLeafQueueNames().contains(dynamicQueuePath),
11231123
"dynamicParent should not be a LeafQueue candidate");
11241124
}
11251125

1126+
private void mockQueueFields(ParentQueue queue, String queuePath) {
1127+
when(queue.getQueuePath()).thenReturn(queuePath);
1128+
when(queue.getQueueCapacities()).thenReturn(new QueueCapacities(false));
1129+
1130+
QueueResourceQuotas qrq = new QueueResourceQuotas();
1131+
qrq.setEffectiveMaxResource(Resource.newInstance(1, 1));
1132+
qrq.setEffectiveMinResource(Resources.createResource(1));
1133+
qrq.setEffectiveMaxResource(RMNodeLabelsManager.NO_LABEL, Resource.newInstance(1, 1));
1134+
qrq.setEffectiveMinResource(RMNodeLabelsManager.NO_LABEL, Resources.createResource(1));
1135+
1136+
when(queue.getQueueResourceQuotas()).thenReturn(qrq);
1137+
when(queue.getEffectiveCapacity(RMNodeLabelsManager.NO_LABEL))
1138+
.thenReturn(Resources.createResource(1));
1139+
when(queue.getEffectiveMaxCapacity(RMNodeLabelsManager.NO_LABEL))
1140+
.thenReturn(Resource.newInstance(1, 1));
1141+
1142+
ResourceUsage usage = new ResourceUsage();
1143+
usage.setUsed(Resources.createResource(1024));
1144+
usage.setReserved(Resources.createResource(1024));
1145+
when(queue.getQueueResourceUsage()).thenReturn(usage);
1146+
}
11261147
static class IsPreemptionRequestFor
11271148
implements ArgumentMatcher<ContainerPreemptEvent> {
11281149
private final ApplicationAttemptId appAttId;
@@ -1369,6 +1390,10 @@ LeafQueue mockLeafQueue(ParentQueue p, Resource tot, int i, Resource[] abs,
13691390
Resource[] used, Resource[] pending, Resource[] reserved, int[] apps,
13701391
Resource[] gran) {
13711392
LeafQueue lq = mock(LeafQueue.class);
1393+
1394+
String queuePath = p.getQueuePath() + ".queue" + (char)('A' + i - 1);
1395+
when(mCS.getQueue(queuePath)).thenReturn(lq);
1396+
13721397
ResourceCalculator rc = mCS.getResourceCalculator();
13731398
List<ApplicationAttemptId> appAttemptIdList =
13741399
new ArrayList<ApplicationAttemptId>();

0 commit comments

Comments
 (0)