Skip to content

Commit 0976392

Browse files
committed
YARN-9590. Correct incompatible, incomplete and redundant activities. Contributed by Tao Yang.
1 parent 9fded67 commit 0976392

File tree

9 files changed

+36
-18
lines changed

9 files changed

+36
-18
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesLogger.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,20 +102,20 @@ public static void recordAppActivityWithoutAllocation(
102102
// Add application-container activity into specific node allocation.
103103
activitiesManager.addSchedulingActivityForNode(nodeId,
104104
requestName, null,
105-
priorityStr, ActivityState.SKIPPED, diagnostic, type,
105+
priorityStr, appState, diagnostic, type,
106106
null);
107107
type = "request";
108108
// Add application-container activity into specific node allocation.
109109
activitiesManager.addSchedulingActivityForNode(nodeId,
110110
application.getApplicationId().toString(), requestName,
111-
priorityStr, ActivityState.SKIPPED,
111+
priorityStr, appState,
112112
ActivityDiagnosticConstant.EMPTY, type, allocationRequestId);
113113
}
114114
// Add queue-application activity into specific node allocation.
115115
activitiesManager.addSchedulingActivityForNode(nodeId,
116116
application.getQueueName(),
117117
application.getApplicationId().toString(),
118-
application.getPriority().toString(), ActivityState.SKIPPED,
118+
application.getPriority().toString(), appState,
119119
schedulerKey != null ? ActivityDiagnosticConstant.EMPTY :
120120
diagnostic, "app", null);
121121
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/activities/ActivitiesManager.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,10 @@ void finishAppAllocationRecording(ApplicationId applicationId,
339339
appAllocations = curAppAllocations;
340340
}
341341
}
342-
if (appAllocations.size() == appActivitiesMaxQueueLength) {
342+
int curQueueLength = appAllocations.size();
343+
while (curQueueLength >= appActivitiesMaxQueueLength) {
343344
appAllocations.poll();
345+
--curQueueLength;
344346
}
345347
appAllocations.add(appAllocation);
346348
Long stopTime =

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1597,7 +1597,7 @@ private void allocateFromReservedContainer(FiCaSchedulerNode node,
15971597
} else{
15981598
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
15991599
queue.getParent().getQueueName(), queue.getQueueName(),
1600-
ActivityState.ACCEPTED, ActivityDiagnosticConstant.EMPTY);
1600+
ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY);
16011601
ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager,
16021602
node, reservedContainer.getContainerId(), AllocationState.SKIPPED);
16031603
}
@@ -1687,6 +1687,10 @@ private CSAssignment allocateContainersOnMultiNodes(
16871687
}
16881688
LOG.debug("This node or this node partition doesn't have available or "
16891689
+ "killable resource");
1690+
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, null,
1691+
"", getRootQueue().getQueueName(), ActivityState.REJECTED,
1692+
ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION + " "
1693+
+ candidates.getPartition());
16901694
return null;
16911695
}
16921696

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,9 @@ public CSAssignment assignContainers(Resource clusterResource,
11881188
application.updateNodeInfoForAMDiagnostics(node);
11891189
} else if (assignment.getSkippedType()
11901190
== CSAssignment.SkippedType.QUEUE_LIMIT) {
1191+
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
1192+
getParent().getQueueName(), getQueueName(), ActivityState.SKIPPED,
1193+
ActivityDiagnosticConstant.QUEUE_SKIPPED_HEADROOM);
11911194
return assignment;
11921195
} else{
11931196
// If we don't allocate anything, and it is not skipped by application,

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -109,16 +109,10 @@ protected CSAssignment getCSAssignmentFromAllocateResult(
109109
allocatedResource);
110110

111111
if (rmContainer != null) {
112-
ActivitiesLogger.APP.recordAppActivityWithAllocation(
113-
activitiesManager, node, application, updatedContainer,
114-
ActivityState.RE_RESERVED);
115112
ActivitiesLogger.APP.finishSkippedAppAllocationRecording(
116113
activitiesManager, application.getApplicationId(),
117114
ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY);
118115
} else {
119-
ActivitiesLogger.APP.recordAppActivityWithAllocation(
120-
activitiesManager, node, application, updatedContainer,
121-
ActivityState.RESERVED);
122116
ActivitiesLogger.APP.finishAllocatedAppAllocationRecording(
123117
activitiesManager, application.getApplicationId(),
124118
updatedContainer.getContainerId(), ActivityState.RESERVED,
@@ -149,7 +143,7 @@ protected CSAssignment getCSAssignmentFromAllocateResult(
149143
node, application, updatedContainer, ActivityState.ALLOCATED);
150144
ActivitiesLogger.APP.finishAllocatedAppAllocationRecording(
151145
activitiesManager, application.getApplicationId(),
152-
updatedContainer.getContainerId(), ActivityState.ACCEPTED,
146+
updatedContainer.getContainerId(), ActivityState.ALLOCATED,
153147
ActivityDiagnosticConstant.EMPTY);
154148

155149
// Update unformed resource
@@ -162,6 +156,9 @@ protected CSAssignment getCSAssignmentFromAllocateResult(
162156
assignment.setSkippedType(
163157
CSAssignment.SkippedType.QUEUE_LIMIT);
164158
}
159+
ActivitiesLogger.APP.finishSkippedAppAllocationRecording(
160+
activitiesManager, application.getApplicationId(),
161+
ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY);
165162
}
166163

167164
return assignment;

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,12 @@ private ContainerAllocation assignContainer(Resource clusterResource,
628628
}
629629
}
630630

631+
ActivitiesLogger.APP.recordAppActivityWithoutAllocation(
632+
activitiesManager, node, application, schedulerKey,
633+
ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE
634+
+ getResourceDiagnostics(capability, availableForDC),
635+
rmContainer == null ?
636+
ActivityState.RESERVED : ActivityState.RE_RESERVED);
631637
ContainerAllocation result = new ContainerAllocation(null,
632638
pendingAsk.getPerAllocationResource(), AllocationState.RESERVED);
633639
result.containerNodeType = type;
@@ -824,7 +830,7 @@ private ContainerAllocation allocate(Resource clusterResource,
824830
ActivityDiagnosticConstant.
825831
APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE);
826832
return new ContainerAllocation(reservedContainer, null,
827-
AllocationState.QUEUE_SKIPPED);
833+
AllocationState.PRIORITY_SKIPPED);
828834
}
829835

830836
result = ContainerAllocation.PRIORITY_SKIPPED;

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAllocationInfo.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ public class AppAllocationInfo {
4343
private long timestamp;
4444
private String dateTime;
4545
private String allocationState;
46+
private String diagnostic;
4647
private List<AppRequestAllocationInfo> requestAllocation;
4748

4849
AppAllocationInfo() {
@@ -57,6 +58,7 @@ public class AppAllocationInfo {
5758
this.timestamp = allocation.getTime();
5859
this.dateTime = new Date(allocation.getTime()).toString();
5960
this.allocationState = allocation.getAppState().name();
61+
this.diagnostic = allocation.getDiagnostic();
6062
Map<String, List<ActivityNode>> requestToActivityNodes =
6163
allocation.getAllocationAttempts().stream().collect(Collectors
6264
.groupingBy((e) -> e.getRequestPriority() + "_" + e
@@ -96,4 +98,8 @@ public String getAllocationState() {
9698
public List<AppRequestAllocationInfo> getRequestAllocation() {
9799
return requestAllocation;
98100
}
101+
102+
public String getDiagnostic() {
103+
return diagnostic;
104+
}
99105
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivities.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -460,7 +460,7 @@ public void testAppActivityJSON() throws Exception {
460460
//Check app activities
461461
verifyNumberOfAllocations(json, 1);
462462
JSONObject allocations = json.getJSONObject("allocations");
463-
verifyStateOfAllocations(allocations, "allocationState", "ACCEPTED");
463+
verifyStateOfAllocations(allocations, "allocationState", "ALLOCATED");
464464
//Check request allocation
465465
JSONObject requestAllocationObj =
466466
allocations.getJSONObject("requestAllocation");
@@ -527,7 +527,7 @@ public void testAppAssignMultipleContainersPerNodeHeartbeat()
527527
JSONArray allocations = json.getJSONArray("allocations");
528528
for (int i = 0; i < allocations.length(); i++) {
529529
verifyStateOfAllocations(allocations.getJSONObject(i),
530-
"allocationState", "ACCEPTED");
530+
"allocationState", "ALLOCATED");
531531
}
532532
} finally {
533533
rm.stop();

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ public void testAppAssignContainer() throws Exception {
277277
verifyNumberOfAllocations(json, 1);
278278

279279
JSONObject allocationObj = json.getJSONObject("allocations");
280-
verifyStateOfAllocations(allocationObj, "allocationState", "ACCEPTED");
280+
verifyStateOfAllocations(allocationObj, "allocationState", "ALLOCATED");
281281
JSONObject requestAllocationObj =
282282
allocationObj.getJSONObject("requestAllocation");
283283
verifyNumberOfAllocationAttempts(requestAllocationObj, 2);
@@ -437,7 +437,7 @@ public void testAppInsufficientResourceDiagnostic() throws Exception {
437437
}
438438
// check second activity is for first allocation with ALLOCATED state
439439
allocationObj = allocationArray.getJSONObject(1);
440-
verifyStateOfAllocations(allocationObj, "allocationState", "ACCEPTED");
440+
verifyStateOfAllocations(allocationObj, "allocationState", "ALLOCATED");
441441
requestAllocationObj = allocationObj.getJSONObject("requestAllocation");
442442
verifyNumberOfAllocationAttempts(requestAllocationObj, 1);
443443
verifyStateOfAllocations(requestAllocationObj, "allocationState",
@@ -610,7 +610,7 @@ public void testAppGroupByDiagnostics() throws Exception {
610610
}
611611
// check second activity is for first allocation with ALLOCATED state
612612
allocationObj = allocationArray.getJSONObject(1);
613-
verifyStateOfAllocations(allocationObj, "allocationState", "ACCEPTED");
613+
verifyStateOfAllocations(allocationObj, "allocationState", "ALLOCATED");
614614
requestAllocationObj = allocationObj.getJSONObject("requestAllocation");
615615
verifyNumberOfAllocationAttempts(requestAllocationObj, 1);
616616
verifyStateOfAllocations(requestAllocationObj, "allocationState",

0 commit comments

Comments
 (0)