Skip to content

Commit f7625e8

Browse files
committed
Address review comments
1 parent 262330e commit f7625e8

File tree

3 files changed

+93
-82
lines changed

3 files changed

+93
-82
lines changed

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/OpportunisticContainersQueuePolicy.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public enum OpportunisticContainersQueuePolicy {
3333
*/
3434
BY_QUEUE_LEN,
3535
/**
36-
* Determines wheether or not to run a container based on the amount of
36+
* Determines whether or not to run a container based on the amount of
3737
* resource capacity the node has.
3838
* Sums up the resources running + already queued at the node, compares
3939
* it with the total capacity of the node, and accepts the new container only

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerSchedulerTest.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
4040
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
4141
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
42+
import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.ContainerScheduler;
4243
import org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler.TestContainerSchedulerQueuing;
4344
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
4445
import org.slf4j.LoggerFactory;
@@ -53,6 +54,11 @@
5354

5455
import static org.mockito.Mockito.spy;
5556

57+
/**
58+
* Base test class that overrides the behavior of
59+
* {@link ContainerStateTransitionListener} for testing
60+
* the {@link ContainerScheduler}.
61+
*/
5662
public class BaseContainerSchedulerTest extends BaseContainerManagerTest {
5763
private static final long TWO_GB = 2048 * 1024 * 1024L;
5864

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/scheduler/TestContainerSchedulerOppContainersByResources.java

Lines changed: 86 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@
4646
import java.util.concurrent.TimeoutException;
4747

4848
/**
49-
* Tests the behavior of {@link ContainerScheduler} when the max queue length
50-
* is set to {@literal < 0} such that the NM only queues
51-
* containers if there's enough resources on the node to start
52-
* all queued containers.
49+
* Tests the behavior of {@link ContainerScheduler} when its queueing policy
50+
* is set to {@link OpportunisticContainersQueuePolicy#BY_RESOURCES}
51+
* such that the NM only queues containers if there's enough resources
52+
* on the node to start all queued containers.
5353
*/
5454
public class TestContainerSchedulerOppContainersByResources
5555
extends BaseContainerSchedulerTest {
@@ -62,9 +62,17 @@ public void setup() throws IOException {
6262
conf.set(YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_QUEUE_POLICY,
6363
OpportunisticContainersQueuePolicy.BY_RESOURCES.name());
6464
super.setup();
65+
containerManager.start();
6566
}
6667

67-
private static boolean isSuccessfulRun(final ContainerStatus containerStatus) {
68+
/**
69+
* Checks if a container is in a running or successfully run state.
70+
* @param containerStatus the container status
71+
* @return true if the container is running or completed
72+
* with a successful state, false if the container has not started or failed
73+
*/
74+
private static boolean isContainerInSuccessfulState(
75+
final ContainerStatus containerStatus) {
6876
final org.apache.hadoop.yarn.api.records.ContainerState state =
6977
containerStatus.getState();
7078
final ContainerSubState subState = containerStatus.getContainerSubState();
@@ -73,6 +81,8 @@ private static boolean isSuccessfulRun(final ContainerStatus containerStatus) {
7381
case COMPLETING:
7482
case DONE:
7583
if (subState == ContainerSubState.DONE) {
84+
// If the state is not COMPLETE, then the
85+
// container is a failed container
7686
return state ==
7787
org.apache.hadoop.yarn.api.records.ContainerState.COMPLETE;
7888
}
@@ -106,7 +116,7 @@ private void verifyRunAndKilledContainers(
106116

107117
for (final ContainerStatus status : containerStatuses) {
108118
if (runContainers.contains(status.getContainerId())) {
109-
if (!isSuccessfulRun(status)) {
119+
if (!isContainerInSuccessfulState(status)) {
110120
return false;
111121
}
112122
} else if (killedContainers.contains(status.getContainerId())) {
@@ -124,62 +134,10 @@ private void verifyRunAndKilledContainers(
124134
}
125135

126136
/**
127-
* Tests that newly arrived containers after the resources are filled up
128-
* get killed and never get killed.
137+
* Verifies that nothing is queued at the container scheduler.
129138
*/
130-
@Test
131-
public void testOpportunisticRunsWhenResourcesAvailable() throws Exception {
132-
containerManager.start();
133-
List<StartContainerRequest> list = new ArrayList<>();
134-
final int numContainers = 8;
135-
final int numContainersQueued = 4;
136-
final Set<ContainerId> runContainers = new HashSet<>();
137-
final Set<ContainerId> killedContainers = new HashSet<>();
138-
139-
for (int i = 0; i < numContainers; i++) {
140-
// OContainers that should be run
141-
list.add(StartContainerRequest.newInstance(
142-
recordFactory.newRecordInstance(ContainerLaunchContext.class),
143-
createContainerToken(createContainerId(i), DUMMY_RM_IDENTIFIER,
144-
context.getNodeId(),
145-
user, BuilderUtils.newResource(512, 1),
146-
context.getContainerTokenSecretManager(), null,
147-
ExecutionType.OPPORTUNISTIC)));
148-
}
149-
150-
StartContainersRequest allRequests =
151-
StartContainersRequest.newInstance(list);
152-
containerManager.startContainers(allRequests);
153-
154-
// Wait for containers to start
155-
for (int i = 0; i < numContainersQueued; i++) {
156-
final ContainerId containerId = createContainerId(i);
157-
BaseContainerManagerTest
158-
.waitForNMContainerState(containerManager, containerId,
159-
ContainerState.RUNNING, 40);
160-
runContainers.add(containerId);
161-
}
162-
163-
// Wait for containers to be killed
164-
for (int i = numContainersQueued; i < numContainers; i++) {
165-
final ContainerId containerId = createContainerId(i);
166-
BaseContainerManagerTest
167-
.waitForNMContainerState(containerManager, createContainerId(i),
168-
ContainerState.DONE, 40);
169-
killedContainers.add(containerId);
170-
}
171-
172-
Thread.sleep(5000);
173-
174-
// Get container statuses.
175-
List<ContainerId> statList = new ArrayList<>();
176-
for (int i = 0; i < numContainers; i++) {
177-
statList.add(createContainerId(i));
178-
}
179-
180-
verifyRunAndKilledContainers(
181-
statList, numContainers, runContainers, killedContainers);
182-
139+
private void verifyNothingQueued() {
140+
// Check that nothing is queued
183141
ContainerScheduler containerScheduler =
184142
containerManager.getContainerScheduler();
185143
Assert.assertEquals(0,
@@ -194,19 +152,15 @@ public void testOpportunisticRunsWhenResourcesAvailable() throws Exception {
194152
}
195153

196154
/**
197-
* Sets the max queue length to negative such that the NM only queues
198-
* containers if there's enough resources on the node to start
199-
* all queued containers.
200155
* Tests that newly arrived containers after the resources are filled up
201-
* get killed and never get killed.
156+
* get killed and never gets run.
202157
*/
203158
@Test
204159
public void testKillOpportunisticWhenNoResourcesAvailable() throws Exception {
205-
containerManager.start();
206-
List<StartContainerRequest> list = new ArrayList<>();
160+
List<StartContainerRequest> startContainerRequests = new ArrayList<>();
207161

208162
// GContainer that takes up the whole node
209-
list.add(StartContainerRequest.newInstance(
163+
startContainerRequests.add(StartContainerRequest.newInstance(
210164
recordFactory.newRecordInstance(ContainerLaunchContext.class),
211165
createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
212166
context.getNodeId(),
@@ -215,7 +169,7 @@ public void testKillOpportunisticWhenNoResourcesAvailable() throws Exception {
215169
ExecutionType.GUARANTEED)));
216170

217171
// OContainer that should be killed
218-
list.add(StartContainerRequest.newInstance(
172+
startContainerRequests.add(StartContainerRequest.newInstance(
219173
recordFactory.newRecordInstance(ContainerLaunchContext.class),
220174
createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
221175
context.getNodeId(),
@@ -224,7 +178,7 @@ public void testKillOpportunisticWhenNoResourcesAvailable() throws Exception {
224178
ExecutionType.OPPORTUNISTIC)));
225179

226180
StartContainersRequest allRequests =
227-
StartContainersRequest.newInstance(list);
181+
StartContainersRequest.newInstance(startContainerRequests);
228182
containerManager.startContainers(allRequests);
229183

230184
BaseContainerManagerTest.waitForNMContainerState(containerManager,
@@ -245,16 +199,67 @@ public void testKillOpportunisticWhenNoResourcesAvailable() throws Exception {
245199
Collections.singleton(createContainerId(1))
246200
);
247201

248-
ContainerScheduler containerScheduler =
249-
containerManager.getContainerScheduler();
250-
Assert.assertEquals(0,
251-
containerScheduler.getNumQueuedContainers());
252-
Assert.assertEquals(0,
253-
containerScheduler.getNumQueuedGuaranteedContainers());
254-
Assert.assertEquals(0,
255-
containerScheduler.getNumQueuedOpportunisticContainers());
256-
Assert.assertEquals(0,
257-
metrics.getQueuedOpportunisticContainers());
258-
Assert.assertEquals(0, metrics.getQueuedGuaranteedContainers());
202+
verifyNothingQueued();
203+
}
204+
205+
/**
206+
* Tests that newly arrived containers after the resources are filled up
207+
* get killed and never gets run.
208+
* This scenario is more granular and runs more small container compared to
209+
* {@link #testKillOpportunisticWhenNoResourcesAvailable()}.
210+
*/
211+
@Test
212+
public void testOpportunisticRunsWhenResourcesAvailable() throws Exception {
213+
List<StartContainerRequest> startContainerRequests = new ArrayList<>();
214+
final int numContainers = 8;
215+
final int numContainersQueued = 4;
216+
final Set<ContainerId> runContainers = new HashSet<>();
217+
final Set<ContainerId> killedContainers = new HashSet<>();
218+
219+
for (int i = 0; i < numContainers; i++) {
220+
// OContainers that should be run
221+
startContainerRequests.add(StartContainerRequest.newInstance(
222+
recordFactory.newRecordInstance(ContainerLaunchContext.class),
223+
createContainerToken(createContainerId(i), DUMMY_RM_IDENTIFIER,
224+
context.getNodeId(),
225+
user, BuilderUtils.newResource(512, 1),
226+
context.getContainerTokenSecretManager(), null,
227+
ExecutionType.OPPORTUNISTIC)));
228+
}
229+
230+
StartContainersRequest allRequests =
231+
StartContainersRequest.newInstance(startContainerRequests);
232+
containerManager.startContainers(allRequests);
233+
234+
// Wait for containers to start
235+
for (int i = 0; i < numContainersQueued; i++) {
236+
final ContainerId containerId = createContainerId(i);
237+
BaseContainerManagerTest
238+
.waitForNMContainerState(containerManager, containerId,
239+
ContainerState.RUNNING, 40);
240+
runContainers.add(containerId);
241+
}
242+
243+
// Wait for containers to be killed
244+
for (int i = numContainersQueued; i < numContainers; i++) {
245+
final ContainerId containerId = createContainerId(i);
246+
BaseContainerManagerTest
247+
.waitForNMContainerState(containerManager, createContainerId(i),
248+
ContainerState.DONE, 40);
249+
killedContainers.add(containerId);
250+
}
251+
252+
Thread.sleep(5000);
253+
254+
// Get container statuses.
255+
List<ContainerId> statList = new ArrayList<>();
256+
for (int i = 0; i < numContainers; i++) {
257+
statList.add(createContainerId(i));
258+
}
259+
260+
verifyRunAndKilledContainers(
261+
statList, numContainers, runContainers, killedContainers);
262+
263+
verifyNothingQueued();
259264
}
260265
}

0 commit comments

Comments
 (0)