Skip to content

Commit fa6a914

Browse files
committed
YARN-5025. Added and updated container management protocols for container relocation
1 parent 0ece56e commit fa6a914

File tree

24 files changed

+1280
-32
lines changed

24 files changed

+1280
-32
lines changed

hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
import java.util.concurrent.atomic.AtomicInteger;
3232

3333
import org.apache.hadoop.yarn.api.protocolrecords.CommitResponse;
34+
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerLaunchContextRequest;
35+
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerLaunchContextResponse;
3436
import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
3537
import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
3638
import org.apache.hadoop.yarn.api.protocolrecords.ReInitializeContainerRequest;
@@ -481,6 +483,13 @@ public ResourceLocalizationResponse localize(
481483
ResourceLocalizationRequest request) throws YarnException, IOException {
482484
return null;
483485
}
486+
487+
@Override
488+
public GetContainerLaunchContextResponse getContainerLaunchContext(
489+
GetContainerLaunchContextRequest request) throws YarnException, IOException {
490+
throw new UnsupportedOperationException("getting the container launch context is not " +
491+
"supported for this implementation of ContainerManagementProtocol");
492+
}
484493

485494
@Override
486495
public ReInitializeContainerResponse reInitializeContainer(

hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,12 @@
4747
import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
4848
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
4949
import org.apache.hadoop.yarn.api.protocolrecords.CommitResponse;
50-
import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
51-
import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
50+
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerLaunchContextRequest;
51+
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerLaunchContextResponse;
5252
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
5353
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
54+
import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
55+
import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
5456
import org.apache.hadoop.yarn.api.protocolrecords.ReInitializeContainerRequest;
5557
import org.apache.hadoop.yarn.api.protocolrecords.ReInitializeContainerResponse;
5658
import org.apache.hadoop.yarn.api.protocolrecords.ResourceLocalizationRequest;
@@ -70,7 +72,8 @@
7072
import org.apache.hadoop.yarn.api.records.Priority;
7173
import org.apache.hadoop.yarn.api.records.Resource;
7274
import org.apache.hadoop.yarn.api.records.Token;
73-
import org.apache.hadoop.yarn.client.api.impl.ContainerManagementProtocolProxy.ContainerManagementProtocolProxyData;
75+
import org.apache.hadoop.yarn.client.api.impl.ContainerManagementProtocolProxy
76+
.ContainerManagementProtocolProxyData;
7477
import org.apache.hadoop.yarn.event.Event;
7578
import org.apache.hadoop.yarn.event.EventHandler;
7679
import org.apache.hadoop.yarn.exceptions.YarnException;
@@ -486,6 +489,13 @@ public ResourceLocalizationResponse localize(
486489
ResourceLocalizationRequest request) throws YarnException, IOException {
487490
return null;
488491
}
492+
493+
@Override
494+
public GetContainerLaunchContextResponse getContainerLaunchContext(
495+
GetContainerLaunchContextRequest request) throws YarnException, IOException {
496+
throw new UnsupportedOperationException("getting the container launch context is not " +
497+
"supported for this implementation of ContainerManagementProtocol");
498+
}
489499

490500
@Override
491501
public ReInitializeContainerResponse reInitializeContainer(

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ContainerManagementProtocol.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
import org.apache.hadoop.classification.InterfaceStability.Stable;
2525
import org.apache.hadoop.classification.InterfaceStability.Unstable;
2626
import org.apache.hadoop.yarn.api.protocolrecords.CommitResponse;
27+
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerLaunchContextRequest;
28+
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerLaunchContextResponse;
2729
import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
2830
import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
2931
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
@@ -51,7 +53,7 @@
5153
/**
5254
* <p>The protocol between an <code>ApplicationMaster</code> and a
5355
* <code>NodeManager</code> to start/stop and increase resource of containers
54-
* and to get status of running containers.</p>
56+
* and to get status and launch context of running containers.</p>
5557
*
5658
* <p>If security is enabled the <code>NodeManager</code> verifies that the
5759
* <code>ApplicationMaster</code> has truly been allocated the container
@@ -220,6 +222,22 @@ SignalContainerResponse signalToContainer(SignalContainerRequest request)
220222
@Unstable
221223
ResourceLocalizationResponse localize(ResourceLocalizationRequest request)
222224
throws YarnException, IOException;
225+
226+
/**
227+
* Gets container launch context for a container specified in
228+
* {@link GetContainerLaunchContextRequest}.
229+
* This protocol is only used by the container relocation logic, between node managers, to
230+
* transfer the launch context of the container to be relocated to the target node.
231+
*
232+
* @param request specifies the id of the container for which the launch context is requested
233+
* @return Response that contains the requested container launch context
234+
* @throws YarnException
235+
* @throws IOException
236+
*/
237+
@Public
238+
@Stable
239+
GetContainerLaunchContextResponse getContainerLaunchContext(
240+
GetContainerLaunchContextRequest request) throws YarnException, IOException;
223241

224242
/**
225243
* ReInitialize the Container with a new Launch Context.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.yarn.api.protocolrecords;
20+
21+
import org.apache.hadoop.classification.InterfaceAudience.Public;
22+
import org.apache.hadoop.classification.InterfaceStability.Stable;
23+
import org.apache.hadoop.yarn.api.records.ContainerId;
24+
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
25+
import org.apache.hadoop.yarn.util.Records;
26+
27+
/**
28+
* <p>The request sent by one <code>NodeManager</code> to another
29+
* <code>NodeManager</code> in order to get the launch context of the container with id
30+
* <em>containerId</em>.</p>
31+
*
32+
* <p>This request is used only for container relocation, where the launch context of
33+
* the origin container is necessary for launching the relocated container on the target node.</p>
34+
*
35+
* @see ContainerManagementProtocol#getContainerLaunchContext(GetContainerLaunchContextRequest)
36+
*/
37+
public abstract class GetContainerLaunchContextRequest {
38+
39+
@Public
40+
@Stable
41+
public static GetContainerLaunchContextRequest newInstance(ContainerId containerId) {
42+
GetContainerLaunchContextRequest request =
43+
Records.newRecord(GetContainerLaunchContextRequest.class);
44+
request.setContainerId(containerId);
45+
return request;
46+
}
47+
48+
/**
49+
* Gets the container id for which the launch context is requested.
50+
* @return the container id for which the launch context is requested
51+
*/
52+
@Public
53+
@Stable
54+
public abstract ContainerId getContainerId();
55+
56+
/**
57+
* Sets the container id for which the launch context is requested.
58+
* @param containerId the container id for which the launch context is requested
59+
*/
60+
@Public
61+
@Stable
62+
public abstract void setContainerId(ContainerId containerId);
63+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.yarn.api.protocolrecords;
20+
21+
import org.apache.hadoop.classification.InterfaceAudience.Public;
22+
import org.apache.hadoop.classification.InterfaceStability.Stable;
23+
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
24+
import org.apache.hadoop.yarn.util.Records;
25+
26+
/**
27+
* <p>The response for a container launch context request sent between
28+
* <code>NodeManager</code>s. It contains the requested container launch context.</p>
29+
*
30+
* <p>This request is used only for container relocation, where a launch context
31+
* of the origin container is necessary for launching the relocated container on
32+
* the target node.</p>
33+
*/
34+
public abstract class GetContainerLaunchContextResponse {
35+
36+
@Public
37+
@Stable
38+
public static GetContainerLaunchContextResponse newInstance(ContainerLaunchContext
39+
containerLaunchContext) {
40+
GetContainerLaunchContextResponse request =
41+
Records.newRecord(GetContainerLaunchContextResponse.class);
42+
request.setContainerLaunchContext(containerLaunchContext);
43+
return request;
44+
}
45+
46+
/**
47+
* Gets the launch context of the requested container.
48+
* @return the launch context of the requested container
49+
*/
50+
@Public
51+
@Stable
52+
public abstract ContainerLaunchContext getContainerLaunchContext();
53+
54+
/**
55+
* Sets the launch context of the requested container.
56+
* @param containerLaunchContext the launch context of the requested container
57+
*/
58+
@Public
59+
@Stable
60+
public abstract void setContainerLaunchContext(ContainerLaunchContext containerLaunchContext);
61+
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/StartContainerRequest.java

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,12 @@
2020

2121
import org.apache.hadoop.classification.InterfaceAudience.Public;
2222
import org.apache.hadoop.classification.InterfaceStability.Stable;
23+
import org.apache.hadoop.classification.InterfaceStability.Unstable;
2324
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
25+
import org.apache.hadoop.yarn.api.records.ContainerId;
2426
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
2527
import org.apache.hadoop.yarn.api.records.NMToken;
28+
import org.apache.hadoop.yarn.api.records.NodeId;
2629
import org.apache.hadoop.yarn.api.records.Token;
2730
import org.apache.hadoop.yarn.util.Records;
2831

@@ -36,6 +39,15 @@
3639
* necessary binaries/jar/shared-objects etc. via the
3740
* {@link ContainerLaunchContext}.</p>
3841
*
42+
* <p>The <em>isMove</em> flag tells whether this request corresponds to a container
43+
* relocation. If true, then no container launch context is needed as it will be
44+
* transferred directly from the origin node manager to the target node manager.
45+
* The container to be relocated is identified by the <em>originNodeId</em> and
46+
* <em>originContainerId</em>.
47+
* The originNMToken is sent along so that the origin container can be shut down
48+
* by the target node manager.
49+
* </p>
50+
*
3951
* @see ContainerManagementProtocol#startContainers(StartContainersRequest)
4052
*/
4153
@Public
@@ -49,6 +61,21 @@ public static StartContainerRequest newInstance(
4961
Records.newRecord(StartContainerRequest.class);
5062
request.setContainerLaunchContext(context);
5163
request.setContainerToken(container);
64+
request.setIsMove(false);
65+
return request;
66+
}
67+
68+
@Public
69+
@Stable
70+
public static StartContainerRequest newInstance(
71+
Token container, ContainerId originContainerId, NodeId originNodeId, Token originNMToken) {
72+
StartContainerRequest request =
73+
Records.newRecord(StartContainerRequest.class);
74+
request.setContainerToken(container);
75+
request.setIsMove(true);
76+
request.setOriginContainerId(originContainerId);
77+
request.setOriginNodeId(originNodeId);
78+
request.setOriginNMToken(originNMToken);
5279
return request;
5380
}
5481

@@ -91,4 +118,91 @@ public static StartContainerRequest newInstance(
91118
@Public
92119
@Stable
93120
public abstract void setContainerToken(Token container);
121+
122+
/**
123+
* Gets whether this start container request corresponds to a container relocation.
124+
* @return whether this start container request corresponds to a container relocation
125+
*/
126+
@Public
127+
@Unstable
128+
public abstract boolean getIsMove();
129+
130+
/**
131+
* Sets whether this start container request corresponds to a container relocation.
132+
* @param isMove whether this start container request corresponds to a container
133+
* relocation
134+
*/
135+
@Public
136+
@Unstable
137+
public abstract void setIsMove(boolean isMove);
138+
139+
/**
140+
* Gets the origin container id for this start container request.
141+
* The origin container id is set if and only if this start container request
142+
* corresponds to a container relocation. It identifies the container that should
143+
* be relocated.
144+
*
145+
* @return the origin container id for this start container request
146+
*/
147+
@Public
148+
@Unstable
149+
public abstract ContainerId getOriginContainerId();
150+
151+
/**
152+
* Sets the origin container id for this start container request.
153+
* The origin container id should be set if and only if this start container request
154+
* corresponds to a container relocation. It identifies the container that should
155+
* be relocated.
156+
*
157+
* @param originContainerId the origin container id for this start container request
158+
*/
159+
@Public
160+
@Unstable
161+
public abstract void setOriginContainerId(ContainerId originContainerId);
162+
163+
/**
164+
* Gets the origin node id for this start container request.
165+
* The origin node id is set if and only if this resource start container corresponds
166+
* to a container relocation. It identifies the node of the container that should
167+
* be relocated.
168+
*
169+
* @return the origin node id for this start container request
170+
*/
171+
@Public
172+
@Unstable
173+
public abstract NodeId getOriginNodeId();
174+
175+
/**
176+
* Sets the origin node id for this start container request.
177+
* The origin node id should be set if and only if this start container request
178+
* corresponds to a container relocation. It identifies the node of the container
179+
* that should be relocated.
180+
*
181+
* @param originNodeId the origin node id for this start container request
182+
*/
183+
@Public
184+
@Unstable
185+
public abstract void setOriginNodeId(NodeId originNodeId);
186+
187+
/**
188+
* Gets the security token for the origin node.
189+
* The origin NM token is set if and only if this start container request corresponds
190+
* to a container relocation. It is used for shutting down the origin container.
191+
*
192+
* @return the security token for the origin node
193+
*/
194+
@Public
195+
@Unstable
196+
public abstract Token getOriginNMToken();
197+
198+
/**
199+
* Sets the security token for the origin node.
200+
* The origin NM token should be set if and only if this start container request corresponds
201+
* to a container relocation. It is used for shutting down the origin container.
202+
*
203+
* @param originNMToken the security token for the origin node
204+
*/
205+
@Public
206+
@Unstable
207+
public abstract void setOriginNMToken(Token originNMToken);
94208
}

0 commit comments

Comments
 (0)