1818
1919package org .apache .hadoop .yarn .server .router .clientrm ;
2020
21+ import static org .apache .hadoop .yarn .conf .YarnConfiguration .FEDERATION_POLICY_MANAGER ;
22+ import static org .hamcrest .CoreMatchers .is ;
2123import static org .mockito .Mockito .mock ;
2224
2325import java .io .IOException ;
2426import java .util .ArrayList ;
2527import java .util .Arrays ;
28+ import java .util .Collection ;
2629import java .util .List ;
2730
2831import org .apache .hadoop .test .LambdaTestUtils ;
4851import org .apache .hadoop .yarn .server .resourcemanager .ResourceManager ;
4952import org .apache .hadoop .yarn .util .resource .Resources ;
5053import org .junit .Assert ;
54+ import org .junit .Assume ;
5155import org .junit .Test ;
56+ import org .junit .runner .RunWith ;
57+ import org .junit .runners .Parameterized ;
58+ import org .junit .runners .Parameterized .Parameters ;
5259import org .slf4j .Logger ;
5360import org .slf4j .LoggerFactory ;
5461
6471 * It tests the case with SubClusters down and the Router logic of retries. We
6572 * have 1 good SubCluster and 2 bad ones for all the tests.
6673 */
74+ @ RunWith (Parameterized .class )
6775public class TestFederationClientInterceptorRetry
6876 extends BaseRouterClientRMTest {
6977 private static final Logger LOG =
7078 LoggerFactory .getLogger (TestFederationClientInterceptorRetry .class );
7179
80+ @ Parameters
81+ public static Collection <String []> getParameters () {
82+ return Arrays .asList (new String [][] {{UniformBroadcastPolicyManager .class .getName ()},
83+ {TestSequentialBroadcastPolicyManager .class .getName ()}});
84+ }
85+
7286 private TestableFederationClientInterceptor interceptor ;
7387 private MemoryFederationStateStore stateStore ;
7488 private FederationStateStoreTestUtil stateStoreUtil ;
89+ private String routerPolicyManagerName ;
7590
7691 private String user = "test-user" ;
7792
@@ -84,6 +99,10 @@ public class TestFederationClientInterceptorRetry
8499
85100 private static List <SubClusterId > scs = new ArrayList <>();
86101
102+ public TestFederationClientInterceptorRetry (String policyManagerName ) {
103+ this .routerPolicyManagerName = policyManagerName ;
104+ }
105+
87106 @ Override
88107 public void setUp () throws IOException {
89108 super .setUpConfig ();
@@ -150,8 +169,7 @@ protected YarnConfiguration createConfiguration() {
150169 mockPassThroughInterceptorClass + "," + mockPassThroughInterceptorClass
151170 + "," + TestableFederationClientInterceptor .class .getName ());
152171
153- conf .set (YarnConfiguration .FEDERATION_POLICY_MANAGER ,
154- UniformBroadcastPolicyManager .class .getName ());
172+ conf .set (FEDERATION_POLICY_MANAGER , this .routerPolicyManagerName );
155173
156174 // Disable StateStoreFacade cache
157175 conf .setInt (YarnConfiguration .FEDERATION_CACHE_TIME_TO_LIVE_SECS , 0 );
@@ -283,4 +301,56 @@ public void testSubmitApplicationOneBadOneGood()
283301 SubClusterId respSubClusterId = responseHomeSubCluster .getHomeSubCluster ();
284302 Assert .assertEquals (good , respSubClusterId );
285303 }
304+
305+ @ Test
306+ public void testSubmitApplicationTwoBadOneGood () throws Exception {
307+
308+ LOG .info ("Test submitApplication with two bad, one good SC." );
309+
310+ // This test must require the TestSequentialRouterPolicy policy
311+ Assume .assumeThat (routerPolicyManagerName ,
312+ is (TestSequentialBroadcastPolicyManager .class .getName ()));
313+
314+ setupCluster (Arrays .asList (bad1 , bad2 , good ));
315+ final ApplicationId appId =
316+ ApplicationId .newInstance (System .currentTimeMillis (), 1 );
317+
318+ // Use the TestSequentialRouterPolicy strategy,
319+ // which will sort the SubClusterId because good=0, bad1=1, bad2=2
320+ // We will get 2, 1, 0 [bad2, bad1, good]
321+ // Set the retryNum to 1
322+ // 1st time will use bad2, 2nd time will use bad1
323+ // bad1 is updated to stateStore
324+ interceptor .setNumSubmitRetries (1 );
325+ final SubmitApplicationRequest request = mockSubmitApplicationRequest (appId );
326+ LambdaTestUtils .intercept (YarnException .class , "RM is stopped" ,
327+ () -> interceptor .submitApplication (request ));
328+
329+ // We will get bad1
330+ checkSubmitSubCluster (appId , bad1 );
331+
332+ // Set the retryNum to 2
333+ // 1st time will use bad2, 2nd time will use bad1, 3rd good
334+ interceptor .setNumSubmitRetries (2 );
335+ SubmitApplicationResponse submitAppResponse = interceptor .submitApplication (request );
336+ Assert .assertNotNull (submitAppResponse );
337+
338+ // We will get good
339+ checkSubmitSubCluster (appId , good );
340+ }
341+
342+ private void checkSubmitSubCluster (ApplicationId appId , SubClusterId expectSubCluster )
343+ throws YarnException {
344+ GetApplicationHomeSubClusterRequest getAppRequest =
345+ GetApplicationHomeSubClusterRequest .newInstance (appId );
346+ GetApplicationHomeSubClusterResponse getAppResponse =
347+ stateStore .getApplicationHomeSubCluster (getAppRequest );
348+ Assert .assertNotNull (getAppResponse );
349+ Assert .assertNotNull (getAppResponse );
350+ ApplicationHomeSubCluster responseHomeSubCluster =
351+ getAppResponse .getApplicationHomeSubCluster ();
352+ Assert .assertNotNull (responseHomeSubCluster );
353+ SubClusterId respSubClusterId = responseHomeSubCluster .getHomeSubCluster ();
354+ Assert .assertEquals (expectSubCluster , respSubClusterId );
355+ }
286356}
0 commit comments