2525import org .apache .hadoop .classification .InterfaceAudience ;
2626import org .apache .hadoop .classification .InterfaceStability ;
2727import org .apache .hadoop .conf .Configuration ;
28- import org .apache .hadoop .fs .Path ;
2928import org .apache .hadoop .util .StringUtils ;
3029import org .apache .hadoop .yarn .api .records .ContainerId ;
3130import org .apache .hadoop .yarn .api .records .Resource ;
3736import org .apache .hadoop .yarn .server .nodemanager .containermanager .linux .resources .CGroupsHandler ;
3837import org .apache .hadoop .yarn .server .nodemanager .containermanager .linux .resources .ResourceHandler ;
3938import org .apache .hadoop .yarn .server .nodemanager .containermanager .linux .resources .ResourceHandlerException ;
39+ import org .apache .hadoop .yarn .server .nodemanager .containermanager .linux .resources .fpga .FpgaResourceAllocator .FpgaDevice ;
4040import org .apache .hadoop .yarn .server .nodemanager .containermanager .resourceplugin .fpga .AbstractFpgaVendorPlugin ;
4141import org .apache .hadoop .yarn .server .nodemanager .containermanager .resourceplugin .fpga .FpgaDiscoverer ;
4242
4343import java .util .ArrayList ;
4444import java .util .Arrays ;
4545import java .util .List ;
46- import java .util .Map ;
4746
4847import static org .apache .hadoop .yarn .api .records .ResourceInformation .FPGA_URI ;
4948
@@ -89,21 +88,25 @@ public String getRequestedIPID(Container container) {
8988 }
9089
9190 @ Override
92- public List <PrivilegedOperation > bootstrap (Configuration configuration ) throws ResourceHandlerException {
91+ public List <PrivilegedOperation > bootstrap (Configuration configuration )
92+ throws ResourceHandlerException {
9393 // The plugin should be initilized by FpgaDiscoverer already
9494 if (!vendorPlugin .initPlugin (configuration )) {
95- throw new ResourceHandlerException ("FPGA plugin initialization failed" , null );
95+ throw new ResourceHandlerException ("FPGA plugin initialization failed" );
9696 }
9797 LOG .info ("FPGA Plugin bootstrap success." );
9898 // Get avialable devices minor numbers from toolchain or static configuration
99- List <FpgaResourceAllocator .FpgaDevice > fpgaDeviceList = FpgaDiscoverer .getInstance ().discover ();
99+ List <FpgaResourceAllocator .FpgaDevice > fpgaDeviceList =
100+ FpgaDiscoverer .getInstance ().discover ();
100101 allocator .addFpga (vendorPlugin .getFpgaType (), fpgaDeviceList );
101- this .cGroupsHandler .initializeCGroupController (CGroupsHandler .CGroupController .DEVICES );
102+ this .cGroupsHandler .initializeCGroupController (
103+ CGroupsHandler .CGroupController .DEVICES );
102104 return null ;
103105 }
104106
105107 @ Override
106- public List <PrivilegedOperation > preStart (Container container ) throws ResourceHandlerException {
108+ public List <PrivilegedOperation > preStart (Container container )
109+ throws ResourceHandlerException {
107110 // 1. Get requested FPGA type and count, choose corresponding FPGA plugin(s)
108111 // 2. Use allocator.assignFpga(type, count) to get FPGAAllocation
109112 // 3. If required, download to ensure IP file exists and configure IP file for all devices
@@ -126,15 +129,17 @@ public List<PrivilegedOperation> preStart(Container container) throws ResourceHa
126129 container , getRequestedIPID (container ));
127130 LOG .info ("FpgaAllocation:" + allocation );
128131
129- PrivilegedOperation privilegedOperation = new PrivilegedOperation (PrivilegedOperation .OperationType .FPGA ,
132+ PrivilegedOperation privilegedOperation =
133+ new PrivilegedOperation (PrivilegedOperation .OperationType .FPGA ,
130134 Arrays .asList (CONTAINER_ID_CLI_OPTION , containerIdStr ));
131135 if (!allocation .getDenied ().isEmpty ()) {
132136 List <Integer > denied = new ArrayList <>();
133137 allocation .getDenied ().forEach (device -> denied .add (device .getMinor ()));
134138 privilegedOperation .appendArgs (Arrays .asList (EXCLUDED_FPGAS_CLI_OPTION ,
135139 StringUtils .join ("," , denied )));
136140 }
137- privilegedOperationExecutor .executePrivilegedOperation (privilegedOperation , true );
141+ privilegedOperationExecutor .executePrivilegedOperation (
142+ privilegedOperation , true );
138143
139144 if (deviceCount > 0 ) {
140145 /**
@@ -152,25 +157,30 @@ public List<PrivilegedOperation> preStart(Container container) throws ResourceHa
152157 * for different devices
153158 *
154159 * */
155- ipFilePath = vendorPlugin .downloadIP (getRequestedIPID (container ), container .getWorkDir (),
160+ ipFilePath = vendorPlugin .retrieveIPfilePath (
161+ getRequestedIPID (container ),
162+ container .getWorkDir (),
156163 container .getResourceSet ().getLocalizedResources ());
157- if (ipFilePath .isEmpty ()) {
158- LOG .warn ("FPGA plugin failed to download IP but continue, please check the value of environment viable: " +
159- REQUEST_FPGA_IP_ID_KEY + " if you want yarn to help" );
164+ if (ipFilePath == null ) {
165+ LOG .warn ("FPGA plugin failed to downloaded IP, please check the" +
166+ " value of environment viable: " + REQUEST_FPGA_IP_ID_KEY +
167+ " if you want YARN to program the device" );
160168 } else {
161169 LOG .info ("IP file path:" + ipFilePath );
162170 List <FpgaResourceAllocator .FpgaDevice > allowed = allocation .getAllowed ();
163171 String majorMinorNumber ;
164172 for (int i = 0 ; i < allowed .size (); i ++) {
165- majorMinorNumber = allowed .get (i ).getMajor () + ":" + allowed .get (i ).getMinor ();
166- String currentIPID = allowed .get (i ).getIPID ();
173+ FpgaDevice device = allowed .get (i );
174+ majorMinorNumber = device .getMajor () + ":" + device .getMinor ();
175+ String currentIPID = device .getIPID ();
167176 if (null != currentIPID &&
168177 currentIPID .equalsIgnoreCase (getRequestedIPID (container ))) {
169- LOG .info ("IP already in device \" " + allowed .get (i ).getAliasDevName () + "," +
170- majorMinorNumber + "\" , skip reprogramming" );
178+ LOG .info ("IP already in device \" " +
179+ allowed .get (i ).getAliasDevName () +
180+ "," + majorMinorNumber + "\" , skip reprogramming" );
171181 continue ;
172182 }
173- if (vendorPlugin .configureIP (ipFilePath , majorMinorNumber )) {
183+ if (vendorPlugin .configureIP (ipFilePath , device )) {
174184 // update the allocator that we update an IP of a device
175185 allocator .updateFpga (containerIdStr , allowed .get (i ),
176186 getRequestedIPID (container ));
@@ -186,7 +196,8 @@ public List<PrivilegedOperation> preStart(Container container) throws ResourceHa
186196 throw re ;
187197 } catch (PrivilegedOperationException e ) {
188198 allocator .cleanupAssignFpgas (containerIdStr );
189- cGroupsHandler .deleteCGroup (CGroupsHandler .CGroupController .DEVICES , containerIdStr );
199+ cGroupsHandler .deleteCGroup (CGroupsHandler .CGroupController .DEVICES ,
200+ containerIdStr );
190201 LOG .warn ("Could not update cgroup for container" , e );
191202 throw new ResourceHandlerException (e );
192203 }
@@ -200,7 +211,8 @@ public List<PrivilegedOperation> preStart(Container container) throws ResourceHa
200211 }
201212
202213 @ Override
203- public List <PrivilegedOperation > reacquireContainer (ContainerId containerId ) throws ResourceHandlerException {
214+ public List <PrivilegedOperation > reacquireContainer (ContainerId containerId )
215+ throws ResourceHandlerException {
204216 allocator .recoverAssignedFpgas (containerId );
205217 return null ;
206218 }
@@ -212,7 +224,8 @@ public List<PrivilegedOperation> updateContainer(Container container)
212224 }
213225
214226 @ Override
215- public List <PrivilegedOperation > postComplete (ContainerId containerId ) throws ResourceHandlerException {
227+ public List <PrivilegedOperation > postComplete (ContainerId containerId )
228+ throws ResourceHandlerException {
216229 allocator .cleanupAssignFpgas (containerId .toString ());
217230 cGroupsHandler .deleteCGroup (CGroupsHandler .CGroupController .DEVICES ,
218231 containerId .toString ());
0 commit comments