@@ -233,8 +233,6 @@ Expected<std::unique_ptr<llvm::Module>> jit_compiler::compileDeviceCode(
233
233
DerivedArgList DAL{UserArgList};
234
234
const auto &OptTable = getDriverOptTable ();
235
235
DAL.AddFlagArg (nullptr , OptTable.getOption (OPT_fsycl_device_only));
236
- DAL.AddFlagArg (nullptr ,
237
- OptTable.getOption (OPT_fno_sycl_dead_args_optimization));
238
236
DAL.AddJoinedArg (
239
237
nullptr , OptTable.getOption (OPT_resource_dir_EQ),
240
238
(DPCPPRoot + " /lib/clang/" + Twine (CLANG_VERSION_MAJOR)).str ());
@@ -436,15 +434,35 @@ template <class PassClass> static bool runModulePass(llvm::Module &M) {
436
434
return !Res.areAllPreserved ();
437
435
}
438
436
439
- llvm::Expected<PostLinkResult> jit_compiler::performPostLink (
440
- std::unique_ptr<llvm::Module> Module,
441
- [[maybe_unused]] const llvm::opt::InputArgList &UserArgList) {
437
+ static IRSplitMode getDeviceCodeSplitMode (const InputArgList &UserArgList) {
438
+ // This is the (combined) logic from
439
+ // `get[NonTriple|Triple]BasedSYCLPostLinkOpts` in
440
+ // `clang/lib/Driver/ToolChains/Clang.cpp`: Default is auto mode, but the user
441
+ // can override it by specifying the `-fsycl-device-code-split=` option. The
442
+ // no-argument variant `-fsycl-device-code-split` is ignored.
443
+ if (auto *Arg = UserArgList.getLastArg (OPT_fsycl_device_code_split_EQ)) {
444
+ StringRef ArgVal{Arg->getValue ()};
445
+ if (ArgVal == " per_kernel" ) {
446
+ return SPLIT_PER_KERNEL;
447
+ }
448
+ if (ArgVal == " per_source" ) {
449
+ return SPLIT_PER_TU;
450
+ }
451
+ if (ArgVal == " off" ) {
452
+ return SPLIT_NONE;
453
+ }
454
+ }
455
+ return SPLIT_AUTO;
456
+ }
457
+
458
+ Expected<PostLinkResult>
459
+ jit_compiler::performPostLink (std::unique_ptr<llvm::Module> Module,
460
+ const InputArgList &UserArgList) {
442
461
// This is a simplified version of `processInputModule` in
443
462
// `llvm/tools/sycl-post-link.cpp`. Assertions/TODOs point to functionality
444
463
// left out of the algorithm for now.
445
464
446
- // TODO: SplitMode can be controlled by the user.
447
- const auto SplitMode = SPLIT_NONE;
465
+ const auto SplitMode = getDeviceCodeSplitMode (UserArgList);
448
466
449
467
// TODO: EmitOnlyKernelsAsEntryPoints is controlled by
450
468
// `shouldEmitOnlyKernelsAsEntryPoints` in
@@ -480,77 +498,87 @@ llvm::Expected<PostLinkResult> jit_compiler::performPostLink(
480
498
return createStringError (" `invoke_simd` calls detected" );
481
499
}
482
500
483
- // TODO: Implement actual device code splitting. We're just using the splitter
484
- // to obtain additional information about the module for now.
485
-
486
501
std::unique_ptr<ModuleSplitterBase> Splitter = getDeviceCodeSplitter (
487
502
ModuleDesc{std::move (Module)}, SplitMode,
488
503
/* IROutputOnly=*/ false , EmitOnlyKernelsAsEntryPoints);
489
504
assert (Splitter->hasMoreSplits ());
490
- if (Splitter->remainingSplits () > 1 ) {
491
- return createStringError (" Device code requires splitting" );
492
- }
493
505
494
506
// TODO: Call `verifyNoCrossModuleDeviceGlobalUsage` if device globals shall
495
507
// be processed.
496
508
497
- ModuleDesc MDesc = Splitter->nextSplit ();
509
+ // TODO: This allocation assumes that there are no further splits required,
510
+ // i.e. there are no mixed SYCL/ESIMD modules.
511
+ RTCBundleInfo BundleInfo{Splitter->remainingSplits ()};
512
+ SmallVector<std::unique_ptr<llvm::Module>> Modules;
498
513
499
- // TODO: Call `MDesc.fixupLinkageOfDirectInvokeSimdTargets()` when
500
- // `invoke_simd` is supported.
514
+ auto *DevImgInfoIt = BundleInfo.begin ();
515
+ while (Splitter->hasMoreSplits ()) {
516
+ assert (DevImgInfoIt != BundleInfo.end ());
501
517
502
- SmallVector<ModuleDesc, 2 > ESIMDSplits =
503
- splitByESIMD (std::move (MDesc), EmitOnlyKernelsAsEntryPoints);
504
- assert (!ESIMDSplits.empty ());
505
- if (ESIMDSplits.size () > 1 ) {
506
- return createStringError (" Mixing SYCL and ESIMD code is unsupported" );
507
- }
508
- MDesc = std::move (ESIMDSplits.front ());
518
+ ModuleDesc MDesc = Splitter->nextSplit ();
519
+ RTCDevImgInfo &DevImgInfo = *DevImgInfoIt++;
509
520
510
- if (MDesc.isESIMD ()) {
511
- // `sycl-post-link` has a `-lower-esimd` option, but there's no clang driver
512
- // option to influence it. Rather, the driver sets it unconditionally in the
513
- // multi-file output mode, which we are mimicking here.
514
- lowerEsimdConstructs (MDesc, PerformOpts);
515
- }
521
+ // TODO: Call `MDesc.fixupLinkageOfDirectInvokeSimdTargets()` when
522
+ // `invoke_simd` is supported.
516
523
517
- MDesc.saveSplitInformationAsMetadata ();
518
-
519
- RTCBundleInfo BundleInfo;
520
- BundleInfo.SymbolTable = FrozenSymbolTable{MDesc.entries ().size ()};
521
- transform (MDesc.entries (), BundleInfo.SymbolTable .begin (),
522
- [](Function *F) { return F->getName (); });
523
-
524
- // TODO: Determine what is requested.
525
- GlobalBinImageProps PropReq{
526
- /* EmitKernelParamInfo=*/ true , /* EmitProgramMetadata=*/ true ,
527
- /* EmitExportedSymbols=*/ true , /* EmitImportedSymbols=*/ true ,
528
- /* DeviceGlobals=*/ false };
529
- PropertySetRegistry Properties =
530
- computeModuleProperties (MDesc.getModule (), MDesc.entries (), PropReq);
531
- // TODO: Manually add `compile_target` property as in
532
- // `saveModuleProperties`?
533
- const auto &PropertySets = Properties.getPropSets ();
534
-
535
- BundleInfo.Properties = FrozenPropertyRegistry{PropertySets.size ()};
536
- for (auto &&[KV, FrozenPropSet] : zip (PropertySets, BundleInfo.Properties )) {
537
- const auto &PropertySetName = KV.first ;
538
- const auto &PropertySet = KV.second ;
539
- FrozenPropSet =
540
- FrozenPropertySet{PropertySetName.str (), PropertySet.size ()};
541
- for (auto &&[KV2, FrozenProp] : zip (PropertySet, FrozenPropSet.Values )) {
542
- const auto &PropertyName = KV2.first ;
543
- const auto &PropertyValue = KV2.second ;
544
- FrozenProp = PropertyValue.getType () == PropertyValue::Type::UINT32
545
- ? FrozenPropertyValue{PropertyName.str (),
546
- PropertyValue.asUint32 ()}
547
- : FrozenPropertyValue{
548
- PropertyName.str (), PropertyValue.asRawByteArray (),
549
- PropertyValue.getRawByteArraySize ()};
524
+ SmallVector<ModuleDesc, 2 > ESIMDSplits =
525
+ splitByESIMD (std::move (MDesc), EmitOnlyKernelsAsEntryPoints);
526
+ assert (!ESIMDSplits.empty ());
527
+ if (ESIMDSplits.size () > 1 ) {
528
+ return createStringError (" Mixing SYCL and ESIMD code is unsupported" );
550
529
}
551
- };
530
+ MDesc = std::move (ESIMDSplits.front ());
531
+
532
+ if (MDesc.isESIMD ()) {
533
+ // `sycl-post-link` has a `-lower-esimd` option, but there's no clang
534
+ // driver option to influence it. Rather, the driver sets it
535
+ // unconditionally in the multi-file output mode, which we are mimicking
536
+ // here.
537
+ lowerEsimdConstructs (MDesc, PerformOpts);
538
+ }
539
+
540
+ MDesc.saveSplitInformationAsMetadata ();
541
+
542
+ DevImgInfo.SymbolTable = FrozenSymbolTable{MDesc.entries ().size ()};
543
+ transform (MDesc.entries (), DevImgInfo.SymbolTable .begin (),
544
+ [](Function *F) { return F->getName (); });
545
+
546
+ // TODO: Determine what is requested.
547
+ GlobalBinImageProps PropReq{
548
+ /* EmitKernelParamInfo=*/ true , /* EmitProgramMetadata=*/ true ,
549
+ /* EmitExportedSymbols=*/ true , /* EmitImportedSymbols=*/ true ,
550
+ /* DeviceGlobals=*/ false };
551
+ PropertySetRegistry Properties =
552
+ computeModuleProperties (MDesc.getModule (), MDesc.entries (), PropReq);
553
+ // TODO: Manually add `compile_target` property as in
554
+ // `saveModuleProperties`?
555
+ const auto &PropertySets = Properties.getPropSets ();
556
+
557
+ DevImgInfo.Properties = FrozenPropertyRegistry{PropertySets.size ()};
558
+ for (auto [KV, FrozenPropSet] :
559
+ zip_equal (PropertySets, DevImgInfo.Properties )) {
560
+ const auto &PropertySetName = KV.first ;
561
+ const auto &PropertySet = KV.second ;
562
+ FrozenPropSet =
563
+ FrozenPropertySet{PropertySetName.str (), PropertySet.size ()};
564
+ for (auto [KV2, FrozenProp] :
565
+ zip_equal (PropertySet, FrozenPropSet.Values )) {
566
+ const auto &PropertyName = KV2.first ;
567
+ const auto &PropertyValue = KV2.second ;
568
+ FrozenProp =
569
+ PropertyValue.getType () == PropertyValue::Type::UINT32
570
+ ? FrozenPropertyValue{PropertyName.str (),
571
+ PropertyValue.asUint32 ()}
572
+ : FrozenPropertyValue{PropertyName.str (),
573
+ PropertyValue.asRawByteArray (),
574
+ PropertyValue.getRawByteArraySize ()};
575
+ }
576
+ };
577
+
578
+ Modules.push_back (MDesc.releaseModulePtr ());
579
+ }
552
580
553
- return PostLinkResult{std::move (BundleInfo), MDesc. releaseModulePtr ( )};
581
+ return PostLinkResult{std::move (BundleInfo), std::move (Modules )};
554
582
}
555
583
556
584
Expected<InputArgList>
@@ -607,21 +635,10 @@ jit_compiler::parseUserArgs(View<const char *> UserArgs) {
607
635
}
608
636
}
609
637
610
- if (auto DCSMode = AL.getLastArgValue (OPT_fsycl_device_code_split_EQ, " none" );
611
- DCSMode != " none" && DCSMode != " auto" ) {
612
- return createStringError (" Device code splitting is not yet supported" );
613
- }
614
-
615
638
if (!AL.hasFlag (OPT_fsycl_device_code_split_esimd,
616
639
OPT_fno_sycl_device_code_split_esimd, true )) {
617
640
return createStringError (" ESIMD device code split cannot be deactivated" );
618
641
}
619
642
620
- if (AL.hasFlag (OPT_fsycl_dead_args_optimization,
621
- OPT_fno_sycl_dead_args_optimization, false )) {
622
- return createStringError (
623
- " Dead argument optimization must be disabled for runtime compilation" );
624
- }
625
-
626
643
return std::move (AL);
627
644
}
0 commit comments