diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc index e9866d94b762c..847e53cfa7432 100644 --- a/llvm/include/llvm/ProfileData/InstrProfData.inc +++ b/llvm/include/llvm/ProfileData/InstrProfData.inc @@ -739,6 +739,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure, #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime #define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias #define INSTR_PROF_PROFILE_SET_TIMESTAMP __llvm_profile_set_timestamp +#define INSTR_PROF_PROFILE_SAMPLING_VAR __llvm_profile_sampling /* The variable that holds the name of the profile data * specified via command line. */ diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h index ea97ab2562a5b..969c2cd12f3f0 100644 --- a/llvm/include/llvm/Transforms/Instrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation.h @@ -121,12 +121,18 @@ struct InstrProfOptions { // Use BFI to guide register promotion bool UseBFIInPromotion = false; + // Use sampling to reduce the profile instrumentation runtime overhead. + bool Sampling = false; + // Name of the profile file to use as output std::string InstrProfileOutput; InstrProfOptions() = default; }; +// Create the variable for profile sampling. +void createProfileSamplingVar(Module &M); + // Options for sanitizer coverage instrumentation. struct SanitizerCoverageOptions { enum Type { diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h index 5b1977b7de9a2..7199f27dbc991 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -43,12 +43,14 @@ class FileSystem; class PGOInstrumentationGenCreateVar : public PassInfoMixin { public: - PGOInstrumentationGenCreateVar(std::string CSInstrName = "") - : CSInstrName(CSInstrName) {} + PGOInstrumentationGenCreateVar(std::string CSInstrName = "", + bool Sampling = false) + : CSInstrName(CSInstrName), ProfileSampling(Sampling) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); private: std::string CSInstrName; + bool ProfileSampling; }; /// The instrumentation (profile-instr-gen) pass for IR based PGO. diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 4fd5ee1946bb7..935504b070d2e 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -296,6 +296,9 @@ static cl::opt AttributorRun( clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs"))); +static cl::opt EnableSampledInstr( + "enable-sampled-instrumentation", cl::init(false), cl::Hidden, + cl::desc("Enable profile instrumentation sampling (default = off)")); static cl::opt UseLoopVersioningLICM( "enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass")); @@ -847,6 +850,12 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, // Do counter promotion at Level greater than O0. Options.DoCounterPromotion = true; Options.UseBFIInPromotion = IsCS; + if (EnableSampledInstr) { + Options.Sampling = true; + // With sampling, there is little beneifit to enable counter promotion. + // But note that sampling does work with counter promotion. + Options.DoCounterPromotion = false; + } Options.Atomic = AtomicCounterUpdate; MPM.addPass(InstrProfilingLoweringPass(Options, IsCS)); } @@ -1185,7 +1194,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MPM.addPass(PGOIndirectCallPromotion(false, false)); if (IsPGOPreLink && PGOOpt->CSAction == PGOOptions::CSIRInstr) - MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile)); + MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile, + EnableSampledInstr)); if (IsMemprofUse) MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS)); diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index f994f8a62c320..c27408dca51aa 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -170,6 +170,30 @@ cl::opt SkipRetExitBlock( "skip-ret-exit-block", cl::init(true), cl::desc("Suppress counter promotion if exit blocks contain ret.")); +static cl::opt SampledInstr("sampled-instrumentation", cl::ZeroOrMore, + cl::init(false), + cl::desc("Do PGO instrumentation sampling")); + +static cl::opt SampledInstrPeriod( + "sampled-instr-period", + cl::desc("Set the profile instrumentation sample period. For each sample " + "period, a fixed number of consecutive samples will be recorded. " + "The number is controlled by 'sampled-instr-burst-duration' flag. " + "The default sample period of 65535 is optimized for generating " + "efficient code that leverages unsigned integer wrapping in " + "overflow."), + cl::init(65535)); + +static cl::opt SampledInstrBurstDuration( + "sampled-instr-burst-duration", + cl::desc("Set the profile instrumentation burst duration, which can range " + "from 0 to one less than the value of 'sampled-instr-period'. " + "This number of samples will be recorded for each " + "'sampled-instr-period' count update. Setting to 1 enables " + "simple sampling, in which case it is recommended to set " + "'sampled-instr-period' to a prime number."), + cl::init(200)); + using LoadStorePair = std::pair; static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) { @@ -260,6 +284,9 @@ class InstrLowerer final { /// Returns true if profile counter update register promotion is enabled. bool isCounterPromotionEnabled() const; + /// Return true if profile sampling is enabled. + bool isSamplingEnabled() const; + /// Count the number of instrumented value sites for the function. void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins); @@ -291,6 +318,9 @@ class InstrLowerer final { /// acts on. Value *getCounterAddress(InstrProfCntrInstBase *I); + /// Lower the incremental instructions under profile sampling predicates. + void doSampling(Instruction *I); + /// Get the region counters for an increment, creating them if necessary. /// /// If the counter array doesn't yet exist, the profile data variables @@ -635,33 +665,169 @@ PreservedAnalyses InstrProfilingLoweringPass::run(Module &M, return PreservedAnalyses::none(); } +// +// Perform instrumentation sampling. +// +// There are 3 favors of sampling: +// (1) Full burst sampling: We transform: +// Increment_Instruction; +// to: +// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) { +// Increment_Instruction; +// } +// __llvm_profile_sampling__ += 1; +// if (__llvm_profile_sampling__ >= SampledInstrPeriod) { +// __llvm_profile_sampling__ = 0; +// } +// +// "__llvm_profile_sampling__" is a thread-local global shared by all PGO +// counters (value-instrumentation and edge instrumentation). +// +// (2) Fast burst sampling: +// "__llvm_profile_sampling__" variable is an unsigned type, meaning it will +// wrap around to zero when overflows. In this case, the second check is +// unnecessary, so we won't generate check2 when the SampledInstrPeriod is +// set to 65535 (64K - 1). The code after: +// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) { +// Increment_Instruction; +// } +// __llvm_profile_sampling__ += 1; +// +// (3) Simple sampling: +// When SampledInstrBurstDuration sets to 1, we do a simple sampling: +// __llvm_profile_sampling__ += 1; +// if (__llvm_profile_sampling__ >= SampledInstrPeriod) { +// __llvm_profile_sampling__ = 0; +// Increment_Instruction; +// } +// +// Note that, the code snippet after the transformation can still be counter +// promoted. However, with sampling enabled, counter updates are expected to +// be infrequent, making the benefits of counter promotion negligible. +// Moreover, counter promotion can potentially cause issues in server +// applications, particularly when the counters are dumped without a clean +// exit. To mitigate this risk, counter promotion is disabled by default when +// sampling is enabled. This behavior can be overridden using the internal +// option. +void InstrLowerer::doSampling(Instruction *I) { + if (!isSamplingEnabled()) + return; + + unsigned SampledBurstDuration = SampledInstrBurstDuration.getValue(); + unsigned SampledPeriod = SampledInstrPeriod.getValue(); + if (SampledBurstDuration >= SampledPeriod) { + report_fatal_error( + "SampledPeriod needs to be greater than SampledBurstDuration"); + } + bool UseShort = (SampledPeriod <= USHRT_MAX); + bool IsSimpleSampling = (SampledBurstDuration == 1); + // If (SampledBurstDuration == 1 && SampledPeriod == 65535), generate + // the simple sampling style code. + bool IsFastSampling = (!IsSimpleSampling && SampledPeriod == 65535); + + auto GetConstant = [UseShort](IRBuilder<> &Builder, uint32_t C) { + if (UseShort) + return Builder.getInt16(C); + else + return Builder.getInt32(C); + }; + + IntegerType *SamplingVarTy; + if (UseShort) + SamplingVarTy = Type::getInt16Ty(M.getContext()); + else + SamplingVarTy = Type::getInt32Ty(M.getContext()); + auto *SamplingVar = + M.getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR)); + assert(SamplingVar && "SamplingVar not set properly"); + + // Create the condition for checking the burst duration. + Instruction *SamplingVarIncr; + Value *NewSamplingVarVal; + MDBuilder MDB(I->getContext()); + MDNode *BranchWeight; + IRBuilder<> CondBuilder(I); + auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar); + if (IsSimpleSampling) { + // For the simple sampling, just create the load and increments. + IRBuilder<> IncBuilder(I); + NewSamplingVarVal = + IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1)); + SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar); + } else { + // For the bust-sampling, create the conditonal update. + auto *DurationCond = CondBuilder.CreateICmpULE( + LoadSamplingVar, GetConstant(CondBuilder, SampledBurstDuration)); + BranchWeight = MDB.createBranchWeights( + SampledBurstDuration, SampledPeriod + 1 - SampledBurstDuration); + Instruction *ThenTerm = SplitBlockAndInsertIfThen( + DurationCond, I, /* Unreachable */ false, BranchWeight); + IRBuilder<> IncBuilder(I); + NewSamplingVarVal = + IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1)); + SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar); + I->moveBefore(ThenTerm); + } + + if (IsFastSampling) + return; + + // Create the condtion for checking the period. + Instruction *ThenTerm, *ElseTerm; + IRBuilder<> PeriodCondBuilder(SamplingVarIncr); + auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE( + NewSamplingVarVal, GetConstant(PeriodCondBuilder, SampledPeriod)); + BranchWeight = MDB.createBranchWeights(1, SampledPeriod); + SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm, + &ElseTerm, BranchWeight); + + // For the simple sampling, the counter update happens in sampling var reset. + if (IsSimpleSampling) + I->moveBefore(ThenTerm); + + IRBuilder<> ResetBuilder(ThenTerm); + ResetBuilder.CreateStore(GetConstant(ResetBuilder, 0), SamplingVar); + SamplingVarIncr->moveBefore(ElseTerm); +} + bool InstrLowerer::lowerIntrinsics(Function *F) { bool MadeChange = false; PromotionCandidates.clear(); + SmallVector InstrProfInsts; + + // To ensure compatibility with sampling, we save the intrinsics into + // a buffer to prevent potential breakage of the iterator (as the + // intrinsics will be moved to a different BB). for (BasicBlock &BB : *F) { for (Instruction &Instr : llvm::make_early_inc_range(BB)) { - if (auto *IPIS = dyn_cast(&Instr)) { - lowerIncrement(IPIS); - MadeChange = true; - } else if (auto *IPI = dyn_cast(&Instr)) { - lowerIncrement(IPI); - MadeChange = true; - } else if (auto *IPC = dyn_cast(&Instr)) { - lowerTimestamp(IPC); - MadeChange = true; - } else if (auto *IPC = dyn_cast(&Instr)) { - lowerCover(IPC); - MadeChange = true; - } else if (auto *IPVP = dyn_cast(&Instr)) { - lowerValueProfileInst(IPVP); - MadeChange = true; - } else if (auto *IPMP = dyn_cast(&Instr)) { - IPMP->eraseFromParent(); - MadeChange = true; - } else if (auto *IPBU = dyn_cast(&Instr)) { - lowerMCDCTestVectorBitmapUpdate(IPBU); - MadeChange = true; - } + if (auto *IP = dyn_cast(&Instr)) + InstrProfInsts.push_back(IP); + } + } + + for (auto *Instr : InstrProfInsts) { + doSampling(Instr); + if (auto *IPIS = dyn_cast(Instr)) { + lowerIncrement(IPIS); + MadeChange = true; + } else if (auto *IPI = dyn_cast(Instr)) { + lowerIncrement(IPI); + MadeChange = true; + } else if (auto *IPC = dyn_cast(Instr)) { + lowerTimestamp(IPC); + MadeChange = true; + } else if (auto *IPC = dyn_cast(Instr)) { + lowerCover(IPC); + MadeChange = true; + } else if (auto *IPVP = dyn_cast(Instr)) { + lowerValueProfileInst(IPVP); + MadeChange = true; + } else if (auto *IPMP = dyn_cast(Instr)) { + IPMP->eraseFromParent(); + MadeChange = true; + } else if (auto *IPBU = dyn_cast(Instr)) { + lowerMCDCTestVectorBitmapUpdate(IPBU); + MadeChange = true; } } @@ -684,6 +850,12 @@ bool InstrLowerer::isRuntimeCounterRelocationEnabled() const { return TT.isOSFuchsia(); } +bool InstrLowerer::isSamplingEnabled() const { + if (SampledInstr.getNumOccurrences() > 0) + return SampledInstr; + return Options.Sampling; +} + bool InstrLowerer::isCounterPromotionEnabled() const { if (DoCounterPromotion.getNumOccurrences() > 0) return DoCounterPromotion; @@ -754,6 +926,9 @@ bool InstrLowerer::lower() { if (NeedsRuntimeHook) MadeChange = emitRuntimeHook(); + if (!IsCS && isSamplingEnabled()) + createProfileSamplingVar(M); + bool ContainsProfiling = containsProfilingIntrinsics(M); GlobalVariable *CoverageNamesVar = M.getNamedGlobal(getCoverageUnusedNamesVarName()); @@ -1952,3 +2127,29 @@ void InstrLowerer::emitInitialization() { appendToGlobalCtors(M, F, 0); } + +namespace llvm { +// Create the variable for profile sampling. +void createProfileSamplingVar(Module &M) { + const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR)); + IntegerType *SamplingVarTy; + Constant *ValueZero; + if (SampledInstrPeriod.getValue() <= USHRT_MAX) { + SamplingVarTy = Type::getInt16Ty(M.getContext()); + ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(16, 0)); + } else { + SamplingVarTy = Type::getInt32Ty(M.getContext()); + ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(32, 0)); + } + auto SamplingVar = new GlobalVariable( + M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName); + SamplingVar->setVisibility(GlobalValue::DefaultVisibility); + SamplingVar->setThreadLocal(true); + Triple TT(M.getTargetTriple()); + if (TT.supportsCOMDAT()) { + SamplingVar->setLinkage(GlobalValue::ExternalLinkage); + SamplingVar->setComdat(M.getOrInsertComdat(VarName)); + } + appendToCompilerUsed(M, SamplingVar); +} +} // namespace llvm diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 35b1bbf21be97..4f0ccc8f962db 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1875,6 +1875,8 @@ PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) { // The variable in a comdat may be discarded by LTO. Ensure the declaration // will be retained. appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true)); + if (ProfileSampling) + createProfileSamplingVar(M); PreservedAnalyses PA; PA.preserve(); PA.preserveSet>(); diff --git a/llvm/test/Transforms/PGOProfile/Inputs/cspgo_bar_sample.ll b/llvm/test/Transforms/PGOProfile/Inputs/cspgo_bar_sample.ll new file mode 100644 index 0000000000000..1c8be82715f25 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/Inputs/cspgo_bar_sample.ll @@ -0,0 +1,82 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__llvm_profile_filename = comdat any +$__llvm_profile_raw_version = comdat any +$__llvm_profile_sampling = comdat any + +@odd = common dso_local local_unnamed_addr global i32 0, align 4 +@even = common dso_local local_unnamed_addr global i32 0, align 4 +@__llvm_profile_filename = local_unnamed_addr constant [25 x i8] c"pass2/default_%m.profraw\00", comdat +@__llvm_profile_raw_version = local_unnamed_addr constant i64 216172782113783812, comdat +@__llvm_profile_sampling = thread_local global i16 0, comdat +@llvm.used = appending global [1 x i8*] [i8* bitcast (i64* @__llvm_profile_sampling to i8*)], section "llvm.metadata" + +define dso_local void @bar(i32 %n) !prof !30 { +entry: + %call = tail call fastcc i32 @cond(i32 %n) + %tobool = icmp eq i32 %call, 0 + br i1 %tobool, label %if.else, label %if.then, !prof !31 + +if.then: + %0 = load i32, i32* @odd, align 4, !tbaa !32 + %inc = add i32 %0, 1 + store i32 %inc, i32* @odd, align 4, !tbaa !32 + br label %if.end + +if.else: + %1 = load i32, i32* @even, align 4, !tbaa !32 + %inc1 = add i32 %1, 1 + store i32 %inc1, i32* @even, align 4, !tbaa !32 + br label %if.end + +if.end: + ret void +} + +define internal fastcc i32 @cond(i32 %i) #1 !prof !30 !PGOFuncName !36 { +entry: + %rem = srem i32 %i, 2 + ret i32 %rem +} + +attributes #1 = { inlinehint noinline } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"EnableSplitLTOUnit", i32 0} +!2 = !{i32 1, !"ProfileSummary", !3} +!3 = !{!4, !5, !6, !7, !8, !9, !10, !11} +!4 = !{!"ProfileFormat", !"InstrProf"} +!5 = !{!"TotalCount", i64 500002} +!6 = !{!"MaxCount", i64 200000} +!7 = !{!"MaxInternalCount", i64 100000} +!8 = !{!"MaxFunctionCount", i64 200000} +!9 = !{!"NumCounts", i64 6} +!10 = !{!"NumFunctions", i64 4} +!11 = !{!"DetailedSummary", !12} +!12 = !{!13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28} +!13 = !{i32 10000, i64 200000, i32 1} +!14 = !{i32 100000, i64 200000, i32 1} +!15 = !{i32 200000, i64 200000, i32 1} +!16 = !{i32 300000, i64 200000, i32 1} +!17 = !{i32 400000, i64 200000, i32 1} +!18 = !{i32 500000, i64 100000, i32 4} +!19 = !{i32 600000, i64 100000, i32 4} +!20 = !{i32 700000, i64 100000, i32 4} +!21 = !{i32 800000, i64 100000, i32 4} +!22 = !{i32 900000, i64 100000, i32 4} +!23 = !{i32 950000, i64 100000, i32 4} +!24 = !{i32 990000, i64 100000, i32 4} +!25 = !{i32 999000, i64 100000, i32 4} +!26 = !{i32 999900, i64 100000, i32 4} +!27 = !{i32 999990, i64 100000, i32 4} +!28 = !{i32 999999, i64 1, i32 6} +!30 = !{!"function_entry_count", i64 200000} +!31 = !{!"branch_weights", i32 100000, i32 100000} +!32 = !{!33, !33, i64 0} +!33 = !{!"int", !34, i64 0} +!34 = !{!"omnipotent char", !35, i64 0} +!35 = !{!"Simple C/C++ TBAA"} +!36 = !{!"cspgo_bar.c:cond"} diff --git a/llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll b/llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll new file mode 100644 index 0000000000000..9d083fe04015e --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/counter_promo_sampling.ll @@ -0,0 +1,78 @@ +; RUN: opt < %s --passes=pgo-instr-gen,instrprof --do-counter-promotion=true --sampled-instrumentation=true --skip-ret-exit-block=0 -S | FileCheck --check-prefixes=SAMPLING,PROMO %s + +; SAMPLING: $__llvm_profile_sampling = comdat any +; SAMPLING: @__llvm_profile_sampling = thread_local global i16 0, comdat + +define void @foo(i32 %n, i32 %N) { +; SAMPLING-LABEL: @foo +; SAMPLING: %[[VV0:[0-9]+]] = load i16, ptr @__llvm_profile_sampling, align 2 +; SAMPLING: %[[VV1:[0-9]+]] = icmp ule i16 %[[VV0]], 200 +; SAMPLING: br i1 %[[VV1]], label {{.*}}, label {{.*}}, !prof !0 +; SAMPLING: {{.*}} = load {{.*}} @__profc_foo{{.*}} 3) +; SAMPLING-NEXT: add +; SAMPLING-NEXT: store {{.*}}@__profc_foo{{.*}}3) +bb: + %tmp = add nsw i32 %n, 1 + %tmp1 = add nsw i32 %n, -1 + br label %bb2 + +bb2: +; PROMO: phi {{.*}} +; PROMO-NEXT: phi {{.*}} +; PROMO-NEXT: phi {{.*}} +; PROMO-NEXT: phi {{.*}} + %i.0 = phi i32 [ 0, %bb ], [ %tmp10, %bb9 ] + %tmp3 = icmp slt i32 %i.0, %tmp + br i1 %tmp3, label %bb4, label %bb5 + +bb4: + tail call void @bar(i32 1) + br label %bb9 + +bb5: + %tmp6 = icmp slt i32 %i.0, %tmp1 + br i1 %tmp6, label %bb7, label %bb8 + +bb7: + tail call void @bar(i32 2) + br label %bb9 + +bb8: + tail call void @bar(i32 3) + br label %bb9 + +bb9: +; SAMPLING: phi {{.*}} +; SAMPLING-NEXT: %[[V1:[0-9]+]] = add i16 {{.*}}, 1 +; SAMPLING-NEXT: store i16 %[[V1]], ptr @__llvm_profile_sampling, align 2 +; SAMPLING: phi {{.*}} +; SAMPLING-NEXT: %[[V2:[0-9]+]] = add i16 {{.*}}, 1 +; SAMPLING-NEXT: store i16 %[[V2]], ptr @__llvm_profile_sampling, align 2 +; SAMPLING: phi {{.*}} +; SAMPLING-NEXT: %[[V3:[0-9]+]] = add i16 {{.*}}, 1 +; SAMPLING-NEXT: store i16 %[[V3]], ptr @__llvm_profile_sampling, align 2 +; PROMO: %[[LIVEOUT3:[a-z0-9]+]] = phi {{.*}} +; PROMO-NEXT: %[[LIVEOUT2:[a-z0-9]+]] = phi {{.*}} +; PROMO-NEXT: %[[LIVEOUT1:[a-z0-9]+]] = phi {{.*}} + %tmp10 = add nsw i32 %i.0, 1 + %tmp11 = icmp slt i32 %tmp10, %N + br i1 %tmp11, label %bb2, label %bb12 + +bb12: + ret void +; PROMO: %[[CHECK1:[a-z0-9.]+]] = load {{.*}} @__profc_foo{{.*}} +; PROMO-NEXT: add {{.*}} %[[CHECK1]], %[[LIVEOUT1]] +; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}} +; PROMO-NEXT: %[[CHECK2:[a-z0-9.]+]] = load {{.*}} @__profc_foo{{.*}} 1) +; PROMO-NEXT: add {{.*}} %[[CHECK2]], %[[LIVEOUT2]] +; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}1) +; PROMO-NEXT: %[[CHECK3:[a-z0-9.]+]] = load {{.*}} @__profc_foo{{.*}} 2) +; PROMO-NEXT: add {{.*}} %[[CHECK3]], %[[LIVEOUT3]] +; PROMO-NEXT: store {{.*}}@__profc_foo{{.*}}2) +; PROMO-NOT: @__profc_foo{{.*}}) + +} + +declare void @bar(i32) + +; SAMPLING: !0 = !{!"branch_weights", i32 200, i32 65336} diff --git a/llvm/test/Transforms/PGOProfile/cspgo_sample.ll b/llvm/test/Transforms/PGOProfile/cspgo_sample.ll new file mode 100644 index 0000000000000..97ad4d00c9d9c --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/cspgo_sample.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; REQUIRES: x86-registered-target + +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %S/Inputs/cspgo_bar_sample.ll -o %t2.bc +; RUN: llvm-lto2 run -lto-cspgo-profile-file=alloc -enable-sampled-instrumentation -lto-cspgo-gen -save-temps -o %t %t1.bc %t2.bc \ +; RUN: -r=%t1.bc,foo,pl \ +; RUN: -r=%t1.bc,bar,l \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,__llvm_profile_filename,plx \ +; RUN: -r=%t1.bc,__llvm_profile_raw_version,plx \ +; RUN: -r=%t1.bc,__llvm_profile_sampling,pl \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,odd,pl \ +; RUN: -r=%t2.bc,even,pl \ +; RUN: -r=%t2.bc,__llvm_profile_filename,x \ +; RUN: -r=%t2.bc,__llvm_profile_raw_version,x \ +; RUN: -r=%t2.bc,__llvm_profile_sampling, +; RUN: llvm-dis %t.1.4.opt.bc -o - | FileCheck %s --check-prefix=CSGEN + +; CSGEN: @__llvm_profile_sampling = thread_local global i16 0, comdat +; CSGEN: @__profc_ +; CSGEN: @__profd_ + +source_filename = "cspgo.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__llvm_profile_filename = comdat any +$__llvm_profile_raw_version = comdat any +$__llvm_profile_sampling = comdat any +@__llvm_profile_filename = local_unnamed_addr constant [25 x i8] c"pass2/default_%m.profraw\00", comdat +@__llvm_profile_raw_version = local_unnamed_addr constant i64 216172782113783812, comdat +@__llvm_profile_sampling = thread_local global i16 0, comdat +@llvm.used = appending global [1 x i8*] [i8* bitcast (i64* @__llvm_profile_sampling to i8*)], section "llvm.metadata" + +define dso_local void @foo() #0 !prof !30 { +entry: + br label %for.body + +for.body: + %i.06 = phi i32 [ 0, %entry ], [ %add1, %for.body ] + tail call void @bar(i32 %i.06) #3 + %add = or i32 %i.06, 1 + tail call void @bar(i32 %add) #3 + %add1 = add nuw nsw i32 %i.06, 2 + %cmp = icmp ult i32 %add1, 200000 + br i1 %cmp, label %for.body, label %for.end, !prof !31 + +for.end: + ret void +} + +; CSGEN-LABEL: @foo +; CSGEN: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 +; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 201 +; CSGEN-NEXT: br i1 [[TMP1]], label %{{.*}}, label %{{.*}}, !prof [[PROF:![0-9]+]] +; CSGEN: [[TMP2:%.*]] = add i16 {{.*}}, 1 +; CSGEN-NEXT: store i16 [[TMP2]], ptr @__llvm_profile_sampling, align 2 + +declare dso_local void @bar(i32) + +define dso_local i32 @main() !prof !30 { +entry: + tail call void @foo() + ret i32 0 +} +; CSGEN-LABEL: @main +; CSGEN: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 +; CSGEN-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP0]], 201 +; CSGEN-NEXT: br i1 [[TMP1]], label %{{.*}}, label %{{.*}}, !prof [[PROF:![0-9]+]] +; CSGEN: [[TMP2:%.*]] = add i16 {{.*}}, 1 +; CSGEN-NEXT: store i16 [[TMP2]], ptr @__llvm_profile_sampling, align 2 + +attributes #0 = { "target-cpu"="x86-64" } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"EnableSplitLTOUnit", i32 0} +!2 = !{i32 1, !"ProfileSummary", !3} +!3 = !{!4, !5, !6, !7, !8, !9, !10, !11} +!4 = !{!"ProfileFormat", !"InstrProf"} +!5 = !{!"TotalCount", i64 500002} +!6 = !{!"MaxCount", i64 200000} +!7 = !{!"MaxInternalCount", i64 100000} +!8 = !{!"MaxFunctionCount", i64 200000} +!9 = !{!"NumCounts", i64 6} +!10 = !{!"NumFunctions", i64 4} +!11 = !{!"DetailedSummary", !12} +!12 = !{!13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28} +!13 = !{i32 10000, i64 200000, i32 1} +!14 = !{i32 100000, i64 200000, i32 1} +!15 = !{i32 200000, i64 200000, i32 1} +!16 = !{i32 300000, i64 200000, i32 1} +!17 = !{i32 400000, i64 200000, i32 1} +!18 = !{i32 500000, i64 100000, i32 4} +!19 = !{i32 600000, i64 100000, i32 4} +!20 = !{i32 700000, i64 100000, i32 4} +!21 = !{i32 800000, i64 100000, i32 4} +!22 = !{i32 900000, i64 100000, i32 4} +!23 = !{i32 950000, i64 100000, i32 4} +!24 = !{i32 990000, i64 100000, i32 4} +!25 = !{i32 999000, i64 100000, i32 4} +!26 = !{i32 999900, i64 100000, i32 4} +!27 = !{i32 999990, i64 100000, i32 4} +!28 = !{i32 999999, i64 1, i32 6} +!30 = !{!"function_entry_count", i64 1} +!31 = !{!"branch_weights", i32 100000, i32 1} + +; CSGEN: [[PROF]] = !{!"branch_weights", i32 200, i32 65336} + diff --git a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll new file mode 100644 index 0000000000000..dcc1e805ba6f6 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s --passes=instrprof --sampled-instrumentation -S | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-CODE,SAMPLE-DURATION,SAMPLE-WEIGHT +; RUN: opt < %s --passes=instrprof --sampled-instrumentation --sampled-instr-burst-duration=100 -S | FileCheck %s --check-prefixes=SAMPLE-VAR,SAMPLE-CODE,SAMPLE-DURATION100,SAMPLE-WEIGHT100 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__llvm_profile_raw_version = comdat any + +; SAMPLE-VAR: $__llvm_profile_sampling = comdat any + +@__llvm_profile_raw_version = constant i64 72057594037927940, comdat +@__profn_f = private constant [1 x i8] c"f" + +; SAMPLE-VAR: @__llvm_profile_sampling = thread_local global i16 0, comdat +; SAMPLE-VAR: @__profc_f = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8 +; SAMPLE-VAR: @__profd_f = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 -3706093650706652785, i64 12884901887, i64 sub (i64 ptrtoint (ptr @__profc_f to i64), i64 ptrtoint (ptr @__profd_f to i64)), i64 0, ptr @f.local, ptr null, i32 1, [3 x i16] zeroinitializer, i32 0 }, section "__llvm_prf_data", comdat($__profc_f), align 8 +; SAMPLE-VAR: @__llvm_prf_nm = private constant {{.*}}, section "__llvm_prf_names", align 1 +; SAMPLE-VAR: @llvm.compiler.used = appending global [2 x ptr] [ptr @__llvm_profile_sampling, ptr @__profd_f], section "llvm.metadata" +; SAMPLE-VAR: @llvm.used = appending global [1 x ptr] [ptr @__llvm_prf_nm], section "llvm.metadata" + + +define void @f() { +; SAMPLE-CODE-LABEL: @f( +; SAMPLE-CODE: entry: +; SAMPLE-CODE-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 +; SAMPLE-DURATION: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 200 +; SAMPLE-DURATION100: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 100 +; SAMPLE-CODE: br i1 [[TMP1]], label %[[TMP2:.*]], label %[[TMP4:.*]], !prof !0 +; SAMPLE-CODE: [[TMP2]]: +; SAMPLE-CODE-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f +; SAMPLE-CODE-NEXT: [[TMP3:%.*]] = add i64 [[PGOCOUNT]], 1 +; SAMPLE-CODE-NEXT: store i64 [[TMP3]], ptr @__profc_f +; SAMPLE-CODE-NEXT: br label %[[TMP4]] +; SAMPLE-CODE: [[TMP4]]: +; SAMPLE-CODE-NEXT: [[TMP5:%.*]] = add i16 [[TMP0]], 1 +; SAMPLE-CODE-NEXT: store i16 [[TMP5]], ptr @__llvm_profile_sampling, align 2 +; SAMPLE-CODE-NEXT: ret void +; +entry: + call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 12884901887, i32 1, i32 0) + ret void +} + +; SAMPLE-WEIGHT: !0 = !{!"branch_weights", i32 200, i32 65336} +; SAMPLE-WEIGHT100: !0 = !{!"branch_weights", i32 100, i32 65436} + +declare void @llvm.instrprof.increment(i8*, i64, i32, i32) diff --git a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll new file mode 100644 index 0000000000000..57d1a0cd33fbe --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s --passes=instrprof -sampled-instrumentation --sampled-instr-period=1009 --sampled-instr-burst-duration=32 -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__llvm_profile_raw_version = comdat any + +@__llvm_profile_raw_version = constant i64 72057594037927940, comdat +@__profn_f = private constant [1 x i8] c"f" + +define void @f() { +; CHECK-LABEL: define void @f() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i16 [[TMP0]], 32 +; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB4:.*]], !prof [[PROF0:![0-9]+]] +; CHECK: [[BB2]]: +; CHECK-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[PGOCOUNT]], 1 +; CHECK-NEXT: store i64 [[TMP3]], ptr @__profc_f, align 8 +; CHECK-NEXT: br label %[[BB4]] +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = add i16 [[TMP0]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp uge i16 [[TMP5]], 1009 +; CHECK-NEXT: br i1 [[TMP6]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB7]]: +; CHECK-NEXT: store i16 0, ptr @__llvm_profile_sampling, align 2 +; CHECK-NEXT: br label %[[BB9:.*]] +; CHECK: [[BB8]]: +; CHECK-NEXT: store i16 [[TMP5]], ptr @__llvm_profile_sampling, align 2 +; CHECK-NEXT: br label %[[BB9]] +; CHECK: [[BB9]]: +; CHECK-NEXT: ret void +; +entry: + call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 12884901887, i32 1, i32 0) + ret void +} + +declare void @llvm.instrprof.increment(i8*, i64, i32, i32) +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 32, i32 978} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1009} +;. diff --git a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll new file mode 100644 index 0000000000000..1ad889524bc6a --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s --passes=instrprof --sampled-instrumentation --sampled-instr-period=1000019 --sampled-instr-burst-duration=3000 -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__llvm_profile_raw_version = comdat any + +@__llvm_profile_raw_version = constant i64 72057594037927940, comdat +@__profn_f = private constant [1 x i8] c"f" + +define void @f() { +; CHECK-LABEL: define void @f() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @__llvm_profile_sampling, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 [[TMP0]], 3000 +; CHECK-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB4:.*]], !prof [[PROF0:![0-9]+]] +; CHECK: [[BB2]]: +; CHECK-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[PGOCOUNT]], 1 +; CHECK-NEXT: store i64 [[TMP3]], ptr @__profc_f, align 8 +; CHECK-NEXT: br label %[[BB4]] +; CHECK: [[BB4]]: +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp uge i32 [[TMP5]], 1000019 +; CHECK-NEXT: br i1 [[TMP6]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[BB7]]: +; CHECK-NEXT: store i32 0, ptr @__llvm_profile_sampling, align 4 +; CHECK-NEXT: br label %[[BB9:.*]] +; CHECK: [[BB8]]: +; CHECK-NEXT: store i32 [[TMP5]], ptr @__llvm_profile_sampling, align 4 +; CHECK-NEXT: br label %[[BB9]] +; CHECK: [[BB9]]: +; CHECK-NEXT: ret void +; +entry: + call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 12884901887, i32 1, i32 0) + ret void +} + +declare void @llvm.instrprof.increment(i8*, i64, i32, i32) +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 3000, i32 997020} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1000019} +;. diff --git a/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll b/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll new file mode 100644 index 0000000000000..8e846bbf1d982 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s --passes=instrprof --sampled-instrumentation --sampled-instr-burst-duration=1 --sampled-instr-period=1009 -S | FileCheck %s --check-prefix=PERIOD1009 +; RUN: opt < %s --passes=instrprof --sampled-instrumentation --sampled-instr-burst-duration=1 -S | FileCheck %s --check-prefix=DEFAULTPERIOD + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__llvm_profile_raw_version = comdat any + +@__llvm_profile_raw_version = constant i64 72057594037927940, comdat +@__profn_f = private constant [1 x i8] c"f" + +define void @f() { +; PERIOD1009-LABEL: define void @f() { +; PERIOD1009-NEXT: [[ENTRY:.*:]] +; PERIOD1009-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 +; PERIOD1009-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 1 +; PERIOD1009-NEXT: [[TMP2:%.*]] = icmp uge i16 [[TMP1]], 1009 +; PERIOD1009-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB5:.*]], !prof [[PROF0:![0-9]+]] +; PERIOD1009: [[BB3]]: +; PERIOD1009-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8 +; PERIOD1009-NEXT: [[TMP4:%.*]] = add i64 [[PGOCOUNT]], 1 +; PERIOD1009-NEXT: store i64 [[TMP4]], ptr @__profc_f, align 8 +; PERIOD1009-NEXT: store i16 0, ptr @__llvm_profile_sampling, align 2 +; PERIOD1009-NEXT: br label %[[BB6:.*]] +; PERIOD1009: [[BB5]]: +; PERIOD1009-NEXT: store i16 [[TMP1]], ptr @__llvm_profile_sampling, align 2 +; PERIOD1009-NEXT: br label %[[BB6]] +; PERIOD1009: [[BB6]]: +; PERIOD1009-NEXT: ret void +; +; DEFAULTPERIOD-LABEL: define void @f() { +; DEFAULTPERIOD-NEXT: [[ENTRY:.*:]] +; DEFAULTPERIOD-NEXT: [[TMP0:%.*]] = load i16, ptr @__llvm_profile_sampling, align 2 +; DEFAULTPERIOD-NEXT: [[TMP1:%.*]] = add i16 [[TMP0]], 1 +; DEFAULTPERIOD-NEXT: [[TMP2:%.*]] = icmp uge i16 [[TMP1]], -1 +; DEFAULTPERIOD-NEXT: br i1 [[TMP2]], label %[[BB3:.*]], label %[[BB5:.*]], !prof [[PROF0:![0-9]+]] +; DEFAULTPERIOD: [[BB3]]: +; DEFAULTPERIOD-NEXT: [[PGOCOUNT:%.*]] = load i64, ptr @__profc_f, align 8 +; DEFAULTPERIOD-NEXT: [[TMP4:%.*]] = add i64 [[PGOCOUNT]], 1 +; DEFAULTPERIOD-NEXT: store i64 [[TMP4]], ptr @__profc_f, align 8 +; DEFAULTPERIOD-NEXT: store i16 0, ptr @__llvm_profile_sampling, align 2 +; DEFAULTPERIOD-NEXT: br label %[[BB6:.*]] +; DEFAULTPERIOD: [[BB5]]: +; DEFAULTPERIOD-NEXT: store i16 [[TMP1]], ptr @__llvm_profile_sampling, align 2 +; DEFAULTPERIOD-NEXT: br label %[[BB6]] +; DEFAULTPERIOD: [[BB6]]: +; DEFAULTPERIOD-NEXT: ret void +; +entry: + call void @llvm.instrprof.increment(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__profn_f, i32 0, i32 0), i64 12884901887, i32 1, i32 0) + ret void +} + +declare void @llvm.instrprof.increment(i8*, i64, i32, i32) +;. +; PERIOD1009: [[PROF0]] = !{!"branch_weights", i32 1, i32 1009} +;. +; DEFAULTPERIOD: [[PROF0]] = !{!"branch_weights", i32 1, i32 65535} +;.