diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 99c4982c58b47..c3dede31540d6 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -2488,20 +2488,21 @@ DeleteDeadIFuncs(Module &M, // Follows the use-def chain of \p V backwards until it finds a Function, // in which case it collects in \p Versions. Return true on successful // use-def chain traversal, false otherwise. -static bool collectVersions(TargetTransformInfo &TTI, Value *V, - SmallVectorImpl &Versions) { +static bool +collectVersions(Value *V, SmallVectorImpl &Versions, + function_ref GetTTI) { if (auto *F = dyn_cast(V)) { - if (!TTI.isMultiversionedFunction(*F)) + if (!GetTTI(*F).isMultiversionedFunction(*F)) return false; Versions.push_back(F); } else if (auto *Sel = dyn_cast(V)) { - if (!collectVersions(TTI, Sel->getTrueValue(), Versions)) + if (!collectVersions(Sel->getTrueValue(), Versions, GetTTI)) return false; - if (!collectVersions(TTI, Sel->getFalseValue(), Versions)) + if (!collectVersions(Sel->getFalseValue(), Versions, GetTTI)) return false; } else if (auto *Phi = dyn_cast(V)) { for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) - if (!collectVersions(TTI, Phi->getIncomingValue(I), Versions)) + if (!collectVersions(Phi->getIncomingValue(I), Versions, GetTTI)) return false; } else { // Unknown instruction type. Bail. @@ -2510,31 +2511,43 @@ static bool collectVersions(TargetTransformInfo &TTI, Value *V, return true; } -// Bypass the IFunc Resolver of MultiVersioned functions when possible. To -// deduce whether the optimization is legal we need to compare the target -// features between caller and callee versions. The criteria for bypassing -// the resolver are the following: -// -// * If the callee's feature set is a subset of the caller's feature set, -// then the callee is a candidate for direct call. -// -// * Among such candidates the one of highest priority is the best match -// and it shall be picked, unless there is a version of the callee with -// higher priority than the best match which cannot be picked from a -// higher priority caller (directly or through the resolver). -// -// * For every higher priority callee version than the best match, there -// is a higher priority caller version whose feature set availability -// is implied by the callee's feature set. +// Try to statically resolve calls to versioned functions when possible. First +// we identify the function versions which are associated with an IFUNC symbol. +// We do that by examining the resolver function of the IFUNC. Once we have +// collected all the function versions, we sort them in decreasing priority +// order. This is necessary for determining the most suitable callee version +// for each caller version. We then collect all the callsites to versioned +// functions. The static resolution is performed by comparing the feature sets +// between callers and callees. Specifically: +// * Start a walk over caller and callee lists simultaneously in order of +// decreasing priority. +// * Statically resolve calls from the current caller to the current callee, +// iff the caller feature bits are a superset of the callee feature bits. +// * For FMV callers, as long as the caller feature bits are a subset of the +// callee feature bits, advance to the next callee. This effectively prevents +// considering the current callee as a candidate for static resolution by +// following callers (explanation: preceding callers would not have been +// selected in a hypothetical runtime execution). +// * Advance to the next caller. // +// Presentation in EuroLLVM2025: +// https://www.youtube.com/watch?v=k54MFimPz-A&t=867s static bool OptimizeNonTrivialIFuncs( Module &M, function_ref GetTTI) { bool Changed = false; - // Cache containing the mask constructed from a function's target features. + // Map containing the feature bits for a given function. DenseMap FeatureMask; + // Map containing all the function versions corresponding to an IFunc symbol. + DenseMap> VersionedFuncs; + // Map containing the IFunc symbol a function is version of. + DenseMap VersionOf; + // List of all the interesting IFuncs found in the module. + SmallVector IFuncs; for (GlobalIFunc &IF : M.ifuncs()) { + LLVM_DEBUG(dbgs() << "Examining IFUNC " << IF.getName() << "\n"); + if (IF.isInterposable()) continue; @@ -2545,107 +2558,147 @@ static bool OptimizeNonTrivialIFuncs( if (Resolver->isInterposable()) continue; - TargetTransformInfo &TTI = GetTTI(*Resolver); - - // Discover the callee versions. - SmallVector Callees; - if (any_of(*Resolver, [&TTI, &Callees](BasicBlock &BB) { + SmallVector Versions; + // Discover the versioned functions. + if (any_of(*Resolver, [&](BasicBlock &BB) { if (auto *Ret = dyn_cast_or_null(BB.getTerminator())) - if (!collectVersions(TTI, Ret->getReturnValue(), Callees)) + if (!collectVersions(Ret->getReturnValue(), Versions, GetTTI)) return true; return false; })) continue; - if (Callees.empty()) + if (Versions.empty()) continue; - LLVM_DEBUG(dbgs() << "Statically resolving calls to function " - << Resolver->getName() << "\n"); - - // Cache the feature mask for each callee. - for (Function *Callee : Callees) { - auto [It, Inserted] = FeatureMask.try_emplace(Callee); + for (Function *V : Versions) { + VersionOf.insert({V, &IF}); + auto [It, Inserted] = FeatureMask.try_emplace(V); if (Inserted) - It->second = TTI.getFeatureMask(*Callee); + It->second = GetTTI(*V).getFeatureMask(*V); } - // Sort the callee versions in decreasing priority order. - sort(Callees, [&](auto *LHS, auto *RHS) { + // Sort function versions in decreasing priority order. + sort(Versions, [&](auto *LHS, auto *RHS) { return FeatureMask[LHS].ugt(FeatureMask[RHS]); }); - // Find the callsites and cache the feature mask for each caller. - SmallVector Callers; + IFuncs.push_back(&IF); + VersionedFuncs.try_emplace(&IF, std::move(Versions)); + } + + for (GlobalIFunc *CalleeIF : IFuncs) { + SmallVector NonFMVCallers; + DenseSet CallerIFuncs; DenseMap> CallSites; - for (User *U : IF.users()) { + + // Find the callsites. + for (User *U : CalleeIF->users()) { if (auto *CB = dyn_cast(U)) { - if (CB->getCalledOperand() == &IF) { + if (CB->getCalledOperand() == CalleeIF) { Function *Caller = CB->getFunction(); - auto [FeatIt, FeatInserted] = FeatureMask.try_emplace(Caller); - if (FeatInserted) - FeatIt->second = TTI.getFeatureMask(*Caller); - auto [CallIt, CallInserted] = CallSites.try_emplace(Caller); - if (CallInserted) - Callers.push_back(Caller); - CallIt->second.push_back(CB); + GlobalIFunc *CallerIF = nullptr; + TargetTransformInfo &TTI = GetTTI(*Caller); + bool CallerIsFMV = TTI.isMultiversionedFunction(*Caller); + // The caller is a version of a known IFunc. + if (auto It = VersionOf.find(Caller); It != VersionOf.end()) + CallerIF = It->second; + else if (!CallerIsFMV && OptimizeNonFMVCallers) { + // The caller is non-FMV. + auto [It, Inserted] = FeatureMask.try_emplace(Caller); + if (Inserted) + It->second = TTI.getFeatureMask(*Caller); + } else + // The caller is none of the above, skip. + continue; + auto [It, Inserted] = CallSites.try_emplace(Caller); + if (Inserted) { + if (CallerIsFMV) + CallerIFuncs.insert(CallerIF); + else + NonFMVCallers.push_back(Caller); + } + It->second.push_back(CB); } } } - // Sort the caller versions in decreasing priority order. - sort(Callers, [&](auto *LHS, auto *RHS) { - return FeatureMask[LHS].ugt(FeatureMask[RHS]); - }); + if (CallSites.empty()) + continue; - auto implies = [](APInt A, APInt B) { return B.isSubsetOf(A); }; - - // Index to the highest priority candidate. - unsigned I = 0; - // Now try to redirect calls starting from higher priority callers. - for (Function *Caller : Callers) { - assert(I < Callees.size() && "Found callers of equal priority"); - - Function *Callee = Callees[I]; - APInt CallerBits = FeatureMask[Caller]; - APInt CalleeBits = FeatureMask[Callee]; - - // In the case of FMV callers, we know that all higher priority callers - // than the current one did not get selected at runtime, which helps - // reason about the callees (if they have versions that mandate presence - // of the features which we already know are unavailable on this target). - if (TTI.isMultiversionedFunction(*Caller)) { - // If the feature set of the caller implies the feature set of the - // highest priority candidate then it shall be picked. In case of - // identical sets advance the candidate index one position. - if (CallerBits == CalleeBits) - ++I; - else if (!implies(CallerBits, CalleeBits)) { - // Keep advancing the candidate index as long as the caller's - // features are a subset of the current candidate's. - while (implies(CalleeBits, CallerBits)) { - if (++I == Callees.size()) - break; - CalleeBits = FeatureMask[Callees[I]]; + LLVM_DEBUG(dbgs() << "Statically resolving calls to function " + << CalleeIF->getResolverFunction()->getName() << "\n"); + + // The complexity of this algorithm is linear: O(NumCallers + NumCallees). + // TODO + // A limitation it has is that we are not using information about the + // current caller to deduce why an earlier caller of higher priority was + // skipped. For example let's say the current caller is aes+sve2 and a + // previous caller was mops+sve2. Knowing that sve2 is available we could + // infer that mops is unavailable. This would allow us to skip callee + // versions which depend on mops. I tried implementing this but the + // complexity was cubic :/ + auto staticallyResolveCalls = [&](ArrayRef Callers, + ArrayRef Callees, + bool CallerIsFMV) { + // Index to the highest callee candidate. + unsigned I = 0; + + for (Function *const &Caller : Callers) { + if (I == Callees.size()) + break; + + LLVM_DEBUG(dbgs() << " Examining " + << (CallerIsFMV ? "FMV" : "regular") << " caller " + << Caller->getName() << "\n"); + + Function *Callee = Callees[I]; + APInt CallerBits = FeatureMask[Caller]; + APInt CalleeBits = FeatureMask[Callee]; + + // Statically resolve calls from the current caller to the current + // callee, iff the caller feature bits are a superset of the callee + // feature bits. + if (CalleeBits.isSubsetOf(CallerBits)) { + // Not all caller versions are necessarily users of the callee IFUNC. + if (auto It = CallSites.find(Caller); It != CallSites.end()) { + for (CallBase *CS : It->second) { + LLVM_DEBUG(dbgs() << " Redirecting call " << Caller->getName() + << " -> " << Callee->getName() << "\n"); + CS->setCalledOperand(Callee); + } + Changed = true; } - continue; } - } else { - // We can't reason much about non-FMV callers. Just pick the highest - // priority callee if it matches, otherwise bail. - if (!OptimizeNonFMVCallers || I > 0 || !implies(CallerBits, CalleeBits)) + + // Nothing else to do about non-FMV callers. + if (!CallerIsFMV) continue; + + // For FMV callers, as long as the caller feature bits are a subset of + // the callee feature bits, advance to the next callee. This effectively + // prevents considering the current callee as a candidate for static + // resolution by following callers. + while (CallerBits.isSubsetOf(FeatureMask[Callees[I]]) && + ++I < Callees.size()) + ; } - auto &Calls = CallSites[Caller]; - for (CallBase *CS : Calls) { - LLVM_DEBUG(dbgs() << "Redirecting call " << Caller->getName() << " -> " - << Callee->getName() << "\n"); - CS->setCalledOperand(Callee); - } - Changed = true; + }; + + auto &Callees = VersionedFuncs[CalleeIF]; + + // Optimize non-FMV calls. + if (OptimizeNonFMVCallers) + staticallyResolveCalls(NonFMVCallers, Callees, /*CallerIsFMV=*/false); + + // Optimize FMV calls. + for (GlobalIFunc *CallerIF : CallerIFuncs) { + auto &Callers = VersionedFuncs[CallerIF]; + staticallyResolveCalls(Callers, Callees, /*CallerIsFMV=*/true); } - if (IF.use_empty() || - all_of(IF.users(), [](User *U) { return isa(U); })) + + if (CalleeIF->use_empty() || + all_of(CalleeIF->users(), [](User *U) { return isa(U); })) NumIFuncsResolved++; } return Changed; diff --git a/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll b/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll index 4b6a19d3f05cf..156c49c8b6677 100644 --- a/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll +++ b/llvm/test/Transforms/GlobalOpt/resolve-fmv-ifunc.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call i32 @(test_single_bb_resolver|test_multi_bb_resolver|test_caller_feats_not_implied|test_non_fmv_caller|test_priority|test_alternative_names)" --version 4 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call i32 @(test_single_bb_resolver|test_multi_bb_resolver|test_caller_feats_not_implied|test_non_fmv_caller|test_priority|test_alternative_names|test_unrelated_callers)" --version 4 ; REQUIRES: aarch64-registered-target @@ -13,6 +13,14 @@ $test_caller_feats_not_implied.resolver = comdat any $test_non_fmv_caller.resolver = comdat any $test_priority.resolver = comdat any $test_alternative_names.resolver = comdat any +$test_unrelated_callers.resolver = comdat any +$caller1.resolver = comdat any +$caller2.resolver = comdat any +$caller3.resolver = comdat any +$caller6.resolver = comdat any +$caller7.resolver = comdat any +$caller8.resolver = comdat any +$caller9.resolver = comdat any @__aarch64_cpu_features = external local_unnamed_addr global { i64 } @@ -22,6 +30,14 @@ $test_alternative_names.resolver = comdat any @test_non_fmv_caller = weak_odr ifunc i32 (), ptr @test_non_fmv_caller.resolver @test_priority = weak_odr ifunc i32 (), ptr @test_priority.resolver @test_alternative_names = weak_odr ifunc i32 (), ptr @test_alternative_names.resolver +@test_unrelated_callers = weak_odr ifunc i32 (), ptr @test_unrelated_callers.resolver +@caller1 = weak_odr ifunc i32 (), ptr @caller1.resolver +@caller2 = weak_odr ifunc i32 (), ptr @caller2.resolver +@caller3 = weak_odr ifunc i32 (), ptr @caller3.resolver +@caller6 = weak_odr ifunc i32 (), ptr @caller6.resolver +@caller7 = weak_odr ifunc i32 (), ptr @caller7.resolver +@caller8 = weak_odr ifunc i32 (), ptr @caller8.resolver +@caller9 = weak_odr ifunc i32 (), ptr @caller9.resolver declare void @__init_cpu_features_resolver() local_unnamed_addr @@ -34,18 +50,18 @@ define weak_odr ptr @test_single_bb_resolver.resolver() comdat { resolver_entry: tail call void @__init_cpu_features_resolver() %0 = load i64, ptr @__aarch64_cpu_features, align 8 - %1 = and i64 %0, 68719476736 - %.not = icmp eq i64 %1, 0 - %2 = and i64 %0, 1073741824 - %.not3 = icmp eq i64 %2, 0 - %test_single_bb_resolver._Msve.test_single_bb_resolver.default = select i1 %.not3, ptr @test_single_bb_resolver.default, ptr @test_single_bb_resolver._Msve - %common.ret.op = select i1 %.not, ptr %test_single_bb_resolver._Msve.test_single_bb_resolver.default, ptr @test_single_bb_resolver._Msve2 + %1 = and i64 %0, 69793284352 + %2 = icmp eq i64 %1, 69793284352 + %3 = and i64 %0, 1073807616 + %4 = icmp eq i64 %3, 1073807616 + %test_single_bb_resolver._Msve.test_single_bb_resolver.default = select i1 %4, ptr @test_single_bb_resolver._Msve, ptr @test_single_bb_resolver.default + %common.ret.op = select i1 %2, ptr @test_single_bb_resolver._Msve2, ptr %test_single_bb_resolver._Msve.test_single_bb_resolver.default ret ptr %common.ret.op } define i32 @caller1._Msve() #1 { ; CHECK-LABEL: define i32 @caller1._Msve( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR1:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_single_bb_resolver._Msve() ; entry: @@ -55,7 +71,7 @@ entry: define i32 @caller1._Msve2() #2 { ; CHECK-LABEL: define i32 @caller1._Msve2( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR2:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_single_bb_resolver._Msve2() ; entry: @@ -65,7 +81,7 @@ entry: define i32 @caller1.default() #0 { ; CHECK-LABEL: define i32 @caller1.default( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_single_bb_resolver.default() ; entry: @@ -73,6 +89,20 @@ entry: ret i32 %call } +define weak_odr ptr @caller1.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller1.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 69793284352 + %2 = icmp eq i64 %1, 69793284352 + %3 = and i64 %0, 1073807616 + %4 = icmp eq i64 %3, 1073807616 + %caller1._Msve.caller1.default = select i1 %4, ptr @caller1._Msve, ptr @caller1.default + %common.ret.op = select i1 %2, ptr @caller1._Msve2, ptr %caller1._Msve.caller1.default + ret ptr %common.ret.op +} + declare i32 @test_multi_bb_resolver._Mmops() #3 declare i32 @test_multi_bb_resolver._Msve2() #2 declare i32 @test_multi_bb_resolver._Msve() #1 @@ -92,20 +122,20 @@ common.ret: ; preds = %resolver_else2, %re ret ptr %common.ret.op resolver_else: ; preds = %resolver_entry - %2 = and i64 %0, 68719476736 - %.not5 = icmp eq i64 %2, 0 - br i1 %.not5, label %resolver_else2, label %common.ret + %2 = and i64 %0, 69793284352 + %3 = icmp eq i64 %2, 69793284352 + br i1 %3, label %common.ret, label %resolver_else2 resolver_else2: ; preds = %resolver_else - %3 = and i64 %0, 1073741824 - %.not6 = icmp eq i64 %3, 0 - %test_multi_bb_resolver._Msve.test_multi_bb_resolver.default = select i1 %.not6, ptr @test_multi_bb_resolver.default, ptr @test_multi_bb_resolver._Msve + %4 = and i64 %0, 1073807616 + %5 = icmp eq i64 %4, 1073807616 + %test_multi_bb_resolver._Msve.test_multi_bb_resolver.default = select i1 %5, ptr @test_multi_bb_resolver._Msve, ptr @test_multi_bb_resolver.default br label %common.ret } define i32 @caller2._MmopsMsve2() #4 { ; CHECK-LABEL: define i32 @caller2._MmopsMsve2( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR4:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR4:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_multi_bb_resolver._Mmops() ; entry: @@ -115,7 +145,7 @@ entry: define i32 @caller2._Mmops() #3 { ; CHECK-LABEL: define i32 @caller2._Mmops( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR3:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_multi_bb_resolver._Mmops() ; entry: @@ -125,7 +155,7 @@ entry: define i32 @caller2._Msve() #1 { ; CHECK-LABEL: define i32 @caller2._Msve( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_multi_bb_resolver() ; entry: @@ -135,7 +165,7 @@ entry: define i32 @caller2.default() #0 { ; CHECK-LABEL: define i32 @caller2.default( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_multi_bb_resolver.default() ; entry: @@ -143,6 +173,31 @@ entry: ret i32 %call } +define weak_odr ptr @caller2.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller2.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 576460822096707840 + %2 = icmp eq i64 %1, 576460822096707840 + br i1 %2, label %common.ret, label %resolver_else + +common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry + %common.ret.op = phi ptr [ @caller2._MmopsMsve2, %resolver_entry ], [ @caller2._Mmops, %resolver_else ], [ %caller2._Msve.caller2.default, %resolver_else2 ] + ret ptr %common.ret.op + +resolver_else: ; preds = %resolver_entry + %3 = and i64 %0, 576460752303423488 + %.not = icmp eq i64 %3, 0 + br i1 %.not, label %resolver_else2, label %common.ret + +resolver_else2: ; preds = %resolver_else + %4 = and i64 %0, 1073807616 + %5 = icmp eq i64 %4, 1073807616 + %caller2._Msve.caller2.default = select i1 %5, ptr @caller2._Msve, ptr @caller2.default + br label %common.ret +} + declare i32 @test_caller_feats_not_implied._Mmops() #3 declare i32 @test_caller_feats_not_implied._Msme() #5 declare i32 @test_caller_feats_not_implied._Msve() #1 @@ -162,20 +217,20 @@ common.ret: ; preds = %resolver_else2, %re ret ptr %common.ret.op resolver_else: ; preds = %resolver_entry - %2 = and i64 %0, 4398046511104 - %.not5 = icmp eq i64 %2, 0 - br i1 %.not5, label %resolver_else2, label %common.ret + %2 = and i64 %0, 4398180795136 + %3 = icmp eq i64 %2, 4398180795136 + br i1 %3, label %common.ret, label %resolver_else2 resolver_else2: ; preds = %resolver_else - %3 = and i64 %0, 1073741824 - %.not6 = icmp eq i64 %3, 0 - %test_caller_feats_not_implied._Msve.test_caller_feats_not_implied.default = select i1 %.not6, ptr @test_caller_feats_not_implied.default, ptr @test_caller_feats_not_implied._Msve + %4 = and i64 %0, 1073807616 + %5 = icmp eq i64 %4, 1073807616 + %test_caller_feats_not_implied._Msve.test_caller_feats_not_implied.default = select i1 %5, ptr @test_caller_feats_not_implied._Msve, ptr @test_caller_feats_not_implied.default br label %common.ret } define i32 @caller3._Mmops() #3 { ; CHECK-LABEL: define i32 @caller3._Mmops( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR3]] { +; CHECK-SAME: ) #[[ATTR3]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_caller_feats_not_implied._Mmops() ; entry: @@ -185,7 +240,7 @@ entry: define i32 @caller3._Msve() #1 { ; CHECK-LABEL: define i32 @caller3._Msve( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR1]] { +; CHECK-SAME: ) #[[ATTR1]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_caller_feats_not_implied() ; entry: @@ -195,7 +250,7 @@ entry: define i32 @caller3.default() #0 { ; CHECK-LABEL: define i32 @caller3.default( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_caller_feats_not_implied() ; entry: @@ -203,6 +258,20 @@ entry: ret i32 %call } +define weak_odr ptr @caller3.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller3.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 576460752303423488 + %.not = icmp eq i64 %1, 0 + %2 = and i64 %0, 1073807616 + %3 = icmp eq i64 %2, 1073807616 + %caller3._Msve.caller3.default = select i1 %3, ptr @caller3._Msve, ptr @caller3.default + %common.ret.op = select i1 %.not, ptr %caller3._Msve.caller3.default, ptr @caller3._Mmops + ret ptr %common.ret.op +} + declare i32 @test_non_fmv_caller._Maes() #6 declare i32 @test_non_fmv_caller._Msm4() #7 declare i32 @test_non_fmv_caller.default() #0 @@ -212,15 +281,18 @@ define weak_odr ptr @test_non_fmv_caller.resolver() comdat { resolver_entry: tail call void @__init_cpu_features_resolver() %0 = load i64, ptr @__aarch64_cpu_features, align 8 - %1 = and i64 %0, 32768 - %.not = icmp eq i64 %1, 0 - %test_non_fmv_caller._Maes.test_non_fmv_caller.default = select i1 %.not, ptr @test_non_fmv_caller.default, ptr @test_non_fmv_caller._Maes - ret ptr %test_non_fmv_caller._Maes.test_non_fmv_caller.default + %1 = and i64 %0, 33536 + %2 = icmp eq i64 %1, 33536 + %3 = and i64 %0, 800 + %4 = icmp eq i64 %3, 800 + %test_non_fmv_caller._Msm4.test_non_fmv_caller.default = select i1 %4, ptr @test_non_fmv_caller._Msm4, ptr @test_non_fmv_caller.default + %common.ret.op = select i1 %2, ptr @test_non_fmv_caller._Maes, ptr %test_non_fmv_caller._Msm4.test_non_fmv_caller.default + ret ptr %common.ret.op } define i32 @caller4() #8 { ; CHECK-LABEL: define i32 @caller4( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR7:[0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR8:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_non_fmv_caller._Maes() ; entry: @@ -230,7 +302,7 @@ entry: define i32 @caller5() #9 { ; CHECK-LABEL: define i32 @caller5( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR8:[0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR9:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_non_fmv_caller() ; entry: @@ -239,7 +311,7 @@ entry: } declare i32 @test_priority._Msve2-sha3() #10 -declare i32 @test_priority._Mls64Mssbs() #11 +declare i32 @test_priority._McsscMssbs() #11 declare i32 @test_priority._MflagmMlseMrng() #12 declare i32 @test_priority.default() #0 @@ -248,36 +320,57 @@ define weak_odr ptr @test_priority.resolver() comdat { resolver_entry: tail call void @__init_cpu_features_resolver() %0 = load i64, ptr @__aarch64_cpu_features, align 8 - %1 = and i64 %0, 131 - %2 = icmp eq i64 %1, 131 + %1 = and i64 %0, 562949953423360 + %2 = icmp eq i64 %1, 562949953423360 br i1 %2, label %common.ret, label %resolver_else common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry - %common.ret.op = phi ptr [ @test_priority._MflagmMlseMrng, %resolver_entry ], [ @test_priority._Mls64Mssbs, %resolver_else ], [ %test_priority._Msve2-sha3.test_priority.default, %resolver_else2 ] + %common.ret.op = phi ptr [ @test_priority._McsscMssbs, %resolver_entry ], [ @test_priority._Msve2-sha3, %resolver_else ], [ %test_priority._MflagmMlseMrng.test_priority.default, %resolver_else2 ] ret ptr %common.ret.op resolver_else: ; preds = %resolver_entry - %3 = and i64 %0, 9570149208162304 - %4 = icmp eq i64 %3, 9570149208162304 + %3 = and i64 %0, 1169304924928 + %4 = icmp eq i64 %3, 1169304924928 br i1 %4, label %common.ret, label %resolver_else2 resolver_else2: ; preds = %resolver_else - %5 = and i64 %0, 1099511627776 - %.not = icmp eq i64 %5, 0 - %test_priority._Msve2-sha3.test_priority.default = select i1 %.not, ptr @test_priority.default, ptr @test_priority._Msve2-sha3 + %5 = and i64 %0, 131 + %6 = icmp eq i64 %5, 131 + %test_priority._MflagmMlseMrng.test_priority.default = select i1 %6, ptr @test_priority._MflagmMlseMrng, ptr @test_priority.default br label %common.ret } -define i32 @caller6._MflagmMls64MlseMrngMssbsMsve2-sha3() #13 { -; CHECK-LABEL: define i32 @caller6._MflagmMls64MlseMrngMssbsMsve2-sha3( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR12:[0-9]+]] { -; CHECK: [[CALL:%.*]] = tail call i32 @test_priority._Mls64Mssbs() +define i32 @caller6._McsscMflagmMlseMrngMssbsMsve2-sha3() #13 { +; CHECK-LABEL: define i32 @caller6._McsscMflagmMlseMrngMssbsMsve2-sha3( +; CHECK-SAME: ) #[[ATTR13:[0-9]+]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_priority._McsscMssbs() +; +entry: + %call = tail call i32 @test_priority() + ret i32 %call +} + +define i32 @caller6.default() #0 { +; CHECK-LABEL: define i32 @caller6.default( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_priority() ; entry: %call = tail call i32 @test_priority() ret i32 %call } +define weak_odr ptr @caller6.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller6.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 564119258348419 + %2 = icmp eq i64 %1, 564119258348419 + %caller6._McsscMflagmMlseMrngMssbsMsve2-sha3.caller6.default = select i1 %2, ptr @caller6._McsscMflagmMlseMrngMssbsMsve2-sha3, ptr @caller6.default + ret ptr %caller6._McsscMflagmMlseMrngMssbsMsve2-sha3.caller6.default +} + declare i32 @test_alternative_names._Mdpb2Mfrintts() #14 declare i32 @test_alternative_names._Mflagm2Mfrintts() #15 declare i32 @test_alternative_names._Mrcpc2() #16 @@ -310,7 +403,7 @@ resolver_else2: ; preds = %resolver_else define i32 @caller7._Mdpb2Mfrintts() #14 { ; CHECK-LABEL: define i32 @caller7._Mdpb2Mfrintts( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR13:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR14:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_alternative_names._Mdpb2Mfrintts() ; entry: @@ -320,7 +413,7 @@ entry: define i32 @caller7._Mfrintts() #17 { ; CHECK-LABEL: define i32 @caller7._Mfrintts( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR16:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR17:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_alternative_names() ; entry: @@ -330,7 +423,7 @@ entry: define i32 @caller7._Mrcpc2() #16 { ; CHECK-LABEL: define i32 @caller7._Mrcpc2( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR15:[0-9]+]] { +; CHECK-SAME: ) #[[ATTR16:[0-9]+]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_alternative_names._Mrcpc2() ; entry: @@ -340,7 +433,7 @@ entry: define i32 @caller7.default() #0 { ; CHECK-LABEL: define i32 @caller7.default( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ) #[[ATTR0]] { ; CHECK: [[CALL:%.*]] = tail call i32 @test_alternative_names.default() ; entry: @@ -348,6 +441,156 @@ entry: ret i32 %call } +define weak_odr ptr @caller7.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller7.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 17563904 + %2 = icmp eq i64 %1, 17563904 + br i1 %2, label %common.ret, label %resolver_else + +common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry + %common.ret.op = phi ptr [ @caller7._Mdpb2Mfrintts, %resolver_entry ], [ @caller7._Mfrintts, %resolver_else ], [ %caller7._Mrcpc2.caller7.default, %resolver_else2 ] + ret ptr %common.ret.op + +resolver_else: ; preds = %resolver_entry + %3 = and i64 %0, 16777472 + %4 = icmp eq i64 %3, 16777472 + br i1 %4, label %common.ret, label %resolver_else2 + +resolver_else2: ; preds = %resolver_else + %5 = and i64 %0, 12582912 + %6 = icmp eq i64 %5, 12582912 + %caller7._Mrcpc2.caller7.default = select i1 %6, ptr @caller7._Mrcpc2, ptr @caller7.default + br label %common.ret +} + +declare i32 @test_unrelated_callers._Mmops() #3 +declare i32 @test_unrelated_callers._Msve2() #2 +declare i32 @test_unrelated_callers._Msve() #1 +declare i32 @test_unrelated_callers.default() #0 + +define weak_odr ptr @test_unrelated_callers.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @test_unrelated_callers.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 576460752303423488 + %.not = icmp eq i64 %1, 0 + br i1 %.not, label %resolver_else, label %common.ret + +common.ret: ; preds = %resolver_else2, %resolver_else, %resolver_entry + %common.ret.op = phi ptr [ @test_unrelated_callers._Mmops, %resolver_entry ], [ @test_unrelated_callers._Msve2, %resolver_else ], [ %test_unrelated_callers._Msve.test_unrelated_callers.default, %resolver_else2 ] + ret ptr %common.ret.op + +resolver_else: ; preds = %resolver_entry + %2 = and i64 %0, 69793284352 + %3 = icmp eq i64 %2, 69793284352 + br i1 %3, label %common.ret, label %resolver_else2 + +resolver_else2: ; preds = %resolver_else + %4 = and i64 %0, 1073807616 + %5 = icmp eq i64 %4, 1073807616 + %test_unrelated_callers._Msve.test_unrelated_callers.default = select i1 %5, ptr @test_unrelated_callers._Msve, ptr @test_unrelated_callers.default + br label %common.ret +} + +define i32 @caller8._MmopsMsve2() #4 { +; CHECK-LABEL: define i32 @caller8._MmopsMsve2( +; CHECK-SAME: ) #[[ATTR4]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_unrelated_callers._Mmops() +; +entry: + %call = tail call i32 @test_unrelated_callers() + ret i32 %call +} + +define dso_local i32 @caller8._Msve2() #2 { +; CHECK-LABEL: define dso_local i32 @caller8._Msve2( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_unrelated_callers() +; +entry: + %call = tail call i32 @test_unrelated_callers() + ret i32 %call +} + +define i32 @caller8.default() #0 { +; CHECK-LABEL: define i32 @caller8.default( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_unrelated_callers() +; +entry: + %call = tail call i32 @test_unrelated_callers() + ret i32 %call +} + +define weak_odr ptr @caller8.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller8.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 576460822096707840 + %2 = icmp eq i64 %1, 576460822096707840 + %3 = and i64 %0, 69793284352 + %4 = icmp eq i64 %3, 69793284352 + %caller8._Msve2.caller8.default = select i1 %4, ptr @caller8._Msve2, ptr @caller8.default + %common.ret.op = select i1 %2, ptr @caller8._MmopsMsve2, ptr %caller8._Msve2.caller8.default + ret ptr %common.ret.op +} + +define i32 @caller9._Mmops() #3 { +; CHECK-LABEL: define i32 @caller9._Mmops( +; CHECK-SAME: ) #[[ATTR3]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_unrelated_callers._Mmops() +; +entry: + %call = tail call i32 @test_unrelated_callers() + ret i32 %call +} + +define i32 @caller9._Msve() #1 { +; CHECK-LABEL: define i32 @caller9._Msve( +; CHECK-SAME: ) #[[ATTR1]] { +entry: + ret i32 1 +} + +define i32 @caller9.default() #0 { +; CHECK-LABEL: define i32 @caller9.default( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_unrelated_callers.default() +; +entry: + %call = tail call i32 @test_unrelated_callers() + ret i32 %call +} + +define weak_odr ptr @caller9.resolver() comdat { +; CHECK-LABEL: define weak_odr ptr @caller9.resolver() comdat { +resolver_entry: + tail call void @__init_cpu_features_resolver() + %0 = load i64, ptr @__aarch64_cpu_features, align 8 + %1 = and i64 %0, 576460752303423488 + %.not = icmp eq i64 %1, 0 + %2 = and i64 %0, 1073807616 + %3 = icmp eq i64 %2, 1073807616 + %caller9._Msve.caller9.default = select i1 %3, ptr @caller9._Msve, ptr @caller9.default + %common.ret.op = select i1 %.not, ptr %caller9._Msve.caller9.default, ptr @caller9._Mmops + ret ptr %common.ret.op +} + +define i32 @caller10() #18 { +; CHECK-LABEL: define i32 @caller10( +; CHECK-SAME: ) local_unnamed_addr #[[ATTR18:[0-9]+]] { +; CHECK: [[CALL:%.*]] = tail call i32 @test_unrelated_callers._Mmops() +; +entry: + %call = tail call i32 @test_unrelated_callers() + ret i32 %call +} + attributes #0 = { "fmv-features" } attributes #1 = { "fmv-features"="sve" } attributes #2 = { "fmv-features"="sve2" } @@ -359,10 +602,11 @@ attributes #7 = { "fmv-features"="sm4" } attributes #8 = { "target-features"="+aes,+fp-armv8,+neon,+outline-atomics,+v8a" } attributes #9 = { "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,+sm4" } attributes #10 = { "fmv-features"="sve2-sha3" } -attributes #11 = { "fmv-features"="ls64,ssbs" } +attributes #11 = { "fmv-features"="cssc,ssbs" } attributes #12 = { "fmv-features"="flagm,lse,rng" } -attributes #13 = { "fmv-features"="flagm,ls64,lse,rng,ssbs,sve2-sha3" } +attributes #13 = { "fmv-features"="cssc,flagm,lse,rng,ssbs,sve2-sha3" } attributes #14 = { "fmv-features"="dpb2,frintts" } attributes #15 = { "fmv-features"="flagm2,frintts" } attributes #16 = { "fmv-features"="rcpc2" } attributes #17 = { "fmv-features"="frintts" } +attributes #18 = { "target-features"="+fp-armv8,+mops,+neon,+outline-atomics,+sve,+v8a" }