@@ -655,8 +655,9 @@ class DataflowSrcSafetyAnalysis
655655//
656656// Then, a function can be split into a number of disjoint contiguous sequences
657657// of instructions without labels in between. These sequences can be processed
658- // the same way basic blocks are processed by data-flow analysis, assuming
659- // pessimistically that all registers are unsafe at the start of each sequence.
658+ // the same way basic blocks are processed by data-flow analysis, with the same
659+ // pessimistic estimation of the initial state at the start of each sequence
660+ // (except the first instruction of the function).
660661class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
661662 BinaryFunction &BF;
662663 MCPlusBuilder::AllocatorIdTy AllocId;
@@ -667,6 +668,30 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
667668 BC.MIB ->removeAnnotation (I.second , StateAnnotationIndex);
668669 }
669670
671+ // / Compute a reasonably pessimistic estimation of the register state when
672+ // / the previous instruction is not known for sure. Take the set of registers
673+ // / which are trusted at function entry and remove all registers that can be
674+ // / clobbered inside this function.
675+ SrcState computePessimisticState (BinaryFunction &BF) {
676+ BitVector ClobberedRegs (NumRegs);
677+ for (auto &I : BF.instrs ()) {
678+ MCInst &Inst = I.second ;
679+ BC.MIB ->getClobberedRegs (Inst, ClobberedRegs);
680+
681+ // If this is a call instruction, no register is safe anymore, unless
682+ // it is a tail call. Ignore tail calls for the purpose of estimating the
683+ // worst-case scenario, assuming no instructions are executed in the
684+ // caller after this point anyway.
685+ if (BC.MIB ->isCall (Inst) && !BC.MIB ->isTailCall (Inst))
686+ ClobberedRegs.set ();
687+ }
688+
689+ SrcState S = createEntryState ();
690+ S.SafeToDerefRegs .reset (ClobberedRegs);
691+ S.TrustedRegs .reset (ClobberedRegs);
692+ return S;
693+ }
694+
670695public:
671696 CFGUnawareSrcSafetyAnalysis (BinaryFunction &BF,
672697 MCPlusBuilder::AllocatorIdTy AllocId,
@@ -677,6 +702,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
677702 }
678703
679704 void run () override {
705+ const SrcState DefaultState = computePessimisticState (BF);
680706 SrcState S = createEntryState ();
681707 for (auto &I : BF.instrs ()) {
682708 MCInst &Inst = I.second ;
@@ -691,7 +717,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
691717 LLVM_DEBUG ({
692718 traceInst (BC, " Due to label, resetting the state before" , Inst);
693719 });
694- S = createUnsafeState () ;
720+ S = DefaultState ;
695721 }
696722
697723 // Check if we need to remove an old annotation (this is the case if
@@ -1226,6 +1252,83 @@ shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst,
12261252 return make_report (RetKind, Inst, *RetReg);
12271253}
12281254
1255+ // / While BOLT already marks some of the branch instructions as tail calls,
1256+ // / this function tries to improve the coverage by including less obvious cases
1257+ // / when it is possible to do without introducing too many false positives.
1258+ static bool shouldAnalyzeTailCallInst (const BinaryContext &BC,
1259+ const BinaryFunction &BF,
1260+ const MCInstReference &Inst) {
1261+ // Some BC.MIB->isXYZ(Inst) methods simply delegate to MCInstrDesc::isXYZ()
1262+ // (such as isBranch at the time of writing this comment), some don't (such
1263+ // as isCall). For that reason, call MCInstrDesc's methods explicitly when
1264+ // it is important.
1265+ const MCInstrDesc &Desc =
1266+ BC.MII ->get (static_cast <const MCInst &>(Inst).getOpcode ());
1267+ // Tail call should be a branch (but not necessarily an indirect one).
1268+ if (!Desc.isBranch ())
1269+ return false ;
1270+
1271+ // Always analyze the branches already marked as tail calls by BOLT.
1272+ if (BC.MIB ->isTailCall (Inst))
1273+ return true ;
1274+
1275+ // Try to also check the branches marked as "UNKNOWN CONTROL FLOW" - the
1276+ // below is a simplified condition from BinaryContext::printInstruction.
1277+ bool IsUnknownControlFlow =
1278+ BC.MIB ->isIndirectBranch (Inst) && !BC.MIB ->getJumpTable (Inst);
1279+
1280+ if (BF.hasCFG () && IsUnknownControlFlow)
1281+ return true ;
1282+
1283+ return false ;
1284+ }
1285+
1286+ static std::optional<BriefReport<MCPhysReg>>
1287+ shouldReportUnsafeTailCall (const BinaryContext &BC, const BinaryFunction &BF,
1288+ const MCInstReference &Inst, const SrcState &S) {
1289+ static const GadgetKind UntrustedLRKind (
1290+ " untrusted link register found before tail call" );
1291+
1292+ if (!shouldAnalyzeTailCallInst (BC, BF, Inst))
1293+ return std::nullopt ;
1294+
1295+ // Not only the set of registers returned by getTrustedLiveInRegs() can be
1296+ // seen as a reasonable target-independent _approximation_ of "the LR", these
1297+ // are *exactly* those registers used by SrcSafetyAnalysis to initialize the
1298+ // set of trusted registers on function entry.
1299+ // Thus, this function basically checks that the precondition expected to be
1300+ // imposed by a function call instruction (which is hardcoded into the target-
1301+ // specific getTrustedLiveInRegs() function) is also respected on tail calls.
1302+ SmallVector<MCPhysReg> RegsToCheck = BC.MIB ->getTrustedLiveInRegs ();
1303+ LLVM_DEBUG ({
1304+ traceInst (BC, " Found tail call inst" , Inst);
1305+ traceRegMask (BC, " Trusted regs" , S.TrustedRegs );
1306+ });
1307+
1308+ // In musl on AArch64, the _start function sets LR to zero and calls the next
1309+ // stage initialization function at the end, something along these lines:
1310+ //
1311+ // _start:
1312+ // mov x30, #0
1313+ // ; ... other initialization ...
1314+ // b _start_c ; performs "exit" system call at some point
1315+ //
1316+ // As this would produce a false positive for every executable linked with
1317+ // such libc, ignore tail calls performed by ELF entry function.
1318+ if (BC.StartFunctionAddress &&
1319+ *BC.StartFunctionAddress == Inst.getFunction ()->getAddress ()) {
1320+ LLVM_DEBUG ({ dbgs () << " Skipping tail call in ELF entry function.\n " ; });
1321+ return std::nullopt ;
1322+ }
1323+
1324+ // Returns at most one report per instruction - this is probably OK...
1325+ for (auto Reg : RegsToCheck)
1326+ if (!S.TrustedRegs [Reg])
1327+ return make_report (UntrustedLRKind, Inst, Reg);
1328+
1329+ return std::nullopt ;
1330+ }
1331+
12291332static std::optional<BriefReport<MCPhysReg>>
12301333shouldReportCallGadget (const BinaryContext &BC, const MCInstReference &Inst,
12311334 const SrcState &S) {
@@ -1366,6 +1469,9 @@ void FunctionAnalysis::findUnsafeUses(
13661469 if (PacRetGadgetsOnly)
13671470 return ;
13681471
1472+ if (auto Report = shouldReportUnsafeTailCall (BC, BF, Inst, S))
1473+ Reports.push_back (*Report);
1474+
13691475 if (auto Report = shouldReportCallGadget (BC, Inst, S))
13701476 Reports.push_back (*Report);
13711477 if (auto Report = shouldReportSigningOracle (BC, Inst, S))
0 commit comments