Skip to content

Commit

Permalink
Use the results of def/use analysis to check that the input LLVM
Browse files Browse the repository at this point in the history
CFG is valid. This ensures that all non-metadata uses of virtual
registers are proper, i.e., that their def sites dominate all
use sites.

In addition, track the set of live virtual registers during
translation.  This allows us to check which metadata values
are properly defined as we translate them, in particular, for
debugging intrinsics.  Since these intrinsics don't necessarily
follow the usual rules regarding use/def, it can sometimes happen
that metadata values are not properly defined.  In such cases
we simply omit the ill-formed debugging intrinsic information.

Fixes #798
  • Loading branch information
robdockins committed Oct 12, 2021
1 parent 43b7a1e commit c914655
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 20 deletions.
49 changes: 39 additions & 10 deletions crucible-llvm/src/Lang/Crucible/LLVM/Translation.hs
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,14 @@ import Control.Lens hiding (op, (:>) )
import Control.Monad.Except
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Maybe
import Data.String
import qualified Data.Text as Text

import qualified Text.LLVM.AST as L
import qualified Text.LLVM.PP as L

import Data.Parameterized.NatRepr as NatRepr
import Data.Parameterized.Some
Expand Down Expand Up @@ -192,21 +195,26 @@ buildRegTypeMap m0 bb = foldM stmt m0 (L.bbStmts bb)
generateStmts :: (?transOpts :: TranslationOptions)
=> TypeRepr ret
-> L.BlockLabel
-> Set L.Ident {- ^ Set of usable identifiers -}
-> [L.Stmt]
-> LLVMGenerator s arch ret a
generateStmts retType lab stmts = go (processDbgDeclare stmts)
where go [] = fail "LLVM basic block ended without a terminating instruction"
go (x:xs) =
generateStmts retType lab defSet0 stmts = go defSet0 (processDbgDeclare stmts)
where go _ [] = fail "LLVM basic block ended without a terminating instruction"
go defSet (x:xs) =
case x of
-- a result statement assigns the result of the instruction into a register
L.Result ident instr md -> do
setLocation md
generateInstr retType lab instr (assignLLVMReg ident) (go xs)
L.Result ident instr md ->
do setLocation md
generateInstr retType lab defSet instr
(assignLLVMReg ident)
(go (Set.insert ident defSet) xs)

-- an effect statement simply executes the instruction for its effects and discards the result
L.Effect instr md -> do
setLocation md
generateInstr retType lab instr (\_ -> return ()) (go xs)
L.Effect instr md ->
do setLocation md
generateInstr retType lab defSet instr
(\_ -> return ())
(go defSet xs)

-- | Search for calls to intrinsic 'llvm.dbg.declare' and copy the
-- metadata onto the corresponding 'alloca' statement. Also copy
Expand Down Expand Up @@ -317,7 +325,7 @@ defineLLVMBlock
-> LLVMGenerator s arch ret ()
defineLLVMBlock retType lm L.BasicBlock{ L.bbLabel = Just lab, L.bbStmts = stmts } = do
case Map.lookup lab lm of
Just bi -> defineBlock (block_label bi) (generateStmts retType lab stmts)
Just bi -> defineBlock (block_label bi) (generateStmts retType lab (block_use_set bi) stmts)
Nothing -> fail $ unwords ["LLVM basic block not found in block info map", show lab]

defineLLVMBlock _ _ _ = fail "LLVM basic block has no label!"
Expand Down Expand Up @@ -351,9 +359,30 @@ genDefn defn retType =
case Map.lookup entry_lab bim of
Nothing -> fail $ unwords ["entry label not found in label map:", show entry_lab]
Just entry_bi -> do
checkEntryPointUseSet nm entry_bi (L.defArgs defn)
mapM_ (defineLLVMBlock retType bim) (L.defBody defn)
jump (block_label entry_bi)


-- | Check that the input LLVM CFG satisfies the def/use invariant,
-- and raise an error if some virtual register has a use site that
-- is not dominated by its definition site.
checkEntryPointUseSet ::
String ->
LLVMBlockInfo s ->
[L.Typed L.Ident] ->
LLVMGenerator s arg ret ()
checkEntryPointUseSet nm bi args
| Set.null unsatisfiedUses = return ()
| otherwise = fail $ unlines $
[ "Invalid input LLVM for function: " ++ nm
, "The following LLVM virtual registers are used before they are defined:"
] ++ map (\i -> " " ++ show (L.ppIdent i)) (Set.toList unsatisfiedUses)
where
argSet = Set.fromList (map L.typedValue args)
useSet = block_use_set bi
unsatisfiedUses = Set.difference useSet argSet

------------------------------------------------------------------------
-- transDefine
--
Expand Down
32 changes: 22 additions & 10 deletions crucible-llvm/src/Lang/Crucible/LLVM/Translation/Instruction.hs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ import qualified Data.List as List
import Data.List.NonEmpty (NonEmpty((:|)))
import qualified Data.Map.Strict as Map
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.String
Expand All @@ -57,6 +59,7 @@ import Prettyprinter (pretty)
import GHC.Exts ( Proxy#, proxy# )

import qualified Text.LLVM.AST as L
import qualified Text.LLVM.PP as L

import qualified Data.BitVector.Sized as BV
import qualified Data.Parameterized.Context as Ctx
Expand Down Expand Up @@ -1384,6 +1387,7 @@ generateInstr :: forall s arch ret a.
(?transOpts :: TranslationOptions) =>
TypeRepr ret {- ^ Type of the function return value -} ->
L.BlockLabel {- ^ The label of the current LLVM basic block -} ->
Set L.Ident {- ^ Set of usable identifiers -} ->
L.Instr {- ^ The instruction to translate -} ->
(LLVMExpr s arch -> LLVMGenerator s arch ret ())
{- ^ A continuation to assign the produced value of this instruction to a register -} ->
Expand All @@ -1392,7 +1396,7 @@ generateInstr :: forall s arch ret a.
Straightline instructions should enter this continuation,
but block-terminating instructions should not. -} ->
LLVMGenerator s arch ret a
generateInstr retType lab instr assign_f k =
generateInstr retType lab defSet instr assign_f k =
case instr of
-- skip phi instructions, they are handled in definePhiBlock
L.Phi _ _ -> k
Expand Down Expand Up @@ -1575,12 +1579,12 @@ generateInstr retType lab instr assign_f k =
k

L.Call tailcall (L.PtrTo fnTy) fn args ->
callFunctionWithCont instr tailcall fnTy fn args assign_f k
callFunctionWithCont defSet instr tailcall fnTy fn args assign_f k
L.Call _ ty _ _ ->
fail $ unwords ["unexpected function type in call:", show ty]

L.Invoke fnTy fn args normLabel _unwindLabel -> do
callFunctionWithCont instr False fnTy fn args assign_f $ definePhiBlock lab normLabel
callFunctionWithCont defSet instr False fnTy fn args assign_f $ definePhiBlock lab normLabel

L.Bit op x y ->
do tp <- liftMemType' (L.typedType x)
Expand Down Expand Up @@ -1879,6 +1883,7 @@ callFunction instr _tailCall fnTy _fn _args _assign_f =
-- instructions.
callFunctionWithCont :: forall s arch ret a.
(?transOpts :: TranslationOptions) =>
Set L.Ident {- ^ Set of usable identifiers -} ->
L.Instr {- ^ Source instruction o the call -} ->
Bool {- ^ Is the function a tail call? -} ->
L.Type {- ^ type of the function to call -} ->
Expand All @@ -1887,27 +1892,27 @@ callFunctionWithCont :: forall s arch ret a.
(LLVMExpr s arch -> LLVMGenerator s arch ret ()) {- ^ assignment continuation for return value -} ->
LLVMGenerator s arch ret a {- ^ continuation for next instructions -} ->
LLVMGenerator s arch ret a
callFunctionWithCont instr tailCall_ fnTy fn args assign_f k
callFunctionWithCont defSet instr tailCall_ fnTy fn args assign_f k

-- Supports LLVM 4-12
| L.ValSymbol "llvm.dbg.declare" <- fn =
do mbArgs <- dbgArgs args
do mbArgs <- dbgArgs defSet args
case mbArgs of
Right (asScalar -> Scalar _ PtrRepr ptr, lv, di) ->
extensionStmt (LLVM_Debug (LLVM_Dbg_Declare ptr lv di)) >> k
_ -> k

-- Supports LLVM 6-12
| L.ValSymbol "llvm.dbg.addr" <- fn =
do mbArgs <- dbgArgs args
do mbArgs <- dbgArgs defSet args
case mbArgs of
Right (asScalar -> Scalar _ PtrRepr ptr, lv, di) ->
extensionStmt (LLVM_Debug (LLVM_Dbg_Addr ptr lv di)) >> k
_ -> k

-- Supports LLVM 6-12 (earlier versions had an extra argument)
| L.ValSymbol "llvm.dbg.value" <- fn =
do mbArgs <- dbgArgs args
do mbArgs <- dbgArgs defSet args
case mbArgs of
Right (asScalar -> Scalar _ repr val, lv, di) ->
extensionStmt (LLVM_Debug (LLVM_Dbg_Value repr val lv di)) >> k
Expand Down Expand Up @@ -1941,9 +1946,10 @@ callFunctionWithCont instr tailCall_ fnTy fn args assign_f k
-- | Match the arguments used by @dbg.addr@, @dbg.declare@, and @dbg.value@.
dbgArgs ::
(?transOpts :: TranslationOptions) =>
Set L.Ident {- ^ Set of usable identifiers -} ->
[L.Typed L.Value] {- ^ debug call arguments -} ->
LLVMGenerator s arch ret (Either String (LLVMExpr s arch, L.DILocalVariable, L.DIExpression))
dbgArgs args
dbgArgs defSet args
| -- Why guard translating llvm.dbg statements behind its own option? It's
-- because Clang can sometimes generate llvm.dbg statements with improperly
-- scoped arguments—see https://bugs.llvm.org/show_bug.cgi?id=51155. This
Expand All @@ -1958,8 +1964,14 @@ dbgArgs args
L.Typed _ (L.ValMd (L.ValMdDebugInfo (L.DebugInfoLocalVariable lv))) ->
case diArg of
L.Typed _ (L.ValMd (L.ValMdDebugInfo (L.DebugInfoExpression di))) ->
do v <- transTypedValue val
pure (Right (v, lv, di))
let unusableIdents = Set.difference (useTypedVal val) defSet
in if Set.null unusableIdents then
do v <- transTypedValue val
pure (Right (v, lv, di))
else
do let msg = unwords (["Debug intrinsic def/use violation"] ++ map (show . L.ppIdent) (Set.toList unusableIdents))
liftIO $ putStrLn $ msg
pure (Left msg)
_ -> pure (Left ("dbg: argument 3 expected DIExpression, got: " ++ show diArg))
_ -> pure (Left ("dbg: argument 2 expected local variable metadata, got: " ++ show lvArg))
_ -> pure (Left ("dbg: argument 1 expected value metadata, got: " ++ show valArg))
Expand Down
2 changes: 2 additions & 0 deletions crucible-llvm/src/Lang/Crucible/LLVM/Translation/Monad.hs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ module Lang.Crucible.LLVM.Translation.Monad
, LLVMContext(..)
, llvmTypeCtx
, mkLLVMContext

, useTypedVal
) where

import Control.Lens hiding (op, (:>), to, from )
Expand Down

0 comments on commit c914655

Please sign in to comment.