From 6349a78bf906fb163442b8958e9a78be391d0fe6 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Thu, 6 Apr 2023 10:07:59 -0400 Subject: [PATCH 01/14] Adapt to Load gaining an explicit load type This bumps the `llvm-pretty` submodule to bring in the changes to the `Load` data constructor from elliottt/llvm-pretty#110 and adapts the code in `llvm-pretty-bc-parser` accordingly. This is necessary in order to `load` from an opaque pointer. See #177. A test case will be added in a subsequent commit. --- llvm-pretty | 2 +- src/Data/LLVM/BitCode/IR/Function.hs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm-pretty b/llvm-pretty index d099d5d0..1c4b6de9 160000 --- a/llvm-pretty +++ b/llvm-pretty @@ -1 +1 @@ -Subproject commit d099d5d0feab8066bc682f11c8a46c82fb7166b5 +Subproject commit 1c4b6de9e278734963bb02aaa8e4238f622f8a42 diff --git a/src/Data/LLVM/BitCode/IR/Function.hs b/src/Data/LLVM/BitCode/IR/Function.hs index c3b0a61c..f2484ff3 100644 --- a/src/Data/LLVM/BitCode/IR/Function.hs +++ b/src/Data/LLVM/BitCode/IR/Function.hs @@ -621,7 +621,7 @@ parseFunctionBlockEntry _ t d (fromEntry -> Just r) = case recordCode r of aval <- parseField r ix' numeric let align | aval > 0 = Just (bit aval `shiftR` 1) | otherwise = Nothing - result ret (Load (tv { typedType = PtrTo ret }) Nothing align) d + result ret (Load ret tv Nothing align) d -- 21 is unused -- 22 is unused @@ -825,7 +825,7 @@ parseFunctionBlockEntry _ t d (fromEntry -> Just r) = case recordCode r of when (ordval /= Nothing && align == Nothing) (fail "Invalid record") - result ret (Load (tv { typedType = PtrTo ret }) ordval align) d + result ret (Load ret tv ordval align) d -- [ptrty, ptr, val, align, vol, ordering, synchscope] From 02fdbd05d281792e28acf439d343dc5a4088f71c Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Thu, 6 Apr 2023 11:18:29 -0400 Subject: [PATCH 02/14] Adapt to GEP/ConstGEP gaining explicit base types This bumps the `llvm-pretty` submodule to bring in the changes to the `GEP`/`ConstGEP` data constructors from elliottt/llvm-pretty#110 and adapts the code in `llvm-pretty-bc-parser` accordingly. Because `ConstGEP` now stores the basis type for calculations explicitly, I needed to fix #218 in order to ensure that the basis type is always parsed properly. In the process of fixing this issue, I refactored the `parseCeGep` to make the code clearer and more closely mirror the structure of LLVM's own bitcode parser. This is necessary in order to use `getelementptr` on an opaque pointer. See #177. A test case will be added in a subsequent commit. --- llvm-pretty | 2 +- src/Data/LLVM/BitCode/IR/Constants.hs | 74 ++++++++++++++++++++------- src/Data/LLVM/BitCode/IR/Function.hs | 21 ++++---- 3 files changed, 67 insertions(+), 30 deletions(-) diff --git a/llvm-pretty b/llvm-pretty index 1c4b6de9..16bc5bd5 160000 --- a/llvm-pretty +++ b/llvm-pretty @@ -1 +1 @@ -Subproject commit 1c4b6de9e278734963bb02aaa8e4238f622f8a42 +Subproject commit 16bc5bd5a07d6e5d99c1c0f3a57b8eca2ecc944e diff --git a/src/Data/LLVM/BitCode/IR/Constants.hs b/src/Data/LLVM/BitCode/IR/Constants.hs index 8b764e77..24e1600b 100644 --- a/src/Data/LLVM/BitCode/IR/Constants.hs +++ b/src/Data/LLVM/BitCode/IR/Constants.hs @@ -22,7 +22,7 @@ import Data.Bits (shiftL,shiftR,testBit, Bits) import Data.LLVM.BitCode.BitString ( pattern Bits' ) import qualified Data.LLVM.BitCode.BitString as BitS import qualified Data.Map as Map -import Data.Maybe (fromMaybe, isJust) +import Data.Maybe (fromMaybe) import Data.Word (Word16, Word32,Word64) #if __GLASGOW_HASKELL__ >= 704 @@ -317,7 +317,7 @@ parseConstantEntry t (getTy,cs) (fromEntry -> Just r) = -- [n x operands] 12 -> label "CST_CODE_CE_GEP" $ do ty <- getTy - v <- parseCeGep False Nothing t r + v <- parseCeGep CeGepCode12 t r return (getTy,Typed ty v:cs) -- [opval,opval,opval] @@ -369,7 +369,7 @@ parseConstantEntry t (getTy,cs) (fromEntry -> Just r) = -- [n x operands] 20 -> label "CST_CODE_CE_INBOUNDS_GEP" $ do ty <- getTy - v <- parseCeGep True Nothing t r + v <- parseCeGep CeGepCode20 t r return (getTy,Typed ty v:cs) -- [funty,fnval,bb#] @@ -411,10 +411,7 @@ parseConstantEntry t (getTy,cs) (fromEntry -> Just r) = -- [opty, flags, n x operands] 24 -> label "CST_CODE_CE_GEP_WITH_INRANGE_INDEX" $ do ty <- getTy - (flags :: Word64) <- parseField r 1 numeric - let inBounds = testBit flags 0 - inRangeIndex = flags `shiftR` 1 - v <- parseCeGep inBounds (Just inRangeIndex) t r + v <- parseCeGep CeGepCode24 t r return (getTy,Typed ty v:cs) -- [opcode, opval] @@ -454,23 +451,62 @@ parseConstantEntry _ st (abbrevDef -> Just _) = parseConstantEntry _ _ e = fail ("constant block: unexpected: " ++ show e) -parseCeGep :: Bool -> Maybe Word64 -> ValueTable -> Record -> Parse PValue -parseCeGep isInbounds mInrangeIdx t r = do - let isExplicit = odd (length (recordFields r)) -- TODO: is this right for INRANGE_INDEX? - firstIdx = if isJust mInrangeIdx then 2 else if isExplicit then 1 else 0 - field = parseField r - loop n = do +-- | The different codes for constant @getelementptr@ expressions. Each one has +-- minor differences in how they are parsed. +data CeGepCode + = CeGepCode12 + -- ^ @CST_CODE_CE_GEP = 12@. The original. + | CeGepCode20 + -- ^ @CST_CODE_CE_INBOUNDS_GEP = 20@. This adds an @inbounds@ field that + -- indicates that the result value should be poison if it performs an + -- out-of-bounds index. + | CeGepCode24 + -- ^ @CST_CODE_CE_GEP_WITH_INRANGE_INDEX = 24@. This adds an @inrange@ field + -- that indicates that loading or storing to the result pointer will have + -- undefined behavior if the load or store would access memory outside of the + -- bounds of the indices marked as @inrange@. + deriving Eq + +-- | Parse a 'ConstGEP' value. There are several variations on this theme that +-- are captured in the 'CeGepCode' argument. +parseCeGep :: CeGepCode -> ValueTable -> Record -> Parse PValue +parseCeGep code t r = do + let field = parseField r + + (mbBaseTy, ix0) <- + if code == CeGepCode24 || odd (length (recordFields r)) + then do baseTy <- getType =<< field 0 numeric + pure (Just baseTy, 1) + else pure (Nothing, 0) + + (isInbounds, mInrangeIdx, ix1) <- + case code of + CeGepCode12 -> pure (False, Nothing, ix0) + CeGepCode20 -> pure (True, Nothing, ix0) + CeGepCode24 -> do + (flags :: Word64) <- parseField r ix0 numeric + let inbounds = testBit flags 0 + inrangeIdx = flags `shiftR` 1 + pure (inbounds, Just inrangeIdx, ix0 + 1) + + let loop n = do ty <- getType =<< field n numeric elt <- field (n+1) numeric rest <- loop (n+2) `mplus` return [] cxt <- getContext return (Typed ty (typedValue (forwardRef cxt elt t)) : rest) - mPointeeType <- - if isExplicit - then Just <$> (getType =<< field 0 numeric) - else pure Nothing - args <- loop firstIdx - return $! ValConstExpr (ConstGEP isInbounds mInrangeIdx mPointeeType args) + args <- loop ix1 + (ptr, args') <- + case args of + [] -> fail "Invalid constant GEP with no operands" + (base:args') -> pure (base, args') + + baseTy <- + case mbBaseTy of + Just baseTy -> pure baseTy + Nothing -> Assert.elimPtrTo "constant GEP not headed by pointer" (typedType ptr) + + return $! ValConstExpr (ConstGEP isInbounds mInrangeIdx baseTy ptr args') parseWideInteger :: Record -> Int -> Parse Integer parseWideInteger r idx = do diff --git a/src/Data/LLVM/BitCode/IR/Function.hs b/src/Data/LLVM/BitCode/IR/Function.hs index f2484ff3..f59dcbcc 100644 --- a/src/Data/LLVM/BitCode/IR/Function.hs +++ b/src/Data/LLVM/BitCode/IR/Function.hs @@ -1117,14 +1117,15 @@ baseType ty = ty -- [n x operands] parseGEP :: ValueTable -> Maybe Bool -> Record -> PartialDefine -> Parse PartialDefine parseGEP t mbInBound r d = do - (ib, tv, r', ix) <- + (ib, ty, tv, r', ix) <- case mbInBound of -- FUNC_CODE_INST_GEP_OLD -- FUNC_CODE_INST_INBOUNDS_GEP_OLD Just ib -> do (tv,ix') <- getValueTypePair t r 0 - return (ib, tv, r, ix') + ty <- Assert.elimPtrTo "GEP not headed by pointer" (typedType tv) + return (ib, ty, tv, r, ix') -- FUNC_CODE_INST_GEP Nothing -> do @@ -1142,11 +1143,11 @@ parseGEP t mbInBound r d = do , "Base type of operand: " ++ show (ppType (baseType (typedType tv))) ]) -} - return (ib, tv { typedType = PtrTo ty }, r', ix') + return (ib, ty, tv, r', ix') args <- label "parseGepArgs" (parseGepArgs t r' ix) - rty <- label "interpGep" (interpGep (typedType tv) args) - result rty (GEP ib tv args) d + rty <- label "interpGep" (interpGep ty tv args) + result rty (GEP ib ty tv args) d -- Parse an @atomicrmw@ instruction, which can be represented by one of the -- following function codes: @@ -1293,16 +1294,16 @@ parseGepArgs t r = loop rest <- loop ix' return (tv:rest) --- | Interpret the getelementptr arguments, to determine the final type of the --- instruction. -interpGep :: Type -> [Typed PValue] -> Parse Type -interpGep ty vs = check (resolveGep ty vs) +-- | Interpret a getelementptr instruction to determine its result type. +interpGep :: Type -> Typed PValue -> [Typed PValue] -> Parse Type +interpGep baseTy ptr vs = check (resolveGep baseTy ptr vs) where check res = case res of HasType rty -> return (PtrTo rty) Invalid -> fail $ unlines $ [ "Unable to determine the type of getelementptr" - , "Input type: " ++ show ty + , "Base type: " ++ show baseTy + , "Pointer value: " ++ show ptr ] Resolve i k -> do ty' <- getType' =<< getTypeId i From 0796c99e3ccf2ccdb3f333e45d58e7dd5b092882 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Sat, 22 Apr 2023 20:09:30 -0400 Subject: [PATCH 03/14] Basic support for parsing opaque pointer types This adds the bare minimum needed to parse `PtrOpaque` (opaque pointer) types. See #177. Other instructions will need to be tweaked in order to account for the possibility of opaque pointer arguments, but this will happen in subsequent commits. --- src/Data/LLVM/BitCode/IR/Types.hs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/Data/LLVM/BitCode/IR/Types.hs b/src/Data/LLVM/BitCode/IR/Types.hs index 69cfe5bc..ef564bdf 100644 --- a/src/Data/LLVM/BitCode/IR/Types.hs +++ b/src/Data/LLVM/BitCode/IR/Types.hs @@ -117,6 +117,7 @@ parseTypeBlockEntry (fromEntry -> Just r) = case recordCode r of let field = parseField r ty <- field 0 typeRef when (length (recordFields r) == 2) $ do + -- We do not currently store address spaces in the @llvm-pretty@ AST. _space <- field 1 keep return () addType (PtrTo ty) @@ -189,6 +190,26 @@ parseTypeBlockEntry (fromEntry -> Just r) = case recordCode r of rty:ptys -> addType (FunTy rty ptys vararg) [] -> fail "function expects a return type" + 22 -> label "TYPE_CODE_TOKEN" $ do + notImplemented + + 23 -> label "TYPE_CODE_BFLOAT" $ do + notImplemented + + 24 -> label "TYPE_CODE_X86_AMX" $ do + notImplemented + + 25 -> label "TYPE_CODE_OPAQUE_POINTER" $ do + let field = parseField r + when (length (recordFields r) /= 1) $ + fail "Invalid opaque pointer record" + -- We do not currently store address spaces in the @llvm-pretty@ AST. + _space <- field 0 keep + addType PtrOpaque + + 26 -> label "TYPE_CODE_TARGET_TYPE" $ do + notImplemented + code -> Assert.unknownEntity "type code " code -- skip blocks From 29c68e0ef93db0adea85877d89208a7c249e87e4 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Sat, 22 Apr 2023 20:34:51 -0400 Subject: [PATCH 04/14] Remove ptrTo and baseType As explained in the new `Note [Pointers and pointee types]`, we cannot inspect `PtrTo` pointee types if we simultaneously support opaque pointers. The `ptrTo` and `baseType` functions fundamentally rely on this, and as such, they have been removed. They are ultimately used in service of implementing assertions, so removing them is fairly straightforward. See #177. The `elimPtrTo` and `elimPtrTo_` functions also inspect pointee types, but they are required to support old versions of LLVM that do not store the necessary type information in the instructions that need them. In subsequent commits, I will ensure that all uses of `elimPtrTo`/`elimPtrTo_` are appropriately guarded such that they will not be used on modern versions of LLVM bitcode. --- src/Data/LLVM/BitCode/Assert.hs | 57 ++++++++++++---------------- src/Data/LLVM/BitCode/IR/Function.hs | 20 ---------- 2 files changed, 25 insertions(+), 52 deletions(-) diff --git a/src/Data/LLVM/BitCode/Assert.hs b/src/Data/LLVM/BitCode/Assert.hs index c3dd3794..a10d80ac 100644 --- a/src/Data/LLVM/BitCode/Assert.hs +++ b/src/Data/LLVM/BitCode/Assert.hs @@ -22,7 +22,6 @@ module Data.LLVM.BitCode.Assert -- ** Types , elimPtrTo , elimPtrTo_ - , ptrTo ) where import Control.Monad (MonadPlus, mplus) @@ -32,7 +31,7 @@ import Control.Monad.Fail (MonadFail) #endif import Data.LLVM.BitCode.Record (Record) import qualified Data.LLVM.BitCode.Record as Record -import Text.LLVM.AST (Type', Typed, Ident) +import Text.LLVM.AST (Type', Ident) import qualified Text.LLVM.AST as AST supportedCompilerMessage :: [String] @@ -80,7 +79,11 @@ recordSizeIn record ns = ---------------------------------------------------------------- -- ** Types --- | Assert that this thing is a pointer, get the underlying type +-- | Assert that this thing is a @'PtrTo' ty@ and return the underlying @ty@. +-- +-- Think carefully before using this function, as it will not work as you would +-- expect when the type is an opaque pointer. +-- See @Note [Pointers and pointee types]@. elimPtrTo :: (MonadFail m, MonadPlus m) => String -> Type' Ident -> m (Type' Ident) elimPtrTo msg ptrTy = AST.elimPtrTo ptrTy `mplus` (fail $ unlines [ msg @@ -88,35 +91,25 @@ elimPtrTo msg ptrTy = AST.elimPtrTo ptrTy `mplus` , show ptrTy ]) --- | Assert that this thing is a pointer +-- | Assert that this thing is a 'PtrTo' type. +-- +-- Think carefully before using this function, as it will not work as you would +-- expect when the type is an opaque pointer. +-- See @Note [Pointers and pointee types]@. elimPtrTo_ :: (MonadFail m, MonadPlus m) => String -> Type' Ident -> m () elimPtrTo_ msg ptrTy = elimPtrTo msg ptrTy >> pure () --- | Assert that the first thing is a pointer to something of the type of the --- second thing, e.g. in a load/store instruction. --- --- See: https://github.com/llvm-mirror/llvm/blob/release_60/lib/Bitcode/Reader/BitcodeReader.cpp#L3328 -ptrTo :: (MonadFail m, Show a, Show b) - => String - -> Typed a -- ^ The pointer - -> Typed b -- ^ The value - -> m () -ptrTo sig ptr val = do - case AST.typedType ptr of - AST.PtrTo ptrTo_ -> - when (AST.typedType val /= ptrTo_) $ fail $ unlines - [ unwords [ "Expected first value to be a pointer to some type , and" - , "for the second value to be a value of type ." - ] - , "Instruction signature: " ++ sig - , "Pointer type: " ++ show (AST.typedType ptr) - , "Value type: " ++ show (AST.typedType val) - , "Pointer value: " ++ show (AST.typedValue ptr) - , "Value value: " ++ show (AST.typedValue val) - ] - ty -> - fail $ unlines $ - [ "Instruction expected a pointer argument." - , "Instruction signature: " ++ sig - , "Argument type: " ++ show ty - ] +{- +Note [Pointers and pointee types] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Unlike LLVM itself, llvm-pretty and llvm-pretty-bc-parser allow mixing opaque +and non-opaque pointers. A consequence of this is that we generally avoid +pattern matching on PtrTo (non-opaque pointer) types and inspecting the +underlying pointee types. This sort of code simply won't work for PtrOpaque +types, which lack pointee types. + +The elimPtrTo and elimPtrTo_ functions go against this rule, as they retrieve +the pointee type in a PtrTo. These functions are primarily used for supporting +old versions of LLVM which do not store the necessary type information in the +instruction itself. +-} diff --git a/src/Data/LLVM/BitCode/IR/Function.hs b/src/Data/LLVM/BitCode/IR/Function.hs index f59dcbcc..fd5fc3d1 100644 --- a/src/Data/LLVM/BitCode/IR/Function.hs +++ b/src/Data/LLVM/BitCode/IR/Function.hs @@ -812,8 +812,6 @@ parseFunctionBlockEntry _ t d (fromEntry -> Just r) = case recordCode r of else do ty <- Assert.elimPtrTo "" (typedType tv) return (ty, ix) - Assert.ptrTo "load atomic : , *" tv (Typed ret ()) - ordval <- getDecodedOrdering =<< parseField r (ix' + 2) unsigned when (ordval `elem` Nothing:map Just [Release, AcqRel]) $ fail $ "Invalid atomic ordering: " ++ show ordval @@ -840,7 +838,6 @@ parseFunctionBlockEntry _ t d (fromEntry -> Just r) = case recordCode r of (val,ix') <- getValueTypePair t r ix Assert.recordSizeIn r [ix' + 2] - Assert.ptrTo "store : , * " ptr val aval <- field ix' numeric let align | aval > 0 = Just (bit aval `shiftR` 1) @@ -853,7 +850,6 @@ parseFunctionBlockEntry _ t d (fromEntry -> Just r) = case recordCode r of (val, ix') <- getValueTypePair t r ix Assert.recordSizeIn r [ix' + 4] - Assert.ptrTo "store atomic : , * " ptr val -- TODO: There's no spot in the AST for this ordering. Should there be? ordering <- getDecodedOrdering =<< parseField r (ix' + 2) unsigned @@ -880,7 +876,6 @@ parseFunctionBlockEntry _ t d (fromEntry -> Just r) = case recordCode r of -- TODO: record size assertion -- Assert.recordSizeGreater r (ix'' + 5) - Assert.ptrTo "cmpxchg : * , , " ptr val when (typedType val /= typedType new) $ fail $ unlines $ [ "Mismatched value types:" , "cmp value: " ++ show (typedValue val) @@ -1108,12 +1103,6 @@ addInstrAttachments atts blocks = go 0 (Map.toList atts) (Seq.viewl blocks) go _ _ Seq.EmptyL = Seq.empty -baseType :: Type -> Type -baseType (PtrTo ty) = ty -baseType (Array _ ty) = ty -baseType (Vector _ ty) = ty -baseType ty = ty - -- [n x operands] parseGEP :: ValueTable -> Maybe Bool -> Record -> PartialDefine -> Parse PartialDefine parseGEP t mbInBound r d = do @@ -1134,15 +1123,6 @@ parseGEP t mbInBound r d = do ib <- field 0 boolean ty <- getType =<< field 1 numeric (tv,ix') <- getValueTypePair t r' 2 - -- TODO: the following sometimes fails, but it doesn't seem to matter. - {- - unless (baseType (typedType tv) == ty) - (fail $ unlines [ "Explicit gep type does not match base type of pointer operand" - , "Declared type: " ++ show (ppType ty) - , "Operand type: " ++ show (ppType (typedType tv)) - , "Base type of operand: " ++ show (ppType (baseType (typedType tv))) - ]) - -} return (ib, ty, tv, r', ix') args <- label "parseGepArgs" (parseGepArgs t r' ix) From 86a3330251b19c1889825ef80c8c14b165719b56 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Sat, 22 Apr 2023 20:41:29 -0400 Subject: [PATCH 05/14] Only use `elimPtrTo` on old LLVMs for `invoke`/`call`/`callbr` See `Note [Pointers and pointee types]` for the rationale. See also #177. --- src/Data/LLVM/BitCode/IR/Function.hs | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/Data/LLVM/BitCode/IR/Function.hs b/src/Data/LLVM/BitCode/IR/Function.hs index fd5fc3d1..4a1e9312 100644 --- a/src/Data/LLVM/BitCode/IR/Function.hs +++ b/src/Data/LLVM/BitCode/IR/Function.hs @@ -539,13 +539,9 @@ parseFunctionBlockEntry _ t d (fromEntry -> Just r) = case recordCode r of (f,ix') <- getValueTypePair t r ix - -- NOTE: mbFTy should be the same as the type of f - calleeTy <- Assert.elimPtrTo "Callee is not a pointer" (typedType f) - fty <- case mbFTy of - Just ty | calleeTy == ty -> return ty - | otherwise -> fail "Explicit invoke type does not match callee" - Nothing -> return calleeTy + Just ty -> return ty + Nothing -> Assert.elimPtrTo "Callee is not a pointer" (typedType f) (ret,as,va) <- elimFunTy fty `mplus` fail "invalid INVOKE record" @@ -714,14 +710,10 @@ parseFunctionBlockEntry _ t d (fromEntry -> Just r) = case recordCode r of (Typed opTy fn, ix2) <- getValueTypePair t r ix1 `mplus` fail "Invalid record" - op <- Assert.elimPtrTo "Callee is not a pointer type" opTy - fnty <- case mbFnTy of - Just ty | ty == op -> return op - | otherwise -> fail "Explicit call type does not match \ - \pointee type of callee operand" - - Nothing -> + Just ty -> return ty + Nothing -> do + op <- Assert.elimPtrTo "Callee is not a pointer type" opTy case op of FunTy{} -> return op _ -> fail "Callee is not of pointer to function type" @@ -987,14 +979,10 @@ parseFunctionBlockEntry _ t d (fromEntry -> Just r) = case recordCode r of (Typed opTy fn, ix2) <- getValueTypePair t r ix1 `mplus` fail "Invalid callbr record" - op <- Assert.elimPtrTo "callbr callee is not a pointer type" opTy - fnty <- case mbFnTy of - Just ty | ty == op -> return op - | otherwise -> fail "Explicit call type does not match \ - \pointee type of callee operand" - - Nothing -> + Just ty -> return ty + Nothing -> do + op <- Assert.elimPtrTo "callbr callee is not a pointer type" opTy case op of FunTy{} -> return op _ -> fail "Callee is not of pointer to function type" From 75967e60b4d577efd095b55c1d3ec13db8f29390 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Sat, 22 Apr 2023 20:49:03 -0400 Subject: [PATCH 06/14] parseAtomicRMW: Don't inspect pointee type of argument on recent LLVMs Recent versions of the `FUNC_CODE_INST_ATOMICRMW` instruction code directly store the type corresponding to the pointer argument, which avoids the need to pattern-match on the pointer type. This is required to support opaque pointers. See #177. Older versions of the instruction (`FUNC_CODE_INST_ATOMICRMW_OLD`) do not store this type directly, so there were have no choice but to inspect the pointee type using `elimPtrTo`. --- src/Data/LLVM/BitCode/IR/Function.hs | 44 +++++++++++++--------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/src/Data/LLVM/BitCode/IR/Function.hs b/src/Data/LLVM/BitCode/IR/Function.hs index 4a1e9312..9edf7241 100644 --- a/src/Data/LLVM/BitCode/IR/Function.hs +++ b/src/Data/LLVM/BitCode/IR/Function.hs @@ -1137,29 +1137,27 @@ parseAtomicRMW old t r d = do -- TODO: parse sync scope (ssid) (ptr, ix0) <- getValueTypePair t r 0 - (val, ix1) <- case typedType ptr of - PtrTo ty@(PrimType prim) -> do - - -- Catch pointers of the wrong type - when (case prim of - Integer _ -> False - FloatType _ -> False - _ -> True) $ - fail $ "Expected pointer to integer or float, found " ++ show ty - - if old - then -- FUNC_CODE_INST_ATOMICRMW_OLD - do typed <- getValue t ty =<< parseField r ix0 numeric - if ty /= (typedType typed) - then fail $ unlines $ [ "Wrong type of value retrieved from value table" - , "Expected: " ++ show (ty) - , "Got: " ++ show (typedType typed) - ] - else pure (typed, ix0 + 1) - else -- FUNC_CODE_INST_ATOMICRMW - getValueTypePair t r ix0 - - ty -> fail $ "Expected pointer to integer or float, found " ++ show ty + (val, ix1) <- + if old + then -- FUNC_CODE_INST_ATOMICRMW_OLD + do ty <- Assert.elimPtrTo "atomicrmw instruction not headed by pointer" (typedType ptr) + typed <- getValue t ty =<< parseField r ix0 numeric + if ty /= (typedType typed) + then fail $ unlines $ [ "Wrong type of value retrieved from value table" + , "Expected: " ++ show (ty) + , "Got: " ++ show (typedType typed) + ] + else pure (typed, ix0 + 1) + else -- FUNC_CODE_INST_ATOMICRMW + getValueTypePair t r ix0 + + -- Catch incorrect operand types + let valTy = typedType val + unless (case valTy of + PrimType (Integer _) -> True + PrimType (FloatType _) -> True + _ -> False) $ + fail $ "Expected integer or float operand, found " ++ show valTy -- TODO: enable this assertion. Is getTypeValuePair returning the wrong value? -- when (length (recordFields r) /= ix1 + 4) $ do From f11693b8e56a4c6865e0145c1841121aafef0548 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Sat, 22 Apr 2023 20:54:03 -0400 Subject: [PATCH 07/14] cmpxchg: Compare argument types using eqTypeModuloOpaquePtrs Because `llvm-pretty` permits opaque and non-opaque pointers to coexist, it is possible for the first argument of `cmpxchg` to be an opaque pointer and the second argument to be a non-opaque pointer (or vice versa). We don't want to reject such scenarios, so we compare the types of the argument using `eqTypeModuloOpaquePtrs`, a special form of type equality that treats opaque and non-opaque pointers as being the same. See #177. This requires bumping the `llvm-pretty` submodule to bring in the corresponding changes from elliottt/llvm-pretty#110. --- llvm-pretty | 2 +- src/Data/LLVM/BitCode/IR/Function.hs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm-pretty b/llvm-pretty index 16bc5bd5..f32f773d 160000 --- a/llvm-pretty +++ b/llvm-pretty @@ -1 +1 @@ -Subproject commit 16bc5bd5a07d6e5d99c1c0f3a57b8eca2ecc944e +Subproject commit f32f773dbc454fb0744d3d26947c210570abfdb2 diff --git a/src/Data/LLVM/BitCode/IR/Function.hs b/src/Data/LLVM/BitCode/IR/Function.hs index 9edf7241..5be9811a 100644 --- a/src/Data/LLVM/BitCode/IR/Function.hs +++ b/src/Data/LLVM/BitCode/IR/Function.hs @@ -868,7 +868,7 @@ parseFunctionBlockEntry _ t d (fromEntry -> Just r) = case recordCode r of -- TODO: record size assertion -- Assert.recordSizeGreater r (ix'' + 5) - when (typedType val /= typedType new) $ fail $ unlines $ + when (typedType val `eqTypeModuloOpaquePtrs` typedType new) $ fail $ unlines $ [ "Mismatched value types:" , "cmp value: " ++ show (typedValue val) , "new value: " ++ show (typedValue new) From 7040a226a12b2613385e0ed59a60119738bd5046 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Sat, 22 Apr 2023 21:03:27 -0400 Subject: [PATCH 08/14] README: Indicate support for LLVM 15 and 16 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c13e19f6..1567b7b7 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,8 @@ compilers. | | v12.0 | ✓ | | | | | v13.0 | ✓ | | See [issues][llvm13] | | | v14.0 | ✓ | | See [issues][llvm14] | -| | v15.0 | | | See [issues][llvm15] | -| | v16.0 | | | See [issues][llvm16] | +| | v15.0 | ✓ | | See [issues][llvm15] | +| | v16.0 | ✓ | | See [issues][llvm16] | | `clang++` | v3.4 | | | | | | v3.5 | | | | | | v3.6 | | | | From bf5e371b727654e9f21316a3d19efe48599875f6 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Sat, 22 Apr 2023 21:05:29 -0400 Subject: [PATCH 09/14] llvm-quick-fuzz: Add LLVM 15 and 16 configurations --- .github/workflows/llvm-quick-fuzz.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/llvm-quick-fuzz.yml b/.github/workflows/llvm-quick-fuzz.yml index b32c344d..e008fa90 100644 --- a/.github/workflows/llvm-quick-fuzz.yml +++ b/.github/workflows/llvm-quick-fuzz.yml @@ -23,7 +23,9 @@ jobs: os: [ubuntu-22.04] # See doc/developing.md ghc: ["8.10.7"] - llvm: [ "https://github.com/llvm/llvm-project/releases/download/llvmorg-14.0.0/clang+llvm-14.0.0-x86_64-linux-gnu-ubuntu-18.04.tar.xz" + llvm: [ "https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.2/clang+llvm-16.0.2-x86_64-linux-gnu-ubuntu-22.04.tar.xz" + , "https://github.com/llvm/llvm-project/releases/download/llvmorg-15.0.6/clang+llvm-15.0.6-x86_64-linux-gnu-ubuntu-18.04.tar.xz" + , "https://github.com/llvm/llvm-project/releases/download/llvmorg-14.0.0/clang+llvm-14.0.0-x86_64-linux-gnu-ubuntu-18.04.tar.xz" , "https://github.com/llvm/llvm-project/releases/download/llvmorg-13.0.0/clang+llvm-13.0.0-x86_64-linux-gnu-ubuntu-20.04.tar.xz" , "https://github.com/llvm/llvm-project/releases/download/llvmorg-12.0.0/clang+llvm-12.0.0-x86_64-linux-gnu-ubuntu-20.04.tar.xz" , "https://github.com/llvm/llvm-project/releases/download/llvmorg-11.0.0/clang+llvm-11.0.0-x86_64-linux-gnu-ubuntu-20.04.tar.xz" From be7be208acc80aeed275cd85c2ef355f9e61c605 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Sat, 22 Apr 2023 21:06:00 -0400 Subject: [PATCH 10/14] disasm-test: Avoid llvm-as from crashing on mixed opaque/non-opaque pointers This bumps the `llvm-pretty` submodule to bring in the `fixupOpaquePtrs` function from elliottt/llvm-pretty#110 and use it in the `disasm-test` test suite. This is needed because we must give pretty-printed `llvm-pretty` ASTs to `llvm-as`, which strictly forbids mixing opaque and non-opaque pointers. See #177. --- disasm-test/Main.hs | 5 +++-- llvm-pretty | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/disasm-test/Main.hs b/disasm-test/Main.hs index d0d61ac7..b967213e 100644 --- a/disasm-test/Main.hs +++ b/disasm-test/Main.hs @@ -366,11 +366,12 @@ processBitCode _keep (Roundtrip roundtrip) pfx file = do case e of Left err -> X.throwIO (ParseError err) Right m -> do - parsed <- printToTempFile "ll" (show (ppLLVM (ppModule m))) + let m' = AST.fixupOpaquePtrs m + parsed <- printToTempFile "ll" (show (ppLLVM (ppModule m'))) -- stripComments _keep parsed if roundtrip then do - tmp2 <- printToTempFile "ast" (ppShow (normalizeModule m)) + tmp2 <- printToTempFile "ast" (ppShow (normalizeModule m')) return (parsed, Just tmp2) else return (parsed, Nothing) diff --git a/llvm-pretty b/llvm-pretty index f32f773d..a454fcbe 160000 --- a/llvm-pretty +++ b/llvm-pretty @@ -1 +1 @@ -Subproject commit f32f773dbc454fb0744d3d26947c210570abfdb2 +Subproject commit a454fcbe4192c07bcced2cf1384686dc7359a4a3 From 5d0aba1d52efd36f0d68b9beba73ae0eefdd6e6f Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Sat, 22 Apr 2023 22:18:31 -0400 Subject: [PATCH 11/14] disasm-test: Rename callbr.ll to callbr.pre-llvm15.ll The syntax for inline assembly changed slightly in LLVM 15+, so let's pre-emptively move this test case to `pre-llvm15`. In a subsequent commit, we will add a newer `.ll` file that supports LLVM 15+. --- disasm-test/Main.hs | 1 + disasm-test/tests/{callbr.ll => callbr.pre-llvm15.ll} | 0 2 files changed, 1 insertion(+) rename disasm-test/tests/{callbr.ll => callbr.pre-llvm15.ll} (100%) diff --git a/disasm-test/Main.hs b/disasm-test/Main.hs index b967213e..82adbdb5 100644 --- a/disasm-test/Main.hs +++ b/disasm-test/Main.hs @@ -215,6 +215,7 @@ cube = TS.mkCUBE , "pre-llvm12" , "pre-llvm13" , "pre-llvm14" + , "pre-llvm15" ]) ] -- Somewhat unusually for tasty-sugar, we make the expectedSuffix the same diff --git a/disasm-test/tests/callbr.ll b/disasm-test/tests/callbr.pre-llvm15.ll similarity index 100% rename from disasm-test/tests/callbr.ll rename to disasm-test/tests/callbr.pre-llvm15.ll From ffd44fd2f23e6614aa642a1bc7f48a38aa32268e Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Sat, 22 Apr 2023 22:19:47 -0400 Subject: [PATCH 12/14] disasm-test: Add callbr test output for LLVM 15+ --- disasm-test/tests/callbr.c | 18 ++++++++++++++ disasm-test/tests/callbr.ll | 47 +++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 disasm-test/tests/callbr.c create mode 100644 disasm-test/tests/callbr.ll diff --git a/disasm-test/tests/callbr.c b/disasm-test/tests/callbr.c new file mode 100644 index 00000000..845fffe9 --- /dev/null +++ b/disasm-test/tests/callbr.c @@ -0,0 +1,18 @@ +// Adapted from the Clang test suite: +// (https://github.com/llvm/llvm-project/blob/32103608fc073700f04238872d8b22655ddec3fb/clang/test/CodeGen/asm-goto.c#L5-L20), +// which is licensed under the Apache License v2.0 + + +int main(void) { + int cond = 0; + asm volatile goto("testl %0, %0; jne %l1;" + : /* No outputs */ + : "r"(cond) + : /* No clobbers */ + : label_true, loop); + return 0; +loop: + return 0; +label_true: + return 1; +} diff --git a/disasm-test/tests/callbr.ll b/disasm-test/tests/callbr.ll new file mode 100644 index 00000000..86e28b48 --- /dev/null +++ b/disasm-test/tests/callbr.ll @@ -0,0 +1,47 @@ +; ModuleID = 'callbr.c' +source_filename = "callbr.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @main() #0 { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + store i32 0, ptr %1, align 4 + store i32 0, ptr %2, align 4 + %3 = load i32, ptr %2, align 4 + callbr void asm sideeffect "testl $0, $0; jne ${1:l};", "r,!i,!i,~{dirflag},~{fpsr},~{flags}"(i32 %3) #1 + to label %4 [label %6, label %5], !srcloc !7 + +4: ; preds = %0 + store i32 0, ptr %1, align 4 + br label %7 + +5: ; preds = %0 + store i32 0, ptr %1, align 4 + br label %7 + +6: ; preds = %0 + store i32 1, ptr %1, align 4 + br label %7 + +7: ; preds = %6, %5, %4 + %8 = load i32, ptr %1, align 4 + ret i32 %8 +} + +attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} +!llvm.commandline = !{!6} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{i32 7, !"PIE Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{i32 7, !"frame-pointer", i32 2} +!5 = !{!"clang version 15.0.6"} +!6 = !{!"/home/ryanglscott/Software/clang+llvm-15.0.6/bin/clang-15 -S -emit-llvm -frecord-command-line callbr.c -o callbr.ll"} +!7 = !{i64 272} From bb603a362ea098ac91d3a5adfe005796bb7f1de2 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Sun, 23 Apr 2023 10:12:53 -0400 Subject: [PATCH 13/14] disasm-test: Add opaque pointer test cases See #177. --- disasm-test/tests/opaque-atomicrmw.ll | 4 ++++ disasm-test/tests/opaque-atomicrmw.pre-llvm15.ll | 4 ++++ disasm-test/tests/opaque-call.ll | 11 +++++++++++ disasm-test/tests/opaque-call.pre-llvm15.ll | 4 ++++ disasm-test/tests/opaque-constant-getelementptr.ll | 9 +++++++++ .../tests/opaque-constant-getelementptr.pre-llvm15.ll | 4 ++++ disasm-test/tests/opaque-getelementptr.ll | 8 ++++++++ disasm-test/tests/opaque-getelementptr.pre-llvm15.ll | 4 ++++ 8 files changed, 48 insertions(+) create mode 100644 disasm-test/tests/opaque-atomicrmw.ll create mode 100644 disasm-test/tests/opaque-atomicrmw.pre-llvm15.ll create mode 100644 disasm-test/tests/opaque-call.ll create mode 100644 disasm-test/tests/opaque-call.pre-llvm15.ll create mode 100644 disasm-test/tests/opaque-constant-getelementptr.ll create mode 100644 disasm-test/tests/opaque-constant-getelementptr.pre-llvm15.ll create mode 100644 disasm-test/tests/opaque-getelementptr.ll create mode 100644 disasm-test/tests/opaque-getelementptr.pre-llvm15.ll diff --git a/disasm-test/tests/opaque-atomicrmw.ll b/disasm-test/tests/opaque-atomicrmw.ll new file mode 100644 index 00000000..27c27c59 --- /dev/null +++ b/disasm-test/tests/opaque-atomicrmw.ll @@ -0,0 +1,4 @@ +define void @atomicrmw(ptr %a, i32 %i) { + %b = atomicrmw add ptr %a, i32 %i acquire + ret void +} diff --git a/disasm-test/tests/opaque-atomicrmw.pre-llvm15.ll b/disasm-test/tests/opaque-atomicrmw.pre-llvm15.ll new file mode 100644 index 00000000..d7cb3b6c --- /dev/null +++ b/disasm-test/tests/opaque-atomicrmw.pre-llvm15.ll @@ -0,0 +1,4 @@ +SKIP_TEST + +This test case requires the use of opaque pointers, which are most easily +usable with LLVM 15 or later. diff --git a/disasm-test/tests/opaque-call.ll b/disasm-test/tests/opaque-call.ll new file mode 100644 index 00000000..8c323d32 --- /dev/null +++ b/disasm-test/tests/opaque-call.ll @@ -0,0 +1,11 @@ +define void @f(i32 %x) { + ret void +} + +define void @g() { + %p = alloca ptr + store ptr @f, ptr %p + %f = load ptr, ptr %p + call void (i32) %f(i32 42) + ret void +} diff --git a/disasm-test/tests/opaque-call.pre-llvm15.ll b/disasm-test/tests/opaque-call.pre-llvm15.ll new file mode 100644 index 00000000..d7cb3b6c --- /dev/null +++ b/disasm-test/tests/opaque-call.pre-llvm15.ll @@ -0,0 +1,4 @@ +SKIP_TEST + +This test case requires the use of opaque pointers, which are most easily +usable with LLVM 15 or later. diff --git a/disasm-test/tests/opaque-constant-getelementptr.ll b/disasm-test/tests/opaque-constant-getelementptr.ll new file mode 100644 index 00000000..7d3c5d5b --- /dev/null +++ b/disasm-test/tests/opaque-constant-getelementptr.ll @@ -0,0 +1,9 @@ +%struct.RT = type { i8, [10 x [20 x i32]], i8 } +%struct.ST = type { i32, double, %struct.RT } + +@.s = private constant %struct.ST zeroinitializer + +define ptr @foo() { +entry: + ret ptr getelementptr inbounds (%struct.ST, ptr @.s, i64 1, i32 2, i32 1, i64 5, i64 13) +} diff --git a/disasm-test/tests/opaque-constant-getelementptr.pre-llvm15.ll b/disasm-test/tests/opaque-constant-getelementptr.pre-llvm15.ll new file mode 100644 index 00000000..d7cb3b6c --- /dev/null +++ b/disasm-test/tests/opaque-constant-getelementptr.pre-llvm15.ll @@ -0,0 +1,4 @@ +SKIP_TEST + +This test case requires the use of opaque pointers, which are most easily +usable with LLVM 15 or later. diff --git a/disasm-test/tests/opaque-getelementptr.ll b/disasm-test/tests/opaque-getelementptr.ll new file mode 100644 index 00000000..b6739792 --- /dev/null +++ b/disasm-test/tests/opaque-getelementptr.ll @@ -0,0 +1,8 @@ +%struct.RT = type { i8, [10 x [20 x i32]], i8 } +%struct.ST = type { i32, double, %struct.RT } + +define ptr @foo(ptr %s) { +entry: + %arrayidx = getelementptr inbounds %struct.ST, ptr %s, i64 1, i32 2, i32 1, i64 5, i64 13 + ret ptr %arrayidx +} diff --git a/disasm-test/tests/opaque-getelementptr.pre-llvm15.ll b/disasm-test/tests/opaque-getelementptr.pre-llvm15.ll new file mode 100644 index 00000000..d7cb3b6c --- /dev/null +++ b/disasm-test/tests/opaque-getelementptr.pre-llvm15.ll @@ -0,0 +1,4 @@ +SKIP_TEST + +This test case requires the use of opaque pointers, which are most easily +usable with LLVM 15 or later. From 3a87e05220198281194ac173dcbcd958baad4ae7 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Mon, 8 May 2023 13:31:08 -0400 Subject: [PATCH 14/14] Allow versions-6.* Note that `versions` uses SemVer rather than the PVP, and as a result, it sometimes bumps the secondary version number even for non-breaking changes. As a result, I am guarding against the major version number rather than the secondary one. --- llvm-pretty-bc-parser.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm-pretty-bc-parser.cabal b/llvm-pretty-bc-parser.cabal index 06433b6c..fae43285 100644 --- a/llvm-pretty-bc-parser.cabal +++ b/llvm-pretty-bc-parser.cabal @@ -135,7 +135,7 @@ Test-suite disasm-test tasty-hunit, tasty-sugar >= 2.2 && < 2.3, text, - versions < 6, + versions < 7, llvm-pretty, llvm-pretty-bc-parser