Skip to content

Commit

Permalink
Merge pull request #177 from GaloisInc/strings2_ci_2201
Browse files Browse the repository at this point in the history
Updated strings support with recent CI testing and Z3 compatibility matrix.
  • Loading branch information
kquick authored Jan 11, 2022
2 parents dbec0ba + a566649 commit 537b609
Show file tree
Hide file tree
Showing 9 changed files with 284 additions and 275 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ views or policies of the Department of Defense or the U.S. Government.

# Solver Compatibility

| Feature | ABC | Boolector | CVC4 | Dreal | STP | Yices | Z3 |
|---------------|------|-----------|--------|-------|----------|----------|-------------------------------|
| Supported | yes | 3.2.2, ? | 1.8, ? | yes | 2.3.3, ? | 2.6.4, ? | 4.8.8 -- 4.8.14 |
| goal timeouts | | | | | | | yes except 4.8.12 and GHC < 9 |
| | | | | | | | |
| Feature | ABC | Boolector | CVC4 | Dreal | STP | Yices | Z3 |
|---------------------------------------|-----|-----------|--------|-------|----------|----------|-----------------|
| Supported | yes | 3.2.2, ? | 1.8, ? | yes | 2.3.3, ? | 2.6.4, ? | 4.8.8 -- 4.8.14 |
| goal timeouts | ? | yes | yes | ? | yes | yes | ! 4.8.12 |
| strings with unicode and escape codes | ? | ? | yes | ? | ? | ? | >= 4.8.12 |
122 changes: 80 additions & 42 deletions what4/src/What4/Protocol/SMTLib2.hs
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,6 @@ import Control.Applicative
import Control.Exception
import Control.Monad.State.Strict
import qualified Data.BitVector.Sized as BV
import qualified Data.Bits as Bits
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Data.Char (digitToInt, isPrint, isAscii)
import Data.IORef
import Data.Map.Strict (Map)
Expand Down Expand Up @@ -207,54 +204,95 @@ arraySelect = SMT2.select
arrayStore :: Term -> Term -> Term -> Term
arrayStore = SMT2.store

byteStringTerm :: ByteString -> Term
byteStringTerm bs = SMT2.T ("\"" <> BS.foldr f "\"" bs)
------------------------------------------------------------------------------------
-- String Escaping functions
--
-- The following functions implement the escaping and
-- escape parsing rules from SMTLib 2.6. Documentation
-- regarding this format is pasted below from the
-- specification document.
--
-- The restriction to printable US ASCII characters in string constants is for
-- simplicity since that set is universally supported. Arbitrary Unicode characters
-- can be represented with _escape sequences_ which can have one of the following
-- forms
-- \ud₃d₂d₁d₀
-- \u{d₀}
-- \u{d₁d₀}
-- \u{d₂d₁d₀}
-- \u{d₃d₂d₁d₀}
-- \u{d₄d₃d₂d₁d₀}
-- where each dᵢ is a hexadecimal digit and d₄ is restricted to the range 0-2.
-- These are the **only escape sequences** in this theory. See later.
-- In a later version, the restrictions above on the digits may be extended
-- to allow characters from all 17 Unicode planes.
--
-- Observe that the first form, \ud₃d₂d₁d₀, has exactly 4 hexadecimal digit,
-- following the common use of this form in some programming languages.
-- Unicode characters outside the range covered by \ud₃d₂d₁d₀ can be
-- represented with the long form \u{d₄d₃d₂d₁d₀}.
--
-- Also observe that programming language-specific escape sequences, such as
-- \n, \b, \r and so on, are _not_ escape sequences in this theory as they
-- are not fully standard across languages.

-- | Apply the SMTLib2.6 string escaping rules to a string literal.
textToTerm :: Text -> Term
textToTerm bs = SMT2.T ("\"" <> Text.foldr f "\"" bs)
where
f w x
f c x
| '\"' == c = "\"\"" <> x
| isPrint c = Builder.singleton c <> x
| otherwise = "\\x" <> h1 <> h2 <> x
where
h1 = Builder.fromString (showHex (w `Bits.shiftR` 4) "")
h2 = Builder.fromString (showHex (w Bits..&. 0xF) "")

c :: Char
c = toEnum (fromEnum w)

| otherwise = "\\u{" <> Builder.fromString (showHex (fromEnum c) "}") <> x

unescapeText :: Text -> Maybe ByteString
-- | Parse SMTLIb2.6 escaping rules for strings.
--
-- Note! The escaping rule that uses the @\"\"@ sequence
-- to encode a double quote has already been resolved
-- by @parseSMTLIb2String@, so here we just need to
-- parse the @\\u@ escape forms.
unescapeText :: Text -> Maybe Text
unescapeText = go mempty
where
go bs t =
go str t =
case Text.uncons t of
Nothing -> Just bs
Nothing -> Just str
Just (c, t')
| not (isAscii c) -> Nothing
| c == '\\' -> readEscape bs t'
| otherwise -> continue bs c t'
| c == '\\' -> readEscape str t'
| otherwise -> continue str c t'

continue bs c t = go (BS.snoc bs (toEnum (fromEnum c))) t
continue str c t = go (Text.snoc str c) t

readEscape bs t =
readEscape str t =
case Text.uncons t of
Nothing -> Nothing
Just (c, t')
| c == 'a' -> continue bs '\a' t'
| c == 'b' -> continue bs '\b' t'
| c == 'e' -> continue bs '\x1B' t'
| c == 'f' -> continue bs '\f' t'
| c == 'n' -> continue bs '\n' t'
| c == 'r' -> continue bs '\r' t'
| c == 't' -> continue bs '\t' t'
| c == 'v' -> continue bs '\v' t'
| c == 'x' -> readHexEscape bs t'
| otherwise -> continue bs c t'

readHexEscape bs t =
case readHex (Text.unpack (Text.take 2 t)) of
(n, []):_ | 0 <= n && n < 256 -> go (BS.snoc bs (toEnum n)) (Text.drop 2 t)
-- Note: the \u forms are the _only_ escape forms
| c == 'u' -> readHexEscape str t'
| otherwise -> continue (Text.snoc str '\\') c t'

readHexEscape str t =
case Text.uncons t of
Just (c, t')
-- take until the closing brace
| c == '{'
, (ds, t'') <- Text.breakOn "}" t'
, Just ('}',t''') <- Text.uncons t''
-> readDigits str ds t'''

-- take exactly four digits
| (ds, t'') <- Text.splitAt 4 t'
, Text.length ds == 4
-> readDigits str ds t''

_ -> Nothing

readDigits str ds t =
case readHex (Text.unpack ds) of
(n, []):_ -> continue str (toEnum n) t
_ -> Nothing

-- | This class exists so that solvers supporting the SMTLib2 format can support
-- features that go slightly beyond the standard.
--
Expand Down Expand Up @@ -295,8 +333,8 @@ class Show a => SMTLib2Tweaks a where
smtlib2StringSort :: SMT2.Sort
smtlib2StringSort = SMT2.Sort "String"

smtlib2StringTerm :: ByteString -> Term
smtlib2StringTerm = byteStringTerm
smtlib2StringTerm :: Text -> Term
smtlib2StringTerm = textToTerm

smtlib2StringLength :: Term -> Term
smtlib2StringLength = SMT2.un_app "str.len"
Expand Down Expand Up @@ -368,7 +406,7 @@ asSMT2Type IntegerTypeMap = SMT2.intSort
asSMT2Type RealTypeMap = SMT2.realSort
asSMT2Type (BVTypeMap w) = SMT2.bvSort (natValue w)
asSMT2Type (FloatTypeMap fpp) = SMT2.Sort $ mkFloatSymbol "FloatingPoint" (asSMTFloatPrecision fpp)
asSMT2Type Char8TypeMap = smtlib2StringSort @a
asSMT2Type UnicodeTypeMap = smtlib2StringSort @a
asSMT2Type ComplexToStructTypeMap =
smtlib2StructSort @a [ SMT2.realSort, SMT2.realSort ]
asSMT2Type ComplexToArrayTypeMap =
Expand Down Expand Up @@ -636,7 +674,7 @@ instance SMTLib2Tweaks a => SMTWriter (Writer a) where
let resolveArg (var, Some tp) = (var, asSMT2Type @a tp)
in SMT2.defineFun f (resolveArg <$> args) (asSMT2Type @a return_type) e

stringTerm bs = smtlib2StringTerm @a bs
stringTerm str = smtlib2StringTerm @a str
stringLength x = smtlib2StringLength @a x
stringAppend xs = smtlib2StringAppend @a xs
stringContains x y = smtlib2StringContains @a x y
Expand Down Expand Up @@ -745,8 +783,8 @@ parseBVLitHelper (SApp ["_", SAtom (Text.unpack -> ('b' : 'v' : n_str)), SAtom (
-- BGS: Is this correct?
parseBVLitHelper _ = natBV 0 0

parseStringSolverValue :: MonadFail m => SExp -> m ByteString
parseStringSolverValue (SString t) | Just bs <- unescapeText t = return bs
parseStringSolverValue :: MonadFail m => SExp -> m Text
parseStringSolverValue (SString t) | Just t' <- unescapeText t = return t'
parseStringSolverValue x = fail ("Could not parse string solver value:\n " ++ show x)

parseFloatSolverValue :: MonadFail m => FloatPrecisionRepr fpp
Expand All @@ -759,7 +797,7 @@ parseFloatSolverValue (FloatingPointPrecisionRepr eb sb) s = do
(Just Refl, Just Refl) -> do
-- eb' + 1 ~ 1 + eb'
Refl <- return $ plusComm eb' (knownNat @1)
-- (eb' + 1) + sb' ~ eb' + (1 + sb')
-- (eb' + 1) + sb' ~ eb' + (1 + sb')
Refl <- return $ plusAssoc eb' (knownNat @1) sb'
return bv
where bv = BV.concat (addNat (knownNat @1) eb) sb' (BV.concat knownNat eb sgn expt) sig
Expand Down
43 changes: 21 additions & 22 deletions what4/src/What4/Protocol/SMTWriter.hs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ import Control.Monad.State.Strict
import Control.Monad.Trans.Maybe
import qualified Data.BitVector.Sized as BV
import qualified Data.Bits as Bits
import Data.ByteString (ByteString)
import Data.IORef
import Data.Kind
import Data.List.NonEmpty (NonEmpty(..))
Expand Down Expand Up @@ -174,7 +173,7 @@ data TypeMap (tp::BaseType) where
RealTypeMap :: TypeMap BaseRealType
BVTypeMap :: (1 <= w) => !(NatRepr w) -> TypeMap (BaseBVType w)
FloatTypeMap :: !(FloatPrecisionRepr fpp) -> TypeMap (BaseFloatType fpp)
Char8TypeMap :: TypeMap (BaseStringType Char8)
UnicodeTypeMap :: TypeMap (BaseStringType Unicode)

-- A complex number mapped to an SMTLIB struct.
ComplexToStructTypeMap:: TypeMap BaseComplexType
Expand Down Expand Up @@ -211,7 +210,7 @@ instance Show (TypeMap a) where
show RealTypeMap = "RealTypeMap"
show (BVTypeMap n) = "BVTypeMap " ++ show n
show (FloatTypeMap x) = "FloatTypeMap " ++ show x
show Char8TypeMap = "Char8TypeMap"
show UnicodeTypeMap = "UnicodeTypeMap"
show (ComplexToStructTypeMap) = "ComplexToStructTypeMap"
show ComplexToArrayTypeMap = "ComplexToArrayTypeMap"
show (PrimArrayTypeMap ctx a) = "PrimArrayTypeMap " ++ showF ctx ++ " " ++ showF a
Expand All @@ -226,7 +225,7 @@ instance TestEquality TypeMap where
testEquality BoolTypeMap BoolTypeMap = Just Refl
testEquality IntegerTypeMap IntegerTypeMap = Just Refl
testEquality RealTypeMap RealTypeMap = Just Refl
testEquality Char8TypeMap Char8TypeMap = Just Refl
testEquality UnicodeTypeMap UnicodeTypeMap = Just Refl
testEquality (FloatTypeMap x) (FloatTypeMap y) = do
Refl <- testEquality x y
return Refl
Expand Down Expand Up @@ -931,7 +930,7 @@ class (SupportTermOps (Term h)) => SMTWriter h where
structProj :: Ctx.Assignment TypeMap args -> Ctx.Index args tp -> Term h -> Term h

-- | Produce a term representing a string literal
stringTerm :: ByteString -> Term h
stringTerm :: Text -> Term h

-- | Compute the length of a term
stringLength :: Term h -> Term h
Expand Down Expand Up @@ -1056,7 +1055,7 @@ declareTypes conn = \case
RealTypeMap -> return ()
BVTypeMap _ -> return ()
FloatTypeMap _ -> return ()
Char8TypeMap -> return ()
UnicodeTypeMap -> return ()
ComplexToStructTypeMap -> declareStructDatatype conn (Ctx.Empty Ctx.:> RealTypeMap Ctx.:> RealTypeMap)
ComplexToArrayTypeMap -> return ()
PrimArrayTypeMap args ret ->
Expand Down Expand Up @@ -1181,7 +1180,7 @@ typeMapFirstClass conn tp0 = do
BaseFloatRepr fpp -> Right $! FloatTypeMap fpp
BaseRealRepr -> Right RealTypeMap
BaseIntegerRepr -> Right IntegerTypeMap
BaseStringRepr Char8Repr -> Right Char8TypeMap
BaseStringRepr UnicodeRepr -> Right UnicodeTypeMap
BaseStringRepr si -> Left (StringTypeUnsupported (Some si))
BaseComplexRepr
| feat `hasProblemFeature` useStructs -> Right ComplexToStructTypeMap
Expand Down Expand Up @@ -1299,7 +1298,7 @@ addPartialSideCond _ t (BVTypeMap w) (Just (BVD.BVDBitwise rng)) = assertBitRang
when (hi < maxUnsigned w) $
addSideCondition "bv_bitrange" $ (bvOr t (bvTerm w (BV.mkBV w hi))) .== (bvTerm w (BV.mkBV w hi))

addPartialSideCond _ t (Char8TypeMap) (Just (StringAbs len)) =
addPartialSideCond _ t (UnicodeTypeMap) (Just (StringAbs len)) =
do case rangeLowBound len of
Inclusive lo ->
addSideCondition "string length low range" $
Expand Down Expand Up @@ -1758,9 +1757,9 @@ mkExpr t@(FloatExpr fpp f _) = do
return $ SMTExpr (FloatTypeMap fpp) $ floatTerm fpp f
mkExpr t@(StringExpr l _) =
case l of
Char8Literal bs -> do
UnicodeLiteral str -> do
checkStringSupport t
return $ SMTExpr Char8TypeMap $ stringTerm @h bs
return $ SMTExpr UnicodeTypeMap $ stringTerm @h str
_ -> do
conn <- asks scConn
theoryUnsupported conn ("strings " ++ show (stringLiteralInfo l)) t
Expand Down Expand Up @@ -2322,15 +2321,15 @@ appSMTExpr ae = do

StringLength xe -> do
case stringInfo xe of
Char8Repr -> do
UnicodeRepr -> do
checkStringSupport i
x <- mkBaseExpr xe
freshBoundTerm IntegerTypeMap $ stringLength @h x
si -> fail ("Unsupported symbolic string length operation " ++ show si)

StringIndexOf xe ye ke ->
case stringInfo xe of
Char8Repr -> do
UnicodeRepr -> do
checkStringSupport i
x <- mkBaseExpr xe
y <- mkBaseExpr ye
Expand All @@ -2340,17 +2339,17 @@ appSMTExpr ae = do

StringSubstring _ xe offe lene ->
case stringInfo xe of
Char8Repr -> do
UnicodeRepr -> do
checkStringSupport i
x <- mkBaseExpr xe
off <- mkBaseExpr offe
len <- mkBaseExpr lene
freshBoundTerm Char8TypeMap $ stringSubstring @h x off len
freshBoundTerm UnicodeTypeMap $ stringSubstring @h x off len
si -> fail ("Unsupported symbolic string substring operation " ++ show si)

StringContains xe ye ->
case stringInfo xe of
Char8Repr -> do
UnicodeRepr -> do
checkStringSupport i
x <- mkBaseExpr xe
y <- mkBaseExpr ye
Expand All @@ -2359,7 +2358,7 @@ appSMTExpr ae = do

StringIsPrefixOf xe ye ->
case stringInfo xe of
Char8Repr -> do
UnicodeRepr -> do
checkStringSupport i
x <- mkBaseExpr xe
y <- mkBaseExpr ye
Expand All @@ -2368,7 +2367,7 @@ appSMTExpr ae = do

StringIsSuffixOf xe ye ->
case stringInfo xe of
Char8Repr -> do
UnicodeRepr -> do
checkStringSupport i
x <- mkBaseExpr xe
y <- mkBaseExpr ye
Expand All @@ -2377,12 +2376,12 @@ appSMTExpr ae = do

StringAppend si xes ->
case si of
Char8Repr -> do
UnicodeRepr -> do
checkStringSupport i
let f (SSeq.StringSeqLiteral l) = return $ stringTerm @h $ fromChar8Lit l
let f (SSeq.StringSeqLiteral l) = return $ stringTerm @h $ fromUnicodeLit l
f (SSeq.StringSeqTerm t) = mkBaseExpr t
xs <- mapM f $ SSeq.toList xes
freshBoundTerm Char8TypeMap $ stringAppend @h xs
freshBoundTerm UnicodeTypeMap $ stringAppend @h xs

_ -> fail ("Unsupported symbolic string append operation " ++ show si)

Expand Down Expand Up @@ -2965,7 +2964,7 @@ data SMTEvalFunctions h
-- and codomain are both bitvectors. If 'Nothing',
-- signifies that we should fall back to index-selection
-- representation of arrays.
, smtEvalString :: Term h -> IO ByteString
, smtEvalString :: Term h -> IO Text
-- ^ Given a SMT term representing as sequence of bytes,
-- return the value as a bytestring.
}
Expand Down Expand Up @@ -3017,7 +3016,7 @@ getSolverVal _ smtFns (BVTypeMap w) tm = smtEvalBV smtFns w tm
getSolverVal _ smtFns RealTypeMap tm = smtEvalReal smtFns tm
getSolverVal _ smtFns (FloatTypeMap fpp) tm =
bfFromBits (fppOpts fpp RNE) . BV.asUnsigned <$> smtEvalFloat smtFns fpp tm
getSolverVal _ smtFns Char8TypeMap tm = Char8Literal <$> smtEvalString smtFns tm
getSolverVal _ smtFns UnicodeTypeMap tm = UnicodeLiteral <$> smtEvalString smtFns tm
getSolverVal _ smtFns IntegerTypeMap tm = do
r <- smtEvalReal smtFns tm
when (denominator r /= 1) $ fail "Expected integer value."
Expand Down
2 changes: 1 addition & 1 deletion what4/src/What4/Solver/Yices.hs
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ yicesType IntegerTypeMap = intType
yicesType RealTypeMap = realType
yicesType (BVTypeMap w) = YicesType (app "bitvector" [fromString (show w)])
yicesType (FloatTypeMap _) = floatFail
yicesType Char8TypeMap = stringFail
yicesType UnicodeTypeMap = stringFail
yicesType ComplexToStructTypeMap = tupleType [realType, realType]
yicesType ComplexToArrayTypeMap = fnType [boolType] realType
yicesType (PrimArrayTypeMap i r) = fnType (toListFC yicesType i) (yicesType r)
Expand Down
Loading

0 comments on commit 537b609

Please sign in to comment.