diff --git a/saw-core/src/Verifier/SAW/Name.hs b/saw-core/src/Verifier/SAW/Name.hs index d979ea82d1..442dd129c1 100644 --- a/saw-core/src/Verifier/SAW/Name.hs +++ b/saw-core/src/Verifier/SAW/Name.hs @@ -174,7 +174,7 @@ data NameInfo | -- | This name was imported from some other programming language/scope ImportedName URI -- ^ An absolutely-qualified name, which is required to be unique - [Text] -- ^ A collection of aliases for this name. Sorter or "less-qualified" + [Text] -- ^ A collection of aliases for this name. Shorter or "less-qualified" -- aliases should be nearer the front of the list deriving (Eq,Ord,Show) @@ -249,13 +249,13 @@ data PrimName e = deriving (Show, Functor, Foldable, Traversable) instance Eq (PrimName e) where - x == y = primVarIndex x == primVarIndex y + x == y = primName x == primName y instance Ord (PrimName e) where - compare x y = compare (primVarIndex x) (primVarIndex y) + compare x y = compare (primName x) (primName y) instance Hashable (PrimName e) where - hashWithSalt x pn = hashWithSalt x (primVarIndex pn) + hashWithSalt x pn = hashWithSalt x (primName pn) primNameToExtCns :: PrimName e -> ExtCns e primNameToExtCns (PrimName varIdx nm tp) = EC varIdx (ModuleIdentifier nm) tp diff --git a/saw-core/src/Verifier/SAW/Rewriter.hs b/saw-core/src/Verifier/SAW/Rewriter.hs index dff781e9e4..4017917745 100644 --- a/saw-core/src/Verifier/SAW/Rewriter.hs +++ b/saw-core/src/Verifier/SAW/Rewriter.hs @@ -245,7 +245,7 @@ scMatch sc pat term = -- saves the names associated with those bound variables. match :: Int -> [(LocalName, Term)] -> Term -> Term -> MatchState -> MaybeT IO MatchState - match _ _ (STApp i fv _) (STApp j _ _) s + match _ _ (STApp i _ fv _) (STApp j _ _ _) s | fv == emptyBitSet && i == j = return s match depth env x y s@(MatchState m cs) = -- (lift $ putStrLn $ "matching (lhs): " ++ scPrettyTerm defaultPPOpts x) >> diff --git a/saw-core/src/Verifier/SAW/SharedTerm.hs b/saw-core/src/Verifier/SAW/SharedTerm.hs index 82fa91cae4..0e0eb1cc76 100644 --- a/saw-core/src/Verifier/SAW/SharedTerm.hs +++ b/saw-core/src/Verifier/SAW/SharedTerm.hs @@ -289,10 +289,11 @@ import Data.List (inits, find) import Data.Maybe import qualified Data.Foldable as Fold import Data.Foldable (foldl', foldlM, foldrM, maximum) +import Data.Hashable (Hashable(hash)) import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HMap -import Data.IntMap (IntMap) -import qualified Data.IntMap as IntMap +import Data.IntMap.Strict (IntMap) +import qualified Data.IntMap.Strict as IntMap import qualified Data.IntSet as IntSet import Data.IORef (IORef,newIORef,readIORef,modifyIORef',atomicModifyIORef',writeIORef) import Data.Map (Map) @@ -343,7 +344,7 @@ data TermFMap a } emptyTFM :: TermFMap a -emptyTFM = TermFMap IntMap.empty HMap.empty +emptyTFM = TermFMap mempty mempty lookupTFM :: TermF Term -> TermFMap a -> Maybe a lookupTFM tf tfm = @@ -634,18 +635,19 @@ emptyAppCache = emptyTFM -- | Return term for application using existing term in cache if it is available. getTerm :: AppCacheRef -> TermF Term -> IO Term -getTerm r a = - modifyMVar r $ \s -> do - case lookupTFM a s of - Just t -> return (s, t) +getTerm cache termF = + modifyMVar cache $ \s -> do + case lookupTFM termF s of + Just term -> return (s, term) Nothing -> do i <- getUniqueInt - let t = STApp { stAppIndex = i - , stAppFreeVars = freesTermF (fmap looseVars a) - , stAppTermF = a - } - let s' = insertTFM a t s - seq s' $ return (s', t) + let term = STApp { stAppIndex = i + , stAppHash = hash termF + , stAppFreeVars = freesTermF (fmap looseVars termF) + , stAppTermF = termF + } + s' = insertTFM termF term s + seq s' $ return (s', term) -------------------------------------------------------------------------------- diff --git a/saw-core/src/Verifier/SAW/Term/Functor.hs b/saw-core/src/Verifier/SAW/Term/Functor.hs index f926736883..581e1682ef 100644 --- a/saw-core/src/Verifier/SAW/Term/Functor.hs +++ b/saw-core/src/Verifier/SAW/Term/Functor.hs @@ -365,6 +365,10 @@ zipWithFlatTermF f = go -- Term Functor ---------------------------------------------------------------- +-- | A \"knot-tying\" structure for representing terms and term-like things. +-- Often, this appears in context as the type \"'TermF' 'Term'\", in which case +-- it represents a full 'Term' AST. The \"F\" stands for 'Functor', or +-- occasionally for \"Former\". data TermF e = FTermF !(FlatTermF e) -- ^ The atomic, or builtin, term constructs @@ -381,24 +385,77 @@ data TermF e -- The body and type should be closed terms. deriving (Eq, Ord, Show, Functor, Foldable, Traversable, Generic) +-- See the commentary on 'Hashable Term' for a note on uniqueness. instance Hashable e => Hashable (TermF e) -- automatically derived. +-- NB: we may someday wish to customize this instance, for a couple reasons. +-- +-- 1. Hash 'Constant's based on their definition, if it exists, rather than +-- always using both their type and definition (as the automatically derived +-- instance does). Their type, represented as an 'ExtCns', contains unavoidable +-- freshness derived from a global counter (via 'scFreshGlobalVar' as +-- initialized in 'Verifier.SAW.SharedTerm.mkSharedContext'), but their +-- definition does not necessarily contain the same freshness. +-- +-- 2. Improve the default, XOR-based hashing scheme to improve collision +-- resistance. A polynomial-based approach may be fruitful. For a constructor +-- with fields numbered 1..n, evaluate a polynomial along the lines of: +-- coeff(0) * salt ^ 0 + coeff(1) + salt ^ 1 + ... + coeff(n) * salt ^ n +-- where +-- coeff(0) = salt `hashWithSalt` +-- coeff(i) = salt `hashWithSalt` -- Term Datatype --------------------------------------------------------------- type TermIndex = Int -- Word64 +-- | For more information on the semantics of 'Term's, see the +-- [manual](https://saw.galois.com/manual.html). 'Term' and 'TermF' are split +-- into two structures to facilitate mutual structural recursion (sometimes +-- referred to as the ["knot-tying"](https://wiki.haskell.org/Tying_the_Knot) +-- pattern, sometimes referred to in terms of ["recursion +-- schemes"](https://blog.sumtypeofway.com/posts/introduction-to-recursion-schemes.html)) +-- and term object reuse via hash-consing. data Term = STApp + -- ^ This constructor \"wraps\" a 'TermF' 'Term', assigning it a + -- guaranteed-unique integer identifier and caching its likely-unique hash. + -- Most 'Term's are constructed via 'STApp'. When a fresh 'TermF' is evinced + -- in the course of a SAW invocation and needs to be lifted into a 'Term', + -- we can see if we've already created a 'Term' wrapper for an identical + -- 'TermF', and reuse it if so. The implementation of hash-consed 'Term' + -- construction exists in 'Verifier.SAW.SharedTerm', in particular in the + -- 'Verifier.SAW.SharedTerm.scTermF' field of the + -- t'Verifier.SAW.SharedTerm.SharedContext' object. { stAppIndex :: {-# UNPACK #-} !TermIndex - , stAppFreeVars :: !BitSet -- Free variables + -- ^ The UID associated with a 'Term'. It is guaranteed unique across a + -- universe of properly-constructed 'Term's within a single SAW + -- invocation. + , stAppHash :: {-# UNPACK #-} !Int + -- ^ The hash, according to 'hash', of the 'stAppTermF' field associated + -- with this 'Term'. This should be as unique as a hash can be, but is + -- not guaranteed unique as 'stAppIndex' is. + , stAppFreeVars :: !BitSet + -- ^ The free variables associated with the 'stAppTermF' field. , stAppTermF :: !(TermF Term) + -- ^ The underlying 'TermF' that this 'Term' wraps. This field "ties the + -- knot" of the 'Term'/'TermF' recursion scheme. } | Unshared !(TermF Term) + -- ^ Used for constructing 'Term's that don't need to be shared/reused. deriving (Show, Typeable) instance Hashable Term where - hashWithSalt salt STApp{ stAppIndex = i } = salt `combine` 0x00000000 `hashWithSalt` hash i + -- Why have 'Hashable' depend on the not-necessarily-unique hash instead of + -- the necessarily-unique index? Per #1830 (PR) and #1831 (issue), we want to + -- be able to derive a reference to terms based solely on their shape. Indices + -- have nothing to do with a term's shape - they're assigned sequentially when + -- building terms, according to the (arbitrary) order in which a term is + -- built. As for uniqueness, though hashing a term based on its subterms' + -- hashes introduces less randomness/freshness, it maintains plenty, and + -- provides benefits as described above. No code should ever rely on total + -- uniqueness of hashes, and terms are no exception. + hashWithSalt salt STApp{ stAppHash = h } = salt `combine` 0x00000000 `hashWithSalt` h hashWithSalt salt (Unshared t) = salt `combine` 0x55555555 `hashWithSalt` hash t @@ -408,6 +465,15 @@ combine :: Int -> Int -> Int combine h1 h2 = (h1 * 0x01000193) `xor` h2 instance Eq Term where + -- Note: we take some minor liberties with the contract of 'hashWithSalt' in + -- this implementation of 'Eq'. The contract states that if two values are + -- equal according to '==', then they must have the same hash. For terms + -- constructed by/within SAW, this will hold, because SAW's handling of index + -- generation and assignment ensures that equality of indices implies equality + -- of terms and term hashes (see 'Verifier.SAW.SharedTerm.getTerm'). However, + -- if terms are constructed outside this standard procedure or in a way that + -- does not respect index uniqueness rules, 'hashWithSalt''s contract could be + -- violated. (==) = alphaEquiv alphaEquiv :: Term -> Term -> Bool diff --git a/src/SAWScript/HeapsterBuiltins.hs b/src/SAWScript/HeapsterBuiltins.hs index 8b3b40db8d..1326c7f5a8 100644 --- a/src/SAWScript/HeapsterBuiltins.hs +++ b/src/SAWScript/HeapsterBuiltins.hs @@ -276,7 +276,7 @@ parseAndInsDef henv nm term_tp term_string = typed_term <- liftIO $ scTypeCheckCompleteError sc (Just mnm) un_term liftIO $ scCheckSubtype sc (Just mnm) typed_term term_tp case typedVal typed_term of - STApp _ _ (Constant (EC _ (ModuleIdentifier term_ident) _) _) -> + STApp _ _ _ (Constant (EC _ (ModuleIdentifier term_ident) _) _) -> return term_ident term -> do m <- liftIO $ scFindModule sc mnm