Skip to content

Commit

Permalink
Merge pull request #1830 from GaloisInc/persistent-term-hashing
Browse files Browse the repository at this point in the history
Persistent structure-based term identifiers
  • Loading branch information
mergify[bot] authored Mar 30, 2023
2 parents ecab2cd + 3745791 commit ab51c9b
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 21 deletions.
8 changes: 4 additions & 4 deletions saw-core/src/Verifier/SAW/Name.hs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ data NameInfo
| -- | This name was imported from some other programming language/scope
ImportedName
URI -- ^ An absolutely-qualified name, which is required to be unique
[Text] -- ^ A collection of aliases for this name. Sorter or "less-qualified"
[Text] -- ^ A collection of aliases for this name. Shorter or "less-qualified"
-- aliases should be nearer the front of the list

deriving (Eq,Ord,Show)
Expand Down Expand Up @@ -249,13 +249,13 @@ data PrimName e =
deriving (Show, Functor, Foldable, Traversable)

instance Eq (PrimName e) where
x == y = primVarIndex x == primVarIndex y
x == y = primName x == primName y

instance Ord (PrimName e) where
compare x y = compare (primVarIndex x) (primVarIndex y)
compare x y = compare (primName x) (primName y)

instance Hashable (PrimName e) where
hashWithSalt x pn = hashWithSalt x (primVarIndex pn)
hashWithSalt x pn = hashWithSalt x (primName pn)

primNameToExtCns :: PrimName e -> ExtCns e
primNameToExtCns (PrimName varIdx nm tp) = EC varIdx (ModuleIdentifier nm) tp
Expand Down
2 changes: 1 addition & 1 deletion saw-core/src/Verifier/SAW/Rewriter.hs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ scMatch sc pat term =
-- saves the names associated with those bound variables.
match :: Int -> [(LocalName, Term)] -> Term -> Term -> MatchState ->
MaybeT IO MatchState
match _ _ (STApp i fv _) (STApp j _ _) s
match _ _ (STApp i _ fv _) (STApp j _ _ _) s
| fv == emptyBitSet && i == j = return s
match depth env x y s@(MatchState m cs) =
-- (lift $ putStrLn $ "matching (lhs): " ++ scPrettyTerm defaultPPOpts x) >>
Expand Down
28 changes: 15 additions & 13 deletions saw-core/src/Verifier/SAW/SharedTerm.hs
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,11 @@ import Data.List (inits, find)
import Data.Maybe
import qualified Data.Foldable as Fold
import Data.Foldable (foldl', foldlM, foldrM, maximum)
import Data.Hashable (Hashable(hash))
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HMap
import Data.IntMap (IntMap)
import qualified Data.IntMap as IntMap
import Data.IntMap.Strict (IntMap)
import qualified Data.IntMap.Strict as IntMap
import qualified Data.IntSet as IntSet
import Data.IORef (IORef,newIORef,readIORef,modifyIORef',atomicModifyIORef',writeIORef)
import Data.Map (Map)
Expand Down Expand Up @@ -343,7 +344,7 @@ data TermFMap a
}

emptyTFM :: TermFMap a
emptyTFM = TermFMap IntMap.empty HMap.empty
emptyTFM = TermFMap mempty mempty

lookupTFM :: TermF Term -> TermFMap a -> Maybe a
lookupTFM tf tfm =
Expand Down Expand Up @@ -634,18 +635,19 @@ emptyAppCache = emptyTFM

-- | Return term for application using existing term in cache if it is available.
getTerm :: AppCacheRef -> TermF Term -> IO Term
getTerm r a =
modifyMVar r $ \s -> do
case lookupTFM a s of
Just t -> return (s, t)
getTerm cache termF =
modifyMVar cache $ \s -> do
case lookupTFM termF s of
Just term -> return (s, term)
Nothing -> do
i <- getUniqueInt
let t = STApp { stAppIndex = i
, stAppFreeVars = freesTermF (fmap looseVars a)
, stAppTermF = a
}
let s' = insertTFM a t s
seq s' $ return (s', t)
let term = STApp { stAppIndex = i
, stAppHash = hash termF
, stAppFreeVars = freesTermF (fmap looseVars termF)
, stAppTermF = termF
}
s' = insertTFM termF term s
seq s' $ return (s', term)


--------------------------------------------------------------------------------
Expand Down
70 changes: 68 additions & 2 deletions saw-core/src/Verifier/SAW/Term/Functor.hs
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,10 @@ zipWithFlatTermF f = go

-- Term Functor ----------------------------------------------------------------

-- | A \"knot-tying\" structure for representing terms and term-like things.
-- Often, this appears in context as the type \"'TermF' 'Term'\", in which case
-- it represents a full 'Term' AST. The \"F\" stands for 'Functor', or
-- occasionally for \"Former\".
data TermF e
= FTermF !(FlatTermF e)
-- ^ The atomic, or builtin, term constructs
Expand All @@ -381,24 +385,77 @@ data TermF e
-- The body and type should be closed terms.
deriving (Eq, Ord, Show, Functor, Foldable, Traversable, Generic)

-- See the commentary on 'Hashable Term' for a note on uniqueness.
instance Hashable e => Hashable (TermF e) -- automatically derived.
-- NB: we may someday wish to customize this instance, for a couple reasons.
--
-- 1. Hash 'Constant's based on their definition, if it exists, rather than
-- always using both their type and definition (as the automatically derived
-- instance does). Their type, represented as an 'ExtCns', contains unavoidable
-- freshness derived from a global counter (via 'scFreshGlobalVar' as
-- initialized in 'Verifier.SAW.SharedTerm.mkSharedContext'), but their
-- definition does not necessarily contain the same freshness.
--
-- 2. Improve the default, XOR-based hashing scheme to improve collision
-- resistance. A polynomial-based approach may be fruitful. For a constructor
-- with fields numbered 1..n, evaluate a polynomial along the lines of:
-- coeff(0) * salt ^ 0 + coeff(1) + salt ^ 1 + ... + coeff(n) * salt ^ n
-- where
-- coeff(0) = salt `hashWithSalt` <custom per-constructor salt>
-- coeff(i) = salt `hashWithSalt` <field i>


-- Term Datatype ---------------------------------------------------------------

type TermIndex = Int -- Word64

-- | For more information on the semantics of 'Term's, see the
-- [manual](https://saw.galois.com/manual.html). 'Term' and 'TermF' are split
-- into two structures to facilitate mutual structural recursion (sometimes
-- referred to as the ["knot-tying"](https://wiki.haskell.org/Tying_the_Knot)
-- pattern, sometimes referred to in terms of ["recursion
-- schemes"](https://blog.sumtypeofway.com/posts/introduction-to-recursion-schemes.html))
-- and term object reuse via hash-consing.
data Term
= STApp
-- ^ This constructor \"wraps\" a 'TermF' 'Term', assigning it a
-- guaranteed-unique integer identifier and caching its likely-unique hash.
-- Most 'Term's are constructed via 'STApp'. When a fresh 'TermF' is evinced
-- in the course of a SAW invocation and needs to be lifted into a 'Term',
-- we can see if we've already created a 'Term' wrapper for an identical
-- 'TermF', and reuse it if so. The implementation of hash-consed 'Term'
-- construction exists in 'Verifier.SAW.SharedTerm', in particular in the
-- 'Verifier.SAW.SharedTerm.scTermF' field of the
-- t'Verifier.SAW.SharedTerm.SharedContext' object.
{ stAppIndex :: {-# UNPACK #-} !TermIndex
, stAppFreeVars :: !BitSet -- Free variables
-- ^ The UID associated with a 'Term'. It is guaranteed unique across a
-- universe of properly-constructed 'Term's within a single SAW
-- invocation.
, stAppHash :: {-# UNPACK #-} !Int
-- ^ The hash, according to 'hash', of the 'stAppTermF' field associated
-- with this 'Term'. This should be as unique as a hash can be, but is
-- not guaranteed unique as 'stAppIndex' is.
, stAppFreeVars :: !BitSet
-- ^ The free variables associated with the 'stAppTermF' field.
, stAppTermF :: !(TermF Term)
-- ^ The underlying 'TermF' that this 'Term' wraps. This field "ties the
-- knot" of the 'Term'/'TermF' recursion scheme.
}
| Unshared !(TermF Term)
-- ^ Used for constructing 'Term's that don't need to be shared/reused.
deriving (Show, Typeable)

instance Hashable Term where
hashWithSalt salt STApp{ stAppIndex = i } = salt `combine` 0x00000000 `hashWithSalt` hash i
-- Why have 'Hashable' depend on the not-necessarily-unique hash instead of
-- the necessarily-unique index? Per #1830 (PR) and #1831 (issue), we want to
-- be able to derive a reference to terms based solely on their shape. Indices
-- have nothing to do with a term's shape - they're assigned sequentially when
-- building terms, according to the (arbitrary) order in which a term is
-- built. As for uniqueness, though hashing a term based on its subterms'
-- hashes introduces less randomness/freshness, it maintains plenty, and
-- provides benefits as described above. No code should ever rely on total
-- uniqueness of hashes, and terms are no exception.
hashWithSalt salt STApp{ stAppHash = h } = salt `combine` 0x00000000 `hashWithSalt` h
hashWithSalt salt (Unshared t) = salt `combine` 0x55555555 `hashWithSalt` hash t


Expand All @@ -408,6 +465,15 @@ combine :: Int -> Int -> Int
combine h1 h2 = (h1 * 0x01000193) `xor` h2

instance Eq Term where
-- Note: we take some minor liberties with the contract of 'hashWithSalt' in
-- this implementation of 'Eq'. The contract states that if two values are
-- equal according to '==', then they must have the same hash. For terms
-- constructed by/within SAW, this will hold, because SAW's handling of index
-- generation and assignment ensures that equality of indices implies equality
-- of terms and term hashes (see 'Verifier.SAW.SharedTerm.getTerm'). However,
-- if terms are constructed outside this standard procedure or in a way that
-- does not respect index uniqueness rules, 'hashWithSalt''s contract could be
-- violated.
(==) = alphaEquiv

alphaEquiv :: Term -> Term -> Bool
Expand Down
2 changes: 1 addition & 1 deletion src/SAWScript/HeapsterBuiltins.hs
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ parseAndInsDef henv nm term_tp term_string =
typed_term <- liftIO $ scTypeCheckCompleteError sc (Just mnm) un_term
liftIO $ scCheckSubtype sc (Just mnm) typed_term term_tp
case typedVal typed_term of
STApp _ _ (Constant (EC _ (ModuleIdentifier term_ident) _) _) ->
STApp _ _ _ (Constant (EC _ (ModuleIdentifier term_ident) _) _) ->
return term_ident
term -> do
m <- liftIO $ scFindModule sc mnm
Expand Down

0 comments on commit ab51c9b

Please sign in to comment.