Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Persistent structure-based term identifiers #1830

Merged
merged 18 commits into from
Mar 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions saw-core/src/Verifier/SAW/Name.hs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ data NameInfo
| -- | This name was imported from some other programming language/scope
ImportedName
URI -- ^ An absolutely-qualified name, which is required to be unique
[Text] -- ^ A collection of aliases for this name. Sorter or "less-qualified"
[Text] -- ^ A collection of aliases for this name. Shorter or "less-qualified"
-- aliases should be nearer the front of the list

deriving (Eq,Ord,Show)
Expand Down Expand Up @@ -249,13 +249,13 @@ data PrimName e =
deriving (Show, Functor, Foldable, Traversable)

instance Eq (PrimName e) where
x == y = primVarIndex x == primVarIndex y
x == y = primName x == primName y

instance Ord (PrimName e) where
compare x y = compare (primVarIndex x) (primVarIndex y)
compare x y = compare (primName x) (primName y)

instance Hashable (PrimName e) where
hashWithSalt x pn = hashWithSalt x (primVarIndex pn)
hashWithSalt x pn = hashWithSalt x (primName pn)

primNameToExtCns :: PrimName e -> ExtCns e
primNameToExtCns (PrimName varIdx nm tp) = EC varIdx (ModuleIdentifier nm) tp
Expand Down
2 changes: 1 addition & 1 deletion saw-core/src/Verifier/SAW/Rewriter.hs
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ scMatch sc pat term =
-- saves the names associated with those bound variables.
match :: Int -> [(LocalName, Term)] -> Term -> Term -> MatchState ->
MaybeT IO MatchState
match _ _ (STApp i fv _) (STApp j _ _) s
match _ _ (STApp i _ fv _) (STApp j _ _ _) s
| fv == emptyBitSet && i == j = return s
match depth env x y s@(MatchState m cs) =
-- (lift $ putStrLn $ "matching (lhs): " ++ scPrettyTerm defaultPPOpts x) >>
Expand Down
28 changes: 15 additions & 13 deletions saw-core/src/Verifier/SAW/SharedTerm.hs
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,11 @@ import Data.List (inits, find)
import Data.Maybe
import qualified Data.Foldable as Fold
import Data.Foldable (foldl', foldlM, foldrM, maximum)
import Data.Hashable (Hashable(hash))
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HMap
import Data.IntMap (IntMap)
import qualified Data.IntMap as IntMap
import Data.IntMap.Strict (IntMap)
import qualified Data.IntMap.Strict as IntMap
import qualified Data.IntSet as IntSet
import Data.IORef (IORef,newIORef,readIORef,modifyIORef',atomicModifyIORef',writeIORef)
import Data.Map (Map)
Expand Down Expand Up @@ -343,7 +344,7 @@ data TermFMap a
}

emptyTFM :: TermFMap a
emptyTFM = TermFMap IntMap.empty HMap.empty
emptyTFM = TermFMap mempty mempty

lookupTFM :: TermF Term -> TermFMap a -> Maybe a
lookupTFM tf tfm =
Expand Down Expand Up @@ -634,18 +635,19 @@ emptyAppCache = emptyTFM

-- | Return term for application using existing term in cache if it is available.
getTerm :: AppCacheRef -> TermF Term -> IO Term
getTerm r a =
modifyMVar r $ \s -> do
case lookupTFM a s of
Just t -> return (s, t)
getTerm cache termF =
modifyMVar cache $ \s -> do
case lookupTFM termF s of
Just term -> return (s, term)
Nothing -> do
i <- getUniqueInt
let t = STApp { stAppIndex = i
, stAppFreeVars = freesTermF (fmap looseVars a)
, stAppTermF = a
}
let s' = insertTFM a t s
seq s' $ return (s', t)
let term = STApp { stAppIndex = i
, stAppHash = hash termF
, stAppFreeVars = freesTermF (fmap looseVars termF)
, stAppTermF = termF
}
s' = insertTFM termF term s
seq s' $ return (s', term)


--------------------------------------------------------------------------------
Expand Down
70 changes: 68 additions & 2 deletions saw-core/src/Verifier/SAW/Term/Functor.hs
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,10 @@ zipWithFlatTermF f = go

-- Term Functor ----------------------------------------------------------------

-- | A \"knot-tying\" structure for representing terms and term-like things.
-- Often, this appears in context as the type \"'TermF' 'Term'\", in which case
-- it represents a full 'Term' AST. The \"F\" stands for 'Functor', or
-- occasionally for \"Former\".
data TermF e
= FTermF !(FlatTermF e)
-- ^ The atomic, or builtin, term constructs
Expand All @@ -381,24 +385,77 @@ data TermF e
-- The body and type should be closed terms.
deriving (Eq, Ord, Show, Functor, Foldable, Traversable, Generic)

-- See the commentary on 'Hashable Term' for a note on uniqueness.
instance Hashable e => Hashable (TermF e) -- automatically derived.
-- NB: we may someday wish to customize this instance, for a couple reasons.
--
-- 1. Hash 'Constant's based on their definition, if it exists, rather than
-- always using both their type and definition (as the automatically derived
-- instance does). Their type, represented as an 'ExtCns', contains unavoidable
-- freshness derived from a global counter (via 'scFreshGlobalVar' as
-- initialized in 'Verifier.SAW.SharedTerm.mkSharedContext'), but their
-- definition does not necessarily contain the same freshness.
--
-- 2. Improve the default, XOR-based hashing scheme to improve collision
-- resistance. A polynomial-based approach may be fruitful. For a constructor
-- with fields numbered 1..n, evaluate a polynomial along the lines of:
-- coeff(0) * salt ^ 0 + coeff(1) + salt ^ 1 + ... + coeff(n) * salt ^ n
-- where
-- coeff(0) = salt `hashWithSalt` <custom per-constructor salt>
-- coeff(i) = salt `hashWithSalt` <field i>


-- Term Datatype ---------------------------------------------------------------

type TermIndex = Int -- Word64

-- | For more information on the semantics of 'Term's, see the
-- [manual](https://saw.galois.com/manual.html). 'Term' and 'TermF' are split
-- into two structures to facilitate mutual structural recursion (sometimes
-- referred to as the ["knot-tying"](https://wiki.haskell.org/Tying_the_Knot)
-- pattern, sometimes referred to in terms of ["recursion
-- schemes"](https://blog.sumtypeofway.com/posts/introduction-to-recursion-schemes.html))
-- and term object reuse via hash-consing.
data Term
= STApp
-- ^ This constructor \"wraps\" a 'TermF' 'Term', assigning it a
-- guaranteed-unique integer identifier and caching its likely-unique hash.
-- Most 'Term's are constructed via 'STApp'. When a fresh 'TermF' is evinced
-- in the course of a SAW invocation and needs to be lifted into a 'Term',
-- we can see if we've already created a 'Term' wrapper for an identical
-- 'TermF', and reuse it if so. The implementation of hash-consed 'Term'
-- construction exists in 'Verifier.SAW.SharedTerm', in particular in the
-- 'Verifier.SAW.SharedTerm.scTermF' field of the
-- t'Verifier.SAW.SharedTerm.SharedContext' object.
{ stAppIndex :: {-# UNPACK #-} !TermIndex
, stAppFreeVars :: !BitSet -- Free variables
-- ^ The UID associated with a 'Term'. It is guaranteed unique across a
-- universe of properly-constructed 'Term's within a single SAW
-- invocation.
, stAppHash :: {-# UNPACK #-} !Int
-- ^ The hash, according to 'hash', of the 'stAppTermF' field associated
-- with this 'Term'. This should be as unique as a hash can be, but is
-- not guaranteed unique as 'stAppIndex' is.
, stAppFreeVars :: !BitSet
-- ^ The free variables associated with the 'stAppTermF' field.
, stAppTermF :: !(TermF Term)
-- ^ The underlying 'TermF' that this 'Term' wraps. This field "ties the
-- knot" of the 'Term'/'TermF' recursion scheme.
}
| Unshared !(TermF Term)
-- ^ Used for constructing 'Term's that don't need to be shared/reused.
deriving (Show, Typeable)

instance Hashable Term where
hashWithSalt salt STApp{ stAppIndex = i } = salt `combine` 0x00000000 `hashWithSalt` hash i
-- Why have 'Hashable' depend on the not-necessarily-unique hash instead of
-- the necessarily-unique index? Per #1830 (PR) and #1831 (issue), we want to
-- be able to derive a reference to terms based solely on their shape. Indices
-- have nothing to do with a term's shape - they're assigned sequentially when
-- building terms, according to the (arbitrary) order in which a term is
-- built. As for uniqueness, though hashing a term based on its subterms'
-- hashes introduces less randomness/freshness, it maintains plenty, and
-- provides benefits as described above. No code should ever rely on total
-- uniqueness of hashes, and terms are no exception.
hashWithSalt salt STApp{ stAppHash = h } = salt `combine` 0x00000000 `hashWithSalt` h
hashWithSalt salt (Unshared t) = salt `combine` 0x55555555 `hashWithSalt` hash t


Expand All @@ -408,6 +465,15 @@ combine :: Int -> Int -> Int
combine h1 h2 = (h1 * 0x01000193) `xor` h2

instance Eq Term where
-- Note: we take some minor liberties with the contract of 'hashWithSalt' in
-- this implementation of 'Eq'. The contract states that if two values are
-- equal according to '==', then they must have the same hash. For terms
-- constructed by/within SAW, this will hold, because SAW's handling of index
-- generation and assignment ensures that equality of indices implies equality
-- of terms and term hashes (see 'Verifier.SAW.SharedTerm.getTerm'). However,
-- if terms are constructed outside this standard procedure or in a way that
-- does not respect index uniqueness rules, 'hashWithSalt''s contract could be
-- violated.
(==) = alphaEquiv

alphaEquiv :: Term -> Term -> Bool
Expand Down
2 changes: 1 addition & 1 deletion src/SAWScript/HeapsterBuiltins.hs
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ parseAndInsDef henv nm term_tp term_string =
typed_term <- liftIO $ scTypeCheckCompleteError sc (Just mnm) un_term
liftIO $ scCheckSubtype sc (Just mnm) typed_term term_tp
case typedVal typed_term of
STApp _ _ (Constant (EC _ (ModuleIdentifier term_ident) _) _) ->
STApp _ _ _ (Constant (EC _ (ModuleIdentifier term_ident) _) _) ->
return term_ident
term -> do
m <- liftIO $ scFindModule sc mnm
Expand Down