Skip to content

Commit

Permalink
Experiment: dummy DB that maps (hashes of) contexts to presises
Browse files Browse the repository at this point in the history
  • Loading branch information
omelkonian committed Oct 26, 2023
1 parent 312b366 commit 457dfa7
Show file tree
Hide file tree
Showing 10 changed files with 291 additions and 67 deletions.
2 changes: 2 additions & 0 deletions .ghci
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@
:def! unimath (\_ -> return ":r\n :set args --no-terms -r -v agda2train:10 -ojson/ -i $HOME/agda-unimath/src $HOME/agda-unimath/src/everything.lagda.md\n main")
:def! typetopo (\_ -> return ":r\n :set args --no-terms -r -v agda2train:10 -ojson/ -i $HOME/TypeTopology/source $HOME/TypeTopology/source/index.lagda\n main")
:def! prelude (\_ -> return ":r\n :set args -r -v agda2train:10 -ojson/ -i $HOME/git/formal-prelude/ $HOME/git/formal-prelude/Prelude/Main.agda\n main")
:def! testDB (\x -> return $ ":r\n :set args add data/db.json test/golden/" <> x <> ".json\n main")
:def! testQueryDB (\x -> return $ ":r\n :set args query data/db.json " <> x <> ".json\n main")
7 changes: 5 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
# schedule: [{cron: '0 0 * * *'}]
pull_request:
push:
branches: [master]
branches: [master, db-example]

jobs:
cabal:
Expand Down Expand Up @@ -48,14 +48,17 @@ jobs:
# fingerprint doesn't match, so we don't need to start from scratch every
# time a dependency changes.
- uses: actions/cache@v3
if: github.ref == 'refs/heads/master'
name: Cache ~/.cabal/store
with:
path: ${{ steps.setup-haskell-cabal.outputs.cabal-store }}
key: ${{ runner.os }}-${{ matrix.ghc }}-${{ hashFiles('cabal.project.freeze') }}
restore-keys: ${{ runner.os }}-${{ matrix.ghc }}-

- name: Test
run: make test
run: |
make test
make testDB
# - name: Free disk space
# if: ${{ matrix.stdlib }}
Expand Down
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ default: repl
repl:
cabal repl agda2train # e.g. `:set args -r -o json -itest test/First.agda ... main ... :r ... main`

replDB:
cabal repl agda2train-db

build:
cabal build

Expand All @@ -27,6 +30,13 @@ cleanTest:
make -C test clean
make -C test cleanGolden

# DB example

testDB:
cabal run agda2train-db -- add data/db.json test/golden/Test.PiVsFun.json
cabal run agda2train-db -- query data/db.json data/query.json \
| tail -n1 | cmp data/expected_response.txt

# Extracting training data from whole libraries

STDLIB?=$(HOME)/git/agda-stdlib
Expand Down
15 changes: 13 additions & 2 deletions agda2train.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ common globalOptions
, mtl >=2.2.1 && <2.4
, async >=2.2 && <2.3
, file-embed == 0.0.15.0
, aeson-pretty >= 0.8.9 && < 0.9
, bytestring >=0.10.8.1 && <0.13

library agda2train-lib
import: globalOptions
Expand All @@ -65,8 +67,17 @@ executable agda2train
build-depends:
agda2train-lib
, deepseq >=1.4.2.0 && <1.6
, bytestring >=0.10.8.1 && <0.13
, directory >=1.2.6.2 && <1.4
, filepath >=1.4.1.0 && <1.5
, unordered-containers >=0.2.9.0 && <0.3
, aeson-pretty >= 0.8.9 && < 0.9

executable agda2train-db
import: globalOptions
main-is: DB.hs
ghc-options:
-threaded -rtsopts -with-rtsopts=-N
-Wno-missing-home-modules
build-depends:
agda2train-lib
, unordered-containers >=0.2.9.0 && <0.3
, hashable
11 changes: 11 additions & 0 deletions data/db.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"-5242397938801254731": [
"Test.PiVsFun.refl<10>"
],
"4884008893817283726": [
"Test.PiVsFun.refl<10>"
],
"7116324300784873135": [
"Test.PiVsFun.refl<10>"
]
}
1 change: 1 addition & 0 deletions data/expected_response.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["Test.PiVsFun.refl<10>"]
72 changes: 72 additions & 0 deletions data/query.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{
"scope": [
{
"name": "Test.PiVsFun.refl<10>",
"type": {
"tag": "Pi",
"name": "x",
"domain": {
"tag": "Sort",
"sort": "Set"
},
"codomain": {
"tag": "Pi",
"name": "y",
"domain": {
"tag": "Sort",
"sort": "Set"
},
"codomain": {
"tag": "Application",
"head": {
"tag": "ScopeReference",
"name": "Test.PiVsFun._≡_<4>"
},
"arguments": [
{
"tag": "DeBruijn",
"index": 1
},
{
"tag": "DeBruijn",
"index": 0
}
]
}
}
},
"definition": {
"tag": "Postulate"
}
}
],
"context": [
{
"tag": "Sort",
"name": "x",
"sort": "Set"
},
{
"tag": "Sort",
"name": "x",
"sort": "Set"
}
],
"goal": {
"tag": "Application",
"head": {
"tag": "ScopeReference",
"name": "Test.PiVsFun._≡_<4>"
},
"arguments": [
{
"tag": "DeBruijn",
"index": 1
},
{
"tag": "DeBruijn",
"index": 1
}
]
}
}
110 changes: 110 additions & 0 deletions src/DB.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
{-# LANGUAGE TypeApplications #-}
-- | Store the JSON information in an SQL database for quick lookups.
--
-- NB: "training" this model by feeding samples merely consists of adding DB rows
module Main where

import GHC.Generics

import System.Environment ( getArgs )
import qualified Data.HashMap.Strict as M
import Data.Maybe ( fromMaybe )
import Control.Monad ( forM_ )
import Control.Arrow ( first )

import Data.Aeson
( ToJSON(..), genericToJSON
, FromJSON(..), genericParseJSON
, eitherDecodeFileStrict' )

import Data.Hashable ( Hashable, hash )

-- import Database.Persist.TH

import Agda.Utils.Either ( caseEitherM )

import ToTrain ( names )
import Output hiding ( ScopeEntry, ScopeEntry' )
import qualified Output as O

data ScopeEntry' = ScopeEntry
{ _type :: Type
, definition :: Definition
} deriving (Generic, Show, Eq, Hashable)
instance ToJSON ScopeEntry' where toJSON = genericToJSON jsonOpts
instance FromJSON ScopeEntry' where parseJSON = genericParseJSON jsonOpts
type ScopeEntry = Named ScopeEntry'
data Context = Context
{ scope :: [ScopeEntry]
, context :: [Type]
, goal :: Type
} deriving (Generic, Show, Eq, Hashable)
instance ToJSON Context where toJSON = genericToJSON jsonOpts
instance FromJSON Context where parseJSON = genericParseJSON jsonOpts
type Premises = [String]

type Key' = Context
type Key = Int -- hash of `Context`
type Value = Premises
type Database = M.HashMap Key Value

instance Hashable a => Hashable (Named a)
instance Hashable a => Hashable (Pretty a)
instance Hashable a => Hashable (Reduced a)
instance Hashable O.ScopeEntry'; instance Hashable Sample
instance Hashable Definition; instance Hashable Clause; instance Hashable Term

fileSamples :: FileData -> [(Key', Value)]
fileSamples (Named{item = TrainData{..}}) =
concatMap (map go . fromMaybe [] . holes . item) scopeLocal
where
usedIn :: Name -> Term -> Bool
usedIn n = \case
Pi _ (n' :~ ty) t -> n `usedIn` ty || ((n' /= n) && n `usedIn` t)
Lam (n' :~ t) -> (n' /= n) && n `usedIn` t
App hd ts -> case hd of {Ref n' -> n' == n; DB _ -> False}
|| any (n `usedIn`) ts
_ -> False

go :: Sample -> (Key', Value)
go Sample{..} = Context
{ scope = fmap bareScope
<$> scopeGlobal
<> onlyRelevant scopeLocal
<> onlyRelevant (fromMaybe [] scopePrivate)
, context = thing . item <$> thing (ctx)
, goal = original (thing goal)
} .-> premises
where (.->) = (,)
onlyRelevant = filter $ (`usedIn` original (thing term)) . name
bareScope :: O.ScopeEntry' -> ScopeEntry'
bareScope O.ScopeEntry{..} = ScopeEntry {_type = original (thing _type), definition = thing definition}

main :: IO ()
main = getArgs >>= \case
("add" : dbJson : jsonFns) -> forM_ jsonFns $ \jsonFn -> do
caseEitherM (eitherDecodeFileStrict' jsonFn) fail $ \fd -> do
-- putStrLn "** File Data" >> print fd
let samples = fileSamples fd
putStrLn "** Samples" -- >> print samples
forM_ samples $ \(k, _) -> do
putStrLn "------" >> print k
putStrLn "# " >> print (hash k)
let db = M.fromList (first hash <$> samples)
putStrLn "** Database" >> print db
putStrLn "serializing database into a .json file"
encodeFile dbJson db
("query" : dbJson : ctxJson : []) ->
caseEitherM (eitherDecodeFileStrict' @Database dbJson) fail $ \db -> do
putStrLn "** Database" >> print db
caseEitherM (eitherDecodeFileStrict' @Context ctxJson) fail $ \ctx -> do
putStrLn "** Context" >> print ctx
let hctx = hash ctx
putStrLn "# " >> print hctx
case M.lookup hctx db of
Just premises -> putStrLn "** Premises" >> print premises
Nothing -> putStrLn "NOT TRAINED ON THIS CONTEXT!"
args -> fail
$ "Usage: agda2train-db add <DB_JSON> <JSON>* \n"
<> " or agda2train-db query <CONTEXT_JSON>\n\n"
<> "User entered: agda2train-db " <> unwords args
30 changes: 0 additions & 30 deletions src/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ import qualified Data.List as L
import qualified Data.Set as S
import qualified Data.HashMap.Strict as HM
import qualified Data.ByteString.Lazy as BL
import Data.Aeson ( ToJSON )
import Data.Aeson.Encode.Pretty
( encodePretty', Config(..), defConfig, Indent(..), keyOrder )

import Control.Monad
import Control.Monad.IO.Class ( liftIO )
Expand Down Expand Up @@ -205,30 +202,3 @@ getOutDir opts = case outDir opts of

getOutFn :: Options -> String -> FilePath
getOutFn opts mn = getOutDir opts </> mn <> ".json"

-- * JSON encoding

-- | Uses "Aeson.Pretty" to order the JSON fields.
encode :: ToJSON a => a -> BL.ByteString
encode = encodePretty' $ defConfig
{ confIndent = Spaces 2
, confCompare = keyOrder
[ "pretty"
, "tag"
, "name"
, "original", "simplified", "reduced", "normalised"
, "telescope", "patterns", "fields"
, "domain", "codomain"
, "abstraction", "body"
, "sort", "level", "literal"
, "head", "arguments"
, "variants", "reference", "variant"
, "index"
, "scopeGlobal", "scopeLocal"
, "type", "definition", "holes"
, "ctx", "goal", "term", "premises"
]
}

encodeFile :: ToJSON a => FilePath -> a -> IO ()
encodeFile = \fn -> BL.writeFile fn . encode
Loading

0 comments on commit 457dfa7

Please sign in to comment.