diff --git a/CHANGES.md b/CHANGES.md index 152d833807..b0e36e2ec0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,8 @@ ## New Features +SAW now supports verifying Java code using JDK 9 or later. + When verifying Java code, the path to Java can be specified with the new `--java-bin-dirs`/`-b` command-line option. Alternatively, if `--java-bin-dirs` is not set, then SAW searches the `PATH` to find Java. diff --git a/saw-remote-api/src/SAWServer.hs b/saw-remote-api/src/SAWServer.hs index e937381740..6db38beaab 100644 --- a/saw-remote-api/src/SAWServer.hs +++ b/saw-remote-api/src/SAWServer.hs @@ -47,6 +47,7 @@ import Verifier.SAW.TypedTerm (TypedTerm, CryptolModule) import qualified SAWScript.Crucible.Common.MethodSpec as CMS (CrucibleMethodSpecIR) import qualified SAWScript.Crucible.LLVM.MethodSpecIR as CMS (SomeLLVM, LLVMModule) +import SAWScript.JavaCodebase (loadCodebase) import SAWScript.JavaExpr (JavaType(..)) import SAWScript.Options (defaultOptions) import SAWScript.Position (Pos(..)) @@ -167,7 +168,8 @@ initialState readFileFn = ss <- basic_ss sc let jarFiles = [] classPaths = [] - jcb <- JSS.loadCodebase jarFiles classPaths + javaBinDirs = [] + jcb <- loadCodebase jarFiles classPaths javaBinDirs let bic = BuiltinContext { biSharedContext = sc , biJavaCodebase = jcb , biBasicSS = ss diff --git a/saw-script.cabal b/saw-script.cabal index e9026d5f45..83aae22748 100644 --- a/saw-script.cabal +++ b/saw-script.cabal @@ -29,6 +29,7 @@ library base >= 4 , aig , array + , base16-bytestring >= 0.1 && < 1.1 , binary , bimap , bytestring @@ -41,6 +42,7 @@ library , crucible-jvm , crucible-llvm >= 0.2 , crucible-saw + , cryptohash-sha1 >= 0.11 && < 0.12 , deepseq , either , exceptions @@ -87,7 +89,6 @@ library , utf8-string , what4 >= 0.4 , vector - , xdg-basedir , GraphSCC , macaw-base , macaw-x86 @@ -117,6 +118,7 @@ library SAWScript.ImportAIG SAWScript.Interpreter SAWScript.JavaBuiltins + SAWScript.JavaCodebase SAWScript.JavaExpr SAWScript.JavaMethodSpec SAWScript.JavaMethodSpec.Evaluator diff --git a/src/SAWScript/Crucible/JVM/Builtins.hs b/src/SAWScript/Crucible/JVM/Builtins.hs index 03a75fce1d..267f2a14d9 100644 --- a/src/SAWScript/Crucible/JVM/Builtins.hs +++ b/src/SAWScript/Crucible/JVM/Builtins.hs @@ -61,10 +61,6 @@ import Data.Void (absurd) import Prettyprinter import System.IO --- jvm-verifier --- TODO: transition to Lang.JVM.Codebase from crucible-jvm -import qualified Verifier.Java.Codebase as CB - -- cryptol import qualified Cryptol.Eval.Type as Cryptol (evalValType) import qualified Cryptol.TypeCheck.Type as Cryptol @@ -112,6 +108,7 @@ import qualified SAWScript.Position as SS import SAWScript.Options import SAWScript.Crucible.JVM.BuiltinsJVM (prepareClassTopLevel) +import SAWScript.JavaCodebase (Codebase) import SAWScript.JavaExpr (JavaType(..)) import qualified SAWScript.Crucible.Common as Common @@ -143,7 +140,7 @@ ppAbortedResult _cc = Common.ppAbortedResult (\_gp -> mempty) -- number of classes we load is way too large, and the classes take a -- long time to parse and translate. -allClassRefs :: CB.Codebase -> J.ClassName -> IO (Set J.ClassName) +allClassRefs :: Codebase -> J.ClassName -> IO (Set J.ClassName) allClassRefs cb c0 = go seen0 [c0] where seen0 = Set.fromList CJ.initClasses diff --git a/src/SAWScript/Crucible/JVM/BuiltinsJVM.hs b/src/SAWScript/Crucible/JVM/BuiltinsJVM.hs index 0087ea477f..c62a6e8468 100644 --- a/src/SAWScript/Crucible/JVM/BuiltinsJVM.hs +++ b/src/SAWScript/Crucible/JVM/BuiltinsJVM.hs @@ -73,6 +73,7 @@ import Verifier.SAW.TypedTerm (TypedTerm(..), abstractTypedExts) -- saw-script import SAWScript.Builtins(fixPos) +import SAWScript.JavaCodebase (Codebase) import SAWScript.Value import SAWScript.Options(Options,simVerbose) import SAWScript.Crucible.LLVM.Builtins (setupArg, setupArgs, getGlobalPair, runCFG, baseCryptolType) @@ -93,8 +94,12 @@ import Debug.Trace -- | Use the Codebase implementation from the old Java static simulator -- instance IsCodebase JCB.Codebase where - lookupClass cb = J.lookupClass cb fixPos - findMethod cb = J.findMethod cb fixPos + lookupClass cb = J.lookupClassLegacy cb fixPos + findMethod cb = J.findMethodLegacy cb fixPos + +instance IsCodebase Codebase where + lookupClass cb = J.lookupClass cb fixPos + findMethod cb = J.findMethod cb fixPos ----------------------------------------------------------------------- -- | Make sure the class is in the database and allocate handles for its diff --git a/src/SAWScript/Crucible/JVM/MethodSpecIR.hs b/src/SAWScript/Crucible/JVM/MethodSpecIR.hs index fca3e0c6dd..1af48513e1 100644 --- a/src/SAWScript/Crucible/JVM/MethodSpecIR.hs +++ b/src/SAWScript/Crucible/JVM/MethodSpecIR.hs @@ -38,10 +38,6 @@ import qualified Lang.Crucible.FunctionHandle as Crucible (HandleAllocator) -- crucible-jvm import qualified Lang.Crucible.JVM as CJ --- jvm-verifier --- TODO: transition to Lang.JVM.Codebase from crucible-jvm -import qualified Verifier.Java.Codebase as CB - -- jvm-parser import qualified Language.JVM.Parser as J @@ -51,6 +47,7 @@ import Verifier.SAW.TypedTerm (TypedTerm) import SAWScript.Crucible.Common (Sym) import qualified SAWScript.Crucible.Common.MethodSpec as MS import qualified SAWScript.Crucible.Common.Setup.Type as Setup +import SAWScript.JavaCodebase (Codebase) -------------------------------------------------------------------------------- -- ** Language features @@ -180,12 +177,12 @@ instance PPL.Pretty JVMPointsTo where -------------------------------------------------------------------------------- -- *** JVMCrucibleContext -type instance MS.Codebase CJ.JVM = CB.Codebase +type instance MS.Codebase CJ.JVM = Codebase data JVMCrucibleContext = JVMCrucibleContext { _jccJVMClass :: J.Class - , _jccCodebase :: CB.Codebase + , _jccCodebase :: Codebase , _jccJVMContext :: CJ.JVMContext , _jccBackend :: Sym -- This is stored inside field _ctxSymInterface of Crucible.SimContext; why do we need another one? , _jccHandleAllocator :: Crucible.HandleAllocator @@ -198,7 +195,7 @@ type instance MS.CrucibleContext CJ.JVM = JVMCrucibleContext -------------------------------------------------------------------------------- initialDefCrucibleMethodSpecIR :: - CB.Codebase -> + Codebase -> J.ClassName -> J.Method -> ProgramLoc -> diff --git a/src/SAWScript/Interpreter.hs b/src/SAWScript/Interpreter.hs index a5197a70f0..f1fd859f74 100644 --- a/src/SAWScript/Interpreter.hs +++ b/src/SAWScript/Interpreter.hs @@ -51,6 +51,7 @@ import SAWScript.Builtins import SAWScript.Exceptions (failTypecheck) import qualified SAWScript.Import import SAWScript.JavaBuiltins +import SAWScript.JavaCodebase (loadCodebase) import SAWScript.JavaExpr import SAWScript.LLVMBuiltins import SAWScript.Options @@ -71,7 +72,6 @@ import Verifier.SAW.TypedAST import Verifier.SAW.TypedTerm import qualified Verifier.SAW.CryptolEnv as CEnv -import qualified Verifier.Java.Codebase as JCB import qualified Verifier.Java.SAWBackend as JavaSAW import qualified Verifier.SAW.Cryptol.Prelude as CryptolSAW @@ -421,7 +421,7 @@ buildTopLevelEnv proxy opts = simps <- scSimpset sc0 cryptolDefs [] convs let sc = rewritingSharedContext sc0 simps ss <- basic_ss sc - jcb <- JCB.loadCodebase (jarList opts) (classPath opts) + jcb <- loadCodebase (jarList opts) (classPath opts) (javaBinDirs opts) currDir <- getCurrentDirectory Crucible.withHandleAllocator $ \halloc -> do let ro0 = TopLevelRO diff --git a/src/SAWScript/JavaBuiltins.hs b/src/SAWScript/JavaBuiltins.hs index 4ff7cd93a9..8095fec3f0 100644 --- a/src/SAWScript/JavaBuiltins.hs +++ b/src/SAWScript/JavaBuiltins.hs @@ -31,8 +31,8 @@ import Prettyprinter import Language.JVM.Common -import Verifier.Java.Codebase hiding (lookupClass) -import Verifier.Java.Simulator as JSS hiding (lookupClass) +import Verifier.Java.Codebase hiding (lookupClass, Codebase) +import Verifier.Java.Simulator as JSS hiding (lookupClass, Codebase) import Verifier.Java.SAWBackend import Verifier.SAW.Cryptol (importType, emptyEnv, exportFirstOrderValue) @@ -46,6 +46,7 @@ import Verifier.SAW.CryptolEnv (schemaNoUser) import qualified SAWScript.CongruenceClosure as CC +import SAWScript.JavaCodebase (Codebase, legacyCodebaseHack) import SAWScript.JavaExpr import SAWScript.JavaMethodSpec import SAWScript.JavaMethodSpecIR @@ -64,7 +65,7 @@ import qualified Cryptol.Eval.Concrete as Cryptol (Concrete(..)) import qualified Cryptol.TypeCheck.AST as Cryptol import qualified Cryptol.Utils.PP as Cryptol (pretty) -loadJavaClass :: JSS.Codebase -> String -> IO Class +loadJavaClass :: Codebase -> String -> IO Class loadJavaClass cb = lookupClass cb fixPos . mkClassName . dotsToSlashes @@ -112,7 +113,7 @@ symexecJava cls mname inputs outputs satBranches = do " argument in method " ++ methodName meth pidx = fromIntegral . localIndexOfParameter meth withSAWBackend Nothing $ \sbe -> io $ do - runSimulator cb sbe defaultSEH (Just fl) $ do + runSimulator (legacyCodebaseHack cb) sbe defaultSEH (Just fl) $ do setVerbosity (simVerbose opts) assigns <- mapM mkAssign inputs let (argAssigns, otherAssigns) = partition (isArg meth . fst) assigns @@ -164,7 +165,7 @@ extractJava cls mname setup = do let fl = defaultSimFlags { alwaysBitBlastBranchTerms = jsSatBranches setupRes } meth = specMethod (jsSpec setupRes) - io $ runSimulator cb sbe defaultSEH (Just fl) $ do + io $ runSimulator (legacyCodebaseHack cb) sbe defaultSEH (Just fl) $ do setVerbosity (simVerbose opts) argTypes <- either fail return (getActualArgTypes setupRes) args <- mapM (freshJavaVal (Just argsRef) sc) argTypes @@ -247,8 +248,8 @@ verifyJava cls mname overrides setup = do liftIO $ bsCheckAliasTypes pos bs liftIO $ printOutLn opts Debug $ "Executing " ++ specName ms ++ " at PC " ++ show (bsLoc bs) ++ "." - -- runDefSimulator cb sbe $ do - runSimulator cb sbe defaultSEH (Just fl) $ do + -- runDefSimulator (legacyCodebaseHack cb) sbe $ do + runSimulator (legacyCodebaseHack cb) sbe defaultSEH (Just fl) $ do setVerbosity (simVerbose opts) let prover script vs n g = do let exts = getAllExts g @@ -381,7 +382,7 @@ checkCompatibleType _sc msg aty schema = ] parseJavaExpr' :: (MonadIO m) => - JSS.Codebase -> JSS.Class -> JSS.Method -> String + Codebase -> JSS.Class -> JSS.Method -> String -> m JavaExpr parseJavaExpr' cb cls meth name = liftIO (runExceptT (parseJavaExpr cb cls meth name) >>= either fail return) @@ -401,7 +402,7 @@ getJavaExpr ctx name = do , ftext "Maybe you're missing a `java_var` or `java_class_var`?" ] -typeJavaExpr :: JSS.Codebase -> String -> JavaType +typeJavaExpr :: Codebase -> String -> JavaType -> JavaSetup (JavaExpr, JavaActualType) typeJavaExpr cb name ty = do ms <- gets jsSpec diff --git a/src/SAWScript/JavaCodebase.hs b/src/SAWScript/JavaCodebase.hs new file mode 100644 index 0000000000..2eff57d6b9 --- /dev/null +++ b/src/SAWScript/JavaCodebase.hs @@ -0,0 +1,450 @@ +{- | +Module : SAWScript.JavaCodebase +Description : TODO RGS +License : BSD3 +Maintainer : atomb +Stability : provisional +-} + +{-# LANGUAGE NamedFieldPuns #-} +-- TODO RGS: This is a really ugly hack to work around the fact that we're +-- still in the process of migrating from jvm-verifier to crucible-jvm. +-- See #993. +module SAWScript.JavaCodebase + ( Codebase(..) + , legacyCodebaseHack + + , loadCodebase + , lookupClass + , supers + , tryLookupClass + ) where + +import Control.Monad +import Control.Monad.Trans.Maybe (MaybeT(..)) +import Crypto.Hash.SHA1 (hash) +import qualified Data.ByteString.Base16 as B16 +import qualified Data.ByteString.Char8 as B +import Data.Foldable (Foldable(..)) +import Data.IORef +import Data.List (isPrefixOf) +import qualified Data.Map as M +import Data.Maybe +import System.Directory +import System.Exit +import System.FilePath (pathSeparator, (<.>), ()) +import System.Process (readProcessWithExitCode) + +import Language.JVM.Common +import Language.JVM.JarReader +import Language.JVM.Parser + +import SAWScript.JavaTools + +import qualified Verifier.Java.Codebase as JSS + +{- Note [Loading classes from JIMAGE files] + +The Java standard library is packaged differently depending on which version of +the JDK is being used: + +* JDK 8 and earlier ship the standard library in /lib/rt.jar, so + loading the standard library's .class files is simply a matter of parsing + rt.jar. The `jvm-parser` library is used to parse rt.jar like any other JAR + file. +* JDK 9 and later abandon rt.jar and instead distribute the standard library as + a JIMAGE file. (For more on the JIMAGE file format and how it compares to + JAR, see https://stackoverflow.com/a/64202720.) In particular, the standard + library is now located in a JIMAGE file at /lib/modules. + +We need access to the standard library's .class files in order to verify even +the simplest Java programs, so figuring out how to load classes from JIMAGE +files is a requirement for using JDK 9 or later in SAW. Unlike JAR, however, +the JIMAGE file format is internal to the JDK and subject to change in the +future. As a result, parsing JIMAGE files directly, like `jvm-parser` does with +JAR files, is likely not a good idea. + +If parsing JIMAGE files directly isn't a viable option, what can be done? As +far as I can tell, there are only two reasonable ways of extracting .class +files from JIMAGE files (outside of using Java itself): + +* Use libjimage, a C++ library bundled with the JDK, to load classes. This is + what the JDK itself uses internally, and as a result, it's reasonably space- + and time-efficient to load a single class from a JIMAGE file. Unfortunately, + this is a non-starter for SAW due to potential linking issues that would + arise when combining GHC with C++ code. + See https://github.com/GaloisInc/crucible/issues/621. + +* Use the jimage utility, shipped alongside java in /bin, to extract + the contents of JIMAGE files. The `jimage extract` command can either be used + to extract everything in a JIMAGE file all at once, or it can be used to + extract one class at a time, like in this example: + + jimage extract --include regex:.*/java/lang/Class.class --dir + +Since libjimage is an option, that only leaves jimage. Unfortunately, jimage is +not without its flaws: + +* jimage is /slow/. It takes about about 1.5 seconds for jimage to extract the + standard `modules` file on a reasonably performant machine. Having an extra + 1.5 seconds of lag every time that SAW is invoked doesn't sound appealing. + +* What's worse, jimage is still unreasonably slow even if you only use it to + extract a single class from a JIMAGE file. It takes about 0.25 seconds to + extract java/lang/Class.class from the standard `modules`, which leads me to + suspect that jimage is doing a linear scan through every file in `modules`. + In any case, this is about 14× slower than loading a class file from a JAR, + and if SAW spends 0.25 seconds every time it loads any class from `modules`, + this would add up very quickly. + +What can we do about jimage's performance issues? One appealing alternative is +to use jlink, yet another utility that the JDK ships. If you give jlink a list +of Java modules, it will produce a minimal runtime environment that includes a +JIMAGE file that only contains the classes needed to support those modules, but +nothing else. In theory, this could cut down on the time it takes to extract +classes from a JIMAGE file, as the standard library's `modules` file is ~136 MB +when compressed (~493 MB when uncompressed), and most Java classes will only +use a small portion of that. + +Unfortunately, jlink is even slower than jimage. It takes jlink about 3 seconds +to create a runtime environment that only contains the `java.base` module, +which cancels out any time saved when extracting the JIMAGE file afterwards. + +Having exhausted all other possibilities, we have concluded that we simply +can't make JIMAGE extraction fast in the general case. We /can/, however, +amortize the cost of using JIMAGE files by caching the extracted contents. +This is exactly what SAW does. Any time that SAW tries to load a class +(e.g., java/lang/Class.class), it first checks to see if it is in a known cache +directory (e.g., ~/.cache/saw//java.base/java/lang/Class.class), +and if so, loads it like it would any other .class on the classpath. If not, +it will then consult the JAR path and the classpath. If it is not located by +that point, then SAW will attempt to extract it from the JIMAGE file, caching +the result for subsequent lookups. Importantly, we perform the JIMAGE +extraction step last, as it is far more time-consuming that trying to load +classes from the JAR path or classpath. + +There's still possibly some room for further optimization here. As mentioned +above, extracting one class at a time from a JIMAGE file takes about 0.25 +seconds, while extracting the entire JIMAGE file takes about 1.5 seconds. +Depending on how many classes a program uses, it may take less time overall +to just extract-and-cache everything from a JIMAGE file at once. The downside, +however, is that the cache would require much more space. A typical JDK +`modules` file takes up ~493 MB of classes when extracted, but most simple Java +programs will only use <1 MB of these classes. As a result, we opt for the +one-class-at-a-time approach, which seems better tuned for the sorts of +Java programs that SAW verifies in common cases. +-} + +data Codebase + = -- | What I eventually want to upstream into @crucible-jvm@. + ExperimentalCodebase ExperimentalCodebase + -- | A @jvm-verifier@ 'JSS.Codebase'. Only needed for legacy code paths. + | LegacyCodebase JSS.Codebase + +-- | Collection of classes loaded by JVM. +data CodebaseState = CodebaseState { + classMap :: M.Map ClassName Class + , subclassMap :: M.Map ClassName [Class] + -- ^ Maps class names to the list of classes that are direct subclasses, and + -- interfaces to list of classes that directly implement them. + } + +-- | Contains the path to the @jimage@ tool, the standard @modules@ JIMAGE +-- file, and the path where the extracted contents of @modules@ are cached. +-- +-- This is only used when SAW uses JDK 9 or later. +-- See Note [Loading classes from JIMAGE files]. +data JimagePaths = JimagePaths + { jimagePath :: FilePath + , standardModulesFilePath :: FilePath + , standardModulesCachePath :: FilePath + } + +data ExperimentalCodebase = Codebase + { jarReader :: JarReader + -- ^ Maps class names to lazily loaded classes in JARs + , classPaths :: [FilePath] + , jimagePaths :: Maybe JimagePaths + -- ^ The path to the @jimage@ tool and the paths it works on. + , stateRef :: IORef CodebaseState + } + +-- | TODO RGS: Sigh. Unfortunately, java_verify relies on the `jvm-verifier` +-- library's Codebase data type. We're in the process of migrating from +-- jvm-verifier to crucible-jvm (see #993), so in the meantime, this ugly hack +-- will allow java_verify to continue to work if using Java 8 or earlier (which +-- is when LegacyCodebase is used). +legacyCodebaseHack :: Codebase -> JSS.Codebase +legacyCodebaseHack (LegacyCodebase cb) = cb +legacyCodebaseHack ExperimentalCodebase{} = error "jvm-verifier does not support Java 9 or later" + +-- | Loads Java classes directly beneath given path. Also loads jar indices for +-- lazy class loading. +loadCodebase :: [FilePath] -> [FilePath] -> [FilePath] -> IO Codebase +loadCodebase jarFiles classPaths javaBinDirs = do + mbJavaPath <- findJavaIn javaBinDirs + case mbJavaPath of + Nothing -> legacyCodebase + Just javaPath -> do + javaMajorVersion <- findJavaMajorVersion javaPath + if javaMajorVersion >= 9 + then experimentalCodebase javaPath + else legacyCodebase + where + experimentalCodebase :: FilePath -> IO Codebase + experimentalCodebase javaPath = do + -- REVISIT: Currently, classes found in the classpath shadow those in the + -- jars. Pretty sure the -classpath argument to the vanilla jvm allows + -- mixture of jars and directories, and resolves names in the order in which + -- those elements are encountered. We probably want to do the same thing (and + -- be able to just provide one argument to loadCodebase), but not for the + -- beta. [js 04 Nov 2010] + -- + -- UPDATE: docs on class resolution: + -- http://docs.oracle.com/javase/8/docs/technotes/tools/findingclasses.html. + -- I don't see any mention of resolution order in case of name + -- conflicts. This Stack Overflow answer claims it's complicated in + -- general, but that it will be the first encountered, searching + -- left to right in the class path: + -- http://stackoverflow.com/a/9757708/470844. + -- + -- If we later want to make the resolution order be the + -- left-to-right-in-classpath order, then we can e.g. implement a + -- 'ClassPathLoader' which includes sequence of 'JarLoader' and + -- 'DirLoader' objects, which embed maps from class names to 'Class' + -- objects. A + -- + -- getClass :: String -> IO (Maybe Class) + -- + -- interface would be sufficient. If we want to avoid repeating the + -- lookup in many maps -- one for each classpath component -- we can + -- merge the maps as in the current 'JarReader' type, but I doubt + -- this would ever matter, performance wise. + jars <- newJarReader jarFiles + mbJimagePaths <- findJimagePaths javaPath javaBinDirs + let cb = CodebaseState { classMap = M.empty, subclassMap = M.empty } + cbRef <- newIORef cb + pure $ ExperimentalCodebase $ Codebase + { jarReader = jars + , classPaths = classPaths + , jimagePaths = mbJimagePaths + , stateRef = cbRef + } + + legacyCodebase :: IO Codebase + legacyCodebase = LegacyCodebase <$> JSS.loadCodebase jarFiles classPaths + +-- | Attempt to find an executable named @jimage@, either in the directories +-- supplied as arguments or on the @PATH@. If such an executable can be found, +-- then @Just@ a @JimagePaths@ is returned. Otherwise, @Nothing@ is returned. +-- +-- This is only used when SAW uses JDK 9 or later. +-- See Note [Loading classes from JIMAGE files]. +findJimagePaths :: FilePath + -> [FilePath] + -> IO (Maybe JimagePaths) +findJimagePaths javaPath javaBinDirs = runMaybeT $ do + jimagePath <- MaybeT $ findJimageIn javaBinDirs + javaHome <- MaybeT $ findJavaProperty javaPath "java.home" + MaybeT $ do + let standardModulesPath = javaHome "lib" "modules" + xdgCacheDir <- getXdgDirectory XdgCache "saw" + let standardModulesHash = B16.encode $ hash $ B.pack standardModulesPath + standardModulesCache = xdgCacheDir B.unpack standardModulesHash + createDirectoryIfMissing True standardModulesCache + pure $ Just $ JimagePaths + { jimagePath = jimagePath + , standardModulesFilePath = standardModulesPath + , standardModulesCachePath = standardModulesCache + } + +lookupClass :: Codebase -> ClassName -> IO Class +lookupClass (ExperimentalCodebase cb) = lookupClassExperimental cb +lookupClass (LegacyCodebase cb) = JSS.lookupClass cb + +-- | Returns class with given name in codebase or raises error if no class with +-- that name can be found. +-- +-- The components of class name @clNm@ should be slash separated, not +-- dot separated. E.g. the class @com.example.Class@ should be +-- @com/example/class@. +lookupClassExperimental :: ExperimentalCodebase -> ClassName -> IO Class +lookupClassExperimental cb clNm = do + maybeCl <- tryLookupClassExperimental cb clNm + case maybeCl of + Just cl -> return cl + Nothing -> error $ errorMsg + where + dotNm = slashesToDots (unClassName clNm) + isStandardLibClass = "java.lang" `isPrefixOf` dotNm + errorMsg = unlines $ + if isStandardLibClass then + [ "Cannot find class " ++ dotNm ++ " in codebase." + , "" + , "You probably forgot to specify the location of the" + , "Java standard libraries JAR using the '-j' flag to saw or jss. The" + , " standard libraries JAR is called 'classes.jar' on OS X systems and" + , "'rt.jar' on Windows and Linux systems. Its location can be found by" + , "running 'java -verbose 2>&1 | grep Opened', assuming you're using" + , "a Sun Java." + ] + else + [ "Cannot find class " ++ dotNm ++ " in codebase." + , "" + , "You can specify the location of classes you depend on using" + , "the '-c' flag to specify non-jar classpaths and the '-j' flag" + , "to specify the location of JAR files." + ] + +-- | Produces the superclass hierarchy of the given class. Ordered from subclass +-- to base class, starting with the given class. +supers :: Codebase -> Class -> IO [Class] +supers (ExperimentalCodebase cb) cl = do + starClosureM (maybe (return []) (fmap (:[]) . lookupClassExperimental cb) . superClass) cl + where + -- The monadic variant of starClosure + starClosureM :: Monad m => (a -> m [a]) -> a -> m [a] + starClosureM fn a = + return ((a:) . concat) `ap` (mapM (starClosureM fn) =<< fn a) +supers (LegacyCodebase cb) cl = JSS.supers cb cl + +tryLookupClass :: Codebase -> ClassName -> IO (Maybe Class) +tryLookupClass (ExperimentalCodebase cb) = tryLookupClassExperimental cb +tryLookupClass (LegacyCodebase cb) = JSS.tryLookupClass cb + +-- | Returns class with given name in codebase or returns nothing if no class with +-- that name can be found. +tryLookupClassExperimental :: ExperimentalCodebase -> ClassName -> IO (Maybe Class) +tryLookupClassExperimental (Codebase{jarReader, classPaths, jimagePaths, stateRef}) clNm = do + cb <- readIORef stateRef + case M.lookup clNm (classMap cb) of + Just cl -> return (Just cl) + Nothing -> do + -- We search for .class files in the following order: + -- + -- 1. The cache of files extracted from the `modules` JIMAGE file + -- 2. JAR files + -- 3. Classpath directories + -- 4. The `modules` JIMAGE file (which requires extraction using + -- the @jimage@ tool) + -- + -- Note that: + -- + -- * We search the JIMAGE cache first, as this likely contains classes + -- from the standard library. (It's unclear if Java itself also searches + -- here first, but it seems plausible.) + -- + -- * We bias our search to JARs before classpath directories, + -- as mentioned above in 'loadCodebase'. + -- + -- * We extract classes from the `modules` JIMAGE file last, as that is + -- the most expensive step. See Note [Loading classes from JIMAGE files]. + let mcls = [ loadClassFromStandardModulesCache clNm jimagePaths + , loadClassFromJar clNm jarReader ] ++ + map (loadClassFromDir clNm) classPaths ++ + [ loadClassFromStandardModulesFile clNm jimagePaths ] + mcl <- foldl1 firstSuccess mcls + case mcl of + Just cl -> do + writeIORef stateRef $! addClass cl cb + return $ Just cl + Nothing -> return Nothing + where + -- Search for a .class file in the cache of files extracted from the `modules` JIMAGE file. + -- If it is not found, this does /not/ extract it from a JIMAGE file—see + -- loadClassFromStandardModulesFile. + loadClassFromStandardModulesCache :: ClassName -> Maybe JimagePaths -> IO (Maybe Class) + loadClassFromStandardModulesCache clNme mbJimagePaths = + case mbJimagePaths of + Nothing -> pure Nothing + Just JimagePaths{standardModulesCachePath} -> + loadClassFromParentDir clNme standardModulesCachePath + + -- Attempt to extract a .class file from the `modules` JIMAGE file, cache it for subsequent + -- lookups, and load the resulting .class. + loadClassFromStandardModulesFile :: ClassName -> Maybe JimagePaths -> IO (Maybe Class) + loadClassFromStandardModulesFile clNme mbJimagePaths = + case mbJimagePaths of + Nothing -> pure Nothing + Just JimagePaths{jimagePath, standardModulesFilePath, standardModulesCachePath} -> do + jimageExtract jimagePath clNme standardModulesFilePath standardModulesCachePath + -- After extracting a single class from a JIMAGE file, why do we then have to search + -- every directory under the cached path again? It's because `jimage extract` doesn't + -- tell you the path of the thing it just extracted. Sigh. We can't reasonably infer + -- what this path is either, as it will start with a module name, which isn't + -- something that is contained in a ClassName. + loadClassFromParentDir clNme standardModulesCachePath + + -- Given @parentDir@, search through its subdirectories to find a class, + -- loading it if it is found. + loadClassFromParentDir :: ClassName -> FilePath -> IO (Maybe Class) + loadClassFromParentDir clNme parentDir = do + childDirBaseNames <- listDirectory parentDir + let childDirs = map (parentDir ) childDirBaseNames + foldl' firstSuccess (pure Nothing) $ map (loadClassFromDir clNme) childDirs + + -- Extract a single class from a JIMAGE file and cache it. + jimageExtract :: FilePath -> ClassName -> FilePath -> FilePath -> IO () + jimageExtract jimagePath clNme standardModulesFilePath standardModulesCachePath = do + -- TODO RGS: Is it worth factoring out this process-related code? See my comments elsewhere + -- in SAWScript.JavaTools. + (ec, stdOut, stdErr) <- readProcessWithExitCode jimagePath + [ "extract" + , "--include", "regex:.*/" ++ unClassName clNme ++ ".class" + , "--dir", standardModulesCachePath + , standardModulesFilePath + ] "" + when (ec /= ExitSuccess) $ + fail $ unlines + [ jimagePath ++ " returned non-zero exit code: " ++ show ec + , "Standard output:" + , stdOut + , "Standard error:" + , stdErr + ] + + -- | Attempt to load a class by searching under directory @dir@, which + -- is assumed to be a classpath component. If class @C1. ... .Cn@ is + -- available under @dir@, then it must be located at + -- @dir/C1/.../Cn.class@. + -- http://docs.oracle.com/javase/8/docs/technotes/tools/findingclasses.html#userclass + loadClassFromDir :: ClassName -> FilePath -> IO (Maybe Class) + loadClassFromDir clNme dir = do + exists <- doesFileExist file + if exists + then Just <$> loadClass file + else return Nothing + where + file = dir classNameToClassFilePath clNme + -- | Turn a @com/example/Class@-style classname into a + -- @"com" "example" "Class.class"@-style platform dependent + -- relative class-file path. + -- + -- TODO: move this to 'jvm-parser.git:Language.JVM.Common'? + classNameToClassFilePath :: ClassName -> FilePath + classNameToClassFilePath clName = + map (\c -> if c == '/' then pathSeparator else c) (unClassName clName) <.> "class" + + -- | Register a class with the given codebase + addClass :: Class -> CodebaseState -> CodebaseState + addClass cl (CodebaseState cMap scMap) = + CodebaseState (M.insert (className cl) cl cMap) + (foldr addToSuperclass scMap + (maybeToList (superClass cl)++classInterfaces cl)) + where addToSuperclass super m = + M.alter (\subclasses -> case subclasses of + Just list -> Just (cl : list) + Nothing -> Just [cl]) + super + m + + -- | Combine two @IO (Maybe a)@ computations lazily, choosing the + -- first to succeed (i.e. return 'Just'). + firstSuccess :: IO (Maybe a) -> IO (Maybe a) -> IO (Maybe a) + -- This seems like it would be a common pattern, although I can't + -- find it in the standard libraries. + firstSuccess ima1 ima2 = do + ma1 <- ima1 + case ma1 of + Nothing -> ima2 + Just _ -> return ma1 diff --git a/src/SAWScript/JavaExpr.hs b/src/SAWScript/JavaExpr.hs index 5389b9f6a7..337a34068a 100644 --- a/src/SAWScript/JavaExpr.hs +++ b/src/SAWScript/JavaExpr.hs @@ -72,7 +72,7 @@ import qualified Data.Text as Text import qualified Data.Vector as V import Text.Read hiding (lift) -import Verifier.Java.Codebase as JSS +import Verifier.Java.Codebase as JSS hiding (Codebase) import Verifier.Java.SAWBackend hiding (basic_ss) import Verifier.SAW.Cryptol @@ -81,6 +81,7 @@ import Verifier.SAW.SharedTerm import Verifier.SAW.TypedAST (toShortName) import qualified SAWScript.CongruenceClosure as CC +import SAWScript.JavaCodebase as TODO import SAWScript.Position import SAWScript.Utils @@ -302,7 +303,7 @@ ppActualType (ClassInstance x) = JSS.slashesToDots (JSS.unClassName (JSS.classNa ppActualType (ArrayInstance l tp) = show tp ++ "[" ++ show l ++ "]" ppActualType (PrimitiveType tp) = show tp -parseJavaExpr :: JSS.Codebase -> JSS.Class -> JSS.Method -> String +parseJavaExpr :: Codebase -> JSS.Class -> JSS.Method -> String -> ExceptT String IO JavaExpr parseJavaExpr cb cls meth estr = parseParts eparts where parseParts :: [String] -> ExceptT String IO JavaExpr @@ -348,7 +349,7 @@ parseJavaExpr cb cls meth estr = parseParts eparts " referenced by name, but no debug info available" parseParts (fname:rest) = do let cname = JSS.mkClassName (intercalate "/" (reverse rest)) - mc <- lift $ tryLookupClass cb cname + mc <- lift $ TODO.tryLookupClass cb cname case (filter ((== fname) . fieldName) . filter fieldIsStatic . classFields) <$> mc of Just [fld] -> do let fid = FieldId cname fname (fieldType fld) diff --git a/src/SAWScript/JavaMethodSpec.hs b/src/SAWScript/JavaMethodSpec.hs index 706d5e5e35..b4acd660e0 100644 --- a/src/SAWScript/JavaMethodSpec.hs +++ b/src/SAWScript/JavaMethodSpec.hs @@ -58,6 +58,7 @@ import qualified Text.PrettyPrint.HughesPJ as PP import Language.JVM.Common (ppFldId) import qualified SAWScript.CongruenceClosure as CC +import SAWScript.JavaCodebase (Codebase) import SAWScript.JavaExpr as TC import SAWScript.Options hiding (Verbosity) import qualified SAWScript.Options as Opts @@ -72,7 +73,7 @@ import SAWScript.VerificationCheck import Data.JVM.Symbolic.AST (entryBlock) -import Verifier.Java.Simulator hiding (asBool, State, InvalidType) +import Verifier.Java.Simulator hiding (asBool, State, InvalidType, Codebase) import Verifier.Java.SAWBackend hiding (basic_ss) import Verifier.SAW.Prelude diff --git a/src/SAWScript/JavaMethodSpecIR.hs b/src/SAWScript/JavaMethodSpecIR.hs index f43548ed59..c6c932b6ad 100644 --- a/src/SAWScript/JavaMethodSpecIR.hs +++ b/src/SAWScript/JavaMethodSpecIR.hs @@ -79,6 +79,7 @@ import Verifier.SAW.Term.Pretty (SawDoc, ppTerm, defaultPPOpts) import qualified SAWScript.CongruenceClosure as CC import SAWScript.CongruenceClosure (CCSet) +import SAWScript.JavaCodebase import SAWScript.JavaExpr import SAWScript.Position (Pos(..)) import SAWScript.Utils @@ -347,12 +348,12 @@ bsAddAssumption :: LogicExpr -> BehaviorSpec -> BehaviorSpec bsAddAssumption le bs = bs { bsAssumptions = le : bsAssumptions bs } -initMethodSpec :: Pos -> JSS.Codebase +initMethodSpec :: Pos -> Codebase -> JSS.Class -> String -> IO JavaMethodSpecIR initMethodSpec pos cb thisClass mname = do (methodClass,method) <- findMethod cb pos mname thisClass - superClasses <- JSS.supers cb thisClass + superClasses <- supers cb thisClass let this = thisJavaExpr thisClass initTypeMap | JSS.methodIsStatic method = Map.empty | otherwise = Map.singleton this (ClassInstance thisClass) @@ -387,7 +388,7 @@ data JavaMethodSpecIR = MSIR { -- | The position of the specification for error reporting purposes. specPos :: Pos -- | The codebase containing the method being specified. - , specCodebase :: JSS.Codebase + , specCodebase :: Codebase -- | Class used for this instance. , specThisClass :: JSS.Class -- | Class where method is defined. diff --git a/src/SAWScript/JavaTools.hs b/src/SAWScript/JavaTools.hs index 337ba3b682..4f25578c15 100644 --- a/src/SAWScript/JavaTools.hs +++ b/src/SAWScript/JavaTools.hs @@ -7,7 +7,7 @@ Stability : provisional -} module SAWScript.JavaTools - ( findJavaIn + ( findJavaIn, findJimageIn, findJavaToolIn , findJavaProperty , findJavaMajorVersion ) where @@ -18,16 +18,34 @@ import System.Directory import SAWScript.ProcessUtils --- | @'findJavaIn' javaBinDirs@ searches for an executable named @java@ in the --- directories in @javaBinDirs@. If @javaBinDirs@ is empty, then the @PATH@ is --- searched. +-- | @'findJavaIn' searchPaths@ searches for an executable named @java@ among +-- the directories in @searchPaths@. If @searchPaths@ is empty, then the @PATH@ +-- is searched. -- -- If the search finds at least one executable, then @Just@ the first result is -- returned. If no executables are found, then @Nothing@ is returned. findJavaIn :: [FilePath] -> IO (Maybe FilePath) -findJavaIn javaBinDirs - | null javaBinDirs = findExecutable "java" - | otherwise = listToMaybe <$> findExecutablesInDirectories javaBinDirs "java" +findJavaIn = findJavaToolIn "java" + +-- | @'findJavaIn' searchPaths@ searches for an executable named @java@ among +-- the directories in @searchPaths@. If @searchPaths@ is empty, then the @PATH@ +-- is searched. +-- +-- If the search finds at least one executable, then @Just@ the first result is +-- returned. If no executables are found, then @Nothing@ is returned. +findJimageIn :: [FilePath] -> IO (Maybe FilePath) +findJimageIn = findJavaToolIn "jimage" + +-- | @'findJavaToolIn' toolName searchPaths@ searches for an executable named +-- @toolName@ among the directories in @searchPaths@. If @searchPaths@ is +-- empty, then the @PATH@ is searched. +-- +-- If the search finds at least one executable, then @Just@ the first result is +-- returned. If no executables are found, then @Nothing@ is returned. +findJavaToolIn :: FilePath -> [FilePath] -> IO (Maybe FilePath) +findJavaToolIn toolName searchPaths + | null searchPaths = findExecutable toolName + | otherwise = listToMaybe <$> findExecutablesInDirectories searchPaths toolName -- | @'findJavaProperty' javaPath prop@ invokes @javaPath@ to dump its system -- properties (see ) diff --git a/src/SAWScript/Options.hs b/src/SAWScript/Options.hs index 4005b3f498..ccbdbcf1b5 100644 --- a/src/SAWScript/Options.hs +++ b/src/SAWScript/Options.hs @@ -207,8 +207,12 @@ processEnv opts = do -- --java-bin-dirs or PATH, see the Haddocks for findJavaIn), then use that -- to detect the path to Java's standard rt.jar file and add it to the -- jarList on Java 8 or earlier. (Later versions of Java do not use - -- rt.jar—see #861.) If Java's path is not specified, return the Options - -- unchanged. + -- rt.jar—see Note [Loading classes from JIMAGE files] in + -- SAWScript.JavaCodebase.) If Java's path is not specified, return the + -- Options unchanged. + -- + -- TODO RGS: Update the reference to SAWScript.JavaCodebase if/when it + -- merges into crucible-jvm. addJavaBinDirInducedOpts :: Options -> IO Options addJavaBinDirInducedOpts os@Options{javaBinDirs} = do mbJavaPath <- findJavaIn javaBinDirs diff --git a/src/SAWScript/Utils.hs b/src/SAWScript/Utils.hs index 7d5d9392e5..5bc9744c7c 100644 --- a/src/SAWScript/Utils.hs +++ b/src/SAWScript/Utils.hs @@ -40,6 +40,7 @@ import Numeric(showFFloat) import qualified Verifier.Java.Codebase as JSS +import SAWScript.JavaCodebase (Codebase(..), tryLookupClass) import SAWScript.Options import SAWScript.Position @@ -109,9 +110,9 @@ showDuration n = printf "%02d:%s" m (show s) -- | Atempt to find class with given name, or throw ExecException if no class -- with that name exists. Class name should be in slash-separated form. -lookupClass :: JSS.Codebase -> Pos -> JSS.ClassName -> IO JSS.Class +lookupClass :: Codebase -> Pos -> JSS.ClassName -> IO JSS.Class lookupClass cb site nm = do - maybeCl <- JSS.tryLookupClass cb nm + maybeCl <- tryLookupClass cb nm case maybeCl of Nothing -> do let msg = ftext ("The Java class " ++ JSS.slashesToDots (JSS.unClassName nm) ++ " could not be found.") @@ -123,9 +124,15 @@ lookupClass cb site nm = do in throwIOExecException site msg res Just cl -> return cl +-- TODO RGS: This is a really ugly hack to work around the fact that we're +-- still in the process of migrating from jvm-verifier to crucible-jvm. +-- See #993. +lookupClassLegacy :: JSS.Codebase -> Pos -> JSS.ClassName -> IO JSS.Class +lookupClassLegacy cb = lookupClass $ LegacyCodebase cb + -- | Returns method with given name in this class or one of its subclasses. -- Throws an ExecException if method could not be found or is ambiguous. -findMethod :: JSS.Codebase -> Pos -> String -> JSS.Class -> IO (JSS.Class, JSS.Method) +findMethod :: Codebase -> Pos -> String -> JSS.Class -> IO (JSS.Class, JSS.Method) findMethod cb site nm initClass = impl [] initClass where javaClassName = JSS.slashesToDots (JSS.unClassName (JSS.className initClass)) methodType m = (JSS.methodParameterTypes m, JSS.methodReturnType m) @@ -154,6 +161,12 @@ findMethod cb site nm initClass = impl [] initClass res = "Please disambiguate method name." in throwIOExecException site (ftext msg) res +-- TODO RGS: This is a really ugly hack to work around the fact that we're +-- still in the process of migrating from jvm-verifier to crucible-jvm. +-- See #993. +findMethodLegacy :: JSS.Codebase -> Pos -> String -> JSS.Class -> IO (JSS.Class, JSS.Method) +findMethodLegacy cb = findMethod $ LegacyCodebase cb + throwFieldNotFound :: JSS.Type -> String -> [String] -> ExceptT String IO a throwFieldNotFound tp fieldName names = throwE msg where @@ -162,7 +175,7 @@ throwFieldNotFound tp fieldName names = throwE msg fieldName ++ "." ++ "\nAvailable fields:\n" ++ unlines names -findField :: JSS.Codebase -> Pos -> JSS.Type -> String -> ExceptT String IO JSS.FieldId +findField :: Codebase -> Pos -> JSS.Type -> String -> ExceptT String IO JSS.FieldId findField _ _ tp@(JSS.ArrayType _) nm = throwFieldNotFound tp nm [] findField cb site tp@(JSS.ClassType clName) nm = impl [] =<< lift (lookupClass cb site clName) where diff --git a/src/SAWScript/Value.hs b/src/SAWScript/Value.hs index 057ed898f1..6b4f7c92e4 100644 --- a/src/SAWScript/Value.hs +++ b/src/SAWScript/Value.hs @@ -71,6 +71,7 @@ import qualified SAWScript.Crucible.JVM.MethodSpecIR () import qualified Verifier.Java.Codebase as JSS import qualified Text.LLVM.AST as LLVM (Type) import qualified Text.LLVM.PP as LLVM (ppType) +import SAWScript.JavaCodebase (Codebase) import SAWScript.JavaExpr (JavaType(..)) import SAWScript.JavaPretty (prettyClass) import SAWScript.Options (Options(printOutFn),printOutLn,Verbosity) @@ -167,7 +168,7 @@ data SAW_CFG where JVM_CFG :: Crucible.AnyCFG JVM -> SAW_CFG data BuiltinContext = BuiltinContext { biSharedContext :: SharedContext - , biJavaCodebase :: JSS.Codebase + , biJavaCodebase :: Codebase , biBasicSS :: Simpset } deriving Generic @@ -383,7 +384,7 @@ data PrimitiveLifecycle data TopLevelRO = TopLevelRO { roSharedContext :: SharedContext - , roJavaCodebase :: JSS.Codebase + , roJavaCodebase :: Codebase , roOptions :: Options , roHandleAlloc :: Crucible.HandleAllocator , roPosition :: SS.Pos @@ -443,7 +444,7 @@ getPosition = TopLevel (asks roPosition) getSharedContext :: TopLevel SharedContext getSharedContext = TopLevel (asks roSharedContext) -getJavaCodebase :: TopLevel JSS.Codebase +getJavaCodebase :: TopLevel Codebase getJavaCodebase = TopLevel (asks roJavaCodebase) getOptions :: TopLevel Options