FileExists: set one watcher instead of thousands (haskell/ghcide#831)

michaelpj · web-flow · commit 76db051c1c15 · 2020-09-27T21:13:40.000+01:00
* FileExists: set one watcher instead of thousands

This prevents us from sending thousands of notifications to the client
on startup, which can lock up some clients like emacs. Instead we send
precisely one.

This has some consequences for the behaviour of the fast file existence
lookup, which I've noted in the code, alongside a description of how it
works (I spent a while figuring it out, I thought I might as well write
it down).

Fixes haskell/ghcide#776.

* Use fast rules only if it matches our watcher spec
diff --git a/ghcide/ghcide.cabal b/ghcide/ghcide.cabal
@@ -49,6 +49,7 @@ library
         fuzzy,
         filepath,
         fingertree,
+        Glob,
         haddock-library >= 1.8,
         hashable,
         haskell-lsp-types == 0.22.*,
diff --git a/ghcide/shell.nix b/ghcide/shell.nix
@@ -46,6 +46,8 @@ let defaultCompiler = "ghc" + lib.replaceStrings ["."] [""] haskellPackages.ghc.
           diagrams-svg
           extra
           fuzzy
+          fingertree
+          Glob
           ghc-check
           gitrev
           happy
diff --git a/ghcide/src/Development/IDE/Core/FileExists.hs b/ghcide/src/Development/IDE/Core/FileExists.hs
@@ -5,36 +5,75 @@ module Development.IDE.Core.FileExists
   ( fileExistsRules
   , modifyFileExists
   , getFileExists
+  , watchedGlobs
   )
 where
 
 import           Control.Concurrent.Extra
 import           Control.Exception
 import           Control.Monad.Extra
-import qualified Data.Aeson                    as A
 import           Data.Binary
 import qualified Data.ByteString               as BS
-import Data.HashMap.Strict (HashMap)
+import           Data.HashMap.Strict (HashMap)
 import qualified Data.HashMap.Strict as HashMap
 import           Data.Maybe
-import qualified Data.Text                     as T
 import           Development.IDE.Core.FileStore
 import           Development.IDE.Core.IdeConfiguration
 import           Development.IDE.Core.Shake
 import           Development.IDE.Types.Location
-import           Development.IDE.Types.Logger
+import           Development.IDE.Types.Options
 import           Development.Shake
 import           Development.Shake.Classes
 import           GHC.Generics
-import           Language.Haskell.LSP.Messages
-import           Language.Haskell.LSP.Types
 import           Language.Haskell.LSP.Types.Capabilities
 import qualified System.Directory as Dir
+import qualified System.FilePath.Glob as Glob
 
--- | A map for tracking the file existence
+{- Note [File existence cache and LSP file watchers]
+Some LSP servers provide the ability to register file watches with the client, which will then notify
+us of file changes. Some clients can do this more efficiently than us, or generally it's a tricky
+problem
+
+Here we use this to maintain a quick lookup cache of file existence. How this works is:
+- On startup, if the client supports it we ask it to watch some files (see below).
+- When those files are created or deleted (we can also see change events, but we don't
+care since we're only caching existence here) we get a notification from the client.
+- The notification handler calls 'modifyFileExists' to update our cache.
+
+This means that the cache will only ever work for the files we have set up a watcher for.
+So we pick the set that we mostly care about and which are likely to change existence
+most often: the source files of the project (as determined by the source extensions
+we're configured to care about).
+
+For all other files we fall back to the slow path.
+
+There are a few failure modes to think about:
+
+1. The client doesn't send us the notifications we asked for.
+
+There's not much we can do in this case: the whole point is to rely on the client so
+we don't do the checking ourselves. If the client lets us down, we will just be wrong.
+
+2. Races between registering watchers, getting notifications, and file changes.
+
+If a file changes status between us asking for notifications and the client actually
+setting up the notifications, we might not get told about it. But this is a relatively
+small race window around startup, so we just don't worry about it.
+
+3. Using the fast path for files that we aren't watching.
+
+In this case we will fall back to the slow path, but cache that result forever (since
+it won't get invalidated by a client notification). To prevent this we guard the
+fast path by a check that the path also matches our watching patterns.
+-}
+
+-- See Note [File existence cache and LSP file watchers]
+-- | A map for tracking the file existence.
+-- If a path maps to 'True' then it exists; if it maps to 'False' then it doesn't exist'; and
+-- if it's not in the map then we don't know.
 type FileExistsMap = (HashMap NormalizedFilePath Bool)
 
--- | A wrapper around a mutable 'FileExistsMap'
+-- | A wrapper around a mutable 'FileExistsState'
 newtype FileExistsMapVar = FileExistsMapVar (Var FileExistsMap)
 
 instance IsIdeGlobal FileExistsMapVar
@@ -45,22 +84,16 @@ getFileExistsMapUntracked = do
   FileExistsMapVar v <- getIdeGlobalAction
   liftIO $ readVar v
 
--- | Modify the global store of file exists
-modifyFileExistsAction :: (FileExistsMap -> IO FileExistsMap) -> Action ()
-modifyFileExistsAction f = do
-  FileExistsMapVar var <- getIdeGlobalAction
-  liftIO $ modifyVar_ var f
-
--- | Modify the global store of file exists
+-- | Modify the global store of file exists.
 modifyFileExists :: IdeState -> [(NormalizedFilePath, Bool)] -> IO ()
 modifyFileExists state changes = do
   FileExistsMapVar var <- getIdeGlobalState state
   changesMap           <- evaluate $ HashMap.fromList changes
-
   -- Masked to ensure that the previous values are flushed together with the map update
   mask $ \_ -> do
     -- update the map
     modifyVar_ var $ evaluate . HashMap.union changesMap
+    -- See Note [Invalidating file existence results]
     -- flush previous values
     mapM_ (deleteValue state GetFileExists . fst) changes
 
@@ -87,86 +120,101 @@ instance Binary   GetFileExists
 getFileExists :: NormalizedFilePath -> Action Bool
 getFileExists fp = use_ GetFileExists fp
 
+{- Note [Which files should we watch?]
+The watcher system gives us a lot of flexibility: we can set multiple watchers, and they can all watch on glob
+patterns.
+
+We used to have a quite precise system, where we would register a watcher for a single file path only (and always)
+when we actually looked to see if it existed. The downside of this is that it sends a *lot* of notifications
+to the client (thousands on a large project), and this could lock up some clients like emacs
+(https://github.com/emacs-lsp/lsp-mode/issues/2165).
+
+Now we take the opposite approach: we register a single, quite general watcher that looks for all files
+with a predefined set of extensions. The consequences are:
+- The client will have to watch more files. This is usually not too bad, since the pattern is a single glob,
+and the clients typically call out to an optimized implementation of file watching that understands globs.
+- The client will send us a lot more notifications. This isn't too bad in practice, since although
+we're watching a lot of files in principle, they don't get created or destroyed that often.
+- We won't ever hit the fast lookup path for files which aren't in our watch pattern, since the only way
+files get into our map is when the client sends us a notification about them because we're watching them.
+This is fine so long as we're watching the files we check most often, i.e. source files.
+-}
+
+-- | The list of file globs that we ask the client to watch.
+watchedGlobs :: IdeOptions -> [String]
+watchedGlobs opts = [ "**/*." ++ extIncBoot | ext <- optExtensions opts, extIncBoot <- [ext, ext ++ "-boot"]]
+
 -- | Installs the 'getFileExists' rules.
 --   Provides a fast implementation if client supports dynamic watched files.
 --   Creates a global state as a side effect in that case.
-fileExistsRules :: IO LspId -> ClientCapabilities -> VFSHandle -> Rules ()
-fileExistsRules getLspId ClientCapabilities{_workspace} vfs = do
+fileExistsRules :: ClientCapabilities -> VFSHandle -> Rules ()
+fileExistsRules ClientCapabilities{_workspace} vfs = do
   -- Create the global always, although it should only be used if we have fast rules.
   -- But there's a chance someone will send unexpected notifications anyway,
   -- e.g. https://github.com/digital-asset/ghcide/issues/599
   addIdeGlobal . FileExistsMapVar =<< liftIO (newVar [])
+
+  extras <- getShakeExtrasRules
+  opts <- liftIO $ getIdeOptionsIO extras
+  let globs = watchedGlobs opts
+
   case () of
     _ | Just WorkspaceClientCapabilities{_didChangeWatchedFiles} <- _workspace
       , Just DidChangeWatchedFilesClientCapabilities{_dynamicRegistration} <- _didChangeWatchedFiles
       , Just True <- _dynamicRegistration
-        -> fileExistsRulesFast getLspId vfs
-      | otherwise -> do
-        logger <- logger <$> getShakeExtrasRules
-        liftIO $ logDebug logger "Warning: Client does not support watched files. Falling back to OS polling"
-        fileExistsRulesSlow vfs
-
---   Requires an lsp client that provides WatchedFiles notifications.
-fileExistsRulesFast :: IO LspId -> VFSHandle -> Rules ()
-fileExistsRulesFast getLspId vfs =
-  defineEarlyCutoff $ \GetFileExists file -> do
-    isWf <- isWorkspaceFile file
-    if isWf
-        then fileExistsFast getLspId vfs file
-        else fileExistsSlow vfs file
-
-fileExistsFast :: IO LspId -> VFSHandle -> NormalizedFilePath -> Action (Maybe BS.ByteString, ([a], Maybe Bool))
-fileExistsFast getLspId vfs file = do
-    fileExistsMap <- getFileExistsMapUntracked
-    let mbFilesWatched = HashMap.lookup file fileExistsMap
-    case mbFilesWatched of
-      Just fv -> pure (summarizeExists fv, ([], Just fv))
-      Nothing -> do
-        exist                   <- liftIO $ getFileExistsVFS vfs file
-        ShakeExtras { eventer } <- getShakeExtras
-
-        -- add a listener for VFS Create/Delete file events,
-        -- taking the FileExistsMap lock to prevent race conditions
-        -- that would lead to multiple listeners for the same path
-        modifyFileExistsAction $ \x -> do
-          case HashMap.alterF (,Just exist) file x of
-            (Nothing, x') -> do
-            -- if the listener addition fails, we never recover. This is a bug.
-              addListener eventer file
-              return x'
-            (Just _, _) ->
-              -- if the key was already there, do nothing
-              return x
-
-        pure (summarizeExists exist, ([], Just exist))
- where
-  addListener eventer fp = do
-    reqId <- getLspId
-    let
-      req = RequestMessage "2.0" reqId ClientRegisterCapability regParams
-      fpAsId       = T.pack $ fromNormalizedFilePath fp
-      regParams    = RegistrationParams (List [registration])
-      registration = Registration fpAsId
-                                  WorkspaceDidChangeWatchedFiles
-                                  (Just (A.toJSON regOptions))
-      regOptions =
-        DidChangeWatchedFilesRegistrationOptions { _watchers = List [watcher] }
-      watchKind = WatchKind { _watchCreate = True, _watchChange = False, _watchDelete = True}
-      watcher = FileSystemWatcher { _globPattern = fromNormalizedFilePath fp
-                                  , _kind        = Just watchKind
-                                  }
-
-    eventer $ ReqRegisterCapability req
+        -> fileExistsRulesFast globs vfs
+      | otherwise -> fileExistsRulesSlow vfs
+
+-- Requires an lsp client that provides WatchedFiles notifications, but assumes that this has already been checked.
+fileExistsRulesFast :: [String] -> VFSHandle -> Rules ()
+fileExistsRulesFast globs vfs =
+    let patterns = fmap Glob.compile globs
+        fpMatches fp = any (\p -> Glob.match p fp) patterns
+    in defineEarlyCutoff $ \GetFileExists file -> do
+        isWf <- isWorkspaceFile file
+        if isWf && fpMatches (fromNormalizedFilePath file)
+            then fileExistsFast vfs file
+            else fileExistsSlow vfs file
+
+{- Note [Invalidating file existence results]
+We have two mechanisms for getting file existence information:
+- The file existence cache
+- The VFS lookup
+
+Both of these affect the results of the 'GetFileExists' rule, so we need to make sure it
+is invalidated properly when things change.
+
+For the file existence cache, we manually flush the results of 'GetFileExists' when we
+modify it (i.e. when a notification comes from the client). This is faster than using
+'alwaysRerun' in the 'fileExistsFast', and we need it to be as fast as possible.
+
+For the VFS lookup, however, we won't get prompted to flush the result, so instead
+we use 'alwaysRerun'.
+-}
+
+fileExistsFast :: VFSHandle -> NormalizedFilePath -> Action (Maybe BS.ByteString, ([a], Maybe Bool))
+fileExistsFast vfs file = do
+    -- Could in principle use 'alwaysRerun' here, but it's too slwo, See Note [Invalidating file existence results]
+    mp <- getFileExistsMapUntracked
+
+    let mbFilesWatched = HashMap.lookup file mp
+    exist <- case mbFilesWatched of
+      Just exist -> pure exist
+      -- We don't know about it: use the slow route.
+      -- Note that we do *not* call 'fileExistsSlow', as that would trigger 'alwaysRerun'.
+      Nothing -> liftIO $ getFileExistsVFS vfs file
+    pure (summarizeExists exist, ([], Just exist))
 
 summarizeExists :: Bool -> Maybe BS.ByteString
 summarizeExists x = Just $ if x then BS.singleton 1 else BS.empty
 
-fileExistsRulesSlow:: VFSHandle -> Rules ()
+fileExistsRulesSlow :: VFSHandle -> Rules ()
 fileExistsRulesSlow vfs =
   defineEarlyCutoff $ \GetFileExists file -> fileExistsSlow vfs file
 
 fileExistsSlow :: VFSHandle -> NormalizedFilePath -> Action (Maybe BS.ByteString, ([a], Maybe Bool))
 fileExistsSlow vfs file = do
+    -- See Note [Invalidating file existence results]
     alwaysRerun
     exist <- liftIO $ getFileExistsVFS vfs file
     pure (summarizeExists exist, ([], Just exist))
diff --git a/ghcide/src/Development/IDE/Core/Service.hs b/ghcide/src/Development/IDE/Core/Service.hs
@@ -68,7 +68,7 @@ initialise caps mainRule getLspId toDiags wProg wIndefProg logger debouncer opti
             addIdeGlobal $ GlobalIdeOptions options
             fileStoreRules vfs
             ofInterestRules
-            fileExistsRules getLspId caps vfs
+            fileExistsRules caps vfs
             mainRule
 
 writeProfile :: IdeState -> FilePath -> IO ()
diff --git a/ghcide/src/Development/IDE/LSP/LanguageServer.hs b/ghcide/src/Development/IDE/LSP/LanguageServer.hs
@@ -214,8 +214,7 @@ initHandler _ ide params = do
 --   Set them to avoid a warning in VS Code output.
 setHandlersIgnore :: PartialHandlers config
 setHandlersIgnore = PartialHandlers $ \_ x -> return x
-    {LSP.initializedHandler = none
-    ,LSP.responseHandler = none
+    {LSP.responseHandler = none
     }
     where none = Just $ const $ return ()
 
diff --git a/ghcide/src/Development/IDE/LSP/Notifications.hs b/ghcide/src/Development/IDE/LSP/Notifications.hs
@@ -12,6 +12,8 @@ import           Development.IDE.LSP.Server
 import qualified Language.Haskell.LSP.Core        as LSP
 import           Language.Haskell.LSP.Types
 import qualified Language.Haskell.LSP.Types       as LSP
+import qualified Language.Haskell.LSP.Messages    as LSP
+import qualified Language.Haskell.LSP.Types.Capabilities as LSP
 
 import           Development.IDE.Core.IdeConfiguration
 import           Development.IDE.Core.Service
@@ -21,14 +23,15 @@ import           Development.IDE.Types.Logger
 import           Development.IDE.Types.Options
 
 import           Control.Monad.Extra
+import qualified Data.Aeson                       as A
 import           Data.Foldable                    as F
 import           Data.Maybe
 import qualified Data.HashMap.Strict              as M
 import qualified Data.HashSet                     as S
 import qualified Data.Text                        as Text
 
 import           Development.IDE.Core.FileStore   (setSomethingModified, setFileModified, typecheckParents)
-import           Development.IDE.Core.FileExists  (modifyFileExists)
+import           Development.IDE.Core.FileExists  (modifyFileExists, watchedGlobs)
 import           Development.IDE.Core.OfInterest
 
 
@@ -72,6 +75,8 @@ setHandlersNotifications = PartialHandlers $ \WithMessage{..} x -> return x
                 logInfo (ideLogger ide) $ "Closed text document: " <> getUri _uri
     ,LSP.didChangeWatchedFilesNotificationHandler = withNotification (LSP.didChangeWatchedFilesNotificationHandler x) $
         \_ ide (DidChangeWatchedFilesParams fileEvents) -> do
+            -- See Note [File existence cache and LSP file watchers] which explains why we get these notifications and
+            -- what we do with them
             let events =
                     mapMaybe
                         (\(FileEvent uri ev) ->
@@ -98,4 +103,45 @@ setHandlersNotifications = PartialHandlers $ \WithMessage{..} x -> return x
             logInfo (ideLogger ide) $ "Configuration changed: " <> msg
             modifyClientSettings ide (const $ Just cfg)
             setSomethingModified ide
+
+    -- Initialized handler, good time to dynamically register capabilities
+    ,LSP.initializedHandler = withNotification (LSP.initializedHandler x) $ \lsp@LSP.LspFuncs{..} ide _ -> do
+        let watchSupported = case () of
+              _ | LSP.ClientCapabilities{_workspace} <- clientCapabilities
+                , Just LSP.WorkspaceClientCapabilities{_didChangeWatchedFiles} <- _workspace
+                , Just LSP.DidChangeWatchedFilesClientCapabilities{_dynamicRegistration} <- _didChangeWatchedFiles
+                , Just True <- _dynamicRegistration
+                  -> True
+                | otherwise -> False
+
+        if watchSupported
+        then registerWatcher lsp ide
+        else logDebug (ideLogger ide) "Warning: Client does not support watched files. Falling back to OS polling"
+
     }
+    where
+        registerWatcher LSP.LspFuncs{..} ide = do
+            lspId <- getNextReqId
+            opts <- getIdeOptionsIO $ shakeExtras ide
+            let
+              req = RequestMessage "2.0" lspId ClientRegisterCapability regParams
+              regParams    = RegistrationParams (List [registration])
+              -- The registration ID is arbitrary and is only used in case we want to deregister (which we won't).
+              -- We could also use something like a random UUID, as some other servers do, but this works for
+              -- our purposes.
+              registration = Registration "globalFileWatches"
+                                          WorkspaceDidChangeWatchedFiles
+                                          (Just (A.toJSON regOptions))
+              regOptions =
+                DidChangeWatchedFilesRegistrationOptions { _watchers = List watchers }
+              -- See Note [File existence cache and LSP file watchers] for why this exists, and the choice of watch kind
+              watchKind = WatchKind { _watchCreate = True, _watchChange = False, _watchDelete = True}
+              -- See Note [Which files should we watch?] for an explanation of why the pattern is the way that it is
+              -- The patterns will be something like "**/.hs", i.e. "any number of directory segments,
+              -- followed by a file with an extension 'hs'.
+              watcher glob = FileSystemWatcher { _globPattern = glob, _kind = Just watchKind }
+              -- We use multiple watchers instead of one using '{}' because lsp-test doesn't
+              -- support that: https://github.com/bubba/lsp-test/issues/77
+              watchers = [ watcher glob | glob <- watchedGlobs opts ]
+
+            sendFunc $ LSP.ReqRegisterCapability req
diff --git a/ghcide/test/exe/Main.hs b/ghcide/test/exe/Main.hs

Original file line number	Diff line number	Diff line change
`@@ -214,8 +214,7 @@ initHandler _ ide params = do`
`214`	`214`	`-- Set them to avoid a warning in VS Code output.`
`215`	`215`	`setHandlersIgnore :: PartialHandlers config`
`216`	`216`	`setHandlersIgnore = PartialHandlers $ \_ x -> return x`
`217`		`- {LSP.initializedHandler = none`
`218`		`- ,LSP.responseHandler = none`
	`217`	`+ {LSP.responseHandler = none`
`219`	`218`	`}`
`220`	`219`	`where none = Just $ const $ return ()`
`221`	`220`