@@ -5,36 +5,75 @@ module Development.IDE.Core.FileExists
5
5
( fileExistsRules
6
6
, modifyFileExists
7
7
, getFileExists
8
+ , watchedGlobs
8
9
)
9
10
where
10
11
11
12
import Control.Concurrent.Extra
12
13
import Control.Exception
13
14
import Control.Monad.Extra
14
- import qualified Data.Aeson as A
15
15
import Data.Binary
16
16
import qualified Data.ByteString as BS
17
- import Data.HashMap.Strict (HashMap )
17
+ import Data.HashMap.Strict (HashMap )
18
18
import qualified Data.HashMap.Strict as HashMap
19
19
import Data.Maybe
20
- import qualified Data.Text as T
21
20
import Development.IDE.Core.FileStore
22
21
import Development.IDE.Core.IdeConfiguration
23
22
import Development.IDE.Core.Shake
24
23
import Development.IDE.Types.Location
25
- import Development.IDE.Types.Logger
24
+ import Development.IDE.Types.Options
26
25
import Development.Shake
27
26
import Development.Shake.Classes
28
27
import GHC.Generics
29
- import Language.Haskell.LSP.Messages
30
- import Language.Haskell.LSP.Types
31
28
import Language.Haskell.LSP.Types.Capabilities
32
29
import qualified System.Directory as Dir
30
+ import qualified System.FilePath.Glob as Glob
33
31
34
- -- | A map for tracking the file existence
32
+ {- Note [File existence cache and LSP file watchers]
33
+ Some LSP servers provide the ability to register file watches with the client, which will then notify
34
+ us of file changes. Some clients can do this more efficiently than us, or generally it's a tricky
35
+ problem
36
+
37
+ Here we use this to maintain a quick lookup cache of file existence. How this works is:
38
+ - On startup, if the client supports it we ask it to watch some files (see below).
39
+ - When those files are created or deleted (we can also see change events, but we don't
40
+ care since we're only caching existence here) we get a notification from the client.
41
+ - The notification handler calls 'modifyFileExists' to update our cache.
42
+
43
+ This means that the cache will only ever work for the files we have set up a watcher for.
44
+ So we pick the set that we mostly care about and which are likely to change existence
45
+ most often: the source files of the project (as determined by the source extensions
46
+ we're configured to care about).
47
+
48
+ For all other files we fall back to the slow path.
49
+
50
+ There are a few failure modes to think about:
51
+
52
+ 1. The client doesn't send us the notifications we asked for.
53
+
54
+ There's not much we can do in this case: the whole point is to rely on the client so
55
+ we don't do the checking ourselves. If the client lets us down, we will just be wrong.
56
+
57
+ 2. Races between registering watchers, getting notifications, and file changes.
58
+
59
+ If a file changes status between us asking for notifications and the client actually
60
+ setting up the notifications, we might not get told about it. But this is a relatively
61
+ small race window around startup, so we just don't worry about it.
62
+
63
+ 3. Using the fast path for files that we aren't watching.
64
+
65
+ In this case we will fall back to the slow path, but cache that result forever (since
66
+ it won't get invalidated by a client notification). To prevent this we guard the
67
+ fast path by a check that the path also matches our watching patterns.
68
+ -}
69
+
70
+ -- See Note [File existence cache and LSP file watchers]
71
+ -- | A map for tracking the file existence.
72
+ -- If a path maps to 'True' then it exists; if it maps to 'False' then it doesn't exist'; and
73
+ -- if it's not in the map then we don't know.
35
74
type FileExistsMap = (HashMap NormalizedFilePath Bool )
36
75
37
- -- | A wrapper around a mutable 'FileExistsMap '
76
+ -- | A wrapper around a mutable 'FileExistsState '
38
77
newtype FileExistsMapVar = FileExistsMapVar (Var FileExistsMap )
39
78
40
79
instance IsIdeGlobal FileExistsMapVar
@@ -45,22 +84,16 @@ getFileExistsMapUntracked = do
45
84
FileExistsMapVar v <- getIdeGlobalAction
46
85
liftIO $ readVar v
47
86
48
- -- | Modify the global store of file exists
49
- modifyFileExistsAction :: (FileExistsMap -> IO FileExistsMap ) -> Action ()
50
- modifyFileExistsAction f = do
51
- FileExistsMapVar var <- getIdeGlobalAction
52
- liftIO $ modifyVar_ var f
53
-
54
- -- | Modify the global store of file exists
87
+ -- | Modify the global store of file exists.
55
88
modifyFileExists :: IdeState -> [(NormalizedFilePath , Bool )] -> IO ()
56
89
modifyFileExists state changes = do
57
90
FileExistsMapVar var <- getIdeGlobalState state
58
91
changesMap <- evaluate $ HashMap. fromList changes
59
-
60
92
-- Masked to ensure that the previous values are flushed together with the map update
61
93
mask $ \ _ -> do
62
94
-- update the map
63
95
modifyVar_ var $ evaluate . HashMap. union changesMap
96
+ -- See Note [Invalidating file existence results]
64
97
-- flush previous values
65
98
mapM_ (deleteValue state GetFileExists . fst ) changes
66
99
@@ -87,86 +120,101 @@ instance Binary GetFileExists
87
120
getFileExists :: NormalizedFilePath -> Action Bool
88
121
getFileExists fp = use_ GetFileExists fp
89
122
123
+ {- Note [Which files should we watch?]
124
+ The watcher system gives us a lot of flexibility: we can set multiple watchers, and they can all watch on glob
125
+ patterns.
126
+
127
+ We used to have a quite precise system, where we would register a watcher for a single file path only (and always)
128
+ when we actually looked to see if it existed. The downside of this is that it sends a *lot* of notifications
129
+ to the client (thousands on a large project), and this could lock up some clients like emacs
130
+ (https://github.com/emacs-lsp/lsp-mode/issues/2165).
131
+
132
+ Now we take the opposite approach: we register a single, quite general watcher that looks for all files
133
+ with a predefined set of extensions. The consequences are:
134
+ - The client will have to watch more files. This is usually not too bad, since the pattern is a single glob,
135
+ and the clients typically call out to an optimized implementation of file watching that understands globs.
136
+ - The client will send us a lot more notifications. This isn't too bad in practice, since although
137
+ we're watching a lot of files in principle, they don't get created or destroyed that often.
138
+ - We won't ever hit the fast lookup path for files which aren't in our watch pattern, since the only way
139
+ files get into our map is when the client sends us a notification about them because we're watching them.
140
+ This is fine so long as we're watching the files we check most often, i.e. source files.
141
+ -}
142
+
143
+ -- | The list of file globs that we ask the client to watch.
144
+ watchedGlobs :: IdeOptions -> [String ]
145
+ watchedGlobs opts = [ " **/*." ++ extIncBoot | ext <- optExtensions opts, extIncBoot <- [ext, ext ++ " -boot" ]]
146
+
90
147
-- | Installs the 'getFileExists' rules.
91
148
-- Provides a fast implementation if client supports dynamic watched files.
92
149
-- Creates a global state as a side effect in that case.
93
- fileExistsRules :: IO LspId -> ClientCapabilities -> VFSHandle -> Rules ()
94
- fileExistsRules getLspId ClientCapabilities {_workspace} vfs = do
150
+ fileExistsRules :: ClientCapabilities -> VFSHandle -> Rules ()
151
+ fileExistsRules ClientCapabilities {_workspace} vfs = do
95
152
-- Create the global always, although it should only be used if we have fast rules.
96
153
-- But there's a chance someone will send unexpected notifications anyway,
97
154
-- e.g. https://github.com/digital-asset/ghcide/issues/599
98
155
addIdeGlobal . FileExistsMapVar =<< liftIO (newVar [] )
156
+
157
+ extras <- getShakeExtrasRules
158
+ opts <- liftIO $ getIdeOptionsIO extras
159
+ let globs = watchedGlobs opts
160
+
99
161
case () of
100
162
_ | Just WorkspaceClientCapabilities {_didChangeWatchedFiles} <- _workspace
101
163
, Just DidChangeWatchedFilesClientCapabilities {_dynamicRegistration} <- _didChangeWatchedFiles
102
164
, Just True <- _dynamicRegistration
103
- -> fileExistsRulesFast getLspId vfs
104
- | otherwise -> do
105
- logger <- logger <$> getShakeExtrasRules
106
- liftIO $ logDebug logger " Warning: Client does not support watched files. Falling back to OS polling"
107
- fileExistsRulesSlow vfs
108
-
109
- -- Requires an lsp client that provides WatchedFiles notifications.
110
- fileExistsRulesFast :: IO LspId -> VFSHandle -> Rules ()
111
- fileExistsRulesFast getLspId vfs =
112
- defineEarlyCutoff $ \ GetFileExists file -> do
113
- isWf <- isWorkspaceFile file
114
- if isWf
115
- then fileExistsFast getLspId vfs file
116
- else fileExistsSlow vfs file
117
-
118
- fileExistsFast :: IO LspId -> VFSHandle -> NormalizedFilePath -> Action (Maybe BS. ByteString , ([a ], Maybe Bool ))
119
- fileExistsFast getLspId vfs file = do
120
- fileExistsMap <- getFileExistsMapUntracked
121
- let mbFilesWatched = HashMap. lookup file fileExistsMap
122
- case mbFilesWatched of
123
- Just fv -> pure (summarizeExists fv, ([] , Just fv))
124
- Nothing -> do
125
- exist <- liftIO $ getFileExistsVFS vfs file
126
- ShakeExtras { eventer } <- getShakeExtras
127
-
128
- -- add a listener for VFS Create/Delete file events,
129
- -- taking the FileExistsMap lock to prevent race conditions
130
- -- that would lead to multiple listeners for the same path
131
- modifyFileExistsAction $ \ x -> do
132
- case HashMap. alterF (,Just exist) file x of
133
- (Nothing , x') -> do
134
- -- if the listener addition fails, we never recover. This is a bug.
135
- addListener eventer file
136
- return x'
137
- (Just _, _) ->
138
- -- if the key was already there, do nothing
139
- return x
140
-
141
- pure (summarizeExists exist, ([] , Just exist))
142
- where
143
- addListener eventer fp = do
144
- reqId <- getLspId
145
- let
146
- req = RequestMessage " 2.0" reqId ClientRegisterCapability regParams
147
- fpAsId = T. pack $ fromNormalizedFilePath fp
148
- regParams = RegistrationParams (List [registration])
149
- registration = Registration fpAsId
150
- WorkspaceDidChangeWatchedFiles
151
- (Just (A. toJSON regOptions))
152
- regOptions =
153
- DidChangeWatchedFilesRegistrationOptions { _watchers = List [watcher] }
154
- watchKind = WatchKind { _watchCreate = True , _watchChange = False , _watchDelete = True }
155
- watcher = FileSystemWatcher { _globPattern = fromNormalizedFilePath fp
156
- , _kind = Just watchKind
157
- }
158
-
159
- eventer $ ReqRegisterCapability req
165
+ -> fileExistsRulesFast globs vfs
166
+ | otherwise -> fileExistsRulesSlow vfs
167
+
168
+ -- Requires an lsp client that provides WatchedFiles notifications, but assumes that this has already been checked.
169
+ fileExistsRulesFast :: [String ] -> VFSHandle -> Rules ()
170
+ fileExistsRulesFast globs vfs =
171
+ let patterns = fmap Glob. compile globs
172
+ fpMatches fp = any (\ p -> Glob. match p fp) patterns
173
+ in defineEarlyCutoff $ \ GetFileExists file -> do
174
+ isWf <- isWorkspaceFile file
175
+ if isWf && fpMatches (fromNormalizedFilePath file)
176
+ then fileExistsFast vfs file
177
+ else fileExistsSlow vfs file
178
+
179
+ {- Note [Invalidating file existence results]
180
+ We have two mechanisms for getting file existence information:
181
+ - The file existence cache
182
+ - The VFS lookup
183
+
184
+ Both of these affect the results of the 'GetFileExists' rule, so we need to make sure it
185
+ is invalidated properly when things change.
186
+
187
+ For the file existence cache, we manually flush the results of 'GetFileExists' when we
188
+ modify it (i.e. when a notification comes from the client). This is faster than using
189
+ 'alwaysRerun' in the 'fileExistsFast', and we need it to be as fast as possible.
190
+
191
+ For the VFS lookup, however, we won't get prompted to flush the result, so instead
192
+ we use 'alwaysRerun'.
193
+ -}
194
+
195
+ fileExistsFast :: VFSHandle -> NormalizedFilePath -> Action (Maybe BS. ByteString , ([a ], Maybe Bool ))
196
+ fileExistsFast vfs file = do
197
+ -- Could in principle use 'alwaysRerun' here, but it's too slwo, See Note [Invalidating file existence results]
198
+ mp <- getFileExistsMapUntracked
199
+
200
+ let mbFilesWatched = HashMap. lookup file mp
201
+ exist <- case mbFilesWatched of
202
+ Just exist -> pure exist
203
+ -- We don't know about it: use the slow route.
204
+ -- Note that we do *not* call 'fileExistsSlow', as that would trigger 'alwaysRerun'.
205
+ Nothing -> liftIO $ getFileExistsVFS vfs file
206
+ pure (summarizeExists exist, ([] , Just exist))
160
207
161
208
summarizeExists :: Bool -> Maybe BS. ByteString
162
209
summarizeExists x = Just $ if x then BS. singleton 1 else BS. empty
163
210
164
- fileExistsRulesSlow :: VFSHandle -> Rules ()
211
+ fileExistsRulesSlow :: VFSHandle -> Rules ()
165
212
fileExistsRulesSlow vfs =
166
213
defineEarlyCutoff $ \ GetFileExists file -> fileExistsSlow vfs file
167
214
168
215
fileExistsSlow :: VFSHandle -> NormalizedFilePath -> Action (Maybe BS. ByteString , ([a ], Maybe Bool ))
169
216
fileExistsSlow vfs file = do
217
+ -- See Note [Invalidating file existence results]
170
218
alwaysRerun
171
219
exist <- liftIO $ getFileExistsVFS vfs file
172
220
pure (summarizeExists exist, ([] , Just exist))
0 commit comments