diff --git a/Sources/SemanticIndex/CheckedIndex.swift b/Sources/SemanticIndex/CheckedIndex.swift index ea74f8f0d..022a4112b 100644 --- a/Sources/SemanticIndex/CheckedIndex.swift +++ b/Sources/SemanticIndex/CheckedIndex.swift @@ -122,7 +122,7 @@ public final class CheckedIndex { public func mainFilesContainingFile(uri: DocumentURI, crossLanguage: Bool = false) -> [DocumentURI] { return index.mainFilesContainingFile(path: uri.pseudoPath, crossLanguage: crossLanguage).compactMap { let url = URL(fileURLWithPath: $0) - guard checker.indexHasUpToDateUnit(for: url, index: self.index) else { + guard checker.indexHasUpToDateUnit(for: url, mainFile: nil, index: self.index) else { return nil } return DocumentURI(url) @@ -137,8 +137,12 @@ public final class CheckedIndex { /// Return `true` if a unit file has been indexed for the given file path after its last modification date. /// /// This means that at least a single build configuration of this file has been indexed since its last modification. - public func hasUpToDateUnit(for url: URL) -> Bool { - return checker.indexHasUpToDateUnit(for: url, index: index) + /// + /// If `mainFile` is passed, then `url` is a header file that won't have a unit associated with it. `mainFile` is + /// assumed to be a file that imports `url`. To check that `url` has an up-to-date unit, check that the latest unit + /// for `mainFile` is newer than the mtime of the header file at `url`. + public func hasUpToDateUnit(for url: URL, mainFile: URL? = nil) -> Bool { + return checker.indexHasUpToDateUnit(for: url, mainFile: mainFile, index: index) } /// Returns true if the file at the given URL has a different content in the document manager than on-disk. This is @@ -254,7 +258,11 @@ private struct IndexOutOfDateChecker { /// Return `true` if a unit file has been indexed for the given file path after its last modification date. /// /// This means that at least a single build configuration of this file has been indexed since its last modification. - mutating func indexHasUpToDateUnit(for filePath: URL, index: IndexStoreDB) -> Bool { + /// + /// If `mainFile` is passed, then `filePath` is a header file that won't have a unit associated with it. `mainFile` is + /// assumed to be a file that imports `url`. To check that `url` has an up-to-date unit, check that the latest unit + /// for `mainFile` is newer than the mtime of the header file at `url`. + mutating func indexHasUpToDateUnit(for filePath: URL, mainFile: URL?, index: IndexStoreDB) -> Bool { switch checkLevel { case .inMemoryModifiedFiles(let documentManager): if fileHasInMemoryModifications(filePath, documentManager: documentManager) { @@ -265,7 +273,7 @@ private struct IndexOutOfDateChecker { // If there are no in-memory modifications check if there are on-disk modifications. fallthrough case .modifiedFiles: - guard let lastUnitDate = index.dateOfLatestUnitFor(filePath: filePath.path) else { + guard let lastUnitDate = index.dateOfLatestUnitFor(filePath: (mainFile ?? filePath).path) else { return false } do { diff --git a/Sources/SemanticIndex/SemanticIndexManager.swift b/Sources/SemanticIndex/SemanticIndexManager.swift index 7f0e59405..9718714e4 100644 --- a/Sources/SemanticIndex/SemanticIndexManager.swift +++ b/Sources/SemanticIndex/SemanticIndexManager.swift @@ -115,7 +115,7 @@ public final actor SemanticIndexManager { /// Returns immediately after scheduling that task. /// /// Indexing is being performed with a low priority. - public func scheduleBackgroundIndex(files: some Collection) async { + private func scheduleBackgroundIndex(files: some Collection) async { await self.index(files: files, priority: .low) } @@ -127,7 +127,7 @@ public final actor SemanticIndexManager { generateBuildGraphTask = Task(priority: .low) { await orLog("Generating build graph") { try await self.buildSystemManager.generateBuildGraph() } let index = index.checked(for: .modifiedFiles) - let filesToIndex = await self.buildSystemManager.sourceFiles().map(\.uri) + let filesToIndex = await self.buildSystemManager.sourceFiles().lazy.map(\.uri) .filter { uri in guard let url = uri.fileURL else { // The URI is not a file, so there's nothing we can index. @@ -185,31 +185,41 @@ public final actor SemanticIndexManager { } public func filesDidChange(_ events: [FileEvent]) async { - let filesToReIndex = await filesToReIndex(changedFiles: events.map(\.uri)) - + // We only re-index the files that were changed and don't re-index any of their dependencies. See the + // `Documentation/Files_To_Reindex.md` file. + let changedFiles = events.map(\.uri) // Reset the index status for these files so they get re-indexed by `index(files:priority:)` - for uri in filesToReIndex { + for uri in changedFiles { indexStatus[uri] = nil } - await scheduleBackgroundIndex(files: filesToReIndex) + await scheduleBackgroundIndex(files: changedFiles) } - /// Returns the files that should be re-indexed if the given files have been modified. + /// Returns the files that should be indexed to get up-to-date index information for the given files. /// - /// - SeeAlso: The `Documentation/Files_To_Reindex.md` file. - private func filesToReIndex(changedFiles: [DocumentURI]) async -> Set { + /// If `files` contains a header file, this will return a `FileToIndex` that re-indexes a main file which includes the + /// header file to update the header file's index. + private func filesToIndex(toCover files: some Collection) async -> [FileToIndex] { let sourceFiles = Set(await buildSystemManager.sourceFiles().map(\.uri)) - let filesToReIndex = await changedFiles.asyncMap { (uri) -> [DocumentURI] in + let filesToReIndex = await files.asyncCompactMap { (uri) -> FileToIndex? in if sourceFiles.contains(uri) { - // If this is a source file, re-index it - return [uri] + // If this is a source file, just index it. + return FileToIndex(uri: uri, mainFile: nil) + } + // Otherwise, see if it is a header file. If so, index a main file that that imports it to update header file's + // index. + // Deterministically pick a main file. This ensures that we always pick the same main file for a header. This way, + // if we request the same header to be indexed twice, we'll pick the same unit file the second time around, + // realize that its timestamp is later than the modification date of the header and we don't need to re-index. + let mainFile = index.checked(for: .deletedFiles) + .mainFilesContainingFile(uri: uri, crossLanguage: false) + .sorted(by: { $0.stringValue < $1.stringValue }).first + guard let mainFile else { + return nil } - // Otherwise, see if it is a header file. If so, re-index all the files that import it. - // We don't want to re-index `.swift` files that depend on a header file, similar to how we don't re-index a Swift - // module if one of its dependent modules has changed. - return index.checked(for: .deletedFiles).mainFilesContainingFile(uri: uri, crossLanguage: false) - }.flatMap { $0 } - return Set(filesToReIndex) + return FileToIndex(uri: uri, mainFile: mainFile) + } + return filesToReIndex } // MARK: - Helper functions @@ -227,24 +237,25 @@ public final actor SemanticIndexManager { } /// Update the index store for the given files, assuming that their targets have already been prepared. - private func updateIndexStore(for files: [DocumentURI], priority: TaskPriority?) async { + private func updateIndexStore(for files: [FileToIndex], priority: TaskPriority?) async { let taskDescription = AnyIndexTaskDescription( UpdateIndexStoreTaskDescription( filesToIndex: Set(files), - buildSystemManager: self.buildSystemManager + buildSystemManager: self.buildSystemManager, + index: index ) ) let updateIndexStoreTask = await self.indexTaskScheduler.schedule(priority: priority, taskDescription) { newState in switch newState { case .executing: for file in files { - if case .scheduled(let task) = self.indexStatus[file] { - self.indexStatus[file] = .executing(task) + if case .scheduled(let task) = self.indexStatus[file.uri] { + self.indexStatus[file.uri] = .executing(task) } else { logger.fault( """ - Index status of \(file) is in an unexpected state \ - '\(self.indexStatus[file]?.description ?? "", privacy: .public)' when update index store task \ + Index status of \(file.uri) is in an unexpected state \ + '\(self.indexStatus[file.uri]?.description ?? "", privacy: .public)' when update index store task \ started executing """ ) @@ -252,13 +263,13 @@ public final actor SemanticIndexManager { } case .cancelledToBeRescheduled: for file in files { - if case .executing(let task) = self.indexStatus[file] { - self.indexStatus[file] = .scheduled(task) + if case .executing(let task) = self.indexStatus[file.uri] { + self.indexStatus[file.uri] = .scheduled(task) } else { logger.fault( """ - Index status of \(file) is in an unexpected state \ - '\(self.indexStatus[file]?.description ?? "", privacy: .public)' when update index store task \ + Index status of \(file.uri) is in an unexpected state \ + '\(self.indexStatus[file.uri]?.description ?? "", privacy: .public)' when update index store task \ is cancelled to be rescheduled. """ ) @@ -266,7 +277,7 @@ public final actor SemanticIndexManager { } case .finished: for file in files { - self.indexStatus[file] = .upToDate + self.indexStatus[file.uri] = .upToDate } self.indexTaskDidFinish() } @@ -279,20 +290,20 @@ public final actor SemanticIndexManager { /// The returned task finishes when all files are indexed. @discardableResult private func index(files: some Collection, priority: TaskPriority?) async -> Task { - let outOfDateFiles = files.filter { - if case .upToDate = indexStatus[$0] { + let outOfDateFiles = await filesToIndex(toCover: files).filter { + if case .upToDate = indexStatus[$0.uri] { return false } return true } - .sorted(by: { $0.stringValue < $1.stringValue }) // sort files to get deterministic indexing order + .sorted(by: { $0.uri.stringValue < $1.uri.stringValue }) // sort files to get deterministic indexing order // Sort the targets in topological order so that low-level targets get built before high-level targets, allowing us // to index the low-level targets ASAP. - var filesByTarget: [ConfiguredTarget: [DocumentURI]] = [:] + var filesByTarget: [ConfiguredTarget: [FileToIndex]] = [:] for file in outOfDateFiles { - guard let target = await buildSystemManager.canonicalConfiguredTarget(for: file) else { - logger.error("Not indexing \(file.forLogging) because the target could not be determined") + guard let target = await buildSystemManager.canonicalConfiguredTarget(for: file.uri) else { + logger.error("Not indexing \(file.uri.forLogging) because the target could not be determined") continue } filesByTarget[target, default: []].append(file) @@ -349,7 +360,7 @@ public final actor SemanticIndexManager { // setting it to `.scheduled` because we don't have an `await` call between the creation of `indexTask` and // this loop, so we still have exclusive access to the `SemanticIndexManager` actor and hence `updateIndexStore` // can't execute until we have set all index statuses to `.scheduled`. - indexStatus[file] = .scheduled(indexTask) + indexStatus[file.uri] = .scheduled(indexTask) } indexTasksWereScheduled(filesToIndex.count) } diff --git a/Sources/SemanticIndex/UpdateIndexStoreTaskDescription.swift b/Sources/SemanticIndex/UpdateIndexStoreTaskDescription.swift index abc7dd7f2..bd4c5b749 100644 --- a/Sources/SemanticIndex/UpdateIndexStoreTaskDescription.swift +++ b/Sources/SemanticIndex/UpdateIndexStoreTaskDescription.swift @@ -22,6 +22,16 @@ import class TSCBasic.Process private nonisolated(unsafe) var updateIndexStoreIDForLogging = AtomicUInt32(initialValue: 1) +/// Information about a file that should be indexed. +/// +/// The URI of the file whose index should be updated. This could be a header file that can't actually be indexed on +/// its own. In that case `mainFile` is the file that should be indexed, which will effectively update the index of +/// `uri`. +struct FileToIndex: Hashable { + let uri: DocumentURI + let mainFile: DocumentURI? +} + /// Describes a task to index a set of source files. /// /// This task description can be scheduled in a `TaskScheduler`. @@ -30,11 +40,15 @@ public struct UpdateIndexStoreTaskDescription: IndexTaskDescription { public let id = updateIndexStoreIDForLogging.fetchAndIncrement() /// The files that should be indexed. - private let filesToIndex: Set + private let filesToIndex: Set /// The build system manager that is used to get the toolchain and build settings for the files to index. private let buildSystemManager: BuildSystemManager + /// A reference to the underlying index store. Used to check if the index is already up-to-date for a file, in which + /// case we don't need to index it again. + private let index: UncheckedIndex + /// The task is idempotent because indexing the same file twice produces the same result as indexing it once. public var isIdempotent: Bool { true } @@ -49,11 +63,13 @@ public struct UpdateIndexStoreTaskDescription: IndexTaskDescription { } init( - filesToIndex: Set, - buildSystemManager: BuildSystemManager + filesToIndex: Set, + buildSystemManager: BuildSystemManager, + index: UncheckedIndex ) { self.filesToIndex = filesToIndex self.buildSystemManager = buildSystemManager + self.index = index } public func execute() async { @@ -66,12 +82,12 @@ public struct UpdateIndexStoreTaskDescription: IndexTaskDescription { ) { let startDate = Date() - let filesToIndexDescription = filesToIndex.map { $0.fileURL?.lastPathComponent ?? $0.stringValue } + let filesToIndexDescription = filesToIndex.map { $0.uri.fileURL?.lastPathComponent ?? $0.uri.stringValue } .joined(separator: ", ") logger.log( "Starting updating index store with priority \(Task.currentPriority.rawValue, privacy: .public): \(filesToIndexDescription)" ) - let filesToIndex = filesToIndex.sorted(by: { $0.stringValue < $1.stringValue }) + let filesToIndex = filesToIndex.sorted(by: { $0.uri.stringValue < $1.uri.stringValue }) // TODO (indexing): Once swiftc supports it, we should group files by target and index files within the same // target together in one swiftc invocation. // https://github.com/apple/sourcekit-lsp/issues/1268 @@ -104,44 +120,60 @@ public struct UpdateIndexStoreTaskDescription: IndexTaskDescription { } } - private func updateIndexStoreForSingleFile(_ uri: DocumentURI) async { - guard let language = await buildSystemManager.defaultLanguage(for: uri) else { - logger.error("Not indexing \(uri.forLogging) because its language could not be determined") + private func updateIndexStoreForSingleFile(_ fileToIndex: FileToIndex) async { + let mainFileUri = fileToIndex.mainFile ?? fileToIndex.uri + guard let fileToIndexUrl = fileToIndex.uri.fileURL else { + // The URI is not a file, so there's nothing we can index. + return + } + guard + !index.checked(for: .modifiedFiles).hasUpToDateUnit(for: fileToIndexUrl, mainFile: fileToIndex.mainFile?.fileURL) + else { + logger.debug("Not indexing \(fileToIndex.uri.forLogging) because index has an up-to-date unit") + // We consider a file's index up-to-date if we have any up-to-date unit. Changing build settings does not + // invalidate the up-to-date status of the index. + return + } + if let mainFile = fileToIndex.mainFile { + logger.log("Updating index store of \(fileToIndex.uri.forLogging) using main file \(mainFile.forLogging)") + } + guard let language = await buildSystemManager.defaultLanguage(for: mainFileUri) else { + logger.error("Not indexing \(fileToIndex.uri.forLogging) because its language could not be determined") return } let buildSettings = await buildSystemManager.buildSettingsInferredFromMainFile( - for: uri, + for: mainFileUri, language: language, logBuildSettings: false ) guard let buildSettings else { - logger.error("Not indexing \(uri.forLogging) because it has no compiler arguments") + logger.error("Not indexing \(fileToIndex.uri.forLogging) because it has no compiler arguments") return } - guard let toolchain = await buildSystemManager.toolchain(for: uri, language) else { + guard let toolchain = await buildSystemManager.toolchain(for: mainFileUri, language) else { logger.error( - "Not updating index store for \(uri.forLogging) because no toolchain could be determined for the document" + "Not updating index store for \(mainFileUri.forLogging) because no toolchain could be determined for the document" ) return } switch language { case .swift: do { - try await updateIndexStore(forSwiftFile: uri, buildSettings: buildSettings, toolchain: toolchain) + try await updateIndexStore(forSwiftFile: mainFileUri, buildSettings: buildSettings, toolchain: toolchain) } catch { - logger.error("Updating index store for \(uri) failed: \(error.forLogging)") - BuildSettingsLogger.log(settings: buildSettings, for: uri) + logger.error("Updating index store for \(fileToIndex.uri) failed: \(error.forLogging)") + BuildSettingsLogger.log(settings: buildSettings, for: mainFileUri) } case .c, .cpp, .objective_c, .objective_cpp: do { - try await updateIndexStore(forClangFile: uri, buildSettings: buildSettings, toolchain: toolchain) + try await updateIndexStore(forClangFile: mainFileUri, buildSettings: buildSettings, toolchain: toolchain) } catch { - logger.error("Updating index store for \(uri) failed: \(error.forLogging)") - BuildSettingsLogger.log(settings: buildSettings, for: uri) + logger.error("Updating index store for \(fileToIndex.uri) failed: \(error.forLogging)") + BuildSettingsLogger.log(settings: buildSettings, for: mainFileUri) } default: logger.error( - "Not updating index store for \(uri) because it is a language that is not supported by background indexing" + "Not updating index store for \(fileToIndex.uri) because it is a language that is not supported by background indexing" ) } } diff --git a/Tests/SourceKitLSPTests/BackgroundIndexingTests.swift b/Tests/SourceKitLSPTests/BackgroundIndexingTests.swift index 218a2c7ea..75aa8806e 100644 --- a/Tests/SourceKitLSPTests/BackgroundIndexingTests.swift +++ b/Tests/SourceKitLSPTests/BackgroundIndexingTests.swift @@ -440,7 +440,7 @@ final class BackgroundIndexingTests: XCTestCase { ) } - func testBackgroundIndexingReindexesMainFilesWhenHeaderIsModified() async throws { + func testBackgroundIndexingReindexesHeader() async throws { let project = try await SwiftPMTestProject( files: [ "MyLibrary/include/Header.h": """ @@ -448,21 +448,6 @@ final class BackgroundIndexingTests: XCTestCase { """, "MyFile.c": """ #include "Header.h" - - void 2️⃣test() { - #ifdef MY_FLAG - 3️⃣someFunc(); - #endif - } - """, - "MyOtherFile.c": """ - #include "Header.h" - - void 4️⃣otherTest() { - #ifdef MY_FLAG - 5️⃣someFunc(); - #endif - } """, ], build: true, @@ -482,13 +467,14 @@ final class BackgroundIndexingTests: XCTestCase { let headerNewMarkedContents = """ void someFunc(); - #define MY_FLAG 1 + void 2️⃣test() { + 3️⃣someFunc(); + }; """ - - let headerFileUrl = try XCTUnwrap(uri.fileURL) + let newPositions = DocumentPositions(markedText: headerNewMarkedContents) try extractMarkers(headerNewMarkedContents).textWithoutMarkers.write( - to: headerFileUrl, + to: try XCTUnwrap(uri.fileURL), atomically: true, encoding: .utf8 ) @@ -502,130 +488,21 @@ final class BackgroundIndexingTests: XCTestCase { XCTAssertEqual( callsAfterEdit, [ - CallHierarchyIncomingCall( - from: CallHierarchyItem( - name: "otherTest", - kind: .function, - tags: nil, - uri: try project.uri(for: "MyOtherFile.c"), - range: try project.range(from: "4️⃣", to: "4️⃣", in: "MyOtherFile.c"), - selectionRange: try project.range(from: "4️⃣", to: "4️⃣", in: "MyOtherFile.c"), - data: .dictionary([ - "usr": .string("c:@F@otherTest"), - "uri": .string(try project.uri(for: "MyOtherFile.c").stringValue), - ]) - ), - fromRanges: [try project.range(from: "5️⃣", to: "5️⃣", in: "MyOtherFile.c")] - ), - CallHierarchyIncomingCall( - from: CallHierarchyItem( - name: "test", - kind: .function, - tags: nil, - uri: try project.uri(for: "MyFile.c"), - range: try project.range(from: "2️⃣", to: "2️⃣", in: "MyFile.c"), - selectionRange: try project.range(from: "2️⃣", to: "2️⃣", in: "MyFile.c"), - data: .dictionary([ - "usr": .string("c:@F@test"), - "uri": .string(try project.uri(for: "MyFile.c").stringValue), - ]) - ), - fromRanges: [try project.range(from: "3️⃣", to: "3️⃣", in: "MyFile.c")] - ), - ] - ) - } - - func testBackgroundIndexingReindexesMainFilesWhenTransitiveHeaderIsModified() async throws { - let project = try await SwiftPMTestProject( - files: [ - "MyLibrary/include/OtherHeader.h": "", - "MyLibrary/include/Header.h": """ - #include "OtherHeader.h" - void 1️⃣someFunc(); - """, - "MyFile.c": """ - #include "Header.h" - - void 2️⃣test() { - #ifdef MY_FLAG - 3️⃣someFunc(); - #endif - } - """, - "MyOtherFile.c": """ - #include "Header.h" - - void 4️⃣otherTest() { - #ifdef MY_FLAG - 5️⃣someFunc(); - #endif - } - """, - ], - build: true, - serverOptions: backgroundIndexingOptions - ) - - let (uri, positions) = try project.openDocument("Header.h", language: .c) - let prepare = try await project.testClient.send( - CallHierarchyPrepareRequest(textDocument: TextDocumentIdentifier(uri), position: positions["1️⃣"]) - ) - - let callsBeforeEdit = try await project.testClient.send( - CallHierarchyIncomingCallsRequest(item: try XCTUnwrap(prepare?.only)) - ) - XCTAssertEqual(callsBeforeEdit, []) - - let otherHeaderFileUrl = try XCTUnwrap(project.uri(for: "OtherHeader.h").fileURL) - - try "#define MY_FLAG 1".write( - to: otherHeaderFileUrl, - atomically: true, - encoding: .utf8 - ) - - project.testClient.send( - DidChangeWatchedFilesNotification(changes: [FileEvent(uri: DocumentURI(otherHeaderFileUrl), type: .changed)]) - ) - _ = try await project.testClient.send(PollIndexRequest()) - - let callsAfterEdit = try await project.testClient.send( - CallHierarchyIncomingCallsRequest(item: try XCTUnwrap(prepare?.only)) - ) - XCTAssertEqual( - callsAfterEdit, - [ - CallHierarchyIncomingCall( - from: CallHierarchyItem( - name: "otherTest", - kind: .function, - tags: nil, - uri: try project.uri(for: "MyOtherFile.c"), - range: try project.range(from: "4️⃣", to: "4️⃣", in: "MyOtherFile.c"), - selectionRange: try project.range(from: "4️⃣", to: "4️⃣", in: "MyOtherFile.c"), - data: .dictionary([ - "usr": .string("c:@F@otherTest"), - "uri": .string(try project.uri(for: "MyOtherFile.c").stringValue), - ]) - ), - fromRanges: [try project.range(from: "5️⃣", to: "5️⃣", in: "MyOtherFile.c")] - ), CallHierarchyIncomingCall( from: CallHierarchyItem( name: "test", kind: .function, tags: nil, - uri: try project.uri(for: "MyFile.c"), - range: try project.range(from: "2️⃣", to: "2️⃣", in: "MyFile.c"), - selectionRange: try project.range(from: "2️⃣", to: "2️⃣", in: "MyFile.c"), + uri: uri, + range: Range(newPositions["2️⃣"]), + selectionRange: Range(newPositions["2️⃣"]), data: .dictionary([ "usr": .string("c:@F@test"), - "uri": .string(try project.uri(for: "MyFile.c").stringValue), + "uri": .string(uri.stringValue), ]) ), - fromRanges: [try project.range(from: "3️⃣", to: "3️⃣", in: "MyFile.c")] - ), + fromRanges: [Range(newPositions["3️⃣"])] + ) ] ) }