@@ -4,14 +4,15 @@ use std::{path::PathBuf, time::Instant};
44
55use ide:: {
66 AnalysisHost , LineCol , Moniker , MonikerDescriptorKind , MonikerIdentifier , MonikerResult ,
7- StaticIndex , StaticIndexedFile , SymbolInformationKind , TextRange , TokenId , TokenStaticData ,
8- VendoredLibrariesConfig ,
7+ RootDatabase , StaticIndex , StaticIndexedFile , SymbolInformationKind , TextRange , TokenId ,
8+ TokenStaticData , VendoredLibrariesConfig ,
99} ;
1010use ide_db:: LineIndexDatabase ;
1111use load_cargo:: { load_workspace_at, LoadCargoConfig , ProcMacroServerChoice } ;
1212use rustc_hash:: { FxHashMap , FxHashSet } ;
13- use scip:: types as scip_types;
13+ use scip:: types:: { self as scip_types, SymbolInformation } ;
1414use tracing:: error;
15+ use vfs:: FileId ;
1516
1617use crate :: {
1718 cli:: flags,
@@ -84,26 +85,40 @@ impl flags::Scip {
8485 text_document_encoding : scip_types:: TextEncoding :: UTF8 . into ( ) ,
8586 special_fields : Default :: default ( ) ,
8687 } ;
88+
8789 let mut documents = Vec :: new ( ) ;
8890
91+ // All TokenIds where an Occurrence has been emitted that references a symbol.
92+ let mut token_ids_referenced: FxHashSet < TokenId > = FxHashSet :: default ( ) ;
93+ // All TokenIds where the SymbolInformation has been written to the document.
8994 let mut token_ids_emitted: FxHashSet < TokenId > = FxHashSet :: default ( ) ;
90- let mut global_symbols_emitted: FxHashSet < String > = FxHashSet :: default ( ) ;
91- let mut duplicate_symbols: Vec < ( String , String ) > = Vec :: new ( ) ;
95+ // All FileIds emitted as documents.
96+ let mut file_ids_emitted: FxHashSet < FileId > = FxHashSet :: default ( ) ;
97+
98+ // All non-local symbols encountered, for detecting duplicate symbol errors.
99+ let mut nonlocal_symbols_emitted: FxHashSet < String > = FxHashSet :: default ( ) ;
100+ // List of (source_location, symbol) for duplicate symbol errors to report.
101+ let mut duplicate_symbol_errors: Vec < ( String , String ) > = Vec :: new ( ) ;
102+ // This is called after definitions have been deduplicated by token_ids_emitted. The purpose
103+ // is to detect reuse of symbol names because this causes ambiguity about their meaning.
104+ let mut record_error_if_symbol_already_used =
105+ |symbol : String , relative_path : & str , line_index : & LineIndex , text_range : TextRange | {
106+ let is_local = symbol. starts_with ( "local " ) ;
107+ if !is_local && !nonlocal_symbols_emitted. insert ( symbol. clone ( ) ) {
108+ let source_location =
109+ text_range_to_string ( relative_path, line_index, text_range) ;
110+ duplicate_symbol_errors. push ( ( source_location, symbol) ) ;
111+ }
112+ } ;
113+
114+ // Generates symbols from token monikers.
92115 let mut symbol_generator = SymbolGenerator :: new ( ) ;
93116
94117 for StaticIndexedFile { file_id, tokens, .. } in si. files {
95118 symbol_generator. clear_document_local_state ( ) ;
96119
97- let relative_path = match get_relative_filepath ( & vfs, & root, file_id) {
98- Some ( relative_path) => relative_path,
99- None => continue ,
100- } ;
101-
102- let line_index = LineIndex {
103- index : db. line_index ( file_id) ,
104- encoding : PositionEncoding :: Utf8 ,
105- endings : LineEndings :: Unix ,
106- } ;
120+ let Some ( relative_path) = get_relative_filepath ( & vfs, & root, file_id) else { continue } ;
121+ let line_index = get_line_index ( db, file_id) ;
107122
108123 let mut occurrences = Vec :: new ( ) ;
109124 let mut symbols = Vec :: new ( ) ;
@@ -120,54 +135,45 @@ impl flags::Scip {
120135 ( "" . to_owned ( ) , None )
121136 } ;
122137
123- if !symbol. is_empty ( ) && token_ids_emitted. insert ( id) {
124- if !symbol. starts_with ( "local " )
125- && !global_symbols_emitted. insert ( symbol. clone ( ) )
126- {
127- let source_location =
128- text_range_to_string ( relative_path. as_str ( ) , & line_index, text_range) ;
129- duplicate_symbols. push ( ( source_location, symbol. clone ( ) ) ) ;
138+ if !symbol. is_empty ( ) {
139+ let is_defined_in_this_document = match token. definition {
140+ Some ( def) => def. file_id == file_id,
141+ _ => false ,
142+ } ;
143+ if is_defined_in_this_document {
144+ if token_ids_emitted. insert ( id) {
145+ // token_ids_emitted does deduplication. This checks that this results
146+ // in unique emitted symbols, as otherwise references are ambiguous.
147+ record_error_if_symbol_already_used (
148+ symbol. clone ( ) ,
149+ relative_path. as_str ( ) ,
150+ & line_index,
151+ text_range,
152+ ) ;
153+ symbols. push ( compute_symbol_info (
154+ relative_path. clone ( ) ,
155+ symbol. clone ( ) ,
156+ enclosing_symbol,
157+ token,
158+ ) ) ;
159+ }
130160 } else {
131- let documentation = match & token. documentation {
132- Some ( doc) => vec ! [ doc. as_str( ) . to_owned( ) ] ,
133- None => vec ! [ ] ,
134- } ;
135-
136- let position_encoding =
137- scip_types:: PositionEncoding :: UTF8CodeUnitOffsetFromLineStart . into ( ) ;
138- let signature_documentation =
139- token. signature . clone ( ) . map ( |text| scip_types:: Document {
140- relative_path : relative_path. clone ( ) ,
141- language : "rust" . to_owned ( ) ,
142- text,
143- position_encoding,
144- ..Default :: default ( )
145- } ) ;
146- let symbol_info = scip_types:: SymbolInformation {
147- symbol : symbol. clone ( ) ,
148- documentation,
149- relationships : Vec :: new ( ) ,
150- special_fields : Default :: default ( ) ,
151- kind : symbol_kind ( token. kind ) . into ( ) ,
152- display_name : token. display_name . clone ( ) . unwrap_or_default ( ) ,
153- signature_documentation : signature_documentation. into ( ) ,
154- enclosing_symbol : enclosing_symbol. unwrap_or_default ( ) ,
155- } ;
156-
157- symbols. push ( symbol_info)
161+ token_ids_referenced. insert ( id) ;
158162 }
159163 }
160164
161165 // If the range of the def and the range of the token are the same, this must be the definition.
162166 // they also must be in the same file. See https://github.com/rust-lang/rust-analyzer/pull/17988
163- let mut symbol_roles = Default :: default ( ) ;
164- match token. definition {
165- Some ( def) if def. file_id == file_id && def. range == text_range => {
166- symbol_roles |= scip_types:: SymbolRole :: Definition as i32 ;
167- }
168- _ => { }
167+ let is_definition = match token. definition {
168+ Some ( def) => def. file_id == file_id && def. range == text_range,
169+ _ => false ,
169170 } ;
170171
172+ let mut symbol_roles = Default :: default ( ) ;
173+ if is_definition {
174+ symbol_roles |= scip_types:: SymbolRole :: Definition as i32 ;
175+ }
176+
171177 occurrences. push ( scip_types:: Occurrence {
172178 range : text_range_to_scip_range ( & line_index, text_range) ,
173179 symbol,
@@ -195,18 +201,61 @@ impl flags::Scip {
195201 position_encoding,
196202 special_fields : Default :: default ( ) ,
197203 } ) ;
204+ if !file_ids_emitted. insert ( file_id) {
205+ panic ! ( "Invariant violation: file emitted multiple times." ) ;
206+ }
207+ }
208+
209+ // Collect all symbols referenced by the files but not defined within them.
210+ let mut external_symbols = Vec :: new ( ) ;
211+ for id in token_ids_referenced. difference ( & token_ids_emitted) {
212+ let id = * id;
213+ let token = si. tokens . get ( id) . unwrap ( ) ;
214+
215+ let Some ( definition) = token. definition else {
216+ break ;
217+ } ;
218+
219+ let file_id = definition. file_id ;
220+ let Some ( relative_path) = get_relative_filepath ( & vfs, & root, file_id) else { continue } ;
221+ let line_index = get_line_index ( db, file_id) ;
222+ let text_range = definition. range ;
223+ if file_ids_emitted. contains ( & file_id) {
224+ tracing:: error!(
225+ "Bug: definition at {} should have been in an SCIP document but was not." ,
226+ text_range_to_string( relative_path. as_str( ) , & line_index, text_range)
227+ ) ;
228+ continue ;
229+ }
230+
231+ let TokenSymbols { symbol, enclosing_symbol } = symbol_generator
232+ . token_symbols ( id, token)
233+ . expect ( "To have been referenced, the symbol must be in the cache." ) ;
234+
235+ record_error_if_symbol_already_used (
236+ symbol. clone ( ) ,
237+ relative_path. as_str ( ) ,
238+ & line_index,
239+ text_range,
240+ ) ;
241+ external_symbols. push ( compute_symbol_info (
242+ relative_path. clone ( ) ,
243+ symbol. clone ( ) ,
244+ enclosing_symbol,
245+ token,
246+ ) ) ;
198247 }
199248
200249 let index = scip_types:: Index {
201250 metadata : Some ( metadata) . into ( ) ,
202251 documents,
203- external_symbols : Vec :: new ( ) ,
252+ external_symbols,
204253 special_fields : Default :: default ( ) ,
205254 } ;
206255
207- if !duplicate_symbols . is_empty ( ) {
256+ if !duplicate_symbol_errors . is_empty ( ) {
208257 eprintln ! ( "{}" , DUPLICATE_SYMBOLS_MESSAGE ) ;
209- for ( source_location, symbol) in duplicate_symbols {
258+ for ( source_location, symbol) in duplicate_symbol_errors {
210259 eprintln ! ( "{}" , source_location) ;
211260 eprintln ! ( " Duplicate symbol: {}" , symbol) ;
212261 eprintln ! ( ) ;
@@ -239,6 +288,37 @@ Known cases that can cause this:
239288Duplicate symbols encountered:
240289" ;
241290
291+ fn compute_symbol_info (
292+ relative_path : String ,
293+ symbol : String ,
294+ enclosing_symbol : Option < String > ,
295+ token : & TokenStaticData ,
296+ ) -> SymbolInformation {
297+ let documentation = match & token. documentation {
298+ Some ( doc) => vec ! [ doc. as_str( ) . to_owned( ) ] ,
299+ None => vec ! [ ] ,
300+ } ;
301+
302+ let position_encoding = scip_types:: PositionEncoding :: UTF8CodeUnitOffsetFromLineStart . into ( ) ;
303+ let signature_documentation = token. signature . clone ( ) . map ( |text| scip_types:: Document {
304+ relative_path,
305+ language : "rust" . to_owned ( ) ,
306+ text,
307+ position_encoding,
308+ ..Default :: default ( )
309+ } ) ;
310+ scip_types:: SymbolInformation {
311+ symbol,
312+ documentation,
313+ relationships : Vec :: new ( ) ,
314+ special_fields : Default :: default ( ) ,
315+ kind : symbol_kind ( token. kind ) . into ( ) ,
316+ display_name : token. display_name . clone ( ) . unwrap_or_default ( ) ,
317+ signature_documentation : signature_documentation. into ( ) ,
318+ enclosing_symbol : enclosing_symbol. unwrap_or_default ( ) ,
319+ }
320+ }
321+
242322fn get_relative_filepath (
243323 vfs : & vfs:: Vfs ,
244324 rootpath : & vfs:: AbsPathBuf ,
@@ -247,6 +327,14 @@ fn get_relative_filepath(
247327 Some ( vfs. file_path ( file_id) . as_path ( ) ?. strip_prefix ( rootpath) ?. as_str ( ) . to_owned ( ) )
248328}
249329
330+ fn get_line_index ( db : & RootDatabase , file_id : FileId ) -> LineIndex {
331+ LineIndex {
332+ index : db. line_index ( file_id) ,
333+ encoding : PositionEncoding :: Utf8 ,
334+ endings : LineEndings :: Unix ,
335+ }
336+ }
337+
250338// SCIP Ranges have a (very large) optimization that ranges if they are on the same line
251339// only encode as a vector of [start_line, start_col, end_col].
252340//
0 commit comments