From 01bae1d7799abfbf363ca3f80416f12740296b9d Mon Sep 17 00:00:00 2001 From: Ziwei Wang Date: Wed, 27 Sep 2023 17:54:38 -0400 Subject: [PATCH 1/4] Feat: Implement basic semantic highlighting for functions and variables 1. Match function and variable declarations 2. Match function and variable references 3. References are matched only if they are declared first 4. Currently it doesn't about the scope, tokens are generated as long as they are matched by the regex --- .../BitBakeDocumentSemanticTokensProvider.ts | 170 ++++++++++++++++++ client/src/extension.ts | 6 +- 2 files changed, 175 insertions(+), 1 deletion(-) create mode 100644 client/src/BitBakeDocumentSemanticTokensProvider.ts diff --git a/client/src/BitBakeDocumentSemanticTokensProvider.ts b/client/src/BitBakeDocumentSemanticTokensProvider.ts new file mode 100644 index 00000000..01b28b54 --- /dev/null +++ b/client/src/BitBakeDocumentSemanticTokensProvider.ts @@ -0,0 +1,170 @@ +import { SemanticTokensLegend, SemanticTokensBuilder } from 'vscode' +import type { TextDocument, CancellationToken, SemanticTokens, DocumentSemanticTokensProvider } from 'vscode' + +const tokenTypes = new Map() +const tokenModifiers = new Map() + +interface IParsedToken { + line: number + startCharacter: number + length: number + tokenType: string + tokenModifiers: string[] +} + +const generateSemanticTokensLegend = (): SemanticTokensLegend => { + const tokenTypesLegend = [ + 'function', 'variable' + ] + tokenTypesLegend.forEach((tokenType, index) => tokenTypes.set(tokenType, index)) + + const tokenModifiersLegend = [ + 'declaration' + ] + tokenModifiersLegend.forEach((tokenModifier, index) => tokenModifiers.set(tokenModifier, index)) + + return new SemanticTokensLegend(tokenTypesLegend, tokenModifiersLegend) +} + +export const legend = generateSemanticTokensLegend() + +export class BitBakeDocumentSemanticTokensProvider implements DocumentSemanticTokensProvider { + async provideDocumentSemanticTokens (document: TextDocument, token: CancellationToken): Promise { + const allTokens = this._parseText(document.getText()) + const builder = new SemanticTokensBuilder(legend) + allTokens.forEach((token) => { + builder.push(token.line, token.startCharacter, token.length, this._encodeTokenType(token.tokenType), this._encodeTokenModifiers(token.tokenModifiers)) + }) + return builder.build() + } + + // Check node_modules/@types/vscode/index.d.ts for more encoding details + private _encodeTokenType (tokenType: string): number { + if (tokenTypes.has(tokenType)) { + return Number(tokenTypes.get(tokenType)) + } else if (tokenType === 'notInLegend') { + return tokenTypes.size + 2 + } + return 0 + } + + private _encodeTokenModifiers (strTokenModifiers: string[]): number { + let result = 0 + for (let i = 0; i < strTokenModifiers.length; i++) { + const tokenModifier = strTokenModifiers[i] + if (tokenModifiers.has(tokenModifier)) { + result = result | (1 << Number(tokenModifiers.get(tokenModifier))) + } else if (tokenModifier === 'notInLegend') { + result = result | (1 << tokenModifiers.size + 2) + } + } + return result + } + + private _parseText (text: string): IParsedToken[] { + /** + * This function traverse the document twice. In the first traverse, the function and variable declarations will be matched and stored in an array which will be used to construct another array of regex for the second traverse. In the second traverse, the array of regex is used to match the function and variable references. + */ + + // TODO: Share tokens with other handlers such as diagnosis provider + const resultTokens: IParsedToken[] = [] + const lines = text.split(/\r\n|\r|\n/) + let declaredFunctions: string[] = [] + let declaredVariables: string[] = [] + + const functionStartsWithDefRegex = /(?[a-zA-Z0-9_-][\w-]*)(?:(append|prepend|remove))?(?=\s*\()/ + const functionStartsWithPythonRegex = /(?[a-zA-Z0-9_-][\w-]*)(?:(append|prepend|remove))?(?=\s*\()/ + const shellFunctionsRegex = /(?[a-zA-Z0-9_-][\w-]*)(?:(append|prepend|remove))?(?=\s*\(.*\)\s*\{)/ + const anonymousFunctionsRegex = /(\bpython)(?=\s*\(.*\)\s*\{)/ + const variableDeclarationRegex = /(?[a-zA-Z0-9_-][\w-]*)(?:(append|prepend|remove))?(?=\s*=\s*)(?!\s*\()/ + + // First traverse: Match functions and variables declarations + for (let i = 0; i < lines.length; i++) { + const line = lines[i] + + const matchFunctionStartsWithDef = functionStartsWithDefRegex.exec(line) + const matchFunctionStartsWithPython = functionStartsWithPythonRegex.exec(line) + const matchShellFunction = shellFunctionsRegex.exec(line) + const matchVariableDeclaration = variableDeclarationRegex.exec(line) + const matchAnonymousFunction = anonymousFunctionsRegex.exec(line) + + if (matchAnonymousFunction !== null) { + // Avoid mistakening anonymous functions as shell function declaration + continue + } + if (matchFunctionStartsWithDef?.groups?.name.length !== undefined) { + resultTokens.push({ + line: i, + startCharacter: matchFunctionStartsWithDef.index, + length: matchFunctionStartsWithDef?.groups?.name.length, + tokenType: 'function', + tokenModifiers: ['declaration'] + }) + declaredFunctions.push(matchFunctionStartsWithDef?.groups?.name) + } else if (matchFunctionStartsWithPython?.groups?.name.length !== undefined) { + resultTokens.push({ + line: i, + startCharacter: matchFunctionStartsWithPython.index, + length: matchFunctionStartsWithPython?.groups?.name.length, + tokenType: 'function', + tokenModifiers: ['declaration'] + }) + + declaredFunctions.push(matchFunctionStartsWithPython?.groups?.name) + } else if (matchShellFunction?.groups?.name.length !== undefined) { + resultTokens.push({ + line: i, + startCharacter: matchShellFunction.index, + length: matchShellFunction.groups?.name.length, + tokenType: 'function', + tokenModifiers: ['declaration'] + }) + + declaredFunctions.push(matchShellFunction.groups?.name) + } else if (matchVariableDeclaration?.groups?.name.length !== undefined) { + resultTokens.push({ + line: i, + startCharacter: matchVariableDeclaration.index, + length: matchVariableDeclaration.groups?.name.length, + tokenType: 'variable', + tokenModifiers: ['declaration'] + }) + + declaredVariables.push(matchVariableDeclaration.groups?.name) + } + } + // Remove duplicates + declaredFunctions = [...new Set(declaredFunctions)] + declaredVariables = [...new Set(declaredVariables)] + // Sort the regex from longest to shortest for precise matching + declaredFunctions.sort((prev, next) => next.length - prev.length) + declaredVariables.sort((prev, next) => next.length - prev.length) + + const declaredObjectsRegex: Array<{ regex: RegExp, type: string }> = [] + + declaredFunctions.forEach(declaredFunction => { + declaredObjectsRegex.push({ regex: new RegExp(`(? { + declaredObjectsRegex.push({ regex: new RegExp(`(? { console.log('Congratulations, your extension "BitBake" is now active!') @@ -48,6 +50,8 @@ export async function activate (context: ExtensionContext): Promise { // Start the client and launch the server await client.start() + + context.subscriptions.push(languages.registerDocumentSemanticTokensProvider({ language: 'bitbake', scheme: 'file' }, new BitBakeDocumentSemanticTokensProvider(), legend)) } export function deactivate (): Thenable | undefined { From fa1660cfc55103146fb41ebddd10542a9117be84 Mon Sep 17 00:00:00 2001 From: Ziwei Wang Date: Fri, 29 Sep 2023 09:48:33 -0400 Subject: [PATCH 2/4] Chore: Remove hypen as a possible first character in function or variable names --- client/src/BitBakeDocumentSemanticTokensProvider.ts | 8 ++++---- client/syntaxes/bitbake.tmLanguage.json | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/client/src/BitBakeDocumentSemanticTokensProvider.ts b/client/src/BitBakeDocumentSemanticTokensProvider.ts index 01b28b54..7f486e74 100644 --- a/client/src/BitBakeDocumentSemanticTokensProvider.ts +++ b/client/src/BitBakeDocumentSemanticTokensProvider.ts @@ -72,11 +72,11 @@ export class BitBakeDocumentSemanticTokensProvider implements DocumentSemanticTo let declaredFunctions: string[] = [] let declaredVariables: string[] = [] - const functionStartsWithDefRegex = /(?[a-zA-Z0-9_-][\w-]*)(?:(append|prepend|remove))?(?=\s*\()/ - const functionStartsWithPythonRegex = /(?[a-zA-Z0-9_-][\w-]*)(?:(append|prepend|remove))?(?=\s*\()/ - const shellFunctionsRegex = /(?[a-zA-Z0-9_-][\w-]*)(?:(append|prepend|remove))?(?=\s*\(.*\)\s*\{)/ + const functionStartsWithDefRegex = /(?[a-zA-Z0-9_][\w-]*)(?:(append|prepend|remove))?(?=\s*\()/ + const functionStartsWithPythonRegex = /(?[a-zA-Z0-9_][\w-]*)(?:(append|prepend|remove))?(?=\s*\()/ + const shellFunctionsRegex = /(?[a-zA-Z0-9_][\w-]*)(?:(append|prepend|remove))?(?=\s*\(.*\)\s*\{)/ const anonymousFunctionsRegex = /(\bpython)(?=\s*\(.*\)\s*\{)/ - const variableDeclarationRegex = /(?[a-zA-Z0-9_-][\w-]*)(?:(append|prepend|remove))?(?=\s*=\s*)(?!\s*\()/ + const variableDeclarationRegex = /(?[a-zA-Z0-9_][\w-]*)(?:(append|prepend|remove))?(?=\s*=\s*)(?!\s*\()/ // First traverse: Match functions and variables declarations for (let i = 0; i < lines.length; i++) { diff --git a/client/syntaxes/bitbake.tmLanguage.json b/client/syntaxes/bitbake.tmLanguage.json index 40b8cc41..57614634 100644 --- a/client/syntaxes/bitbake.tmLanguage.json +++ b/client/syntaxes/bitbake.tmLanguage.json @@ -139,7 +139,7 @@ } }, "functions": { - "begin": "\\s*([a-zA-Z0-9_-][\\w-]*)(:(append|prepend|remove))?(?=\\s*\\()", + "begin": "\\s*([a-zA-Z0-9_][\\w-]*)(:(append|prepend|remove))?(?=\\s*\\()", "beginCaptures": { "1": { "name": "entity.name.function.python.bb" @@ -207,7 +207,7 @@ ] }, "variable-name": { - "match": "([a-zA-Z0-9_-][\\w-]*)(?!\\s*\\()", + "match": "([a-zA-Z0-9_][\\w-]*)(?!\\s*\\()", "captures": { "1": { "name": "variable.other.names.bb" From c06560a66a1a532c426bad7c45c79d68a5ef096f Mon Sep 17 00:00:00 2001 From: Ziwei Wang Date: Fri, 29 Sep 2023 11:07:35 -0400 Subject: [PATCH 3/4] Chore: Update TextMate grammars 1. Remove digits as possible first character and hypen in the function and variable names 2. Enable grammars for numeric and square brackets in variable expansion syntax --- client/syntaxes/bitbake.tmLanguage.json | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/client/syntaxes/bitbake.tmLanguage.json b/client/syntaxes/bitbake.tmLanguage.json index 57614634..8bbd9b45 100644 --- a/client/syntaxes/bitbake.tmLanguage.json +++ b/client/syntaxes/bitbake.tmLanguage.json @@ -139,7 +139,7 @@ } }, "functions": { - "begin": "\\s*([a-zA-Z0-9_][\\w-]*)(:(append|prepend|remove))?(?=\\s*\\()", + "begin": "\\s*([a-zA-Z_][\\w_]*)(:(append|prepend|remove))?(?=\\s*\\()", "beginCaptures": { "1": { "name": "entity.name.function.python.bb" @@ -189,6 +189,17 @@ { "include": "#keywords" }, + { + "match": "(\\[)", + "name": "support.function.bracket-open.bb" + }, + { + "match": "(\\])", + "name": "support.function.bracket-close.bb" + }, + { + "include": "#numeric" + }, { "include": "#functions" }, @@ -207,7 +218,7 @@ ] }, "variable-name": { - "match": "([a-zA-Z0-9_][\\w-]*)(?!\\s*\\()", + "match": "([a-zA-Z_][\\w_]*)", "captures": { "1": { "name": "variable.other.names.bb" From 449a2efe775bf8dd8e36c597a2b8b3cb610d83d9 Mon Sep 17 00:00:00 2001 From: Ziwei Wang Date: Fri, 29 Sep 2023 11:28:32 -0400 Subject: [PATCH 4/4] Chore: Remove digits as possible first character and hypen in the function and variable names --- client/src/BitBakeDocumentSemanticTokensProvider.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/client/src/BitBakeDocumentSemanticTokensProvider.ts b/client/src/BitBakeDocumentSemanticTokensProvider.ts index 7f486e74..90f73414 100644 --- a/client/src/BitBakeDocumentSemanticTokensProvider.ts +++ b/client/src/BitBakeDocumentSemanticTokensProvider.ts @@ -66,17 +66,16 @@ export class BitBakeDocumentSemanticTokensProvider implements DocumentSemanticTo * This function traverse the document twice. In the first traverse, the function and variable declarations will be matched and stored in an array which will be used to construct another array of regex for the second traverse. In the second traverse, the array of regex is used to match the function and variable references. */ - // TODO: Share tokens with other handlers such as diagnosis provider const resultTokens: IParsedToken[] = [] const lines = text.split(/\r\n|\r|\n/) let declaredFunctions: string[] = [] let declaredVariables: string[] = [] - const functionStartsWithDefRegex = /(?[a-zA-Z0-9_][\w-]*)(?:(append|prepend|remove))?(?=\s*\()/ - const functionStartsWithPythonRegex = /(?[a-zA-Z0-9_][\w-]*)(?:(append|prepend|remove))?(?=\s*\()/ - const shellFunctionsRegex = /(?[a-zA-Z0-9_][\w-]*)(?:(append|prepend|remove))?(?=\s*\(.*\)\s*\{)/ + const functionStartsWithDefRegex = /(?[a-zA-Z_][\w_]*)(?:(append|prepend|remove))?(?=\s*\()/ + const functionStartsWithPythonRegex = /(?[a-zA-Z_][\w_]*)(?:(append|prepend|remove))?(?=\s*\()/ + const shellFunctionsRegex = /(?[a-zA-Z_][\w_]*)(?:(append|prepend|remove))?(?=\s*\(.*\)\s*\{)/ const anonymousFunctionsRegex = /(\bpython)(?=\s*\(.*\)\s*\{)/ - const variableDeclarationRegex = /(?[a-zA-Z0-9_][\w-]*)(?:(append|prepend|remove))?(?=\s*=\s*)(?!\s*\()/ + const variableDeclarationRegex = /(?[a-zA-Z_][\w_]*)(?:(append|prepend|remove))?(?=\s*=\s*)(?!\s*\()/ // First traverse: Match functions and variables declarations for (let i = 0; i < lines.length; i++) {