Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lexical classification for template strings #1744

Closed
wants to merge 7 commits into from
Closed
93 changes: 84 additions & 9 deletions src/services/services.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,9 @@ module ts {
InMultiLineCommentTrivia,
InSingleQuoteStringLiteral,
InDoubleQuoteStringLiteral,
InTemplateHeadOrNoSubstitutionTemplate,
InTemplateMiddleOrTail,
InTemplateSubstitutionPosition,
}

export enum TokenClass {
Expand All @@ -1476,7 +1479,7 @@ module ts {
}

export interface Classifier {
getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult;
getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent?: boolean): ClassificationResult;
}

export interface DocumentRegistry {
Expand Down Expand Up @@ -5638,6 +5641,10 @@ module ts {
noRegexTable[SyntaxKind.TrueKeyword] = true;
noRegexTable[SyntaxKind.FalseKeyword] = true;

// Just a stack of TemplateHeads and OpenCurlyBraces, used
// to perform rudimentary classification on templates.
var templateStack: SyntaxKind[] = [];

function isAccessibilityModifier(kind: SyntaxKind) {
switch (kind) {
case SyntaxKind.PublicKeyword:
Expand Down Expand Up @@ -5671,13 +5678,19 @@ module ts {
// if there are more cases we want the classifier to be better at.
return true;
}

// 'classifyKeywordsInGenerics' should be 'true' when a syntactic classifier is not present.
function getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): ClassificationResult {

// If there is a syntactic classifier ('syntacticClassifierAbsent' is false),
// we will be more conservative in order to avoid conflicting with the syntactic classifier.
function getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent?: boolean): ClassificationResult {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a comment on this function, syntacticClassifierAbsent is now more interesting. you want to add comments on what it does.

var offset = 0;
var token = SyntaxKind.Unknown;
var lastNonTriviaToken = SyntaxKind.Unknown;

// Empty out the template stack for reuse.
while (templateStack.length > 0) {
templateStack.pop();
}

// If we're in a string literal, then prepend: "\
// (and a newline). That way when we lex we'll think we're still in a string literal.
//
Expand All @@ -5696,6 +5709,17 @@ module ts {
text = "/*\n" + text;
offset = 3;
break;
case EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate:
text = "`\n" + text;
offset = 2;
break;
case EndOfLineState.InTemplateMiddleOrTail:
text = "}\n" + text;
offset = 2;
// fallthrough
case EndOfLineState.InTemplateSubstitutionPosition:
templateStack.push(SyntaxKind.TemplateHead);
break;
}

scanner.setText(text);
Expand Down Expand Up @@ -5757,16 +5781,49 @@ module ts {
angleBracketStack--;
}
else if (token === SyntaxKind.AnyKeyword ||
token === SyntaxKind.StringKeyword ||
token === SyntaxKind.NumberKeyword ||
token === SyntaxKind.BooleanKeyword) {
if (angleBracketStack > 0 && !classifyKeywordsInGenerics) {
token === SyntaxKind.StringKeyword ||
token === SyntaxKind.NumberKeyword ||
token === SyntaxKind.BooleanKeyword) {
if (angleBracketStack > 0 && !syntacticClassifierAbsent) {
// If it looks like we're could be in something generic, don't classify this
// as a keyword. We may just get overwritten by the syntactic classifier,
// causing a noisy experience for the user.
token = SyntaxKind.Identifier;
}
}
else if (token === SyntaxKind.TemplateHead) {
templateStack.push(token);
}
else if (token === SyntaxKind.OpenBraceToken) {
// If we don't have anything on the template stack,
// then we aren't trying to keep track of a previously scanned template head.
if (templateStack.length > 0) {
templateStack.push(token);
}
}
else if (token === SyntaxKind.CloseBraceToken) {
// If we don't have anything on the template stack,
// then we aren't trying to keep track of a previously scanned template head.
if (templateStack.length > 0) {
var lastTemplateStackToken = lastOrUndefined(templateStack);

if (lastTemplateStackToken === SyntaxKind.TemplateHead) {
token = scanner.reScanTemplateToken();

// Only pop on a TemplateTail; a TemplateMiddle indicates there is more for us.
if (token === SyntaxKind.TemplateTail) {
templateStack.pop();
}
else {
Debug.assert(token === SyntaxKind.TemplateMiddle, "Should have been a template middle. Was " + token);
}
}
else {
Debug.assert(lastTemplateStackToken === SyntaxKind.OpenBraceToken, "Should have been an open brace. Was: " + token);
templateStack.pop();
}
}
}

lastNonTriviaToken = token;
}
Expand All @@ -5781,7 +5838,6 @@ module ts {
var start = scanner.getTokenPos();
var end = scanner.getTextPos();

// add the token
addResult(end - start, classFromKind(token));

if (end >= text.length) {
Expand Down Expand Up @@ -5811,6 +5867,22 @@ module ts {
result.finalLexState = EndOfLineState.InMultiLineCommentTrivia;
}
}
else if (isTemplateLiteralKind(token)) {
if (scanner.isUnterminated()) {
if (token === SyntaxKind.TemplateTail) {
result.finalLexState = EndOfLineState.InTemplateMiddleOrTail;
}
else if (token === SyntaxKind.NoSubstitutionTemplateLiteral) {
result.finalLexState = EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate;
}
else {
Debug.fail("Only 'NoSubstitutionTemplateLiteral's and 'TemplateTail's can be unterminated; got SyntaxKind #" + token);
}
}
}
else if (templateStack.length > 0 && lastOrUndefined(templateStack) === SyntaxKind.TemplateHead) {
result.finalLexState = EndOfLineState.InTemplateSubstitutionPosition;
}
}
}

Expand Down Expand Up @@ -5913,6 +5985,9 @@ module ts {
return TokenClass.Whitespace;
case SyntaxKind.Identifier:
default:
if (isTemplateLiteralKind(token)) {
return TokenClass.StringLiteral; // maybe make a TemplateLiteral
}
return TokenClass.Identifier;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/services/shims.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ module ts {
}

export interface ClassifierShim extends Shim {
getClassificationsForLine(text: string, lexState: EndOfLineState, classifyKeywordsInGenerics?: boolean): string;
getClassificationsForLine(text: string, lexState: EndOfLineState, syntacticClassifierAbsent?: boolean): string;
}

export interface CoreServicesShim extends Shim {
Expand Down
143 changes: 128 additions & 15 deletions tests/cases/unittests/services/colorization.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
/// <reference path="..\..\..\..\src\harness\harnessLanguageService.ts" />

interface Classification {
position: number;
length: number;
class: ts.TokenClass;
position: number;
}

interface ClassiferResult {
Expand All @@ -15,6 +15,7 @@ interface ClassiferResult {
interface ClassificationEntry {
value: any;
class: ts.TokenClass;
position?: number;
}

describe('Colorization', function () {
Expand Down Expand Up @@ -60,16 +61,23 @@ describe('Colorization', function () {
return undefined;
}

function punctuation(text: string) { return { value: text, class: ts.TokenClass.Punctuation }; }
function keyword(text: string) { return { value: text, class: ts.TokenClass.Keyword }; }
function operator(text: string) { return { value: text, class: ts.TokenClass.Operator }; }
function comment(text: string) { return { value: text, class: ts.TokenClass.Comment }; }
function whitespace(text: string) { return { value: text, class: ts.TokenClass.Whitespace }; }
function identifier(text: string) { return { value: text, class: ts.TokenClass.Identifier }; }
function numberLiteral(text: string) { return { value: text, class: ts.TokenClass.NumberLiteral }; }
function stringLiteral(text: string) { return { value: text, class: ts.TokenClass.StringLiteral }; }
function regExpLiteral(text: string) { return { value: text, class: ts.TokenClass.RegExpLiteral }; }
function finalEndOfLineState(value: number) { return { value: value, class: <ts.TokenClass>undefined }; }
function punctuation(text: string, position?: number) { return createClassification(text, ts.TokenClass.Punctuation, position); }
function keyword(text: string, position?: number) { return createClassification(text, ts.TokenClass.Keyword, position); }
function operator(text: string, position?: number) { return createClassification(text, ts.TokenClass.Operator, position); }
function comment(text: string, position?: number) { return createClassification(text, ts.TokenClass.Comment, position); }
function whitespace(text: string, position?: number) { return createClassification(text, ts.TokenClass.Whitespace, position); }
function identifier(text: string, position?: number) { return createClassification(text, ts.TokenClass.Identifier, position); }
function numberLiteral(text: string, position?: number) { return createClassification(text, ts.TokenClass.NumberLiteral, position); }
function stringLiteral(text: string, position?: number) { return createClassification(text, ts.TokenClass.StringLiteral, position); }
function regExpLiteral(text: string, position?: number) { return createClassification(text, ts.TokenClass.RegExpLiteral, position); }
function finalEndOfLineState(value: number): ClassificationEntry { return { value: value, class: undefined, position: 0 }; }
function createClassification(text: string, tokenClass: ts.TokenClass, position?: number): ClassificationEntry {
return {
value: text,
class: tokenClass,
position: position,
};
}

function test(text: string, initialEndOfLineState: ts.EndOfLineState, ...expectedEntries: ClassificationEntry[]): void {
var result = getClassifications(text, initialEndOfLineState);
Expand All @@ -81,18 +89,23 @@ describe('Colorization', function () {
assert.equal(result.finalEndOfLineState, expectedEntry.value, "final endOfLineState does not match expected.");
}
else {
var actualEntryPosition = text.indexOf(expectedEntry.value);
var actualEntryPosition = expectedEntry.position !== undefined ? expectedEntry.position : text.indexOf(expectedEntry.value);
assert(actualEntryPosition >= 0, "token: '" + expectedEntry.value + "' does not exit in text: '" + text + "'.");

var actualEntry = getEntryAtPosistion(result, actualEntryPosition);

assert(actualEntry, "Could not find classification entry for '" + expectedEntry.value + "' at position: " + actualEntryPosition);
assert.equal(actualEntry.class, expectedEntry.class, "Classification class does not match expected. Expected: " + ts.TokenClass[expectedEntry.class] + ", Actual: " + ts.TokenClass[actualEntry.class]);
assert.equal(actualEntry.length, expectedEntry.value.length, "Classification length does not match expected. Expected: " + ts.TokenClass[expectedEntry.value.length] + ", Actual: " + ts.TokenClass[actualEntry.length]);
assert(actualEntry, "Could not find classification entry for '" + expectedEntry.value + "' at position: " + actualEntryPosition + "\n\n" + JSON.stringify(result));
assert.equal(actualEntry.class, expectedEntry.class,
"Classification class does not match expected. Expected: " + ts.TokenClass[expectedEntry.class] + " - '" + expectedEntry.value + "', Actual: " + ts.TokenClass[actualEntry.class] + " - '" + getActualText(text, actualEntry) + "\n\n" + JSON.stringify(result));
assert.equal(actualEntry.length, expectedEntry.value.length, "Classification length does not match expected. Expected: " + expectedEntry.value.length + " - '" + expectedEntry.value + "', Actual: " + actualEntry.length + " - '" + getActualText(text, actualEntry) + "'\n\n" + JSON.stringify(result));
}
}
}

function getActualText(sourceText: string, classification: Classification): string {
return sourceText.substr(classification.position, classification.length);
}

describe("test getClassifications", function () {
it("Returns correct token classes", function () {
test("var x: string = \"foo\"; //Hello",
Expand Down Expand Up @@ -291,6 +304,106 @@ describe('Colorization', function () {
finalEndOfLineState(ts.EndOfLineState.Start));
});

it("classifies a single line no substitution template string correctly", () => {
test("`number number public string`",
ts.EndOfLineState.Start,
stringLiteral("`number number public string`"),
finalEndOfLineState(ts.EndOfLineState.Start));
});
it("classifies substitution parts of a template string correctly", () => {
test("`number '${ 1 + 1 }' string '${ 'hello' }'`",
ts.EndOfLineState.Start,
stringLiteral("`number '${"),
numberLiteral("1"),
operator("+"),
numberLiteral("1"),
stringLiteral("}' string '${"),
stringLiteral("'hello'"),
stringLiteral("}'`"),
finalEndOfLineState(ts.EndOfLineState.Start));
});
it("classifies an unterminated no substitution template string correctly", () => {
test("`hello world",
ts.EndOfLineState.Start,
stringLiteral("`hello world"),
finalEndOfLineState(ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate));
});
it("classifies the entire line of an unterminated multiline no-substitution/head template", () => {
test("...",
ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate,
stringLiteral("..."),
finalEndOfLineState(ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate));
});
it("classifies the entire line of an unterminated multiline template middle/end",() => {
test("...",
ts.EndOfLineState.InTemplateMiddleOrTail,
stringLiteral("..."),
finalEndOfLineState(ts.EndOfLineState.InTemplateMiddleOrTail));
});
it("classifies a termination of a multiline template head", () => {
test("...${",
ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate,
stringLiteral("...${"),
finalEndOfLineState(ts.EndOfLineState.InTemplateSubstitutionPosition));
});
it("classifies the termination of a multiline no substitution template", () => {
test("...`",
ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate,
stringLiteral("...`"),
finalEndOfLineState(ts.EndOfLineState.Start));
});
it("classifies the substitution parts and middle/tail of a multiline template string", () => {
test("${ 1 + 1 }...`",
ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate,
stringLiteral("${"),
numberLiteral("1"),
operator("+"),
numberLiteral("1"),
stringLiteral("}...`"),
finalEndOfLineState(ts.EndOfLineState.Start));
});
it("classifies a template middle and propagates the end of line state",() => {
test("${ 1 + 1 }...`",
ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate,
stringLiteral("${"),
numberLiteral("1"),
operator("+"),
numberLiteral("1"),
stringLiteral("}...`"),
finalEndOfLineState(ts.EndOfLineState.Start));
});
it("classifies substitution expressions with curly braces appropriately", () => {
var pos = 0;
var lastLength = 0;

test("...${ () => { } } ${ { x: `1` } }...`",
ts.EndOfLineState.InTemplateHeadOrNoSubstitutionTemplate,
stringLiteral(track("...${"), pos),
punctuation(track(" ", "("), pos),
punctuation(track(")"), pos),
punctuation(track(" ", "=>"), pos),
punctuation(track(" ", "{"), pos),
punctuation(track(" ", "}"), pos),
stringLiteral(track(" ", "} ${"), pos),
punctuation(track(" ", "{"), pos),
identifier(track(" ", "x"), pos),
punctuation(track(":"), pos),
stringLiteral(track(" ", "`1`"), pos),
punctuation(track(" ", "}"), pos),
stringLiteral(track(" ", "}...`"), pos),
finalEndOfLineState(ts.EndOfLineState.Start));

// Adjusts 'pos' by accounting for the length of each portion of the string,
// but only return the last given string
function track(...vals: string[]): string {
for (var i = 0, n = vals.length; i < n; i++) {
pos += lastLength;
lastLength = vals[i].length;
}
return ts.lastOrUndefined(vals);
}
});

it("classifies partially written generics correctly.", function () {
test("Foo<number",
ts.EndOfLineState.Start,
Expand Down