Skip to content

Commit 7899cd1

Browse files
securityMBcopybara-github
authored andcommitted
No public description
PiperOrigin-RevId: 658781590
1 parent 24a2851 commit 7899cd1

File tree

6 files changed

+226
-79
lines changed

6 files changed

+226
-79
lines changed

src/builders/html_sanitizer/css/sanitizer.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
* that bug and possibly other ones.
1717
*/
1818

19-
import {safeStyleEl} from '../../../dom/index.js';
19+
import {setTextContent} from '../../../dom/elements/style.js';
2020
import {createStyleSheetInternal} from '../../../internals/style_sheet_impl.js';
2121
import {
2222
ResourceUrlPolicy,
@@ -51,7 +51,7 @@ class CssSanitizer {
5151
private getStyleSheet(cssText: string): CSSStyleSheet {
5252
const style = this.inertDocument.createElement('style');
5353
const safeStyle = createStyleSheetInternal(cssText);
54-
safeStyleEl.setTextContent(style, safeStyle);
54+
setTextContent(style, safeStyle);
5555
this.inertDocument.head.appendChild(style);
5656
const sheet = style.sheet!; // guaranteed to be non-null
5757
style.remove();

src/builders/html_sanitizer/css/tokenizer.ts

+42-39
Original file line numberDiff line numberDiff line change
@@ -76,23 +76,23 @@ class Tokenizer {
7676
*
7777
* https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#next-input-code-point
7878
*/
79-
private get nextInputCodePoint(): string | EOF {
79+
private nextInputCodePoint(): string | EOF {
8080
return this.css[this.pos];
8181
}
8282

83-
private get nextTwoInputCodePoints(): [string | EOF, string | EOF] {
83+
private nextTwoInputCodePoints(): [string | EOF, string | EOF] {
8484
return [this.css[this.pos], this.css[this.pos + 1]];
8585
}
8686

87-
private get nextThreeInputCodePoints(): [
87+
private nextThreeInputCodePoints(): [
8888
string | EOF,
8989
string | EOF,
9090
string | EOF,
9191
] {
9292
return [this.css[this.pos], this.css[this.pos + 1], this.css[this.pos + 2]];
9393
}
9494

95-
private get currentInputCodePoint(): string | EOF {
95+
private currentInputCodePoint(): string | EOF {
9696
return this.css[this.pos - 1];
9797
}
9898

@@ -136,7 +136,7 @@ class Tokenizer {
136136
// ":ho st", which is safe.
137137
return {tokenKind: CssTokenKind.WHITESPACE};
138138
}
139-
const codePoint = this.nextInputCodePoint;
139+
const codePoint = this.nextInputCodePoint();
140140
this.consumeTheNextInputCodePoint();
141141
if (codePoint === EOF) {
142142
return {tokenKind: CssTokenKind.EOF};
@@ -147,8 +147,8 @@ class Tokenizer {
147147
return this.consumeString(codePoint);
148148
} else if (codePoint === '#') {
149149
if (
150-
this.isIdentCodePoint(this.nextInputCodePoint) ||
151-
this.twoCodePointsAreValidEscape(...this.nextTwoInputCodePoints)
150+
this.isIdentCodePoint(this.nextInputCodePoint()) ||
151+
this.twoCodePointsAreValidEscape(...this.nextTwoInputCodePoints())
152152
) {
153153
// In spec there's also a step to check if the next three code points
154154
// would start an ident sequence. However, the only reason to do so
@@ -208,7 +208,7 @@ class Tokenizer {
208208
} else if (codePoint === '@') {
209209
if (
210210
this.threeCodePointsWouldStartAnIdentSequence(
211-
...this.nextThreeInputCodePoints,
211+
...this.nextThreeInputCodePoints(),
212212
)
213213
) {
214214
const ident = this.consumeIdentSequence();
@@ -271,7 +271,7 @@ class Tokenizer {
271271
value: '',
272272
};
273273
while (true) {
274-
const codePoint = this.nextInputCodePoint;
274+
const codePoint = this.nextInputCodePoint();
275275
this.consumeTheNextInputCodePoint();
276276
if (codePoint === EOF || codePoint === quote) {
277277
return stringToken;
@@ -283,10 +283,10 @@ class Tokenizer {
283283
stringToken.value = '';
284284
return stringToken;
285285
} else if (codePoint === '\\') {
286-
if (this.nextInputCodePoint === EOF) {
286+
if (this.nextInputCodePoint() === EOF) {
287287
// > If the next input code point is EOF, do nothing.
288288
continue;
289-
} else if (this.isNewline(this.nextInputCodePoint)) {
289+
} else if (this.isNewline(this.nextInputCodePoint())) {
290290
this.consumeTheNextInputCodePoint();
291291
} else {
292292
const escapedCodePoint = this.consumeEscapedCodePoint();
@@ -300,7 +300,7 @@ class Tokenizer {
300300

301301
/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-an-escaped-code-point */
302302
private consumeEscapedCodePoint(): string {
303-
const codePoint = this.nextInputCodePoint;
303+
const codePoint = this.nextInputCodePoint();
304304
this.consumeTheNextInputCodePoint();
305305
if (codePoint === EOF) {
306306
return '\ufffd';
@@ -311,12 +311,15 @@ class Tokenizer {
311311
// The spec assumes here that the first hex digit has already been
312312
// consumed. So in fact, the maximum number of hex digits that can be
313313
// consumed is 6.
314-
while (this.isHexDigit(this.nextInputCodePoint) && hexDigits.length < 6) {
315-
hexDigits += this.nextInputCodePoint;
314+
while (
315+
this.isHexDigit(this.nextInputCodePoint()) &&
316+
hexDigits.length < 6
317+
) {
318+
hexDigits += this.nextInputCodePoint();
316319
this.consumeTheNextInputCodePoint();
317320
}
318321
// Whitespace directly following an escape sequence is ignored.
319-
if (this.isWhitespace(this.nextInputCodePoint)) {
322+
if (this.isWhitespace(this.nextInputCodePoint())) {
320323
this.consumeTheNextInputCodePoint();
321324
}
322325
// Needed to parse hexadecimal.
@@ -329,7 +332,7 @@ class Tokenizer {
329332
}
330333

331334
private consumeAsMuchWhitespaceAsPossible() {
332-
while (this.isWhitespace(this.nextInputCodePoint)) {
335+
while (this.isWhitespace(this.nextInputCodePoint())) {
333336
this.consumeTheNextInputCodePoint();
334337
}
335338
}
@@ -338,9 +341,9 @@ class Tokenizer {
338341
private consumeIdentSequence(): string {
339342
let result = '';
340343
while (true) {
341-
const codePoint = this.nextInputCodePoint;
344+
const codePoint = this.nextInputCodePoint();
342345
this.consumeTheNextInputCodePoint();
343-
const codePoint2 = this.nextInputCodePoint;
346+
const codePoint2 = this.nextInputCodePoint();
344347
if (this.isIdentCodePoint(codePoint)) {
345348
result += codePoint;
346349
} else if (this.twoCodePointsAreValidEscape(codePoint, codePoint2)) {
@@ -355,15 +358,15 @@ class Tokenizer {
355358
/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-an-ident-like-token */
356359
private consumeIdentLikeToken(): CssToken | CssToken[] {
357360
const ident = this.consumeIdentSequence();
358-
if (/^url$/i.test(ident) && this.nextInputCodePoint === '(') {
361+
if (/^url$/i.test(ident) && this.nextInputCodePoint() === '(') {
359362
// TODO(securitymb): This algorithm may look a little weird but we're
360363
// following the spec here exactly. We will see later on if this can be
361364
// optimized.
362365
this.consumeTheNextInputCodePoint();
363366
while (this.nextTwoInputsPointsAreWhitespace()) {
364367
this.consumeTheNextInputCodePoint();
365368
}
366-
const nextTwo = this.nextTwoInputCodePoints;
369+
const nextTwo = this.nextTwoInputCodePoints();
367370
if (
368371
(this.isWhitespace(nextTwo[0]) &&
369372
(nextTwo[1] === '"' || nextTwo[1] === "'")) ||
@@ -376,7 +379,7 @@ class Tokenizer {
376379
} else {
377380
return this.consumeUrlToken();
378381
}
379-
} else if (this.nextInputCodePoint === '(') {
382+
} else if (this.nextInputCodePoint() === '(') {
380383
this.consumeTheNextInputCodePoint();
381384
// We lowercase the function name because function names are
382385
// case-insensitive in CSS.
@@ -413,15 +416,15 @@ class Tokenizer {
413416
let url = '';
414417
this.consumeAsMuchWhitespaceAsPossible();
415418
while (true) {
416-
const codePoint = this.nextInputCodePoint;
419+
const codePoint = this.nextInputCodePoint();
417420
this.consumeTheNextInputCodePoint();
418421
if (codePoint === ')' || codePoint === EOF) {
419422
return this.createFunctionUrlToken(url);
420423
} else if (this.isWhitespace(codePoint)) {
421424
this.consumeAsMuchWhitespaceAsPossible();
422425
if (
423-
this.nextInputCodePoint === ')' ||
424-
this.nextInputCodePoint === EOF
426+
this.nextInputCodePoint() === ')' ||
427+
this.nextInputCodePoint() === EOF
425428
) {
426429
this.consumeTheNextInputCodePoint();
427430
return this.createFunctionUrlToken(url);
@@ -462,7 +465,7 @@ class Tokenizer {
462465
/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#consume-the-remnants-of-a-bad-url */
463466
private consumeRemnantsOfBadUrl() {
464467
while (true) {
465-
const codePoint = this.nextInputCodePoint;
468+
const codePoint = this.nextInputCodePoint();
466469
this.consumeTheNextInputCodePoint();
467470
if (codePoint === EOF || codePoint === ')') {
468471
return;
@@ -484,23 +487,23 @@ class Tokenizer {
484487
private consumeNumber(): string {
485488
let repr = '';
486489
{
487-
const next = this.nextInputCodePoint;
490+
const next = this.nextInputCodePoint();
488491
if (next === '+' || next === '-') {
489492
this.consumeTheNextInputCodePoint();
490493
repr += next;
491494
}
492495
}
493496
repr += this.consumeDigits();
494497
{
495-
const next = this.nextInputCodePoint;
498+
const next = this.nextInputCodePoint();
496499
const next2 = this.css[this.pos + 1];
497500
if (next === '.' && this.isDigit(next2)) {
498501
this.consumeTheNextInputCodePoint();
499502
repr += '.' + this.consumeDigits();
500503
}
501504
}
502505
{
503-
const next = this.nextInputCodePoint;
506+
const next = this.nextInputCodePoint();
504507
const next2 = this.css[this.pos + 1];
505508
const next3 = this.css[this.pos + 2];
506509
if (next === 'e' || next === 'E') {
@@ -518,8 +521,8 @@ class Tokenizer {
518521

519522
private consumeDigits(): string {
520523
let repr = '';
521-
while (this.isDigit(this.nextInputCodePoint)) {
522-
repr += this.nextInputCodePoint;
524+
while (this.isDigit(this.nextInputCodePoint())) {
525+
repr += this.nextInputCodePoint();
523526
this.consumeTheNextInputCodePoint();
524527
}
525528
return repr;
@@ -533,7 +536,7 @@ class Tokenizer {
533536
const repr = this.consumeNumber();
534537
if (
535538
this.threeCodePointsWouldStartAnIdentSequence(
536-
...this.nextThreeInputCodePoints,
539+
...this.nextThreeInputCodePoints(),
537540
)
538541
) {
539542
return {
@@ -542,15 +545,15 @@ class Tokenizer {
542545
dimension: this.consumeIdentSequence(),
543546
};
544547
}
545-
if (this.nextInputCodePoint === '%') {
548+
if (this.nextInputCodePoint() === '%') {
546549
this.consumeTheNextInputCodePoint();
547550
return {tokenKind: CssTokenKind.PERCENTAGE, repr};
548551
}
549552
return {tokenKind: CssTokenKind.NUMBER, repr};
550553
}
551554

552555
private nextTwoInputsPointsAreWhitespace() {
553-
return this.nextTwoInputCodePoints.every((c) => this.isWhitespace(c));
556+
return this.nextTwoInputCodePoints().every((c) => this.isWhitespace(c));
554557
}
555558

556559
/** https://www.w3.org/TR/2021/CRD-css-syntax-3-20211224/#check-if-two-code-points-are-a-valid-escape */
@@ -563,8 +566,8 @@ class Tokenizer {
563566

564567
private streamStartsWithValidEscape() {
565568
return this.twoCodePointsAreValidEscape(
566-
this.currentInputCodePoint,
567-
this.nextInputCodePoint,
569+
this.currentInputCodePoint(),
570+
this.nextInputCodePoint(),
568571
);
569572
}
570573

@@ -588,8 +591,8 @@ class Tokenizer {
588591

589592
private streamStartsWithANumber() {
590593
return this.threeCodePointsWouldStartANumber(
591-
this.currentInputCodePoint,
592-
...this.nextTwoInputCodePoints,
594+
this.currentInputCodePoint(),
595+
...this.nextTwoInputCodePoints(),
593596
);
594597
}
595598

@@ -618,8 +621,8 @@ class Tokenizer {
618621

619622
private streamStartsWithAnIdentSequence() {
620623
return this.threeCodePointsWouldStartAnIdentSequence(
621-
this.currentInputCodePoint,
622-
...this.nextTwoInputCodePoints,
624+
this.currentInputCodePoint(),
625+
...this.nextTwoInputCodePoints(),
623626
);
624627
}
625628

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/**
2+
* @license
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
/**
6+
* @fileoverview This file exports a default instance of the CSS sanitizer,
7+
* similarly to how the default instance of the HTML sanitizer is exported.
8+
*
9+
* The reason why it's in a separate file is to ensure that html_sanitizer.ts
10+
* doesn't depend on html_sanitizer_builder.ts, which would cause
11+
* a circular dependency.
12+
*/
13+
14+
import {pure} from '../../internals/pure.js';
15+
import {CssSanitizerBuilder} from './html_sanitizer_builder.js';
16+
const defaultCssSanitizer = /* #__PURE__ */ pure(() =>
17+
new CssSanitizerBuilder().build(),
18+
);
19+
/** Sanitizes untrusted CSS using the default sanitizer configuration. */
20+
export function sanitizeHtmlWithCss(css: string): DocumentFragment {
21+
return defaultCssSanitizer.sanitizeToFragment(css);
22+
}

0 commit comments

Comments
 (0)