diff --git a/src/js/background.js b/src/js/background.js index 7b8c31769eb9e..706e55c5c0ba5 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -184,8 +184,8 @@ const µBlock = { // jshint ignore:line // Read-only systemSettings: { - compiledMagic: 56, // Increase when compiled format changes - selfieMagic: 56, // Increase when selfie format changes + compiledMagic: 57, // Increase when compiled format changes + selfieMagic: 57, // Increase when selfie format changes }, // https://github.com/uBlockOrigin/uBlock-issues/issues/759#issuecomment-546654501 diff --git a/src/js/benchmarks.js b/src/js/benchmarks.js index 717d94c31ce50..7d80f7c69d46b 100644 --- a/src/js/benchmarks.js +++ b/src/js/benchmarks.js @@ -174,6 +174,7 @@ const loadBenchmarkDataset = (( ) => { let removeparamCount = 0; let cspCount = 0; let permissionsCount = 0; + let replaceCount = 0; for ( let i = 0; i < requests.length; i++ ) { const request = requests[i]; fctxt.setURL(request.url); @@ -202,6 +203,9 @@ const loadBenchmarkDataset = (( ) => { } } staticNetFilteringEngine.matchHeaders(fctxt, []); + if ( staticNetFilteringEngine.matchAndFetchModifiers(fctxt, 'replace') ) { + replaceCount += 1; + } } else if ( redirectEngine !== undefined ) { if ( staticNetFilteringEngine.redirectRequest(redirectEngine, fctxt) ) { redirectCount += 1; @@ -222,6 +226,7 @@ const loadBenchmarkDataset = (( ) => { `\tremoveparam=: ${removeparamCount}`, `\tcsp=: ${cspCount}`, `\tpermissions=: ${permissionsCount}`, + `\treplace=: ${replaceCount}`, ]; const s = output.join('\n'); console.info(s); diff --git a/src/js/filtering-context.js b/src/js/filtering-context.js index 4169ffd237d55..3d67f996d535e 100644 --- a/src/js/filtering-context.js +++ b/src/js/filtering-context.js @@ -175,6 +175,7 @@ const FilteringContext = class { fromFilteringContext(other) { this.realm = other.realm; + this.id = other.id; this.type = other.type; this.method = other.method; this.url = other.url; diff --git a/src/js/html-filtering.js b/src/js/html-filtering.js index ce824b0155aef..b167497989c70 100644 --- a/src/js/html-filtering.js +++ b/src/js/html-filtering.js @@ -429,15 +429,15 @@ htmlFilteringEngine.retrieve = function(details) { } }; -htmlFilteringEngine.apply = function(doc, details) { +htmlFilteringEngine.apply = function(doc, details, selectors) { docRegister = doc; let modified = false; - for ( const selector of details.selectors.plains ) { + for ( const selector of selectors.plains ) { if ( applyCSSSelector(details, selector) ) { modified = true; } } - for ( const selector of details.selectors.procedurals ) { + for ( const selector of selectors.procedurals ) { if ( applyProceduralSelector(details, selector) ) { modified = true; } diff --git a/src/js/static-filtering-parser.js b/src/js/static-filtering-parser.js index 0e15e9e53dc48..49b66d36280f4 100644 --- a/src/js/static-filtering-parser.js +++ b/src/js/static-filtering-parser.js @@ -187,6 +187,7 @@ export const NODE_TYPE_NET_OPTION_NAME_POPUP = iota++; export const NODE_TYPE_NET_OPTION_NAME_REDIRECT = iota++; export const NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE = iota++; export const NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM = iota++; +export const NODE_TYPE_NET_OPTION_NAME_REPLACE = iota++; export const NODE_TYPE_NET_OPTION_NAME_SCRIPT = iota++; export const NODE_TYPE_NET_OPTION_NAME_SHIDE = iota++; export const NODE_TYPE_NET_OPTION_NAME_TO = iota++; @@ -265,6 +266,7 @@ export const nodeTypeFromOptionName = new Map([ /* synonym */ [ 'rewrite', NODE_TYPE_NET_OPTION_NAME_REDIRECT ], [ 'redirect-rule', NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE ], [ 'removeparam', NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM ], + [ 'replace', NODE_TYPE_NET_OPTION_NAME_REPLACE ], /* synonym */ [ 'queryprune', NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM ], [ 'script', NODE_TYPE_NET_OPTION_NAME_SCRIPT ], [ 'shide', NODE_TYPE_NET_OPTION_NAME_SHIDE ], @@ -597,9 +599,14 @@ const exCharCodeAt = (s, i) => { return pos >= 0 ? s.charCodeAt(pos) : -1; }; +const toEscapedCharRegex = c => { + const safe = c.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return new RegExp(`((?:^|[^\\\\])(?:\\\\\\\\)*)\\\\${safe}`, 'g'); +}; + /******************************************************************************/ -class argListParser { +class ArgListParser { constructor(separatorChar = ',', mustQuote = false) { this.separatorChar = this.actualSeparatorChar = separatorChar; this.separatorCode = this.actualSeparatorCode = separatorChar.charCodeAt(0); @@ -612,10 +619,10 @@ class argListParser { this.reWhitespaceStart = /^\s+/; this.reWhitespaceEnd = /\s+$/; this.reOddTrailingEscape = /(?:^|[^\\])(?:\\\\)*\\$/; - this.reEscapedDoubleQuote = /((?:^|[^\\])(?:\\\\)*)\\"/g; - this.reEscapedSingleQuote = /((?:^|[^\\])(?:\\\\)*)\\'/g; - this.reEscapedBacktick = /((?:^|[^\\])(?:\\\\)*)\\`/g; - this.reEscapedSeparator = new RegExp(`((?:^|[^\\\\])(?:\\\\\\\\)*)\\\\${this.separatorChar}`, 'g'); + this.reEscapedDoubleQuote = toEscapedCharRegex('"'); + this.reEscapedSingleQuote = toEscapedCharRegex("'"); + this.reEscapedBacktick = toEscapedCharRegex('`'); + this.reEscapedSeparator = toEscapedCharRegex(this.separatorChar); this.unescapedSeparator = `$1${this.separatorChar}`; } nextArg(pattern, beg = 0) { @@ -871,7 +878,7 @@ export class AstFilterParser { this.rePlainEntity = /^(?:[\da-z][\da-z_-]*\.)+\*$/; this.reHostsSink = /^[\w%.:\[\]-]+\s+/; this.reHostsRedirect = /(?:0\.0\.0\.0|broadcasthost|local|localhost(?:\.localdomain)?|ip6-\w+)(?:[^\w.-]|$)/; - this.reNetOptionComma = /,(?!\d*\})/g; + this.reNetOptionComma = /,(?:~?[13a-z-]+(?:=.*?)?|_+)(?:,|$)/; this.rePointlessLeftAnchor = /^\|\|?\*+/; this.reIsTokenChar = /^[%0-9A-Za-z]/; this.rePointlessLeadingWildcards = /^(\*+)[^%0-9A-Za-z\u{a0}-\u{10FFFF}]/u; @@ -898,7 +905,7 @@ export class AstFilterParser { this.reGoodRegexToken = /[^\x01%0-9A-Za-z][%0-9A-Za-z]{7,}|[^\x01%0-9A-Za-z][%0-9A-Za-z]{1,6}[^\x01%0-9A-Za-z]/; this.reBadCSP = /(?:=|;)\s*report-(?:to|uri)\b/; this.reNoopOption = /^_+$/; - this.scriptletArgListParser = new argListParser(','); + this.scriptletArgListParser = new ArgListParser(','); } parse(raw) { @@ -1414,6 +1421,7 @@ export class AstFilterParser { break; case NODE_TYPE_NET_OPTION_NAME_REDIRECT: case NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE: + case NODE_TYPE_NET_OPTION_NAME_REPLACE: case NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM: realBad = isNegated || (isException || hasValue) === false || modifierType !== 0; @@ -1474,6 +1482,20 @@ export class AstFilterParser { realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount; break; } + case NODE_TYPE_NET_OPTION_NAME_REPLACE: { + realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount; + if ( realBad ) { break; } + if ( this.options.trustedSource !== true ) { + this.astError = AST_ERROR_UNTRUSTED_SOURCE; + realBad = true; + break; + } + if ( this.interactive ) { + const value = this.getNetOptionValue(NODE_TYPE_NET_OPTION_NAME_REPLACE); + realBad = parseReplaceValue(value) === undefined; + } + break; + } case NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM: realBad = abstractTypeCount || behaviorTypeCount || unredirectableTypeCount; if ( realBad ) { break; } @@ -1959,9 +1981,8 @@ export class AstFilterParser { } endOfNetOption(s, beg) { - this.reNetOptionComma.lastIndex = beg; - const match = this.reNetOptionComma.exec(s); - return match !== null ? match.index : s.length; + const match = this.reNetOptionComma.exec(s.slice(beg)); + return match !== null ? beg + match.index : s.length; } parseNetOption(parent) { @@ -2975,6 +2996,39 @@ export function parseHeaderValue(arg) { return out; } + +// https://adguard.com/kb/general/ad-filtering/create-own-filters/#replace-modifier + +export function parseReplaceValue(s) { + if ( s.charCodeAt(0) !== 0x2F /* / */ ) { return; } + const { reEscapedComma, reEscapedDollarSign } = parseReplaceValue; + const parser = new ArgListParser('/'); + parser.nextArg(s, 1); + let pattern = s.slice(parser.argBeg, parser.argEnd); + if ( parser.transform ) { + pattern = parser.normalizeArg(pattern); + } + pattern = pattern + .replace(reEscapedDollarSign, '$1$$$') + .replace(reEscapedComma, '$1,'); + parser.nextArg(s, parser.separatorEnd); + let replacement = s.slice(parser.argBeg, parser.argEnd); + if ( parser.separatorEnd === parser.separatorBeg ) { return; } + if ( parser.transform ) { + replacement = parser.normalizeArg(replacement); + } + replacement = replacement + .replace(reEscapedDollarSign, '$1$$') + .replace(reEscapedComma, '$1,'); + const flags = s.slice(parser.separatorEnd); + try { + return { re: new RegExp(pattern, flags), replacement }; + } catch(_) { + } +} +parseReplaceValue.reEscapedDollarSign = toEscapedCharRegex('$'); +parseReplaceValue.reEscapedComma = toEscapedCharRegex(','); + /******************************************************************************/ export const netOptionTokenDescriptors = new Map([ @@ -3025,6 +3079,7 @@ export const netOptionTokenDescriptors = new Map([ /* synonym */ [ 'rewrite', { mustAssign: true } ], [ 'redirect-rule', { mustAssign: true } ], [ 'removeparam', { } ], + [ 'replace', { mustAssign: true } ], /* synonym */ [ 'queryprune', { } ], [ 'script', { canNegate: true } ], [ 'shide', { } ], diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 42ec0d3836abf..317cd6caf7877 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -69,23 +69,29 @@ const keyvalStore = typeof vAPI !== 'undefined' // |+-------------- bit 10: headers-based filters // +--------------- bit 11-15: unused -const CategoryCount = 1 << 0xb; // shift left to first unused bit - const RealmBitsMask = 0b00000000111; const ActionBitsMask = 0b00000000011; const TypeBitsMask = 0b01111100000; const TypeBitsOffset = 5; -const BlockAction = 0b00000000000; -const AllowAction = 0b00000000001; -const Important = 0b00000000010; -const BlockImportant = BlockAction | Important; -const ModifyAction = 0b00000000100; -const AnyParty = 0b00000000000; -const FirstParty = 0b00000001000; -const ThirdParty = 0b00000010000; -const AllParties = 0b00000011000; -const HEADERS = 0b10000000000; +const BLOCK_REALM = 0b00000000000000000; +const ALLOW_REALM = 0b00000000000000001; +const IMPORTANT_REALM = 0b00000000000000010; +const BLOCKIMPORTANT_REALM = BLOCK_REALM | IMPORTANT_REALM; +const ANYPARTY_REALM = 0b00000000000000000; +const FIRSTPARTY_REALM = 0b00000000000001000; +const THIRDPARTY_REALM = 0b00000000000010000; +const ALLPARTIES_REALM = FIRSTPARTY_REALM | THIRDPARTY_REALM; +const HEADERS_REALM = 0b00000010000000000; +const REDIRECT_REALM = 0b00000100000000000; +const REMOVEPARAM_REALM = 0b00001000000000000; +const CSP_REALM = 0b00010000000000000; +const PERMISSIONS_REALM = 0b00100000000000000; +const URLTRANSFORM_REALM = 0b01000000000000000; +const REPLACE_REALM = 0b10000000000000000; +const MODIFY_REALMS = REDIRECT_REALM | CSP_REALM | + REMOVEPARAM_REALM | PERMISSIONS_REALM | + URLTRANSFORM_REALM | REPLACE_REALM; const typeNameToTypeValue = { 'no_type': 0 << TypeBitsOffset, @@ -186,6 +192,17 @@ const MODIFIER_TYPE_REMOVEPARAM = 3; const MODIFIER_TYPE_CSP = 4; const MODIFIER_TYPE_PERMISSIONS = 5; const MODIFIER_TYPE_URLTRANSFORM = 6; +const MODIFIER_TYPE_REPLACE = 7; + +const modifierBitsFromType = new Map([ + [ MODIFIER_TYPE_REDIRECT, REDIRECT_REALM ], + [ MODIFIER_TYPE_REDIRECTRULE, REDIRECT_REALM ], + [ MODIFIER_TYPE_REMOVEPARAM, REMOVEPARAM_REALM ], + [ MODIFIER_TYPE_CSP, CSP_REALM ], + [ MODIFIER_TYPE_PERMISSIONS, PERMISSIONS_REALM ], + [ MODIFIER_TYPE_URLTRANSFORM, URLTRANSFORM_REALM ], + [ MODIFIER_TYPE_REPLACE, REPLACE_REALM ], +]); const modifierTypeFromName = new Map([ [ 'redirect', MODIFIER_TYPE_REDIRECT ], @@ -194,6 +211,7 @@ const modifierTypeFromName = new Map([ [ 'csp', MODIFIER_TYPE_CSP ], [ 'permissions', MODIFIER_TYPE_PERMISSIONS ], [ 'urltransform', MODIFIER_TYPE_URLTRANSFORM ], + [ 'replace', MODIFIER_TYPE_REPLACE ], ]); const modifierNameFromType = new Map([ @@ -203,6 +221,7 @@ const modifierNameFromType = new Map([ [ MODIFIER_TYPE_CSP, 'csp' ], [ MODIFIER_TYPE_PERMISSIONS, 'permissions' ], [ MODIFIER_TYPE_URLTRANSFORM, 'urltransform' ], + [ MODIFIER_TYPE_REPLACE, 'replace' ], ]); //const typeValueFromCatBits = catBits => (catBits >>> TypeBitsOffset) & 0b11111; @@ -339,7 +358,7 @@ class LogData { this.raw = this.regex = ''; return; } - this.result = (categoryBits & AllowAction) === 0 ? 1 : 2; + this.result = (categoryBits & ALLOW_REALM) === 0 ? 1 : 2; const pattern = []; const regex = []; const options = []; @@ -356,9 +375,9 @@ class LogData { isRegex: false, }; filterLogData(iunit, logData); - if ( (categoryBits & ThirdParty) !== 0 ) { + if ( (categoryBits & THIRDPARTY_REALM) !== 0 ) { logData.options.unshift('3p'); - } else if ( (categoryBits & FirstParty) !== 0 ) { + } else if ( (categoryBits & FIRSTPARTY_REALM) !== 0 ) { logData.options.unshift('1p'); } const type = categoryBits & TypeBitsMask; @@ -373,7 +392,7 @@ class LogData { ) { raw += '*'; } - if ( (categoryBits & AllowAction) !== 0 ) { + if ( (categoryBits & ALLOW_REALM) !== 0 ) { raw = '@@' + raw; } if ( denyallow.length !== 0 ) { @@ -2171,7 +2190,7 @@ class FilterModifierResult { } get result() { - return (this.bits & AllowAction) === 0 ? 1 : 2; + return (this.bits & ALLOW_REALM) === 0 ? 1 : 2; } get value() { @@ -3188,6 +3207,7 @@ class FilterCompiler { [ sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE, MODIFIER_TYPE_REDIRECTRULE ], [ sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM, MODIFIER_TYPE_REMOVEPARAM ], [ sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM, MODIFIER_TYPE_URLTRANSFORM ], + [ sfp.NODE_TYPE_NET_OPTION_NAME_REPLACE, MODIFIER_TYPE_REPLACE ], ]); // These top 100 "bad tokens" are collated using the "miss" histogram // from tokenHistograms(). The "score" is their occurrence among the @@ -3299,7 +3319,7 @@ class FilterCompiler { } reset() { - this.action = BlockAction; + this.action = BLOCK_REALM; // anchor: bit vector // 0000 (0x0): no anchoring // 0001 (0x1): anchored to the end of the URL. @@ -3314,7 +3334,7 @@ class FilterCompiler { this.modifyValue = undefined; this.pattern = ''; this.patternMatchCase = false; - this.party = AnyParty; + this.party = ANYPARTY_REALM; this.optionUnitBits = 0; this.fromDomainOpt = ''; this.toDomainOpt = ''; @@ -3395,7 +3415,7 @@ class FilterCompiler { if ( not ) { firstParty = !firstParty; } - this.party |= firstParty ? FirstParty : ThirdParty; + this.party |= firstParty ? FIRSTPARTY_REALM : THIRDPARTY_REALM; } processHostnameList(iter, out = []) { @@ -3421,7 +3441,7 @@ class FilterCompiler { processCspOption(value) { this.modifyType = MODIFIER_TYPE_CSP; this.modifyValue = value || ''; - this.optionUnitBits |= this.CSP_BIT; + this.optionUnitBits |= MODIFY_BIT; return true; } @@ -3435,7 +3455,7 @@ class FilterCompiler { parser.getNetFilterDenyallowOptionIterator(), ); if ( this.denyallowOpt === '' ) { return false; } - this.optionUnitBits |= this.DENYALLOW_BIT; + this.optionUnitBits |= DENYALLOW_BIT; break; case sfp.NODE_TYPE_NET_OPTION_NAME_FROM: this.fromDomainOpt = this.processHostnameList( @@ -3443,58 +3463,44 @@ class FilterCompiler { this.fromDomainOptList ); if ( this.fromDomainOpt === '' ) { return false; } - this.optionUnitBits |= this.FROM_BIT; + this.optionUnitBits |= FROM_BIT; break; case sfp.NODE_TYPE_NET_OPTION_NAME_HEADER: { this.headerOpt = parser.getNetOptionValue(id) || ''; - this.optionUnitBits |= this.HEADER_BIT; + this.optionUnitBits |= HEADER_BIT; break; } case sfp.NODE_TYPE_NET_OPTION_NAME_METHOD: this.processMethodOption(parser.getNetOptionValue(id)); - this.optionUnitBits |= this.METHOD_BIT; + this.optionUnitBits |= METHOD_BIT; break; case sfp.NODE_TYPE_NET_OPTION_NAME_PERMISSIONS: + case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE: + case sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM: + case sfp.NODE_TYPE_NET_OPTION_NAME_REPLACE: + case sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM: if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) { return false; } - this.optionUnitBits |= this.PERMISSIONS_BIT; + this.optionUnitBits |= MODIFY_BIT; break; case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT: { - const actualId = this.action === AllowAction + const actualId = this.action === ALLOW_REALM ? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE : id; if ( this.processModifierOption(actualId, parser.getNetOptionValue(id)) === false ) { return false; } - this.optionUnitBits |= this.REDIRECT_BIT; + this.optionUnitBits |= MODIFY_BIT; break; } - case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE: - if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) { - return false; - } - this.optionUnitBits |= this.REDIRECT_BIT; - break; - case sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM: - if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) { - return false; - } - this.optionUnitBits |= this.REMOVEPARAM_BIT; - break; case sfp.NODE_TYPE_NET_OPTION_NAME_TO: this.toDomainOpt = this.processHostnameList( parser.getNetFilterToOptionIterator(), this.toDomainOptList ); if ( this.toDomainOpt === '' ) { return false; } - this.optionUnitBits |= this.TO_BIT; - break; - case sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM: - if ( this.processModifierOption(id, parser.getNetOptionValue(id)) === false ) { - return false; - } - this.optionUnitBits |= this.REDIRECT_BIT; + this.optionUnitBits |= TO_BIT; break; default: break; @@ -3511,7 +3517,7 @@ class FilterCompiler { } if ( parser.isException() ) { - this.action = AllowAction; + this.action = ALLOW_REALM; } if ( parser.isLeftHnAnchored() ) { @@ -3539,14 +3545,14 @@ class FilterCompiler { break; case sfp.NODE_TYPE_NET_OPTION_NAME_STRICT1P: this.strictParty = this.strictParty === -1 ? 0 : 1; - this.optionUnitBits |= this.STRICT_PARTY_BIT; + this.optionUnitBits |= STRICT_PARTY_BIT; break; case sfp.NODE_TYPE_NET_OPTION_NAME_3P: this.processPartyOption(false, parser.isNegatedOption(type)); break; case sfp.NODE_TYPE_NET_OPTION_NAME_STRICT3P: this.strictParty = this.strictParty === 1 ? 0 : -1; - this.optionUnitBits |= this.STRICT_PARTY_BIT; + this.optionUnitBits |= STRICT_PARTY_BIT; break; case sfp.NODE_TYPE_NET_OPTION_NAME_ALL: this.processTypeOption(-1); @@ -3586,6 +3592,7 @@ class FilterCompiler { case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT: case sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE: case sfp.NODE_TYPE_NET_OPTION_NAME_REMOVEPARAM: + case sfp.NODE_TYPE_NET_OPTION_NAME_REPLACE: case sfp.NODE_TYPE_NET_OPTION_NAME_TO: case sfp.NODE_TYPE_NET_OPTION_NAME_URLTRANSFORM: if ( this.processOptionWithValue(parser, type) === false ) { @@ -3599,31 +3606,31 @@ class FilterCompiler { break; } case sfp.NODE_TYPE_NET_OPTION_NAME_EMPTY: { - const id = this.action === AllowAction + const id = this.action === ALLOW_REALM ? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE : sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT; if ( this.processModifierOption(id, 'empty') === false ) { return this.FILTER_INVALID; } - this.optionUnitBits |= this.REDIRECT_BIT; + this.optionUnitBits |= MODIFY_BIT; break; } case sfp.NODE_TYPE_NET_OPTION_NAME_IMPORTANT: - this.optionUnitBits |= this.IMPORTANT_BIT; - this.action = BlockImportant; + this.optionUnitBits |= IMPORTANT_BIT; + this.action = BLOCKIMPORTANT_REALM; break; case sfp.NODE_TYPE_NET_OPTION_NAME_MATCHCASE: this.patternMatchCase = true; break; case sfp.NODE_TYPE_NET_OPTION_NAME_MP4: { - const id = this.action === AllowAction + const id = this.action === ALLOW_REALM ? sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECTRULE : sfp.NODE_TYPE_NET_OPTION_NAME_REDIRECT; if ( this.processModifierOption(id, 'noopmp4-1s') === false ) { return this.FILTER_INVALID; } this.processTypeOption(sfp.NODE_TYPE_NET_OPTION_NAME_MEDIA, false); - this.optionUnitBits |= this.REDIRECT_BIT; + this.optionUnitBits |= MODIFY_BIT; break; } default: @@ -3631,8 +3638,8 @@ class FilterCompiler { } } - if ( this.party === AllParties ) { - this.party = AnyParty; + if ( this.party === ALLPARTIES_REALM ) { + this.party = ANYPARTY_REALM; } // Negated network types? Toggle on all network type bits. @@ -3649,7 +3656,7 @@ class FilterCompiler { } else { this.typeBits &= ~this.notTypeBits; } - this.optionUnitBits |= this.NOT_TYPE_BIT; + this.optionUnitBits |= NOT_TYPE_BIT; } // CSP/permissions options implicitly apply only to @@ -3813,7 +3820,7 @@ class FilterCompiler { } isJustOrigin() { - if ( this.optionUnitBits !== this.FROM_BIT ) { return false; } + if ( this.optionUnitBits !== FROM_BIT ) { return false; } if ( this.isRegex ) { return false; } if ( /[\/~]/.test(this.fromDomainOpt) ) { return false; } if ( this.pattern === '*' ) { return true; } @@ -3857,7 +3864,7 @@ class FilterCompiler { if ( parser.options.toDNR !== true ) { const parsedBlock = this.clone(); parsedBlock.modifyType = undefined; - parsedBlock.optionUnitBits &= ~this.REDIRECT_BIT; + parsedBlock.optionUnitBits &= ~MODIFY_BIT; parsedBlock.compileToFilter(writer); } } @@ -3958,14 +3965,14 @@ class FilterCompiler { // Header if ( this.headerOpt !== undefined ) { units.push(FilterOnHeaders.compile(this)); - this.action |= HEADERS; + this.action |= HEADERS_REALM; } // Important // // IMPORTANT: must always appear at the end of the sequence, so as to // ensure $isBlockImportant is set only for matching filters. - if ( (this.optionUnitBits & this.IMPORTANT_BIT) !== 0 ) { + if ( (this.optionUnitBits & IMPORTANT_BIT) !== 0 ) { units.push(FilterImportant.compile()); } @@ -3974,7 +3981,8 @@ class FilterCompiler { // IMPORTANT: the modifier unit MUST always appear first in a sequence if ( this.modifyType !== undefined ) { units.unshift(FilterModifier.compile(this)); - this.action = (this.action & ~ActionBitsMask) | ModifyAction; + this.action = (this.action & ~ActionBitsMask) | + modifierBitsFromType.get(this.modifyType); } this.compileToAtomicFilter( @@ -4047,18 +4055,16 @@ class FilterCompiler { } } -FilterCompiler.prototype.FROM_BIT = 0b000000000001; -FilterCompiler.prototype.TO_BIT = 0b000000000010; -FilterCompiler.prototype.DENYALLOW_BIT = 0b000000000100; -FilterCompiler.prototype.HEADER_BIT = 0b000000001000; -FilterCompiler.prototype.STRICT_PARTY_BIT = 0b000000010000; -FilterCompiler.prototype.CSP_BIT = 0b000000100000; -FilterCompiler.prototype.REMOVEPARAM_BIT = 0b000001000000; -FilterCompiler.prototype.REDIRECT_BIT = 0b000010000000; -FilterCompiler.prototype.NOT_TYPE_BIT = 0b000100000000; -FilterCompiler.prototype.IMPORTANT_BIT = 0b001000000000; -FilterCompiler.prototype.METHOD_BIT = 0b010000000000; -FilterCompiler.prototype.PERMISSIONS_BIT = 0b100000000000; +// These are to quickly test whether a filter is composite +const FROM_BIT = 0b000000001; +const TO_BIT = 0b000000010; +const DENYALLOW_BIT = 0b000000100; +const HEADER_BIT = 0b000001000; +const STRICT_PARTY_BIT = 0b000010000; +const MODIFY_BIT = 0b000100000; +const NOT_TYPE_BIT = 0b001000000; +const IMPORTANT_BIT = 0b010000000; +const METHOD_BIT = 0b100000000; FilterCompiler.prototype.FILTER_OK = 0; FilterCompiler.prototype.FILTER_INVALID = 1; @@ -4068,16 +4074,15 @@ FilterCompiler.prototype.FILTER_UNSUPPORTED = 2; /******************************************************************************/ const FilterContainer = function() { - this.compilerVersion = '8'; - this.selfieVersion = '9'; + this.compilerVersion = '10'; + this.selfieVersion = '10'; this.MAX_TOKEN_LENGTH = MAX_TOKEN_LENGTH; this.optimizeTaskId = undefined; // As long as CategoryCount is reasonably low, we will use an array to // store buckets using category bits as index. If ever CategoryCount // becomes too large, we can just go back to using a Map. - this.bitsToBucketIndices = JSON.parse(`[${'0,'.repeat(CategoryCount-1)}0]`); - this.buckets = [ new Map() ]; + this.bitsToBucket = new Map(); this.goodFilters = new Set(); this.badFilters = new Set(); this.unitsToOptimize = []; @@ -4105,8 +4110,7 @@ FilterContainer.prototype.reset = function() { this.goodFilters.clear(); this.badFilters.clear(); this.unitsToOptimize.length = 0; - this.bitsToBucketIndices.fill(0); - this.buckets.length = 1; + this.bitsToBucket.clear(); urlTokenizer.resetKnownTokens(); @@ -4145,16 +4149,14 @@ FilterContainer.prototype.freeze = function() { const args = unserialize(line); const bits = args[0]; - let ibucket = this.bitsToBucketIndices[bits]; - if ( ibucket === 0 ) { - ibucket = this.bitsToBucketIndices[bits] = this.buckets.length; - this.buckets.push(new Map()); + const bucket = this.bitsToBucket.get(bits) || (new Map()); + if ( bucket.size === 0 ) { + this.bitsToBucket.set(bits, bucket); } const tokenHash = args[1]; const fdata = args[2]; - const bucket = this.buckets[ibucket]; let iunit = bucket.get(tokenHash) || 0; if ( tokenHash === DOT_TOKEN_HASH ) { @@ -4204,9 +4206,9 @@ FilterContainer.prototype.freeze = function() { // the block-important realm should be checked when and only when // there is a matched exception filter, which important filters are // meant to override. - if ( (bits & ActionBitsMask) === BlockImportant ) { + if ( (bits & ActionBitsMask) === BLOCKIMPORTANT_REALM ) { this.addFilterUnit( - bits & ~Important, + bits & ~IMPORTANT_REALM, tokenHash, filterFromCompiled(fdata) ); @@ -4348,14 +4350,14 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) { } const realms = new Map([ - [ BlockAction, 'block' ], - [ AllowAction, 'allow' ], - [ ModifyAction, 'modify' ], + [ BLOCK_REALM, 'block' ], + [ ALLOW_REALM, 'allow' ], + [ MODIFY_REALMS, 'modify' ], ]); const partyness = new Map([ - [ AnyParty, '' ], - [ FirstParty, 'firstParty' ], - [ ThirdParty, 'thirdParty' ], + [ ANYPARTY_REALM, '' ], + [ FIRSTPARTY_REALM, 'firstParty' ], + [ THIRDPARTY_REALM, 'thirdParty' ], ]); const types = new Set([ 'no_type', @@ -4403,7 +4405,7 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) { // Collect generichide filters const generichideExclusions = []; { - const bucket = buckets.get(AllowAction | typeNameToTypeValue['generichide']); + const bucket = buckets.get(ALLOW_REALM | typeNameToTypeValue['generichide']); if ( bucket ) { for ( const rules of bucket.values() ) { for ( const rule of rules ) { @@ -4460,7 +4462,7 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) { operation: 'append', value: rule.__modifierValue, }]; - if ( rule.__modifierAction === AllowAction ) { + if ( rule.__modifierAction === ALLOW_REALM ) { dnrAddRuleError(rule, 'Unsupported modifier exception'); } break; @@ -4471,7 +4473,7 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) { operation: 'append', value: rule.__modifierValue.split('|').join(', '), }]; - if ( rule.__modifierAction === AllowAction ) { + if ( rule.__modifierAction === ALLOW_REALM ) { dnrAddRuleError(rule, 'Unsupported modifier exception'); } break; @@ -4489,7 +4491,7 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) { if ( rule.__modifierValue !== '' && resource === undefined ) { dnrAddRuleError(rule, `Unpatchable redirect filter: ${rule.__modifierValue}`); } - if ( rule.__modifierAction !== AllowAction ) { + if ( rule.__modifierAction !== ALLOW_REALM ) { const extensionPath = resource || token; rule.action.type = 'redirect'; rule.action.redirect = { extensionPath }; @@ -4534,14 +4536,14 @@ FilterContainer.prototype.dnrFromCompiled = function(op, context, ...args) { 'xmlhttprequest', ]; } - if ( rule.__modifierAction === AllowAction ) { + if ( rule.__modifierAction === ALLOW_REALM ) { dnrAddRuleError(rule, 'Unsupported modifier exception'); } break; case 'urltransform': { const path = rule.__modifierValue; let priority = rule.priority || 1; - if ( rule.__modifierAction !== AllowAction ) { + if ( rule.__modifierAction !== ALLOW_REALM ) { const transform = { path }; rule.action.type = 'redirect'; rule.action.redirect = { transform }; @@ -4574,12 +4576,10 @@ FilterContainer.prototype.addFilterUnit = function( tokenHash, inewunit ) { - let ibucket = this.bitsToBucketIndices[bits]; - if ( ibucket === 0 ) { - ibucket = this.bitsToBucketIndices[bits] = this.buckets.length; - this.buckets.push(new Map()); + const bucket = this.bitsToBucket.get(bits) || (new Map()); + if ( bucket.size === 0 ) { + this.bitsToBucket.set(bits, bucket); } - const bucket = this.buckets[ibucket]; const istoredunit = bucket.get(tokenHash) || 0; if ( istoredunit === 0 ) { bucket.set(tokenHash, inewunit); @@ -4614,7 +4614,7 @@ FilterContainer.prototype.optimize = function(throttle = 0) { const t0 = Date.now(); while ( this.unitsToOptimize.length !== 0 ) { const { bits, tokenHash } = this.unitsToOptimize.pop(); - const bucket = this.buckets[this.bitsToBucketIndices[bits]]; + const bucket = this.bitsToBucket.get(bits); const iunit = bucket.get(tokenHash); const fc = filterGetClass(iunit); switch ( fc ) { @@ -4623,7 +4623,7 @@ FilterContainer.prototype.optimize = function(throttle = 0) { break; case FilterBucket: { const optimizeBits = - (tokenHash === NO_TOKEN_HASH) || (bits & ModifyAction) !== 0 + (tokenHash === NO_TOKEN_HASH) || (bits & MODIFY_REALMS) !== 0 ? 0b10 : 0b01; const inewunit = FilterBucket.optimize(iunit, optimizeBits); @@ -4658,14 +4658,6 @@ FilterContainer.prototype.toSelfie = async function(storage, path) { if ( typeof storage !== 'object' || storage === null ) { return; } if ( typeof storage.put !== 'function' ) { return; } - const bucketsToSelfie = ( ) => { - const selfie = []; - for ( const bucket of this.buckets ) { - selfie.push(Array.from(bucket)); - } - return selfie; - }; - bidiTrieOptimize(true); keyvalStore.setItem( 'SNFE.origHNTrieContainer.trieDetails', @@ -4700,8 +4692,10 @@ FilterContainer.prototype.toSelfie = async function(storage, path) { processedFilterCount: this.processedFilterCount, acceptedCount: this.acceptedCount, discardedCount: this.discardedCount, - bitsToBucketIndices: this.bitsToBucketIndices, - buckets: bucketsToSelfie(), + bitsToBucket: Array.from(this.bitsToBucket).map(kv => { + kv[1] = Array.from(kv[1]); + return kv; + }), urlTokenizer: urlTokenizer.toSelfie(), }) ) @@ -4750,12 +4744,6 @@ FilterContainer.prototype.fromSelfie = async function(storage, path) { if ( results.slice(1).every(v => v === true) === false ) { return false; } - const bucketsFromSelfie = selfie => { - for ( let i = 0; i < selfie.length; i++ ) { - this.buckets[i] = new Map(selfie[i]); - } - }; - const details = results[0]; if ( typeof details !== 'object' || details === null ) { return false; } if ( typeof details.content !== 'string' ) { return false; } @@ -4770,8 +4758,10 @@ FilterContainer.prototype.fromSelfie = async function(storage, path) { this.processedFilterCount = selfie.processedFilterCount; this.acceptedCount = selfie.acceptedCount; this.discardedCount = selfie.discardedCount; - this.bitsToBucketIndices = selfie.bitsToBucketIndices; - bucketsFromSelfie(selfie.buckets); + this.bitsToBucket = new Map(selfie.bitsToBucket.map(kv => { + kv[1] = new Map(kv[1]); + return kv; + })); urlTokenizer.fromSelfie(selfie.urlTokenizer); // If this point is never reached, it means the internal state is @@ -4837,38 +4827,37 @@ FilterContainer.prototype.matchAndFetchModifiers = function( $requestMethodBit = fctxt.method || 0; $requestTypeValue = (typeBits & TypeBitsMask) >>> TypeBitsOffset; - const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; + const modifierType = modifierTypeFromName.get(modifierName); + const modifierBits = modifierBitsFromType.get(modifierType); + + const partyBits = fctxt.is3rdPartyToDoc() ? THIRDPARTY_REALM : FIRSTPARTY_REALM; - const catBits00 = ModifyAction; - const catBits01 = ModifyAction | typeBits; - const catBits10 = ModifyAction | partyBits; - const catBits11 = ModifyAction | typeBits | partyBits; + const catBits00 = modifierBits; + const catBits01 = modifierBits | typeBits; + const catBits10 = modifierBits | partyBits; + const catBits11 = modifierBits | typeBits | partyBits; - const ibucket00 = this.bitsToBucketIndices[catBits00]; - const ibucket01 = typeBits !== 0 ? this.bitsToBucketIndices[catBits01] - : 0; - const ibucket10 = partyBits !== 0 - ? this.bitsToBucketIndices[catBits10] - : 0; - const ibucket11 = typeBits !== 0 && partyBits !== 0 - ? this.bitsToBucketIndices[catBits11] - : 0; + const bucket00 = this.bitsToBucket.get(catBits00); + const bucket01 = typeBits !== 0 + ? this.bitsToBucket.get(catBits01) + : undefined; + const bucket10 = partyBits !== 0 + ? this.bitsToBucket.get(catBits10) + : undefined; + const bucket11 = typeBits !== 0 && partyBits !== 0 + ? this.bitsToBucket.get(catBits11) + : undefined; if ( - ibucket00 === 0 && ibucket01 === 0 && - ibucket10 === 0 && ibucket11 === 0 + bucket00 === undefined && bucket01 === undefined && + bucket10 === undefined && bucket11 === undefined ) { return; } - const bucket00 = this.buckets[ibucket00]; - const bucket01 = this.buckets[ibucket01]; - const bucket10 = this.buckets[ibucket10]; - const bucket11 = this.buckets[ibucket11]; - const results = []; const env = { - type: modifierTypeFromName.get(modifierName) || 0, + type: modifierType || 0, bits: 0, th: 0, iunit: 0, @@ -4884,28 +4873,28 @@ FilterContainer.prototype.matchAndFetchModifiers = function( env.th = th; $tokenBeg = tokenHashes[i+1]; if ( - (ibucket00 !== 0) && + (bucket00 !== undefined) && (iunit = bucket00.get(th) || 0) !== 0 ) { env.bits = catBits00; env.iunit = iunit; filterMatchAndFetchModifiers(iunit, env); } if ( - (ibucket01 !== 0) && + (bucket01 !== undefined) && (iunit = bucket01.get(th) || 0) !== 0 ) { env.bits = catBits01; env.iunit = iunit; filterMatchAndFetchModifiers(iunit, env); } if ( - (ibucket10 !== 0) && + (bucket10 !== undefined) && (iunit = bucket10.get(th) || 0) !== 0 ) { env.bits = catBits10; env.iunit = iunit; filterMatchAndFetchModifiers(iunit, env); } if ( - (ibucket11 !== 0) && + (bucket11 !== undefined) && (iunit = bucket11.get(th) || 0) !== 0 ) { env.bits = catBits11; env.iunit = iunit; @@ -4921,7 +4910,7 @@ FilterContainer.prototype.matchAndFetchModifiers = function( // occurrences. if ( results.length === 1 ) { const result = results[0]; - if ( (result.bits & AllowAction) !== 0 ) { return; } + if ( (result.bits & ALLOW_REALM) !== 0 ) { return; } return [ result ]; } @@ -4932,9 +4921,9 @@ FilterContainer.prototype.matchAndFetchModifiers = function( for ( const result of results ) { const actionBits = result.bits & ActionBitsMask; const modifyValue = result.value; - if ( actionBits === BlockImportant ) { + if ( actionBits === BLOCKIMPORTANT_REALM ) { toAddImportant.set(modifyValue, result); - } else if ( actionBits === BlockAction ) { + } else if ( actionBits === BLOCK_REALM ) { toAdd.set(modifyValue, result); } else { toRemove.set(modifyValue, result); @@ -5014,55 +5003,50 @@ FilterContainer.prototype.realmMatchString = function( const catBits10 = realmBits | partyBits; const catBits11 = realmBits | typeBits | partyBits; - const ibucket00 = exactType === 0 - ? this.bitsToBucketIndices[catBits00] - : 0; - const ibucket01 = exactType !== 0 || typeBits !== 0 - ? this.bitsToBucketIndices[catBits01] - : 0; - const ibucket10 = exactType === 0 && partyBits !== 0 - ? this.bitsToBucketIndices[catBits10] - : 0; - const ibucket11 = (exactType !== 0 || typeBits !== 0) && partyBits !== 0 - ? this.bitsToBucketIndices[catBits11] - : 0; + const bucket00 = exactType === 0 + ? this.bitsToBucket.get(catBits00) + : undefined; + const bucket01 = exactType !== 0 || typeBits !== 0 + ? this.bitsToBucket.get(catBits01) + : undefined; + const bucket10 = exactType === 0 && partyBits !== 0 + ? this.bitsToBucket.get(catBits10) + : undefined; + const bucket11 = (exactType !== 0 || typeBits !== 0) && partyBits !== 0 + ? this.bitsToBucket.get(catBits11) + : undefined; if ( - ibucket00 === 0 && ibucket01 === 0 && - ibucket10 === 0 && ibucket11 === 0 + bucket00 === undefined && bucket01 === undefined && + bucket10 === undefined && bucket11 === undefined ) { return false; } - const bucket00 = this.buckets[ibucket00]; - const bucket01 = this.buckets[ibucket01]; - const bucket10 = this.buckets[ibucket10]; - const bucket11 = this.buckets[ibucket11]; - let catBits = 0, iunit = 0; // Pure hostname-based filters let tokenHash = DOT_TOKEN_HASH; if ( - (ibucket00 !== 0) && + (bucket00 !== undefined) && (iunit = bucket00.get(DOT_TOKEN_HASH) || 0) !== 0 && (filterMatch(iunit) === true) ) { catBits = catBits00; } else if ( - (ibucket01 !== 0) && + (bucket01 !== undefined) && (iunit = bucket01.get(DOT_TOKEN_HASH) || 0) !== 0 && (filterMatch(iunit) === true) ) { catBits = catBits01; } else if ( - (ibucket10 !== 0) && + (bucket10 !== undefined) && (iunit = bucket10.get(DOT_TOKEN_HASH) || 0) !== 0 && (filterMatch(iunit) === true) ) { catBits = catBits10; } else if ( - (ibucket11 !== 0) && + (bucket11 !== undefined) && (iunit = bucket11.get(DOT_TOKEN_HASH) || 0) !== 0 && (filterMatch(iunit) === true) ) { @@ -5077,7 +5061,7 @@ FilterContainer.prototype.realmMatchString = function( if ( tokenHash === INVALID_TOKEN_HASH ) { return false; } $tokenBeg = tokenHashes[i+1]; if ( - (ibucket00 !== 0) && + (bucket00 !== undefined) && (iunit = bucket00.get(tokenHash) || 0) !== 0 && (filterMatch(iunit) === true) ) { @@ -5085,7 +5069,7 @@ FilterContainer.prototype.realmMatchString = function( break; } if ( - (ibucket01 !== 0) && + (bucket01 !== undefined) && (iunit = bucket01.get(tokenHash) || 0) !== 0 && (filterMatch(iunit) === true) ) { @@ -5093,7 +5077,7 @@ FilterContainer.prototype.realmMatchString = function( break; } if ( - (ibucket10 !== 0) && + (bucket10 !== undefined) && (iunit = bucket10.get(tokenHash) || 0) !== 0 && (filterMatch(iunit) === true) ) { @@ -5101,7 +5085,7 @@ FilterContainer.prototype.realmMatchString = function( break; } if ( - (ibucket11 !== 0) && + (bucket11 !== undefined) && (iunit = bucket11.get(tokenHash) || 0) !== 0 && (filterMatch(iunit) === true) ) { @@ -5147,9 +5131,9 @@ FilterContainer.prototype.matchRequestReverse = function(type, url) { $docDomain = domainFromHostname($docHostname); // Exception filters - if ( this.realmMatchString(AllowAction, typeBits, FirstParty) ) { + if ( this.realmMatchString(ALLOW_REALM, typeBits, FIRSTPARTY_REALM) ) { // Important block filters. - if ( this.realmMatchString(BlockImportant, typeBits, FirstParty) ) { + if ( this.realmMatchString(BLOCKIMPORTANT_REALM, typeBits, FIRSTPARTY_REALM) ) { return 1; } return 2; @@ -5194,7 +5178,7 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) { typeBits |= 0x80000000; } - const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; + const partyBits = fctxt.is3rdPartyToDoc() ? THIRDPARTY_REALM : FIRSTPARTY_REALM; // Prime tokenizer: we get a normalized URL in return. $requestURL = urlTokenizer.setURL(fctxt.url); @@ -5211,11 +5195,11 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) { // Evaluate block realm before allow realm, and allow realm before // block-important realm, i.e. by order of likelihood of a match. - const r = this.realmMatchString(BlockAction, typeBits, partyBits); + const r = this.realmMatchString(BLOCK_REALM, typeBits, partyBits); if ( r || (modifiers & 0b0010) !== 0 ) { if ( $isBlockImportant ) { return 1; } - if ( this.realmMatchString(AllowAction, typeBits, partyBits) ) { - if ( this.realmMatchString(BlockImportant, typeBits, partyBits) ) { + if ( this.realmMatchString(ALLOW_REALM, typeBits, partyBits) ) { + if ( this.realmMatchString(BLOCKIMPORTANT_REALM, typeBits, partyBits) ) { return 1; } return 2; @@ -5229,7 +5213,7 @@ FilterContainer.prototype.matchRequest = function(fctxt, modifiers = 0) { FilterContainer.prototype.matchHeaders = function(fctxt, headers) { const typeBits = typeNameToTypeValue[fctxt.type] || otherTypeBitValue; - const partyBits = fctxt.is3rdPartyToDoc() ? ThirdParty : FirstParty; + const partyBits = fctxt.is3rdPartyToDoc() ? THIRDPARTY_REALM : FIRSTPARTY_REALM; // Prime tokenizer: we get a normalized URL in return. $requestURL = urlTokenizer.setURL(fctxt.url); @@ -5245,13 +5229,13 @@ FilterContainer.prototype.matchHeaders = function(fctxt, headers) { $httpHeaders.init(headers); let r = 0; - if ( this.realmMatchString(HEADERS | BlockAction, typeBits, partyBits) ) { + if ( this.realmMatchString(HEADERS_REALM | BLOCK_REALM, typeBits, partyBits) ) { r = 1; } if ( r !== 0 && $isBlockImportant !== true ) { - if ( this.realmMatchString(HEADERS | AllowAction, typeBits, partyBits) ) { + if ( this.realmMatchString(HEADERS_REALM | ALLOW_REALM, typeBits, partyBits) ) { r = 2; - if ( this.realmMatchString(HEADERS | BlockImportant, typeBits, partyBits) ) { + if ( this.realmMatchString(HEADERS_REALM | BLOCKIMPORTANT_REALM, typeBits, partyBits) ) { r = 1; } } @@ -5275,7 +5259,7 @@ FilterContainer.prototype.redirectRequest = function(redirectEngine, fctxt) { } // Redirect to highest-ranked directive const directive = directives[highest]; - if ( (directive.bits & AllowAction) !== 0 ) { return directives; } + if ( (directive.bits & ALLOW_REALM) !== 0 ) { return directives; } const { token } = parseRedirectRequestValue(directive); fctxt.redirectURL = redirectEngine.tokenToURL(fctxt, token); if ( fctxt.redirectURL === undefined ) { return; } @@ -5286,7 +5270,7 @@ FilterContainer.prototype.transformRequest = function(fctxt) { const directives = this.matchAndFetchModifiers(fctxt, 'urltransform'); if ( directives === undefined ) { return; } const directive = directives[directives.length-1]; - if ( (directive.bits & AllowAction) !== 0 ) { return directives; } + if ( (directive.bits & ALLOW_REALM) !== 0 ) { return directives; } const redirectURL = new URL(fctxt.url); if ( directive.value === redirectURL.pathname ) { return; } redirectURL.pathname = directive.value; @@ -5309,10 +5293,10 @@ function compareRedirectRequests(redirectEngine, a, b) { parseRedirectRequestValue(b); if ( redirectEngine.hasToken(btok) === false ) { return 1; } if ( abits !== bbits ) { - if ( (abits & Important) !== 0 ) { return 1; } - if ( (bbits & Important) !== 0 ) { return -1; } - if ( (abits & AllowAction) !== 0 ) { return -1; } - if ( (bbits & AllowAction) !== 0 ) { return 1; } + if ( (abits & IMPORTANT_REALM) !== 0 ) { return 1; } + if ( (bbits & IMPORTANT_REALM) !== 0 ) { return -1; } + if ( (abits & ALLOW_REALM) !== 0 ) { return -1; } + if ( (bbits & ALLOW_REALM) !== 0 ) { return 1; } } return aint - bint; } @@ -5348,7 +5332,7 @@ FilterContainer.prototype.filterQuery = function(fctxt) { const out = []; for ( const directive of directives ) { if ( params.size === 0 ) { break; } - const isException = (directive.bits & AllowAction) !== 0; + const isException = (directive.bits & ALLOW_REALM) !== 0; if ( isException && directive.value === '' ) { out.push(directive); break; @@ -5471,10 +5455,8 @@ FilterContainer.prototype.test = async function(docURL, type, url) { FilterContainer.prototype.bucketHistogram = function() { const results = []; - for ( let bits = 0; bits < this.bitsToBucketIndices.length; bits++ ) { - const ibucket = this.bitsToBucketIndices[bits]; - if ( ibucket === 0 ) { continue; } - for ( const [ th, iunit ] of this.buckets[ibucket] ) { + for ( const [ bits, bucket ] of this.bitsToBucket ) { + for ( const [ th, iunit ] of bucket ) { const token = urlTokenizer.stringFromTokenHash(th); const fc = filterGetClass(iunit); const count = fc.getCount !== undefined ? fc.getCount(iunit) : 1; @@ -5553,15 +5535,20 @@ FilterContainer.prototype.dump = function() { const thCounts = new Set(); const realms = new Map([ - [ BlockAction, 'block' ], - [ BlockImportant, 'block-important' ], - [ AllowAction, 'unblock' ], - [ ModifyAction, 'modify' ], + [ BLOCK_REALM, 'block' ], + [ BLOCKIMPORTANT_REALM, 'block-important' ], + [ ALLOW_REALM, 'unblock' ], + [ REDIRECT_REALM, 'redirect' ], + [ REMOVEPARAM_REALM, 'removeparam' ], + [ CSP_REALM, 'csp' ], + [ PERMISSIONS_REALM, 'permissions' ], + [ URLTRANSFORM_REALM, 'urltransform' ], + [ REPLACE_REALM, 'replace' ], ]); const partyness = new Map([ - [ AnyParty, 'any-party' ], - [ FirstParty, '1st-party' ], - [ ThirdParty, '3rd-party' ], + [ ANYPARTY_REALM, 'any-party' ], + [ FIRSTPARTY_REALM, '1st-party' ], + [ THIRDPARTY_REALM, '3rd-party' ], ]); for ( const [ realmBits, realmName ] of realms ) { toOutput(1, `+ realm: ${realmName}`); @@ -5573,11 +5560,11 @@ FilterContainer.prototype.dump = function() { if ( processedTypeBits.has(typeBits) ) { continue; } processedTypeBits.add(typeBits); const bits = realmBits | partyBits | typeBits; - const ibucket = this.bitsToBucketIndices[bits]; - if ( ibucket === 0 ) { continue; } - const thCount = this.buckets[ibucket].size; + const bucket = this.bitsToBucket.get(bits); + if ( bucket === undefined ) { continue; } + const thCount = bucket.size; toOutput(3, `+ type: ${typeName} (${thCount})`); - for ( const [ th, iunit ] of this.buckets[ibucket] ) { + for ( const [ th, iunit ] of bucket) { thCounts.add(th); const ths = thConstants.has(th) ? thConstants.get(th) diff --git a/src/js/traffic.js b/src/js/traffic.js index 9fbe936f05a2b..d98b443552b3f 100644 --- a/src/js/traffic.js +++ b/src/js/traffic.js @@ -32,6 +32,7 @@ import scriptletFilteringEngine from './scriptlet-filtering.js'; import staticNetFilteringEngine from './static-net-filtering.js'; import textEncode from './text-encode.js'; import µb from './background.js'; +import * as sfp from './static-filtering-parser.js'; import { sessionFirewall, @@ -483,11 +484,10 @@ const onBeforeBehindTheSceneRequest = function(fctxt) { const onHeadersReceived = function(details) { // https://github.com/uBlockOrigin/uBlock-issues/issues/610 // Process behind-the-scene requests in a special way. - if ( - details.tabId < 0 && - normalizeBehindTheSceneResponseHeaders(details) === false - ) { - return; + if ( details.tabId < 0 ) { + if ( normalizeBehindTheSceneResponseHeaders(details) === false ) { + return; + } } const fctxt = µb.filteringContext.fromWebrequestDetails(details); @@ -524,8 +524,6 @@ const onHeadersReceived = function(details) { } } - if ( isRootDoc === false && fctxt.itype !== fctxt.SUB_FRAME ) { return; } - // https://github.com/gorhill/uBlock/issues/2813 // Disable the blocking of large media elements if the document is itself // a media element: the resource was not prevented from loading so no @@ -539,10 +537,29 @@ const onHeadersReceived = function(details) { } } - // At this point we have a HTML document. - - const filteredHTML = - µb.canFilterResponseData && filterDocument(fctxt, details) === true; + const bodyFilterSession = bodyFilterer.canFilter(fctxt, details); + if ( bodyFilterSession !== undefined ) { + // `replace=` filter option + const replaceDirectives = + staticNetFilteringEngine.matchAndFetchModifiers(fctxt, 'replace'); + if ( replaceDirectives ) { + bodyFilterSession.addJob({ + fn: textResponseFilterer, + args: [ replaceDirectives ], + }); + } + // html filtering + if ( isRootDoc || fctxt.itype === fctxt.SUB_FRAME ) { + const selectors = htmlFilteringEngine.retrieve(bodyFilterSession); + if ( selectors ) { + bodyFilterSession.addJob({ + fn: htmlResponseFilterer, + args: [ selectors ], + }); + } + } + bodyFilterSession.launch(); + } let modifiedHeaders = false; if ( httpheaderFilteringEngine.apply(fctxt, responseHeaders) === true ) { @@ -551,7 +568,6 @@ const onHeadersReceived = function(details) { if ( injectCSP(fctxt, pageStore, responseHeaders) === true ) { modifiedHeaders = true; } - if ( injectPP(fctxt, pageStore, responseHeaders) === true ) { modifiedHeaders = true; } @@ -562,7 +578,7 @@ const onHeadersReceived = function(details) { // https://github.com/uBlockOrigin/uBlock-issues/issues/229 // Use `no-cache` instead of `no-cache, no-store, must-revalidate`, this // allows Firefox's offline mode to work as expected. - if ( (filteredHTML || modifiedHeaders) && dontCacheResponseHeaders ) { + if ( modifiedHeaders && dontCacheResponseHeaders ) { const cacheControl = µb.hiddenSettings.cacheControlForFirefox1376932; if ( cacheControl !== 'unset' ) { let i = headerIndexFromName('cache-control', responseHeaders); @@ -601,272 +617,301 @@ const normalizeBehindTheSceneResponseHeaders = function(details) { return true; }; -/******************************************************************************* - - The response body filterer is responsible for: +/******************************************************************************/ - - HTML filtering +function textResponseFilterer(session, directives) { + const applied = []; + for ( const directive of directives ) { + if ( directive.refs instanceof Object === false ) { continue; } + const { refs } = directive; + if ( refs.$cache === null ) { + refs.$cache = sfp.parseReplaceValue(refs.value); + } + const cache = refs.$cache; + if ( cache === undefined ) { continue; } + if ( cache.re.test(session.getString()) !== true ) { continue; } + session.setString(session.getString().replace( + cache.re, + cache.replacement + )); + applied.push(directive); + } - In the spirit of efficiency, the response body filterer works this way: + if ( applied && logger.enabled ) { + session.setRealm('network') + .pushFilters(applied.map(a => a.logData())) + .toLogger(); + } - If: - - HTML filtering: no. - Then: - No response body filtering is initiated. + return applied.length !== 0; +} - If: - - HTML filtering: yes. - Then: - Assemble all response body data into a single buffer. Once all the - response data has been received, create a document from it. Then: - - Remove all DOM elements matching HTML filters. - Then serialize the resulting modified document as the new response - body. +/******************************************************************************/ -**/ +function htmlResponseFilterer(session, selectors) { + if ( htmlResponseFilterer.domParser === null ) { + htmlResponseFilterer.domParser = new DOMParser(); + htmlResponseFilterer.xmlSerializer = new XMLSerializer(); + } -const filterDocument = (( ) => { - const filterers = new Map(); - let domParser, xmlSerializer, - utf8TextDecoder, textDecoder, textEncoder; + const doc = htmlResponseFilterer.domParser.parseFromString( + session.getString(), + session.mime + ); - const textDecode = function(encoding, buffer) { - if ( - textDecoder !== undefined && - textDecoder.encoding !== encoding - ) { - textDecoder = undefined; + if ( selectors !== undefined ) { + if ( htmlFilteringEngine.apply(doc, session, selectors) !== true ) { + return false; } - if ( textDecoder === undefined ) { - textDecoder = new TextDecoder(encoding); - } - return textDecoder.decode(buffer); - }; + } + + // https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353 + const doctypeStr = [ + doc.doctype instanceof Object ? + htmlResponseFilterer.xmlSerializer.serializeToString(doc.doctype) + '\n' : + '', + doc.documentElement.outerHTML, + ].join('\n'); + session.setString(doctypeStr); + + return true; +} +htmlResponseFilterer.domParser = null; +htmlResponseFilterer.xmlSerializer = null; + + +/******************************************************************************* + + The response body filterer is responsible for: + + - Realize static network filter option `replace=` + - HTML filtering + +**/ +const bodyFilterer = (( ) => { + const sessions = new Map(); const reContentTypeDocument = /^(?:text\/html|application\/xhtml\+xml)/i; const reContentTypeCharset = /charset=['"]?([^'" ]+)/i; - - const mimeFromContentType = function(contentType) { + const otherValidMimes = new Set([ + 'application/javascript', + 'application/json', + 'application/xml', + 'application/xhtml+xml', + ]); + let textDecoder, textEncoder; + + const mimeFromContentType = contentType => { const match = reContentTypeDocument.exec(contentType); - if ( match !== null ) { - return match[0].toLowerCase(); - } + if ( match === null ) { return; } + return match[0].toLowerCase(); }; - const charsetFromContentType = function(contentType) { + const charsetFromContentType = contentType => { const match = reContentTypeCharset.exec(contentType); - if ( match !== null ) { - return match[1].toLowerCase(); - } + if ( match === null ) { return; } + return match[1].toLowerCase(); }; - const charsetFromDoc = function(doc) { - let meta = doc.querySelector('meta[charset]'); - if ( meta !== null ) { - return meta.getAttribute('charset').toLowerCase(); + const charsetFromStream = bytes => { + if ( bytes.length < 3 ) { return; } + if ( bytes[0] === 0xEF && bytes[1] === 0xBB && bytes[2] === 0xBF ) { + return 'utf-8'; } - meta = doc.querySelector( - 'meta[http-equiv="content-type" i][content]' - ); - if ( meta !== null ) { - return charsetFromContentType(meta.getAttribute('content')); + let i = -1; + while ( i < 65536 ) { + i += 1; + /* c */ if ( bytes[i+0] !== 0x63 ) { continue; } + /* h */ if ( bytes[i+1] !== 0x68 ) { continue; } + /* a */ if ( bytes[i+2] !== 0x61 ) { continue; } + /* r */ if ( bytes[i+3] !== 0x72 ) { continue; } + /* s */ if ( bytes[i+4] !== 0x73 ) { continue; } + /* e */ if ( bytes[i+5] !== 0x65 ) { continue; } + /* t */ if ( bytes[i+6] !== 0x74 ) { continue; } + break; + } + if ( (i - 40) >= 65536 ) { return; } + i += 8; + // find first alpha character + let j = 0; + while ( j < 8 ) { + j += 1; + const c = bytes[i+j]; + if ( c >= 0x41 && c <= 0x5A ) { break; } + if ( c >= 0x61 && c <= 0x7A ) { break; } + j += 1; + } + if ( j === 8 ) { return; } + i += j; + // Collect characters until first non charset-name-character + const chars = []; + j = 0; + while ( j < 24 ) { + const c = bytes[i+j]; + if ( c < 0x2D ) { break; } + if ( c > 0x2D && c < 0x30 ) { break; } + if ( c > 0x39 && c < 0x41 ) { break; } + if ( c > 0x5A && c < 0x61 ) { break; } + if ( c > 0x7A ) { break; } + chars.push(c); + j += 1; } + if ( j === 20 ) { return; } + return String.fromCharCode(...chars).toLowerCase(); }; - const streamClose = function(filterer, buffer) { + const streamClose = (session, buffer) => { if ( buffer !== undefined ) { - filterer.stream.write(buffer); - } else if ( filterer.buffer !== undefined ) { - filterer.stream.write(filterer.buffer); + session.stream.write(buffer); + } else if ( session.buffer !== undefined ) { + session.stream.write(session.buffer); } - filterer.stream.close(); + session.stream.close(); }; const onStreamData = function(ev) { - const filterer = filterers.get(this); - if ( filterer === undefined ) { + const session = sessions.get(this); + if ( session === undefined ) { this.write(ev.data); this.disconnect(); return; } - if ( - this.status !== 'transferringdata' && - this.status !== 'finishedtransferringdata' - ) { - filterers.delete(this); - this.disconnect(); - return; + if ( this.status !== 'transferringdata' ) { + if ( this.status !== 'finishedtransferringdata' ) { + sessions.delete(this); + this.disconnect(); + return; + } } - // TODO: - // - Possibly improve buffer growth, if benchmarking shows it's worth - // it. - // - Also evaluate whether keeping a list of buffers and then decoding - // them in sequence using TextDecoder's "stream" option is more - // efficient. Can the data buffers be safely kept around for later - // use? - // - Informal, quick benchmarks seem to show most of the overhead is - // from calling TextDecoder.decode() and TextEncoder.encode(), and if - // confirmed, there is nothing which can be done uBO-side to reduce - // overhead. - if ( filterer.buffer === null ) { - filterer.buffer = new Uint8Array(ev.data); + if ( session.buffer === null ) { + session.buffer = new Uint8Array(ev.data); return; } const buffer = new Uint8Array( - filterer.buffer.byteLength + - ev.data.byteLength + session.buffer.byteLength + ev.data.byteLength ); - buffer.set(filterer.buffer); - buffer.set(new Uint8Array(ev.data), filterer.buffer.byteLength); - filterer.buffer = buffer; + buffer.set(session.buffer); + buffer.set(new Uint8Array(ev.data), session.buffer.byteLength); + session.buffer = buffer; }; const onStreamStop = function() { - const filterer = filterers.get(this); - filterers.delete(this); - if ( filterer === undefined || filterer.buffer === null ) { + const session = sessions.get(this); + sessions.delete(this); + if ( session === undefined || session.buffer === null ) { this.close(); return; } if ( this.status !== 'finishedtransferringdata' ) { return; } - if ( domParser === undefined ) { - domParser = new DOMParser(); - xmlSerializer = new XMLSerializer(); + // If encoding is still unknown, try to extract from stream data + if ( session.charset === undefined ) { + const charsetFound = charsetFromStream(session.buffer); + if ( charsetFound === undefined ) { return streamClose(session); } + const charsetUsed = textEncode.normalizeCharset(charsetFound); + if ( charsetUsed === undefined ) { return streamClose(session); } + session.charset = charsetUsed; } + + let modified = false; + while ( session.jobs.length !== 0 ) { + const job = session.jobs.shift(); + modified = job.fn(session, ...job.args) || modified; + } + if ( modified !== true ) { return streamClose(session); } + if ( textEncoder === undefined ) { textEncoder = new TextEncoder(); } + let encodedStream = textEncoder.encode(session.str); - let doc; - - // If stream encoding is still unknnown, try to extract from document. - let charsetFound = filterer.charset, - charsetUsed = charsetFound; - if ( charsetFound === undefined ) { - if ( utf8TextDecoder === undefined ) { - utf8TextDecoder = new TextDecoder(); - } - doc = domParser.parseFromString( - utf8TextDecoder.decode(filterer.buffer.slice(0, 1024)), - filterer.mime - ); - charsetFound = charsetFromDoc(doc); - charsetUsed = textEncode.normalizeCharset(charsetFound); - if ( charsetUsed === undefined ) { - return streamClose(filterer); - } + if ( session.charset !== 'utf-8' ) { + encodedStream = textEncode.encode(session.charset, encodedStream); } - doc = domParser.parseFromString( - textDecode(charsetUsed, filterer.buffer), - filterer.mime - ); + streamClose(session, encodedStream); + }; - // https://github.com/gorhill/uBlock/issues/3507 - // In case of no explicit charset found, try to find one again, but - // this time with the whole document parsed. - if ( charsetFound === undefined ) { - charsetFound = textEncode.normalizeCharset(charsetFromDoc(doc)); - if ( charsetFound !== charsetUsed ) { - if ( charsetFound === undefined ) { - return streamClose(filterer); + const onStreamError = function() { + sessions.delete(this); + }; + + return class Session extends µb.FilteringContext { + constructor(fctxt, details, mime, charset) { + super(fctxt); + this.entity = entityFromDomain(this.getDomain()); + this.stream = null; + this.buffer = null; + this.mime = mime; + this.charset = charset; + this.str = null; + this.jobs = []; + } + getString() { + if ( this.str !== null ) { return this.str; } + if ( textDecoder !== undefined ) { + if ( textDecoder.encoding !== this.charset ) { + textDecoder = undefined; } - charsetUsed = charsetFound; - doc = domParser.parseFromString( - textDecode(charsetFound, filterer.buffer), - filterer.mime - ); } - } - - let modified = false; - if ( filterer.selectors !== undefined ) { - if ( htmlFilteringEngine.apply(doc, filterer) ) { - modified = true; + if ( textDecoder === undefined ) { + textDecoder = new TextDecoder(this.charset); } + this.str = textDecoder.decode(this.buffer); + return this.str; } - - if ( modified === false ) { - return streamClose(filterer); + setString(s) { + this.str = s; } - - // https://stackoverflow.com/questions/6088972/get-doctype-of-an-html-as-string-with-javascript/10162353#10162353 - const doctypeStr = doc.doctype instanceof Object ? - xmlSerializer.serializeToString(doc.doctype) + '\n' : - ''; - - // https://github.com/gorhill/uBlock/issues/3391 - let encodedStream = textEncoder.encode( - doctypeStr + - doc.documentElement.outerHTML - ); - if ( charsetUsed !== 'utf-8' ) { - encodedStream = textEncode.encode( - charsetUsed, - encodedStream - ); + addJob(job) { + this.jobs.push(job); + } + launch() { + if ( this.jobs.length === 0 ) { return; } + this.stream = browser.webRequest.filterResponseData(this.id); + this.stream.ondata = onStreamData; + this.stream.onstop = onStreamStop; + this.stream.onerror = onStreamError; + sessions.set(this.stream, this); + return true; } + static canFilter(fctxt, details) { + if ( µb.canFilterResponseData !== true ) { return; } - streamClose(filterer, encodedStream); - }; + // https://github.com/gorhill/uBlock/issues/3478 + const statusCode = details.statusCode || 0; + if ( statusCode !== 0 && (statusCode < 200 || statusCode >= 300) ) { + return; + } - const onStreamError = function() { - filterers.delete(this); - }; + const hostname = fctxt.getHostname(); + if ( hostname === '' ) { return; } - return function(fctxt, extras) { - // https://github.com/gorhill/uBlock/issues/3478 - const statusCode = extras.statusCode || 0; - if ( statusCode !== 0 && (statusCode < 200 || statusCode >= 300) ) { - return; - } + // https://bugzilla.mozilla.org/show_bug.cgi?id=1426789 + const headers = details.responseHeaders; + const disposition = headerValueFromName('content-disposition', headers); + if ( disposition !== '' && disposition.startsWith('inline') === false ) { + return; + } - const hostname = fctxt.getHostname(); - if ( hostname === '' ) { return; } - - const domain = fctxt.getDomain(); - - const request = { - stream: undefined, - tabId: fctxt.tabId, - url: fctxt.url, - hostname: hostname, - domain: domain, - entity: entityFromDomain(domain), - selectors: undefined, - buffer: null, - mime: 'text/html', - charset: undefined - }; + const contentType = headerValueFromName('content-type', headers); + let mime, charset; + if ( contentType !== '' ) { + mime = mimeFromContentType(contentType); + if ( mime === undefined ) { return; } + charset = charsetFromContentType(contentType); + if ( charset !== undefined ) { + charset = textEncode.normalizeCharset(charset); + if ( charset === undefined ) { return; } + } + } - request.selectors = htmlFilteringEngine.retrieve(request); - if ( request.selectors === undefined ) { return; } - - const headers = extras.responseHeaders; - const contentType = headerValueFromName('content-type', headers); - if ( contentType !== '' ) { - request.mime = mimeFromContentType(contentType); - if ( request.mime === undefined ) { return; } - let charset = charsetFromContentType(contentType); - if ( charset !== undefined ) { - charset = textEncode.normalizeCharset(charset); - if ( charset === undefined ) { return; } - request.charset = charset; + if ( mime.startsWith('text/') === false ) { + if ( otherValidMimes.has(mime) === false ) { return; } } + + return new Session(fctxt, details, mime, charset); } - // https://bugzilla.mozilla.org/show_bug.cgi?id=1426789 - const disposition = headerValueFromName('content-disposition', headers); - if ( disposition !== '' && disposition.startsWith('inline') === false ) { return; } - - const stream = request.stream = - browser.webRequest.filterResponseData(extras.requestId); - stream.ondata = onStreamData; - stream.onstop = onStreamStop; - stream.onerror = onStreamError; - filterers.set(stream, request); - - return true; }; })();