Skip to content

Commit

Permalink
separately track unsafe getters
Browse files Browse the repository at this point in the history
  • Loading branch information
guybedford committed Nov 2, 2020
1 parent f50b9ef commit f435e0c
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 38 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ lib/lexer.wasm: include-wasm/cjs-module-lexer.h src/lexer.c
@mkdir -p lib
../wasi-sdk-11.0/bin/clang src/lexer.c -I include-wasm --sysroot=../wasi-sdk-11.0/share/wasi-sysroot -o lib/lexer.wasm -nostartfiles \
-Wl,-z,stack-size=13312,--no-entry,--compress-relocations,--strip-all,--export=__heap_base,\
--export=parseCJS,--export=sa,--export=e,--export=re,--export=es,--export=ee,--export=rre,--export=ree,--export=res,--export=ree \
--export=parseCJS,--export=sa,--export=e,--export=re,--export=es,--export=ee,--export=rre,--export=ree,--export=res,--export=ru,--export=us,--export=ue \
-Wno-logical-op-parentheses -Wno-parentheses \
-Oz

Expand Down
68 changes: 46 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ EXPORTS_SPREAD: `...` (IDENTIFIER | REQUIRE)
EXPORTS_MEMBER: EXPORTS_DOT_ASSIGN | EXPORTS_LITERAL_COMPUTED_ASSIGN
EXPORTS_DEFINE: `Object` `.` `defineProperty `(` IDENTIFIER_STRING `, {`
EXPORTS_DEFINE: `Object` `.` `defineProperty `(` EXPORTS_IDENFITIER `,` IDENTIFIER_STRING
EXPORTS_DEFINE_VALUE: EXPORTS_DEFINE `, {`
(`enumerable: true,`)?
(
`value:` |
Expand Down Expand Up @@ -121,7 +123,9 @@ EXPORT_STAR_LIB: `Object.keys(` IDENTIFIER$1 `).forEach(function (` IDENTIFIER$2

Spacing between tokens is taken to be any ECMA-262 whitespace, ECMA-262 block comment or ECMA-262 line comment.

* The returned export names are taken to be the combination of the `IDENTIFIER` and `IDENTIFIER_STRING` slots for all `EXPORTS_MEMBER`, `EXPORTS_LITERAL` and `EXPORTS_DEFINE` matches.
* The returned export names are taken to be the combination of:
1. All `IDENTIFIER` and `IDENTIFIER_STRING` slots for `EXPORTS_MEMBER` and `EXPORTS_LITERAL` matches.
2. The first `IDENTIFIER_STRING` slot for all `EXPORTS_DEFINE_VALUE` matches where that same string is not an `EXPORTS_DEFINE` match that is not also an `EXPORTS_DEFINE_VALUE` match.
* The reexport specifiers are taken to be the the combination of:
1. The `REQUIRE` matches of the last matched of either `MODULE_EXPORTS_ASSIGN` or `EXPORTS_LITERAL`.
2. All _top-level_ `EXPORT_STAR` `REQUIRE` matches and `EXPORTS_ASSIGN` matches whose `IDENTIFIER` also matches the first `IDENTIFIER` in `EXPORT_STAR_LIB`.
Expand Down Expand Up @@ -162,6 +166,8 @@ It will in turn underclassify in cases where the identifiers are renamed:
})(exports);
```

#### Getter Exports Parsing

`Object.defineProperty` is detected for specifically value and getter forms returning an identifier or member expression:

```js
Expand All @@ -188,6 +194,24 @@ Object.defineProperty(exports, 'd', { value: 'd' });
Object.defineProperty(exports, '__esModule', { value: true });
```

To avoid matching getters that have side effects, any getter for an export name that does not support the forms above will
opt-out of the getter matching:

```js
// DETECTS: NO EXPORTS
Object.defineProperty(exports, 'a', {
value: 'no problem'
});

if (false) {
Object.defineProperty(module.exports, 'a', {
get () {
return dynamic();
}
})
}
```

Alternative object definition structures or getter function bodies are not detected:

```js
Expand Down Expand Up @@ -337,63 +361,63 @@ JS Build:

```
Module load time
> 5ms
> 4ms
Cold Run, All Samples
test/samples/*.js (3635 KiB)
> 323ms
> 299ms
Warm Runs (average of 25 runs)
test/samples/angular.js (1410 KiB)
> 14.84ms
> 13.96ms
test/samples/angular.min.js (303 KiB)
> 4.8ms
> 4.72ms
test/samples/d3.js (553 KiB)
> 7.84ms
> 6.76ms
test/samples/d3.min.js (250 KiB)
> 4ms
test/samples/magic-string.js (34 KiB)
> 0.72ms
> 0.64ms
test/samples/magic-string.min.js (20 KiB)
> 0.4ms
> 0ms
test/samples/rollup.js (698 KiB)
> 9.32ms
> 8.48ms
test/samples/rollup.min.js (367 KiB)
> 6.52ms
> 5.36ms
Warm Runs, All Samples (average of 25 runs)
test/samples/*.js (3635 KiB)
> 44ms
> 40.28ms
```

Wasm Build:
```
Module load time
> 11ms
> 10ms
Cold Run, All Samples
test/samples/*.js (3635 KiB)
> 42ms
> 43ms
Warm Runs (average of 25 runs)
test/samples/angular.js (1410 KiB)
> 9.92ms
> 9.32ms
test/samples/angular.min.js (303 KiB)
> 3.2ms
> 3.16ms
test/samples/d3.js (553 KiB)
> 5.2ms
> 5ms
test/samples/d3.min.js (250 KiB)
> 2.52ms
> 2.32ms
test/samples/magic-string.js (34 KiB)
> 0.16ms
test/samples/magic-string.min.js (20 KiB)
> 0.04ms
> 0ms
test/samples/rollup.js (698 KiB)
> 6.44ms
> 6.28ms
test/samples/rollup.min.js (367 KiB)
> 3.96ms
> 3.6ms
Warm Runs, All Samples (average of 25 runs)
test/samples/*.js (3635 KiB)
> 30.48ms
> 27.76ms
```

### Wasm Build Steps
Expand Down
2 changes: 1 addition & 1 deletion bench/index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Promise.resolve().then(async () => {
console.log('Module load time');
{
const start = process.hrtime.bigint();
var { default: parse } = await import('../lexer.js');
var { parse } = await import('../lexer.js');
console.log(`> ${c.bold.green(Math.round(Number(process.hrtime.bigint() - start) / 1e6) + 'ms')}`);
}

Expand Down
39 changes: 38 additions & 1 deletion include-wasm/cjs-module-lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ Slice* export_write_head = NULL;
Slice* first_reexport = NULL;
Slice* reexport_read_head = NULL;
Slice* reexport_write_head = NULL;
Slice* first_unsafe_getter = NULL;
Slice* unsafe_getter_read_head = NULL;
Slice* unsafe_getter_write_head = NULL;
void* analysis_base;
void* analysis_head;

Expand All @@ -48,6 +51,9 @@ const uint16_t* sa (uint32_t utf16Len) {
first_reexport = NULL;
reexport_write_head = NULL;
reexport_read_head = NULL;
first_unsafe_getter = NULL;
unsafe_getter_write_head = NULL;
unsafe_getter_read_head = NULL;
return source;
}

Expand All @@ -72,6 +78,14 @@ uint32_t res () {
uint32_t ree () {
return reexport_read_head->end - source;
}
// getUnsafeGetterStart
uint32_t us () {
return unsafe_getter_read_head->start - source;
}
// getUnsafeGetterEnd
uint32_t ue () {
return unsafe_getter_read_head->end - source;
}
// readExport
bool re () {
if (export_read_head == NULL)
Expand All @@ -92,6 +106,16 @@ bool rre () {
return false;
return true;
}
// readUnsafeGetter
bool ru () {
if (unsafe_getter_read_head == NULL)
unsafe_getter_read_head = first_unsafe_getter;
else
unsafe_getter_read_head = unsafe_getter_read_head->next;
if (unsafe_getter_read_head == NULL)
return false;
return true;
}

bool parse (uint32_t point);

Expand Down Expand Up @@ -119,14 +143,27 @@ void _addReexport (const uint16_t* start, const uint16_t* end) {
reexport->end = end;
reexport->next = NULL;
}
void _addUnsafeGetter (const uint16_t* start, const uint16_t* end) {
Slice* unsafe_getter = (Slice*)(analysis_head);
analysis_head = analysis_head + sizeof(Slice);
if (unsafe_getter_write_head == NULL)
first_unsafe_getter = unsafe_getter;
else
unsafe_getter_write_head->next = unsafe_getter;
unsafe_getter_write_head = unsafe_getter;
unsafe_getter->start = start;
unsafe_getter->end = end;
unsafe_getter->next = NULL;
}
void _clearReexports () {
reexport_write_head = NULL;
first_reexport = NULL;
}
void (*addExport)(const uint16_t*, const uint16_t*) = &_addExport;
void (*addReexport)(const uint16_t*, const uint16_t*) = &_addReexport;
void (*addUnsafeGetter)(const uint16_t*, const uint16_t*) = &_addUnsafeGetter;
void (*clearReexports)() = &_clearReexports;
bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t* start, const uint16_t* end), void (*addReexport)(const uint16_t* start, const uint16_t* end), void (*clearReexports)());
bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t* start, const uint16_t* end), void (*addReexport)(const uint16_t* start, const uint16_t* end), void (*addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*clearReexports)());

enum RequireType {
Import,
Expand Down
2 changes: 1 addition & 1 deletion include/cjs-module-lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ typedef struct StarExportBinding StarExportBinding;

void bail (uint32_t err);

bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t*, const uint16_t*), void (*addReexport)(const uint16_t*, const uint16_t*), void (*clearReexports)());
bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t*, const uint16_t*), void (*addReexport)(const uint16_t*, const uint16_t*), void (*addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*clearReexports)());

enum RequireType {
Import,
Expand Down
14 changes: 10 additions & 4 deletions lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ let openTokenDepth,
starExportMap,
lastStarExportSpecifier,
_exports,
unsafeGetters,
reexports;

function resetState () {
Expand All @@ -27,6 +28,7 @@ function resetState () {
lastStarExportSpecifier = null;

_exports = new Set();
unsafeGetters = new Set();
reexports = new Set();
}

Expand All @@ -47,7 +49,7 @@ function parseCJS (source, name = '@') {
e.loc = pos;
throw e;
}
const result = { exports: [..._exports], reexports: [...reexports] };
const result = { exports: [..._exports].filter(expt => !unsafeGetters.has(expt)), reexports: [...reexports] };
resetState();
return result;
}
Expand Down Expand Up @@ -260,6 +262,7 @@ function tryParseObjectDefineOrKeys (keys) {
pos++;
ch = commentWhitespace();
if (ch === 100/*d*/ && source.startsWith('efineProperty', pos + 1)) {
let expt;
while (true) {
pos += 14;
revertPos = pos - 1;
Expand All @@ -276,7 +279,7 @@ function tryParseObjectDefineOrKeys (keys) {
let quot = ch;
const exportPos = ++pos;
if (!identifier() || source.charCodeAt(pos) !== quot) break;
const expt = source.slice(exportPos, pos);
expt = source.slice(exportPos, pos);
pos++;
ch = commentWhitespace();
if (ch !== 44/*,*/) break;
Expand Down Expand Up @@ -304,9 +307,9 @@ function tryParseObjectDefineOrKeys (keys) {
pos += 5;
ch = commentWhitespace();
if (ch !== 58/*:*/) break;
pos++;
addExport(expt);
break;
pos = revertPos;
return;
}
else if (ch === 103/*g*/) {
if (!source.startsWith('et', pos + 1)) break;
Expand Down Expand Up @@ -372,6 +375,9 @@ function tryParseObjectDefineOrKeys (keys) {
}
break;
}
if (expt) {
unsafeGetters.add(expt);
}
}
else if (keys && ch === 107/*k*/ && source.startsWith('eys', pos + 1)) {
while (true) {
Expand Down
18 changes: 13 additions & 5 deletions src/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,19 @@ const StarExportBinding* STAR_EXPORT_STACK_END = &starExportStack_[MAX_STAR_EXPO

void (*addExport)(const uint16_t*, const uint16_t*);
void (*addReexport)(const uint16_t*, const uint16_t*);
void (*addUnsafeGetter)(const uint16_t*, const uint16_t*);
void (*clearReexports)();

// Note: parsing is based on the _assumption_ that the source is already valid
bool parseCJS (uint16_t* _source, uint32_t _sourceLen, void (*_addExport)(const uint16_t*, const uint16_t*), void (*_addReexport)(const uint16_t*, const uint16_t*), void (*_clearReexports)()) {
bool parseCJS (uint16_t* _source, uint32_t _sourceLen, void (*_addExport)(const uint16_t*, const uint16_t*), void (*_addReexport)(const uint16_t*, const uint16_t*), void (*_addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*_clearReexports)()) {
source = _source;
sourceLen = _sourceLen;
if (_addExport)
addExport = _addExport;
if (_addReexport)
addReexport = _addReexport;
if (_addUnsafeGetter)
addUnsafeGetter = _addUnsafeGetter;

templateStackDepth = 0;
openTokenDepth = 0;
Expand Down Expand Up @@ -272,6 +275,8 @@ void tryParseObjectDefineOrKeys (bool keys) {
pos++;
ch = commentWhitespace();
if (ch == 'd' && str_eq13(pos + 1, 'e', 'f', 'i', 'n', 'e', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'y')) {
uint16_t* exportStart = 0;
uint16_t* exportEnd = 0;
while (true) {
pos += 14;
revertPos = pos - 1;
Expand All @@ -286,9 +291,9 @@ void tryParseObjectDefineOrKeys (bool keys) {
ch = commentWhitespace();
if (ch != '\'' && ch != '"') break;
uint16_t quot = ch;
uint16_t* exportStart = ++pos;
exportStart = ++pos;
if (!identifier(*pos) || *pos != quot) break;
uint16_t* exportEnd = pos;
exportEnd = pos;
pos++;
ch = commentWhitespace();
if (ch != ',') break;
Expand Down Expand Up @@ -316,9 +321,9 @@ void tryParseObjectDefineOrKeys (bool keys) {
pos += 5;
ch = commentWhitespace();
if (ch != ':') break;
pos++;
addExport(exportStart, exportEnd);
break;
pos = revertPos;
return;
}
else if (ch == 'g') {
if (!str_eq2(pos + 1, 'e', 't')) break;
Expand Down Expand Up @@ -388,6 +393,9 @@ void tryParseObjectDefineOrKeys (bool keys) {
}
break;
}
if (exportEnds > 0) {
addUnsafeGetter(exportStart, exportEnd);
}
}
else if (keys && ch == 'k' && str_eq3(pos + 1, 'e', 'y', 's')) {
while (true) {
Expand Down
9 changes: 6 additions & 3 deletions src/lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,18 @@ export function parse (source, name = '@') {
const addr = wasm.sa(len);
(isLE ? copyLE : copyBE)(source, new Uint16Array(wasm.memory.buffer, addr, len));

if (!wasm.parseCJS(addr, source.length, 0, 0))
if (!wasm.parseCJS(addr, source.length, 0, 0, 0))
throw Object.assign(new Error(`Parse error ${name}${wasm.e()}:${source.slice(0, wasm.e()).split('\n').length}:${wasm.e() - source.lastIndexOf('\n', wasm.e() - 1)}`), { idx: wasm.e() });

let exports = new Set(), reexports = new Set();
let exports = new Set(), reexports = new Set(), unsafeGetters = new Set();

while (wasm.rre())
reexports.add(source.slice(wasm.res(), wasm.ree()));
while (wasm.ru())
unsafeGetters.add(source.slice(wasm.us(), wasm.ue()));
while (wasm.re()) {
let exptStr = source.slice(wasm.es(), wasm.ee());
if (!strictReserved.has(exptStr))
if (!strictReserved.has(exptStr) && !unsafeGetters.has(exptStr))
exports.add(exptStr);
}

Expand Down
Loading

0 comments on commit f435e0c

Please sign in to comment.