From 6867a455526eeff810490771482b24dfdb31e535 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 25 Sep 2017 23:25:11 +0200 Subject: [PATCH] feat(javascript): implement function name mapping This commit adds support for libsourcemap 0.8.2 which adds function name mappings for javascript. There was an earlier verison of the library that supported this but failed to handle unicode ranges in some cases and could cause exceptions. The newer version instead fails silently if the sourcemap file is incorrect. --- CHANGES | 2 + requirements-base.txt | 2 +- setup.py | 3 +- src/sentry/lang/javascript/cache.py | 10 ++-- src/sentry/lang/javascript/processor.py | 48 ++++++++++++-------- tests/sentry/lang/javascript/test_example.py | 11 ++--- tests/sentry/lang/javascript/test_plugin.py | 2 +- 7 files changed, 47 insertions(+), 31 deletions(-) diff --git a/CHANGES b/CHANGES index 20c44e54e012e7..15c08e57930ca8 100644 --- a/CHANGES +++ b/CHANGES @@ -16,6 +16,8 @@ Version 8.20 ------------ - Make BitBucket repositories enabled by default - Add raw data toggle for Additional Data +- Improved function name resolving for JavaScript sourcemaps + - Add initial support for Redis Cluster. - Support a list of hosts in the ``redis.clusters`` configuration along side the traditional dictionary style configuration. diff --git a/requirements-base.txt b/requirements-base.txt index 398fe0de8ac3c6..e3ad326b65ad81 100644 --- a/requirements-base.txt +++ b/requirements-base.txt @@ -19,7 +19,7 @@ hiredis>=0.1.0,<0.2.0 honcho>=0.7.0,<0.8.0 kombu==3.0.35 ipaddress>=1.0.16,<1.1.0 -libsourcemap>=0.7.2,<0.8.0 +libsourcemap>=0.8.2,<0.9.0 loremipsum>=1.0.5,<1.1.0 lxml>=3.4.1 mock>=0.8.0,<1.1 diff --git a/setup.py b/setup.py index 20d7c143645edc..bc1ef506b4f7da 100755 --- a/setup.py +++ b/setup.py @@ -62,8 +62,9 @@ IS_LIGHT_BUILD = os.environ.get('SENTRY_LIGHT_BUILD') == '1' - # we use pip requirements files to improve Docker layer caching + + def get_requirements(env): with open('requirements-{}.txt'.format(env)) as fp: return [x.strip() for x in fp.read().split('\n') if not x.startswith('#')] diff --git a/src/sentry/lang/javascript/cache.py b/src/sentry/lang/javascript/cache.py index 689d2d4434af0b..9edbeec5e47baa 100644 --- a/src/sentry/lang/javascript/cache.py +++ b/src/sentry/lang/javascript/cache.py @@ -20,7 +20,7 @@ def _get_canonical_url(self, url): url = self._aliases[url] return url - def get(self, url): + def get(self, url, raw=False): url = self._get_canonical_url(url) try: parsed, rv = self._cache[url] @@ -30,7 +30,10 @@ def get(self, url): # We have already gotten this file and we've # decoded the response, so just return if parsed: - return rv + parsed, raw_body = rv + if raw: + return raw_body + return parsed # Otherwise, we have a 2-tuple that needs to be applied body, encoding = rv @@ -40,10 +43,11 @@ def get(self, url): if callable(body): body = body() + raw_body = body body = body.decode(codec_lookup(encoding, 'utf-8').name, 'replace').split(u'\n') # Set back a marker to indicate we've parsed this url - self._cache[url] = (True, body) + self._cache[url] = (True, (body, raw_body)) return body def get_errors(self, url): diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py index 46a6d549879090..39583ba38e2458 100644 --- a/src/sentry/lang/javascript/processor.py +++ b/src/sentry/lang/javascript/processor.py @@ -595,24 +595,34 @@ def process_frame(self, processable_frame, processing_task): ) if token is not None: - # Token's return zero-indexed lineno's + # the tokens are zero indexed, so offset correctly new_frame['lineno'] = token.src_line + 1 - new_frame['colno'] = token.src_col - last_token = None - - # we might go back to a frame that is unhandled. In that - # case we might not find the token in the data. - if processable_frame.previous_frame: - last_token = processable_frame.previous_frame.data.get('token') - - # The offending function is always the previous function in the stack - # Honestly, no idea what the bottom most frame is, so - # we're ignoring that atm. - # - # XXX: we should actually be parsing the source code here - # and not use the last token - if last_token: - new_frame['function'] = last_token.name or frame.get('function') + new_frame['colno'] = token.src_col + 1 + + # Find the original function name with a bit of guessing + original_function_name = None + + # In the ideal case we can use the function name from the + # frame and the location to resolve the original name + # through the heuristics in our sourcemap library. + if frame.get('function'): + minified_source = self.get_source(frame['abs_path'], raw=True) + original_function_name = sourcemap_view.get_original_function_name( + token.dst_line, token.dst_col, frame['function'], + minified_source) + if original_function_name is None: + last_token = None + + # Find the previous token for function name handling as a + # fallback. + if processable_frame.previous_frame and \ + processable_frame.previous_frame.processor is self: + last_token = processable_frame.previous_frame.data.get('token') + if last_token: + original_function_name = last_token.name + + if original_function_name is not None: + new_frame['function'] = original_function_name filename = token.src # special case webpack support @@ -697,10 +707,10 @@ def expand_frame(self, frame, source=None): return True return False - def get_source(self, filename): + def get_source(self, filename, raw=False): if filename not in self.cache: self.cache_source(filename) - return self.cache.get(filename) + return self.cache.get(filename, raw=raw) def cache_source(self, filename): sourcemaps = self.sourcemaps diff --git a/tests/sentry/lang/javascript/test_example.py b/tests/sentry/lang/javascript/test_example.py index a53444499df8ab..3aaa4d35f7637b 100644 --- a/tests/sentry/lang/javascript/test_example.py +++ b/tests/sentry/lang/javascript/test_example.py @@ -67,13 +67,14 @@ def test_sourcemap_expansion(self): assert len(frame_list) == 4 + import pprint + pprint.pprint(frame_list) + assert frame_list[0].function == 'produceStack' assert frame_list[0].lineno == 6 assert frame_list[0].filename == 'index.html' - # This function name is obviously wrong but the current logic we - # have does not permit better data here - assert frame_list[1].function == 'i' + assert frame_list[1].function == 'test' assert frame_list[1].lineno == 20 assert frame_list[1].filename == 'test.js' @@ -81,8 +82,6 @@ def test_sourcemap_expansion(self): assert frame_list[2].lineno == 15 assert frame_list[2].filename == 'test.js' - # This function name is obviously wrong but the current logic we - # have does not permit better data here - assert frame_list[3].function == 'cb' + assert frame_list[3].function == 'onFailure' assert frame_list[3].lineno == 5 assert frame_list[3].filename == 'test.js' diff --git a/tests/sentry/lang/javascript/test_plugin.py b/tests/sentry/lang/javascript/test_plugin.py index b62335f8e894dd..238a3db68ce54e 100644 --- a/tests/sentry/lang/javascript/test_plugin.py +++ b/tests/sentry/lang/javascript/test_plugin.py @@ -883,7 +883,7 @@ def test_sourcemap_expansion_with_missing_source(self): # ... but line, column numbers are still correctly mapped assert frame.lineno == 3 - assert frame.colno == 8 + assert frame.colno == 9 @responses.activate def test_failed_sourcemap_expansion(self):