feat(javascript): implement function name mapping

This commit adds support for libsourcemap 0.8.2 which adds function name mappings for javascript. There was an earlier verison of the library that supported this but failed to handle unicode ranges in some cases and could cause exceptions. The newer version instead fails silently if the sourcemap file is incorrect.
getsentry · Sep 25, 2017 · 6867a45 · 6867a45
1 parent 2f3b54e
commit 6867a45
Show file tree

Hide file tree

Showing 7 changed files with 47 additions and 31 deletions.
diff --git a/CHANGES b/CHANGES
@@ -16,6 +16,8 @@ Version 8.20
 ------------
 - Make BitBucket repositories enabled by default
 - Add raw data toggle for Additional Data
+- Improved function name resolving for JavaScript sourcemaps
+
 - Add initial support for Redis Cluster.
 - Support a list of hosts in the ``redis.clusters`` configuration along side
   the traditional dictionary style configuration.

diff --git a/requirements-base.txt b/requirements-base.txt
@@ -19,7 +19,7 @@ hiredis>=0.1.0,<0.2.0
 honcho>=0.7.0,<0.8.0
 kombu==3.0.35
 ipaddress>=1.0.16,<1.1.0
-libsourcemap>=0.7.2,<0.8.0
+libsourcemap>=0.8.2,<0.9.0
 loremipsum>=1.0.5,<1.1.0
 lxml>=3.4.1
 mock>=0.8.0,<1.1

diff --git a/setup.py b/setup.py
@@ -62,8 +62,9 @@
 
 IS_LIGHT_BUILD = os.environ.get('SENTRY_LIGHT_BUILD') == '1'
 
-
 # we use pip requirements files to improve Docker layer caching
+
+
 def get_requirements(env):
     with open('requirements-{}.txt'.format(env)) as fp:
         return [x.strip() for x in fp.read().split('\n') if not x.startswith('#')]

diff --git a/src/sentry/lang/javascript/cache.py b/src/sentry/lang/javascript/cache.py
@@ -20,7 +20,7 @@ def _get_canonical_url(self, url):
             url = self._aliases[url]
         return url
 
-    def get(self, url):
+    def get(self, url, raw=False):
         url = self._get_canonical_url(url)
         try:
             parsed, rv = self._cache[url]
@@ -30,7 +30,10 @@ def get(self, url):
         # We have already gotten this file and we've
         # decoded the response, so just return
         if parsed:
-            return rv
+            parsed, raw_body = rv
+            if raw:
+                return raw_body
+            return parsed
 
         # Otherwise, we have a 2-tuple that needs to be applied
         body, encoding = rv
@@ -40,10 +43,11 @@ def get(self, url):
         if callable(body):
             body = body()
 
+        raw_body = body
         body = body.decode(codec_lookup(encoding, 'utf-8').name, 'replace').split(u'\n')
 
         # Set back a marker to indicate we've parsed this url
-        self._cache[url] = (True, body)
+        self._cache[url] = (True, (body, raw_body))
         return body
 
     def get_errors(self, url):

diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
@@ -595,24 +595,34 @@ def process_frame(self, processable_frame, processing_task):
                     )
 
             if token is not None:
-                # Token's return zero-indexed lineno's
+                # the tokens are zero indexed, so offset correctly
                 new_frame['lineno'] = token.src_line + 1
-                new_frame['colno'] = token.src_col
-                last_token = None
-
-                # we might go back to a frame that is unhandled.  In that
-                # case we might not find the token in the data.
-                if processable_frame.previous_frame:
-                    last_token = processable_frame.previous_frame.data.get('token')
-
-                # The offending function is always the previous function in the stack
-                # Honestly, no idea what the bottom most frame is, so
-                # we're ignoring that atm.
-                #
-                # XXX: we should actually be parsing the source code here
-                # and not use the last token
-                if last_token:
-                    new_frame['function'] = last_token.name or frame.get('function')
+                new_frame['colno'] = token.src_col + 1
+
+                # Find the original function name with a bit of guessing
+                original_function_name = None
+
+                # In the ideal case we can use the function name from the
+                # frame and the location to resolve the original name
+                # through the heuristics in our sourcemap library.
+                if frame.get('function'):
+                    minified_source = self.get_source(frame['abs_path'], raw=True)
+                    original_function_name = sourcemap_view.get_original_function_name(
+                        token.dst_line, token.dst_col, frame['function'],
+                        minified_source)
+                if original_function_name is None:
+                    last_token = None
+
+                    # Find the previous token for function name handling as a
+                    # fallback.
+                    if processable_frame.previous_frame and \
+                       processable_frame.previous_frame.processor is self:
+                        last_token = processable_frame.previous_frame.data.get('token')
+                        if last_token:
+                            original_function_name = last_token.name
+
+                if original_function_name is not None:
+                    new_frame['function'] = original_function_name
 
                 filename = token.src
                 # special case webpack support
@@ -697,10 +707,10 @@ def expand_frame(self, frame, source=None):
             return True
         return False
 
-    def get_source(self, filename):
+    def get_source(self, filename, raw=False):
         if filename not in self.cache:
             self.cache_source(filename)
-        return self.cache.get(filename)
+        return self.cache.get(filename, raw=raw)
 
     def cache_source(self, filename):
         sourcemaps = self.sourcemaps

diff --git a/tests/sentry/lang/javascript/test_example.py b/tests/sentry/lang/javascript/test_example.py
@@ -67,22 +67,21 @@ def test_sourcemap_expansion(self):
 
         assert len(frame_list) == 4
 
+        import pprint
+        pprint.pprint(frame_list)
+
         assert frame_list[0].function == 'produceStack'
         assert frame_list[0].lineno == 6
         assert frame_list[0].filename == 'index.html'
 
-        # This function name is obviously wrong but the current logic we
-        # have does not permit better data here
-        assert frame_list[1].function == 'i'
+        assert frame_list[1].function == 'test'
         assert frame_list[1].lineno == 20
         assert frame_list[1].filename == 'test.js'
 
         assert frame_list[2].function == 'invoke'
         assert frame_list[2].lineno == 15
         assert frame_list[2].filename == 'test.js'
 
-        # This function name is obviously wrong but the current logic we
-        # have does not permit better data here
-        assert frame_list[3].function == 'cb'
+        assert frame_list[3].function == 'onFailure'
         assert frame_list[3].lineno == 5
         assert frame_list[3].filename == 'test.js'
diff --git a/tests/sentry/lang/javascript/test_plugin.py b/tests/sentry/lang/javascript/test_plugin.py
@@ -883,7 +883,7 @@ def test_sourcemap_expansion_with_missing_source(self):
 
         # ... but line, column numbers are still correctly mapped
         assert frame.lineno == 3
-        assert frame.colno == 8
+        assert frame.colno == 9
 
     @responses.activate
     def test_failed_sourcemap_expansion(self):