From 6867a455526eeff810490771482b24dfdb31e535 Mon Sep 17 00:00:00 2001
From: Armin Ronacher <armin.ronacher@active-4.com>
Date: Mon, 25 Sep 2017 23:25:11 +0200
Subject: [PATCH] feat(javascript): implement function name mapping

This commit adds support for libsourcemap 0.8.2 which adds function
name mappings for javascript.  There was an earlier verison of the
library that supported this but failed to handle unicode ranges in
some cases and could cause exceptions.

The newer version instead fails silently if the sourcemap file is
incorrect.
---
 CHANGES                                      |  2 +
 requirements-base.txt                        |  2 +-
 setup.py                                     |  3 +-
 src/sentry/lang/javascript/cache.py          | 10 ++--
 src/sentry/lang/javascript/processor.py      | 48 ++++++++++++--------
 tests/sentry/lang/javascript/test_example.py | 11 ++---
 tests/sentry/lang/javascript/test_plugin.py  |  2 +-
 7 files changed, 47 insertions(+), 31 deletions(-)

diff --git a/CHANGES b/CHANGES
index 20c44e54e012e7..15c08e57930ca8 100644
--- a/CHANGES
+++ b/CHANGES
@@ -16,6 +16,8 @@ Version 8.20
 ------------
 - Make BitBucket repositories enabled by default
 - Add raw data toggle for Additional Data
+- Improved function name resolving for JavaScript sourcemaps
+
 - Add initial support for Redis Cluster.
 - Support a list of hosts in the ``redis.clusters`` configuration along side
   the traditional dictionary style configuration.
diff --git a/requirements-base.txt b/requirements-base.txt
index 398fe0de8ac3c6..e3ad326b65ad81 100644
--- a/requirements-base.txt
+++ b/requirements-base.txt
@@ -19,7 +19,7 @@ hiredis>=0.1.0,<0.2.0
 honcho>=0.7.0,<0.8.0
 kombu==3.0.35
 ipaddress>=1.0.16,<1.1.0
-libsourcemap>=0.7.2,<0.8.0
+libsourcemap>=0.8.2,<0.9.0
 loremipsum>=1.0.5,<1.1.0
 lxml>=3.4.1
 mock>=0.8.0,<1.1
diff --git a/setup.py b/setup.py
index 20d7c143645edc..bc1ef506b4f7da 100755
--- a/setup.py
+++ b/setup.py
@@ -62,8 +62,9 @@
 
 IS_LIGHT_BUILD = os.environ.get('SENTRY_LIGHT_BUILD') == '1'
 
-
 # we use pip requirements files to improve Docker layer caching
+
+
 def get_requirements(env):
     with open('requirements-{}.txt'.format(env)) as fp:
         return [x.strip() for x in fp.read().split('\n') if not x.startswith('#')]
diff --git a/src/sentry/lang/javascript/cache.py b/src/sentry/lang/javascript/cache.py
index 689d2d4434af0b..9edbeec5e47baa 100644
--- a/src/sentry/lang/javascript/cache.py
+++ b/src/sentry/lang/javascript/cache.py
@@ -20,7 +20,7 @@ def _get_canonical_url(self, url):
             url = self._aliases[url]
         return url
 
-    def get(self, url):
+    def get(self, url, raw=False):
         url = self._get_canonical_url(url)
         try:
             parsed, rv = self._cache[url]
@@ -30,7 +30,10 @@ def get(self, url):
         # We have already gotten this file and we've
         # decoded the response, so just return
         if parsed:
-            return rv
+            parsed, raw_body = rv
+            if raw:
+                return raw_body
+            return parsed
 
         # Otherwise, we have a 2-tuple that needs to be applied
         body, encoding = rv
@@ -40,10 +43,11 @@ def get(self, url):
         if callable(body):
             body = body()
 
+        raw_body = body
         body = body.decode(codec_lookup(encoding, 'utf-8').name, 'replace').split(u'\n')
 
         # Set back a marker to indicate we've parsed this url
-        self._cache[url] = (True, body)
+        self._cache[url] = (True, (body, raw_body))
         return body
 
     def get_errors(self, url):
diff --git a/src/sentry/lang/javascript/processor.py b/src/sentry/lang/javascript/processor.py
index 46a6d549879090..39583ba38e2458 100644
--- a/src/sentry/lang/javascript/processor.py
+++ b/src/sentry/lang/javascript/processor.py
@@ -595,24 +595,34 @@ def process_frame(self, processable_frame, processing_task):
                     )
 
             if token is not None:
-                # Token's return zero-indexed lineno's
+                # the tokens are zero indexed, so offset correctly
                 new_frame['lineno'] = token.src_line + 1
-                new_frame['colno'] = token.src_col
-                last_token = None
-
-                # we might go back to a frame that is unhandled.  In that
-                # case we might not find the token in the data.
-                if processable_frame.previous_frame:
-                    last_token = processable_frame.previous_frame.data.get('token')
-
-                # The offending function is always the previous function in the stack
-                # Honestly, no idea what the bottom most frame is, so
-                # we're ignoring that atm.
-                #
-                # XXX: we should actually be parsing the source code here
-                # and not use the last token
-                if last_token:
-                    new_frame['function'] = last_token.name or frame.get('function')
+                new_frame['colno'] = token.src_col + 1
+
+                # Find the original function name with a bit of guessing
+                original_function_name = None
+
+                # In the ideal case we can use the function name from the
+                # frame and the location to resolve the original name
+                # through the heuristics in our sourcemap library.
+                if frame.get('function'):
+                    minified_source = self.get_source(frame['abs_path'], raw=True)
+                    original_function_name = sourcemap_view.get_original_function_name(
+                        token.dst_line, token.dst_col, frame['function'],
+                        minified_source)
+                if original_function_name is None:
+                    last_token = None
+
+                    # Find the previous token for function name handling as a
+                    # fallback.
+                    if processable_frame.previous_frame and \
+                       processable_frame.previous_frame.processor is self:
+                        last_token = processable_frame.previous_frame.data.get('token')
+                        if last_token:
+                            original_function_name = last_token.name
+
+                if original_function_name is not None:
+                    new_frame['function'] = original_function_name
 
                 filename = token.src
                 # special case webpack support
@@ -697,10 +707,10 @@ def expand_frame(self, frame, source=None):
             return True
         return False
 
-    def get_source(self, filename):
+    def get_source(self, filename, raw=False):
         if filename not in self.cache:
             self.cache_source(filename)
-        return self.cache.get(filename)
+        return self.cache.get(filename, raw=raw)
 
     def cache_source(self, filename):
         sourcemaps = self.sourcemaps
diff --git a/tests/sentry/lang/javascript/test_example.py b/tests/sentry/lang/javascript/test_example.py
index a53444499df8ab..3aaa4d35f7637b 100644
--- a/tests/sentry/lang/javascript/test_example.py
+++ b/tests/sentry/lang/javascript/test_example.py
@@ -67,13 +67,14 @@ def test_sourcemap_expansion(self):
 
         assert len(frame_list) == 4
 
+        import pprint
+        pprint.pprint(frame_list)
+
         assert frame_list[0].function == 'produceStack'
         assert frame_list[0].lineno == 6
         assert frame_list[0].filename == 'index.html'
 
-        # This function name is obviously wrong but the current logic we
-        # have does not permit better data here
-        assert frame_list[1].function == 'i'
+        assert frame_list[1].function == 'test'
         assert frame_list[1].lineno == 20
         assert frame_list[1].filename == 'test.js'
 
@@ -81,8 +82,6 @@ def test_sourcemap_expansion(self):
         assert frame_list[2].lineno == 15
         assert frame_list[2].filename == 'test.js'
 
-        # This function name is obviously wrong but the current logic we
-        # have does not permit better data here
-        assert frame_list[3].function == 'cb'
+        assert frame_list[3].function == 'onFailure'
         assert frame_list[3].lineno == 5
         assert frame_list[3].filename == 'test.js'
diff --git a/tests/sentry/lang/javascript/test_plugin.py b/tests/sentry/lang/javascript/test_plugin.py
index b62335f8e894dd..238a3db68ce54e 100644
--- a/tests/sentry/lang/javascript/test_plugin.py
+++ b/tests/sentry/lang/javascript/test_plugin.py
@@ -883,7 +883,7 @@ def test_sourcemap_expansion_with_missing_source(self):
 
         # ... but line, column numbers are still correctly mapped
         assert frame.lineno == 3
-        assert frame.colno == 8
+        assert frame.colno == 9
 
     @responses.activate
     def test_failed_sourcemap_expansion(self):