Use pygments in introspection

Use pygments in introspection Use pygments in introspection Use pygments in introspection Use pygments in introspection Fix failing tests Fix location of syntaxhighlighters Allow word based completions and fix comparison error Fix text completions Look for function calls in text lexer too Address lexers with no keywords Improve keyword handling Fix keywords handler and add bash test Fix pygments handling Remove unused file Still look for function call if python_like Force a text lexer for .txt files
blink1073 · Mar 6, 2016 · e444db7 · e444db7
1 parent f74701a
commit e444db7
Show file tree

Hide file tree

Showing 6 changed files with 181 additions and 64 deletions.
diff --git a/profiling.txt b/profiling.txt
@@ -27,4 +27,5 @@ When Explorer widget is disabled (.spyder.ini): 30ms
 -> when Workspace is hidden: the most time consuming task is filtering
 
 => Workspace lack of performance solved too! -> was due to a CONF.get within a
-   loop... duh...
+   loop... duh...
+
diff --git a/spyderlib/utils/introspection/fallback_plugin.py b/spyderlib/utils/introspection/fallback_plugin.py
@@ -15,12 +15,14 @@
 import re
 import time
 
+from pygments.token import Token
+
 from spyderlib.utils.debug import log_dt
 from spyderlib.utils import sourcecode, encoding
 from spyderlib.utils.introspection.manager import (
     DEBUG_EDITOR, LOG_FILENAME, IntrospectionPlugin)
 from spyderlib.utils.introspection.utils import (
-    get_parent_until, memoize)
+    get_parent_until, memoize, find_lexer_for_filename, get_keywords)
 
 
 class FallbackPlugin(IntrospectionPlugin):
@@ -37,23 +39,51 @@ def get_completions(self, info):
         if not info['obj']:
             return
         items = []
-        base = info['obj']
-        tokens = set(re.findall(info['id_regex'], info['source_code']))
-        items = [item for item in tokens if
-                 item.startswith(base) and len(item) > len(base)]
-        if '.' in base:
-            start = base.rfind('.') + 1
-        else:
-            start = 0
-
-        items = [i[start:len(base)] + i[len(base):].split('.')[0]
-                 for i in items]
-        # get path completions
-        # get last word back to a space or a quote character
-        match = re.search('''[ "\']([\w\.\\\\/]+)\Z''', info['line'])
-        if match:
-            items += _complete_path(match.groups()[0])
-        return [(i, '') for i in sorted(items)]
+        line = info['line'].strip()
+        is_from = line.startswith('from')
+        if ((line.startswith('import') or is_from and ' import' not in line)
+                and info['is_python_like']):
+            items += module_completion(info['line'], [info['filename']])
+            return [(i, 'module') for i in sorted(items)]
+        elif is_from and info['is_python_like']:
+            items += module_completion(info['line'], [info['filename']])
+            return [(i, '') for i in sorted(items)]
+        elif info['obj']:
+            obj = info['obj']
+            if info['context']:
+                lexer = find_lexer_for_filename(info['filename'])
+                # get a list of token matches for the current object
+                tokens = lexer.get_tokens(info['source_code'])
+                for (context, token) in tokens:
+                    token = token.strip()
+                    if (context in info['context']and
+                            token.startswith(obj) and
+                            obj != token):
+                        items.append(token)
+                # add in keywords if not in a string
+                if context not in Token.Literal.String:
+                    try:
+                        keywords = get_keywords(lexer)
+                        items.extend(k for k in keywords if k.startswith(obj))
+                    except Exception:
+                        pass
+            else:
+                tokens = set(re.findall(info['id_regex'], info['source_code']))
+                items = [item for item in tokens if
+                     item.startswith(obj) and len(item) > len(obj)]
+                if '.' in obj:
+                    start = obj.rfind('.') + 1
+                else:
+                    start = 0
+
+                items = [i[start:len(obj)] + i[len(obj):].split('.')[0]
+                     for i in items]
+            # get path completions
+            # get last word back to a space or a quote character
+            match = re.search('''[ "\']([\w\.\\\\/]+)\Z''', info['line'])
+            if match:
+                items += _complete_path(match.groups()[0])
+            return [(i, '') for i in sorted(items)]
 
     def get_definition(self, info):
         """
@@ -62,6 +92,8 @@ def get_definition(self, info):
         This is used to find the path of python-like modules
         (e.g. cython and enaml) for a goto definition
         """
+        if not info['is_python_like']:
+            return
         token = info['obj']
         lines = info['lines']
         source_code = info['source_code']
@@ -88,7 +120,7 @@ def get_definition(self, info):
             if (not source_file or
                     not osp.splitext(source_file)[-1] in exts):
                 line_nr = get_definition_with_regex(source_code, token,
-                                                         line_nr)
+                                                    line_nr)
                 return filename, line_nr
             mod_name = osp.basename(source_file).split('.')[0]
             if mod_name == token or mod_name == '__init__':
@@ -200,7 +232,6 @@ def get_definition_with_regex(source, token, start_line=-1):
                     'self.{0}{1}[^=!<>]*=[^=]',
                     '{0}{1}[^=!<>]*=[^=]']
         matches = get_matches(patterns, source, token, start_line)
-
     # find the one closest to the start line (prefer before the start line)
     if matches:
         min_dist = len(source.splitlines())
@@ -296,25 +327,25 @@ def _complete_path(path=None):
     code += '\nlog_dt'
 
     path, line = p.get_definition(CodeInfo('definition', code, len(code),
-        __file__))
+        __file__, is_python_like=True))
     assert path.endswith('fallback_plugin.py')
 
     code += '\np.get_completions'
     path, line = p.get_definition(CodeInfo('definition', code, len(code),
-        'dummy.txt'))
-    assert path == 'dummy.txt'
+        'dummy.py', is_python_like=True))
+    assert path == 'dummy.py'
     assert 'def get_completions(' in code.splitlines()[line - 1]
 
     code += '\npython_like_mod_finder'
     path, line = p.get_definition(CodeInfo('definition', code, len(code),
-        'dummy.txt'))
-    assert path == 'dummy.txt'
+        'dummy.py', is_python_like=True))
+    assert path == 'dummy.py'
     # FIXME: we need to prioritize def over =
     assert 'def python_like_mod_finder' in code.splitlines()[line - 1]
 
     code += 'python_like_mod_finder'
     resp = p.get_definition(CodeInfo('definition', code, len(code),
-        'dummy.txt'))
+        'dummy.py'))
     assert resp is None
 
     code = """
@@ -325,7 +356,7 @@ def __init__(self):
     t = Test()
     t.foo"""
     path, line = p.get_definition(CodeInfo('definition', code, len(code),
-        'dummy.txt'))
+        'dummy.py', is_python_like=True))
     assert line == 4
 
     ext = python_like_exts()
@@ -350,25 +381,43 @@ def __init__(self):
 
     code = 'import re\n\nre'
     path, line = p.get_definition(CodeInfo('definition', code, len(code),
-        'dummy.txt'))
-    assert path == 'dummy.txt' and line == 1
+        'dummy.py', is_python_like=True))
+    assert path == 'dummy.py' and line == 1
 
     code = 'self.proxy.widget; self.p'
-    comp = p.get_completions(CodeInfo('completions', code, len(code)))
-    assert comp[0] == ('proxy', '')
+    comp = p.get_completions(CodeInfo('completions', code, len(code), 'dummy.py'))
+    assert ('proxy', '') in comp, comp
 
     code = 'self.sigMessageReady.emit; self.s'
-    comp = p.get_completions(CodeInfo('completions', code, len(code)))
-    assert comp == [('sigMessageReady', '')]
+    comp = p.get_completions(CodeInfo('completions', code, len(code), 'dummy.py'))
+    assert ('sigMessageReady', '') in comp
+
+<<<<<<< HEAD
+=======
+    code = 'from numpy import one'
+    comp = p.get_completions(CodeInfo('completions', code, len(code), 'dummy.py', is_python_like=True))
+    assert ('ones', '') in comp
+
+    comp = p.get_completions(CodeInfo('completions', code, len(code), 'dummy.txt'))
+    assert not comp, comp
+
+    code = 'from numpy.testing import (asse'
+    comp = p.get_completions(CodeInfo('completions', code, len(code), 'dummy.py', is_python_like=True))
+    assert ('assert_equal', '') in comp
+
+    code = 'bob = 1; bo'
+    comp = p.get_completions(CodeInfo('completions', code, len(code), 'dummy.m'))
+    assert ('bob', '') in comp
 
-    code = encoding.to_unicode('álfa;á')
-    comp = p.get_completions(CodeInfo('completions', code, len(code)))
-    assert comp == [(encoding.to_unicode('álfa'), '')]
+    code = 'functi'    
+    comp = p.get_completions(CodeInfo('completions', code, len(code), 'dummy.sh'))
+    assert ('function', '') in comp, comp
 
+>>>>>>> 1993154... Use pygments in introspection
     code = '''
 def test(a, b):
     pass
 test(1,'''
     path, line = p.get_definition(CodeInfo('definition', code, len(code),
-        'dummy.txt'))
+        'dummy.py', is_python_like=True))
     assert line == 2
diff --git a/spyderlib/utils/introspection/jedi_plugin.py b/spyderlib/utils/introspection/jedi_plugin.py
@@ -266,7 +266,7 @@ def test(a, b):
     pass
 test(1,'''
     path, line = p.get_definition(CodeInfo('definition', code, len(code),
-        'dummy.txt'))
+        'dummy.txt', is_python_like=True))
     assert line == 2
 
     docs = p.get_info(CodeInfo('info', code, len(code), __file__))

diff --git a/spyderlib/utils/introspection/rope_plugin.py b/spyderlib/utils/introspection/rope_plugin.py
@@ -311,8 +311,9 @@ def test(a, b):
     pass
 test(1,'''
     path, line = p.get_definition(CodeInfo('definition', code, len(code),
-        'dummy.txt'))
+        'dummy.txt', is_python_like=True))
     assert line == 2
 
-    docs = p.get_info(CodeInfo('info', code, len(code), __file__))
+    docs = p.get_info(CodeInfo('info', code, len(code), __file__,
+        is_python_like=True))
     assert 'Test docstring' in docs['docstring']
diff --git a/spyderlib/utils/introspection/utils.py b/spyderlib/utils/introspection/utils.py
@@ -17,8 +17,16 @@
 import errno
 
 from spyderlib.utils.misc import memoize
-from spyderlib.config.base import debug_print
-from spyderlib.utils.misc import select_port
+
+from spyderlib.utils.syntaxhighlighters import (
+    custom_extension_lexer_mapping
+)
+
+from pygments.lexers import (
+    get_lexer_for_filename, get_lexer_by_name, TextLexer
+)
+from pygments.util import ClassNotFound
+from pygments.token import Token
 
 
 class CodeInfo(object):
@@ -28,13 +36,14 @@ class CodeInfo(object):
                                  re.UNICODE)
 
     def __init__(self, name, source_code, position, filename=None,
-            is_python_like=True, in_comment_or_string=False, **kwargs):
+            is_python_like=False, in_comment_or_string=False, **kwargs):
         self.__dict__.update(kwargs)
         self.name = name
         self.filename = filename
         self.source_code = source_code
         self.is_python_like = is_python_like
         self.in_comment_or_string = in_comment_or_string
+
         self.position = position
 
         # if in a comment, look for the previous definition
@@ -72,27 +81,48 @@ def _get_info(self):
         self.line = self.lines[-1]
         self.column = len(self.lines[-1])
 
-        tokens = re.findall(self.id_regex, self.line)
-        if tokens and self.line.endswith(tokens[-1]):
-            self.obj = tokens[-1]
-        else:
-            self.obj = None
+        full_line = self.source_code.splitlines()[self.line_num - 1]
+
+        lexer = find_lexer_for_filename(self.filename)
 
-        self.full_obj = self.obj
+        # check for a text-based lexer that doesn't split tokens
+        if len(list(lexer.get_tokens('a b'))) == 1:
+            # Use regex to get the information
+            tokens = re.findall(self.id_regex, self.line)
+            if tokens and self.line.endswith(tokens[-1]):
+                self.obj = tokens[-1]
+            else:
+                self.obj = None
+
+            self.full_obj = self.obj
 
-        if self.obj:
-            full_line = self.source_code.splitlines()[self.line_num - 1]
-            rest = full_line[self.column:]
-            match = re.match(self.id_regex, rest)
-            if match:
-                self.full_obj = self.obj + match.group()
+            if self.obj:
+                full_line = self.source_code.splitlines()[self.line_num - 1]
+                rest = full_line[self.column:]
+                match = re.match(self.id_regex, rest)
+                if match:
+                    self.full_obj = self.obj + match.group()
 
-        if (self.name in ['info', 'definition'] and (not self.obj)
+            self.context = None
+        else:
+            # Use lexer to get the information
+            pos = 0
+            line_tokens = lexer.get_tokens(full_line)
+            for (context, token) in line_tokens:
+                pos += len(token)
+                if pos >= self.column:
+                    self.obj = token[:len(token) - (pos - self.column)]
+                    self.full_obj = token
+                    if context in Token.Literal.String:
+                        context = Token.Literal.String
+                    self.context = context
+                    break
+
+        if (self.name in ['info', 'definition'] and (not self.context in Token.Name)
                 and self.is_python_like):
             func_call = re.findall(self.func_call_regex, self.line)
             if func_call:
                 self.obj = func_call[-1]
-                debug_print('new obj %s' % repr(self.obj))
                 self.column = self.line.index(self.obj) + len(self.obj)
                 self.position = self.position - len(self.line) + self.column
 
@@ -116,7 +146,7 @@ def _get_docstring(self):
 
     def __eq__(self, other):
         try:
-            return self.__dict__ == other.__dict__
+            return self.serialize() == other.serialize()
         except Exception:
             return False
 
@@ -137,6 +167,41 @@ def serialize(self):
         return state
 
 
+def find_lexer_for_filename(filename):
+    """Get a Pygments Lexer given a filename.
+    """
+    filename = filename or ''
+    root, ext = os.path.splitext(filename)
+    if ext in custom_extension_lexer_mapping:
+        lexer = get_lexer_by_name(custom_extension_lexer_mapping[ext])
+    else:
+        try:
+            lexer = get_lexer_for_filename(filename)
+        except ClassNotFound:
+            return TextLexer()
+    return lexer
+
+
+def get_keywords(lexer):
+    """Get the keywords for a given lexer.
+    """
+    if not hasattr(lexer, 'tokens'):
+        return []
+    if 'keywords' in lexer.tokens:
+        return lexer.tokens['keywords'][0][0].words
+    keywords = []
+    for vals in lexer.tokens.values():
+        for val in vals:
+            try:
+                if '|' in val[0] and ')\\b' in val[0]:
+                    val = re.sub(r'\\.', '', val[0])
+                    val = re.sub('[^0-9a-zA-Z|]+', '', val)
+                    keywords.extend(val.split('|'))
+            except Exception:
+                continue
+    return keywords
+
+
 @memoize
 def get_parent_until(path):
     """