Skip to content

Commit

Permalink
Use same data structure for all options
Browse files Browse the repository at this point in the history
- We now use source_symbols, source_strings and source_comments
across all the various options.

- Each option is now excluding the other options

Signed-off-by: Philippe Ombredanne <[email protected]>
  • Loading branch information
pombredanne committed Apr 24, 2024
1 parent 0db9b00 commit 7227113
Show file tree
Hide file tree
Showing 8 changed files with 15,783 additions and 5,486 deletions.
1 change: 1 addition & 0 deletions src/source_inspector/strings_xgettext.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class XgettextStringScannerPlugin(ScanPlugin):
help="Collect source strings using xgettext.",
help_group=SCAN_GROUP,
sort_order=100,
conflicting_options=["treesitter_symbol_and_string", "pygments_symbol_and_string"],
),
]

Expand Down
1 change: 1 addition & 0 deletions src/source_inspector/symbols_ctags.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class CtagsSymbolScannerPlugin(ScanPlugin):
help="Collect source symbols using Universal ctags.",
help_group=SCAN_GROUP,
sort_order=100,
conflicting_options=["treesitter_symbol_and_string", "pygments_symbol_and_string"],
),
]

Expand Down
51 changes: 38 additions & 13 deletions src/source_inspector/symbols_pygments.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@
from pygments.token import Literal
from pygments.token import Name
from pygments.token import Punctuation
from pygments.token import Whitespace
from pygments.util import ClassNotFound
from textcode import analysis
from typecode.contenttype import Type

from source_inspector.pygments_lexing import get_tokens

"""
Extract strings and symbols from source code files with pygments.
"""
Expand All @@ -35,36 +34,58 @@
@scan_impl
class PygmentsSymbolsAndStringScannerPlugin(ScanPlugin):
"""
Scan a source file for symbols and strings using Pygments.
Scan a source file for symbols, strings and comments using Pygments.
"""

resource_attributes = dict(
pygments_symbols=attr.ib(default=attr.Factory(list), repr=False),
source_symbols=attr.ib(default=attr.Factory(list), repr=False),
source_strings=attr.ib(default=attr.Factory(list), repr=False),
source_comments=attr.ib(default=attr.Factory(list), repr=False),
)

options = [
PluggableCommandLineOption(
("--pygments-symbol",),
("--pygments-symbol-and-string",),
is_flag=True,
default=False,
help="Collect source symbols and strings using pygments.",
help="Collect source symbols, strings and comments using pygments.",
help_group=SCAN_GROUP,
sort_order=100,
conflicting_options=["source_symbol", "source_string", "treesitter_symbol_and_string"],
),
]

def is_enabled(self, pygments_symbol, **kwargs):
return pygments_symbol
def is_enabled(self, pygments_symbol_and_string, **kwargs):
return pygments_symbol_and_string

def get_scanner(self, **kwargs):
return get_pygments_symbols


def get_pygments_symbols(location, **kwargs):
"""
Return a mapping of symbols and strings for a source file at ``location``.
Return a mapping of symbol, string and comment lists for a source file at ``location``.
"""
return dict(pygments_symbols=list(get_tokens(location=location)))
source_strings = []
source_comments = []
source_symbols = []

for token in get_tokens(location=location):
token_type = token["token_type"]
token_value = token["token_value"]

if token_type == "string":
source_strings.append(token_value)
elif token_type == "comment":
source_comments.append(token_value)
elif token_type == "symbol":
source_symbols.append(token_value)

return dict(
source_symbols=source_symbols,
source_strings=source_strings,
source_comments=source_comments,
)


def get_tokens(location, with_literals=True, with_comments=False):
Expand Down Expand Up @@ -95,13 +116,17 @@ def get_tokens(location, with_literals=True, with_comments=False):
tvalue = tvalue.strip()
if not tvalue:
continue
if ttype in Punctuation:

if ttype in (
Punctuation,
Whitespace,
):
continue

if with_literals and ttype in (Literal,) and ttype not in (Punctuation):
if with_literals and ttype in Literal:
yield dict(position=pos, token_type="string", token_value=tvalue)

elif with_comments and ttype in Comment: # and ttype != Token.Comment.Preproc:
elif with_comments and ttype in Comment:
yield dict(position=pos, token_type="comment", token_value=tvalue)

elif ttype in symbols:
Expand Down
Loading

0 comments on commit 7227113

Please sign in to comment.