diff --git a/build.py b/build.py index a72f96f..9ccd4c3 100644 --- a/build.py +++ b/build.py @@ -1,4 +1,8 @@ +import json import os +import glob +import pprint +import re import subprocess import sys from tree_sitter import Language @@ -32,13 +36,12 @@ subprocess.check_call(["git", "fetch", "--depth=1", "origin", commit], cwd=clone_directory) subprocess.check_call(["git", "checkout", commit], cwd=clone_directory) -print() - if sys.platform == "win32": languages_filename = "tree_sitter_languages\\languages.dll" else: languages_filename = "tree_sitter_languages/languages.so" +index=dict() print(f"{sys.argv[0]}: Building", languages_filename) Language.build_library( languages_filename, @@ -90,5 +93,11 @@ 'vendor/tree-sitter-typescript/tsx', 'vendor/tree-sitter-typescript/typescript', 'vendor/tree-sitter-yaml', - ] + ], + index, ) + +print(f"{sys.argv[0]}: Writing index entries for {len(index)} languages") +with open('tree_sitter_languages/generated.pyx', 'w') as file: + file.write('index = ') + pprint.pprint(index, stream=file) diff --git a/setup.py b/setup.py index f5b2ae0..4ace7ff 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ author_email='contact@grantjenks.com', url='https://github.com/grantjenks/py-tree-sitter-languages', license='Apache 2.0', - ext_modules=cythonize('tree_sitter_languages/core.pyx', language_level='3'), + ext_modules=cythonize('tree_sitter_languages/*.pyx', language_level='3'), packages=['tree_sitter_languages'], package_data={'tree_sitter_languages': ['languages.so', 'languages.dll']}, install_requires=['tree-sitter'], diff --git a/tests/test_tree_sitter_languages.py b/tests/test_tree_sitter_languages.py index b4ebc31..511a475 100644 --- a/tests/test_tree_sitter_languages.py +++ b/tests/test_tree_sitter_languages.py @@ -1,4 +1,5 @@ -from tree_sitter_languages import get_language, get_parser +from tree_sitter_languages import get_language, get_parser, get_language_for_file +from tree_sitter_languages.generated import index LANGUAGES = [ 'bash', @@ -45,6 +46,7 @@ 'sqlite', 'toml', 'tsq', + 'tsx', 'typescript', 'yaml', ] @@ -87,3 +89,14 @@ def test_get_language(): for language in LANGUAGES: language = get_language(language) assert language + +def test_generated(): + for language in LANGUAGES: + assert index[language] is not None + +def test_get_language_for_file(): + for filename, lang in { + 'file.sh': 'bash', + 'test.go': 'go', + }.items(): + assert get_language_for_file(filename).name == get_language(lang).name diff --git a/tree_sitter_languages/__init__.py b/tree_sitter_languages/__init__.py index 1ebad68..d55e411 100644 --- a/tree_sitter_languages/__init__.py +++ b/tree_sitter_languages/__init__.py @@ -1,7 +1,7 @@ """Tree Sitter with Languages """ -from .core import get_language, get_parser +from .core import get_language, get_parser, get_language_for_file __version__ = '1.7.0' __title__ = 'tree_sitter_languages' diff --git a/tree_sitter_languages/core.pyx b/tree_sitter_languages/core.pyx index a27377c..efe1e34 100644 --- a/tree_sitter_languages/core.pyx +++ b/tree_sitter_languages/core.pyx @@ -1,6 +1,8 @@ import pathlib +import re import sys +from .generated import index from tree_sitter import Language, Parser @@ -14,6 +16,9 @@ def get_language(language): language = Language(binary_path, language) return language +def get_language_for_file(file_name, file_contents=None): + name = Language.lookup_language_name_for_file(index, file_name, file_contents) + return get_language(name) if name is not None else None def get_parser(language): language = get_language(language) diff --git a/tree_sitter_languages/generated.pyx b/tree_sitter_languages/generated.pyx new file mode 100644 index 0000000..7c17292 --- /dev/null +++ b/tree_sitter_languages/generated.pyx @@ -0,0 +1,128 @@ +index = {'bash': [{'file-types': ['sh', 'bash', 'zsh'], 'scope': 'source.bash'}], + 'c': [{'file-types': ['c', 'h'], 'scope': 'source.c'}], + 'c_sharp': [{'file-types': ['cs'], 'scope': 'source.cs'}], + 'commonlisp': [{'file-types': ['lisp'], 'scope': 'source.lisp'}], + 'cpp': [{'file-types': ['cc', 'cpp', 'hpp', 'h'], + 'highlights': ['queries/highlights.scm', + 'node_modules/tree-sitter-c/queries/highlights.scm'], + 'scope': 'source.cpp'}], + 'css': [{'file-types': ['css'], + 'injection-regex': '^css$', + 'scope': 'source.css'}], + 'dockerfile': [{'file-types': ['Dockerfile', + 'dockerfile', + 'docker', + 'Containerfile', + 'container'], + 'highlights': ['queries/highlights.scm']}], + 'dot': [{'file-types': ['dot', 'gv'], 'scope': 'source.dot'}], + 'elisp': [{'file-types': ['el'], 'scope': 'source.emacs.lisp'}], + 'elixir': [{'file-types': ['ex', 'exs'], + 'injection-regex': '^(ex|elixir)$', + 'scope': 'source.elixir'}], + 'elm': [{'file-types': ['elm'], 'scope': 'source.elm'}], + 'embedded_template': [{'file-types': ['ejs'], + 'injection-regex': 'ejs', + 'injections': 'queries/injections-ejs.scm', + 'scope': 'text.html.ejs'}, + {'file-types': ['erb'], + 'injection-regex': 'erb', + 'injections': 'queries/injections-erb.scm', + 'scope': 'text.html.erb'}], + 'erlang': {}, + 'go': [{'file-types': ['go'], 'scope': 'source.go'}], + 'gomod': {}, + 'hack': [{'file-types': ['hack'], + 'first-line-regex': '^((<\\?hh.*)|(#!.+ hhvm))', + 'scope': 'source.hack'}], + 'haskell': [{'file-types': ['hs'], + 'highlights': ['queries/highlights.scm'], + 'injection-regex': '^(hs|haskell)$', + 'scope': 'source.haskell'}], + 'hcl': [{'file-types': ['hcl'], 'scope': 'source.hcl'}], + 'html': [{'file-types': ['html'], + 'injection-regex': 'html', + 'scope': 'text.html.basic'}], + 'java': [{'file-types': ['java'], 'scope': 'source.java'}], + 'javascript': [{'file-types': ['js'], + 'highlights': ['queries/highlights-jsx.scm', + 'queries/highlights-params.scm', + 'queries/highlights.scm'], + 'injection-regex': '^(js|javascript)$', + 'scope': 'source.js'}], + 'jsdoc': [{'injection-regex': 'jsdoc', 'scope': 'text.jsdoc'}], + 'json': [{'file-types': ['json'], 'scope': 'source.json'}], + 'julia': [{'file-types': ['jl'], 'scope': 'source.julia'}], + 'kotlin': [{'file-types': ['kt', 'kts'], 'scope': 'source.kotlin'}], + 'lua': [{'file-types': ['lua'], 'scope': 'source.lua'}], + 'make': [{'file-types': ['makefile', + 'Makefile', + 'MAKEFILE', + 'GNUmakefile', + 'mk', + 'mak', + 'dsp'], + 'scope': 'source.mk'}], + 'markdown': {}, + 'objc': [{'file-types': ['h', 'm'], + 'highlights': ['queries/highlights.scm', + 'node_modules/tree-sitter-c/queries/highlights.scm'], + 'scope': 'source.objc'}], + 'ocaml': [{'file-types': ['ml'], + 'first-line-regex': '', + 'injection-regex': '^(ocaml|ml)$', + 'path': 'ocaml', + 'scope': 'source.ocaml'}], + 'perl': [{'file-types': ['pl'], 'scope': 'source.perl'}], + 'php': [{'file-types': ['php'], + 'highlights': 'queries/highlights.scm', + 'scope': 'source.php'}], + 'python': [{'file-types': ['py'], 'scope': 'source.python'}], + 'ql': [{'file-types': ['ql', 'qll'], 'scope': 'source.ql'}], + 'r': [{'file-types': ['R', 'r'], + 'first-line-regex': '#!.*\\bRscript$', + 'scope': 'source.R'}], + 'regex': [{'injection-regex': '^regex$', 'scope': 'source.regex'}], + 'rst': [{'file-types': ['rst'], + 'injection-regex': 'rst', + 'scope': 'text.rst'}], + 'ruby': [{'file-types': ['rb'], + 'injection-regex': 'ruby', + 'scope': 'source.ruby'}], + 'rust': [{'file-types': ['rs'], + 'injection-regex': 'rust', + 'scope': 'source.rust'}], + 'scala': [{'file-types': ['scala'], 'scope': 'source.scala'}], + 'sql': [{'file-types': ['sql'], 'scope': 'source.sql'}], + 'sqlite': [{'file-types': ['sql'], + 'highlights': 'queries/highlights.scm', + 'injection-regex': '^(sql)$', + 'scope': 'source.sql'}], + 'toml': [{'file-types': ['toml'], + 'highlights': ['queries/highlights.scm'], + 'injection-regex': '^toml$', + 'scope': 'source.toml'}], + 'tsq': [{'file-types': ['tsq', 'scm'], 'scope': 'scope.tsq'}], + 'tsx': [{'file-types': ['tsx'], + 'highlights': ['queries/highlights.scm', + 'node_modules/tree-sitter-javascript/queries/highlights-jsx.scm', + 'node_modules/tree-sitter-javascript/queries/highlights.scm'], + 'injection-regex': '^(ts|typescript)$', + 'injections': 'node_modules/tree-sitter-javascript/queries/injections.scm', + 'locals': 'node_modules/tree-sitter-javascript/queries/locals.scm', + 'path': 'tsx', + 'scope': 'source.tsx', + 'tags': ['queries/tags.scm', + 'node_modules/tree-sitter-javascript/queries/tags.scm']}], + 'typescript': [{'file-types': ['ts'], + 'highlights': ['queries/highlights.scm', + 'node_modules/tree-sitter-javascript/queries/highlights.scm'], + 'injection-regex': '^(ts|typescript)$', + 'injections': 'node_modules/tree-sitter-javascript/queries/injections.scm', + 'locals': ['queries/locals.scm', + 'node_modules/tree-sitter-javascript/queries/locals.scm'], + 'path': 'typescript', + 'scope': 'source.ts', + 'tags': ['queries/tags.scm', + 'node_modules/tree-sitter-javascript/queries/tags.scm']}], + 'yaml': {}}