diff --git a/doc/packages/index.md b/doc/packages/index.md index 35bb6c1fe4ff2..5484ac4400dfd 100644 --- a/doc/packages/index.md +++ b/doc/packages/index.md @@ -24,6 +24,7 @@ etc-files.section.md nginx.section.md opengl.section.md shell-helpers.section.md +python-tree-sitter.section.md steam.section.md cataclysm-dda.section.md urxvt.section.md diff --git a/doc/packages/python-tree-sitter.section.md b/doc/packages/python-tree-sitter.section.md new file mode 100644 index 0000000000000..e314e4f18f156 --- /dev/null +++ b/doc/packages/python-tree-sitter.section.md @@ -0,0 +1,52 @@ +# Python Tree Sitter {#python-tree-sitter} + +[Tree Sitter](https://tree-sitter.github.io/tree-sitter/) is a framework for building grammars for programming languages. It generates and uses syntax trees from source files, which are useful for code analysis, tooling, and syntax highlighting. + +Python bindings for Tree Sitter grammars are provided through the [py-tree-sitter](https://github.com/tree-sitter/py-tree-sitter) module. The Nix package `python3Packages.tree-sitter-grammars` provides pre-built grammars for various languages. + +For example, to experiment with the Rust grammar, you can create a shell environment with the following configuration: + +```nix +{ pkgs ? {} }: + +pkgs.mkShell { + name = "py-tree-sitter-dev-shell"; + + buildInputs = with pkgs; [ + (python3.withPackages (ps: with ps; [ + tree-sitter + tree-sitter-grammars.tree-sitter-rust + ])) + ]; +} +``` + +Once inside the shell, the following Python code demonstrates how to parse a Rust code snippet: + +```python +# Import the Tree Sitter library and Rust grammar +import tree_sitter +import tree_sitter_rust + +# Load the Rust grammar and initialize the parser +rust = tree_sitter.Language(tree_sitter_rust.language()) +parser = tree_sitter.Parser(rust) + +# Parse a Rust snippet +tree = parser.parse( + bytes( + """ + fn main() { + println!("Hello, world!"); + } + """, + "utf8" + ) +) + +# Display the resulting syntax tree +print(tree.root_node) +``` + +The `tree_sitter_rust.language()` function references the Rust grammar loaded in the Nix shell. The resulting tree allows you to inspect the structure of the code programmatically. + diff --git a/doc/redirects.json b/doc/redirects.json index 2c44eb3ed4a98..733302cc36e08 100644 --- a/doc/redirects.json +++ b/doc/redirects.json @@ -50,6 +50,9 @@ "chap-packageconfig": [ "index.html#chap-packageconfig" ], + "python-tree-sitter": [ + "index.html#python-tree-sitter" + ], "sec-allow-broken": [ "index.html#sec-allow-broken" ], diff --git a/maintainers/maintainer-list.nix b/maintainers/maintainer-list.nix index 560f524355cea..d021bbb6a142b 100644 --- a/maintainers/maintainer-list.nix +++ b/maintainers/maintainer-list.nix @@ -622,6 +622,13 @@ githubId = 1773511; name = "Adrien Devresse"; }; + adfaure = { + email = "adfaure@pm.me"; + matrix = "@adfaure:matrix.org"; + github = "adfaure"; + githubId = 8026586; + name = "Adrien Faure"; + }; adisbladis = { email = "adisbladis@gmail.com"; matrix = "@adis:blad.is"; @@ -856,6 +863,12 @@ githubId = 45179933; name = "Alex Jackson"; }; + a-jay98 = { + email = "ali@jamadi.me"; + github = "A-jay98"; + githubId = 23138252; + name = "Ali Jamadi"; + }; ajgon = { email = "igor@rzegocki.pl"; github = "ajgon"; diff --git a/pkgs/development/python-modules/tree-sitter-grammars/default.nix b/pkgs/development/python-modules/tree-sitter-grammars/default.nix new file mode 100644 index 0000000000000..a8ea305dc2aee --- /dev/null +++ b/pkgs/development/python-modules/tree-sitter-grammars/default.nix @@ -0,0 +1,158 @@ +{ + lib, + buildPythonPackage, + pytestCheckHook, + tree-sitter, + symlinkJoin, + writeTextDir, + pythonOlder, + # `name`: grammar derivation pname in the format of `tree-sitter-` + name, + grammarDrv, +}: +let + inherit (grammarDrv) version; + + snakeCaseName = lib.replaceStrings [ "-" ] [ "_" ] name; + drvPrefix = "python-${name}"; + # If the name of the grammar attribute differs from the grammar's symbol name, + # it could cause a symbol mismatch at load time. This manually curated collection + # of overrides ensures the binding can find the correct symbol + langIdentOverrides = { + tree_sitter_org_nvim = "tree_sitter_org"; + }; + langIdent = langIdentOverrides.${snakeCaseName} or snakeCaseName; +in +buildPythonPackage { + inherit version; + pname = drvPrefix; + + src = symlinkJoin { + name = "${drvPrefix}-source"; + paths = [ + (writeTextDir "${snakeCaseName}/__init__.py" '' + from ._binding import language + + __all__ = ["language"] + '') + (writeTextDir "${snakeCaseName}/binding.c" '' + #include + + typedef struct TSLanguage TSLanguage; + + TSLanguage *${langIdent}(void); + + static PyObject* _binding_language(PyObject *self, PyObject *args) { + return PyLong_FromVoidPtr(${langIdent}()); + } + + static PyMethodDef methods[] = { + {"language", _binding_language, METH_NOARGS, + "Get the tree-sitter language for this grammar."}, + {NULL, NULL, 0, NULL} + }; + + static struct PyModuleDef module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_binding", + .m_doc = NULL, + .m_size = -1, + .m_methods = methods + }; + + PyMODINIT_FUNC PyInit__binding(void) { + return PyModule_Create(&module); + } + '') + (writeTextDir "setup.py" '' + from platform import system + from setuptools import Extension, setup + + + setup( + packages=["${snakeCaseName}"], + ext_package="${snakeCaseName}", + ext_modules=[ + Extension( + name="_binding", + sources=["${snakeCaseName}/binding.c"], + extra_objects = ["${grammarDrv}/parser"], + extra_compile_args=( + ["-std=c11"] if system() != 'Windows' else [] + ), + ) + ], + ) + '') + (writeTextDir "pyproject.toml" '' + [build-system] + requires = ["setuptools", "wheel"] + build-backend = "setuptools.build_meta" + + [project] + name="${snakeCaseName}" + description = "${langIdent} grammar for tree-sitter" + version = "${version}" + keywords = ["parsing", "incremental", "python"] + classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Topic :: Software Development :: Compilers", + "Topic :: Text Processing :: Linguistic", + ] + + requires-python = ">=3.8" + license.text = "MIT" + readme = "README.md" + + [project.optional-dependencies] + core = ["tree-sitter~=0.21"] + + [tool.cibuildwheel] + build = "cp38-*" + build-frontend = "build" + '') + (writeTextDir "tests/test_language.py" '' + from ${snakeCaseName} import language + from tree_sitter import Language, Parser + + # This test only checks that the binding can load the grammar from the compiled shared object. + # It does not verify the grammar itself; that is tested in + # `pkgs/development/tools/parsing/tree-sitter/grammar.nix`. + + def test_language(): + lang = Language(language()) + assert lang is not None + parser = Parser() + parser.language = lang + tree = parser.parse(bytes("", "utf-8")) + assert tree is not None + '') + ]; + }; + + preCheck = '' + # https://github.com/NixOS/nixpkgs/issues/255262 + rm -r ${snakeCaseName} + ''; + + disabled = pythonOlder "3.8"; + + nativeCheckInputs = [ + tree-sitter + pytestCheckHook + ]; + pythonImportsCheck = [ snakeCaseName ]; + + meta = { + description = "Python bindings for ${name}"; + license = lib.licenses.mit; + maintainers = with lib.maintainers; [ + a-jay98 + adfaure + mightyiam + stepbrobd + ]; + }; +} diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index 3ef9e19f46620..bb3e08b8be3e1 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -17386,6 +17386,24 @@ self: super: with self; { callPackage ../development/python-modules/tree-sitter-embedded-template { }; + tree-sitter-grammars = lib.recurseIntoAttrs ( + lib.mapAttrs + ( + name: grammarDrv: + callPackage ../development/python-modules/tree-sitter-grammars { inherit name grammarDrv; } + ) + ( + # Filtering grammars not compatible with current py-tree-sitter version + lib.filterAttrs ( + name: value: + !(builtins.elem name [ + "tree-sitter-sql" + "tree-sitter-templ" + ]) + ) pkgs.tree-sitter.builtGrammars + ) + ); + tree-sitter-html = callPackage ../development/python-modules/tree-sitter-html { }; tree-sitter-javascript = callPackage ../development/python-modules/tree-sitter-javascript { };