Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/packages/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ etc-files.section.md
nginx.section.md
opengl.section.md
shell-helpers.section.md
python-tree-sitter.section.md
steam.section.md
cataclysm-dda.section.md
urxvt.section.md
Expand Down
52 changes: 52 additions & 0 deletions doc/packages/python-tree-sitter.section.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Python Tree Sitter {#python-tree-sitter}

[Tree Sitter](https://tree-sitter.github.io/tree-sitter/) is a framework for building grammars for programming languages. It generates and uses syntax trees from source files, which are useful for code analysis, tooling, and syntax highlighting.

Python bindings for Tree Sitter grammars are provided through the [py-tree-sitter](https://github.com/tree-sitter/py-tree-sitter) module. The Nix package `python3Packages.tree-sitter-grammars` provides pre-built grammars for various languages.

For example, to experiment with the Rust grammar, you can create a shell environment with the following configuration:

```nix
{ pkgs ? <nixpkgs> {} }:

pkgs.mkShell {
name = "py-tree-sitter-dev-shell";

buildInputs = with pkgs; [
(python3.withPackages (ps: with ps; [
tree-sitter
tree-sitter-grammars.tree-sitter-rust
]))
];
}
```

Once inside the shell, the following Python code demonstrates how to parse a Rust code snippet:

```python
# Import the Tree Sitter library and Rust grammar
import tree_sitter
import tree_sitter_rust

# Load the Rust grammar and initialize the parser
rust = tree_sitter.Language(tree_sitter_rust.language())
parser = tree_sitter.Parser(rust)

# Parse a Rust snippet
tree = parser.parse(
bytes(
"""
fn main() {
println!("Hello, world!");
}
""",
"utf8"
)
)

# Display the resulting syntax tree
print(tree.root_node)
```

The `tree_sitter_rust.language()` function references the Rust grammar loaded in the Nix shell. The resulting tree allows you to inspect the structure of the code programmatically.

3 changes: 3 additions & 0 deletions doc/redirects.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@
"chap-packageconfig": [
"index.html#chap-packageconfig"
],
"python-tree-sitter": [
"index.html#python-tree-sitter"
],
"sec-allow-broken": [
"index.html#sec-allow-broken"
],
Expand Down
13 changes: 13 additions & 0 deletions maintainers/maintainer-list.nix
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,13 @@
githubId = 1773511;
name = "Adrien Devresse";
};
adfaure = {
email = "adfaure@pm.me";
matrix = "@adfaure:matrix.org";
github = "adfaure";
githubId = 8026586;
name = "Adrien Faure";
};
adisbladis = {
email = "adisbladis@gmail.com";
matrix = "@adis:blad.is";
Expand Down Expand Up @@ -856,6 +863,12 @@
githubId = 45179933;
name = "Alex Jackson";
};
a-jay98 = {
email = "ali@jamadi.me";
github = "A-jay98";
githubId = 23138252;
name = "Ali Jamadi";
};
ajgon = {
email = "igor@rzegocki.pl";
github = "ajgon";
Expand Down
158 changes: 158 additions & 0 deletions pkgs/development/python-modules/tree-sitter-grammars/default.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
{
lib,
buildPythonPackage,
pytestCheckHook,
tree-sitter,
symlinkJoin,
writeTextDir,
pythonOlder,
# `name`: grammar derivation pname in the format of `tree-sitter-<lang>`
name,
grammarDrv,
}:
let
inherit (grammarDrv) version;

snakeCaseName = lib.replaceStrings [ "-" ] [ "_" ] name;
drvPrefix = "python-${name}";
# If the name of the grammar attribute differs from the grammar's symbol name,
# it could cause a symbol mismatch at load time. This manually curated collection
# of overrides ensures the binding can find the correct symbol
langIdentOverrides = {
tree_sitter_org_nvim = "tree_sitter_org";
};
langIdent = langIdentOverrides.${snakeCaseName} or snakeCaseName;
in
buildPythonPackage {
inherit version;
pname = drvPrefix;

src = symlinkJoin {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would probably be cleaner keeping the files in-tree, pointing src there and using substituteInPlace --subst-var-by in postPatch to pass the variables.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, that is indeed a solution. However, note that two files are generated from the variable: ${snakeCaseName}/init.py and ${snakeCaseName}/binding.c.

This may require some file movement. Perhaps a hybrid solution could work as well.

name = "${drvPrefix}-source";
paths = [
(writeTextDir "${snakeCaseName}/__init__.py" ''
from ._binding import language

__all__ = ["language"]
'')
(writeTextDir "${snakeCaseName}/binding.c" ''
#include <Python.h>

typedef struct TSLanguage TSLanguage;

TSLanguage *${langIdent}(void);

static PyObject* _binding_language(PyObject *self, PyObject *args) {
return PyLong_FromVoidPtr(${langIdent}());
}

static PyMethodDef methods[] = {
{"language", _binding_language, METH_NOARGS,
"Get the tree-sitter language for this grammar."},
{NULL, NULL, 0, NULL}
};

static struct PyModuleDef module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "_binding",
.m_doc = NULL,
.m_size = -1,
.m_methods = methods
};

PyMODINIT_FUNC PyInit__binding(void) {
return PyModule_Create(&module);
}
'')
(writeTextDir "setup.py" ''
from platform import system
from setuptools import Extension, setup


setup(
packages=["${snakeCaseName}"],
ext_package="${snakeCaseName}",
ext_modules=[
Extension(
name="_binding",
sources=["${snakeCaseName}/binding.c"],
extra_objects = ["${grammarDrv}/parser"],
extra_compile_args=(
["-std=c11"] if system() != 'Windows' else []
),
)
],
)
'')
(writeTextDir "pyproject.toml" ''
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name="${snakeCaseName}"
description = "${langIdent} grammar for tree-sitter"
version = "${version}"
keywords = ["parsing", "incremental", "python"]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Topic :: Software Development :: Compilers",
"Topic :: Text Processing :: Linguistic",
]

requires-python = ">=3.8"
license.text = "MIT"
readme = "README.md"

[project.optional-dependencies]
core = ["tree-sitter~=0.21"]

[tool.cibuildwheel]
build = "cp38-*"
build-frontend = "build"
'')
(writeTextDir "tests/test_language.py" ''
from ${snakeCaseName} import language
from tree_sitter import Language, Parser

# This test only checks that the binding can load the grammar from the compiled shared object.
# It does not verify the grammar itself; that is tested in
# `pkgs/development/tools/parsing/tree-sitter/grammar.nix`.

def test_language():
lang = Language(language())
assert lang is not None
parser = Parser()
parser.language = lang
tree = parser.parse(bytes("", "utf-8"))
assert tree is not None
'')
];
};

preCheck = ''
# https://github.com/NixOS/nixpkgs/issues/255262
rm -r ${snakeCaseName}
'';

disabled = pythonOlder "3.8";

nativeCheckInputs = [
tree-sitter
pytestCheckHook
];
pythonImportsCheck = [ snakeCaseName ];

meta = {
description = "Python bindings for ${name}";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [
a-jay98
adfaure
mightyiam
stepbrobd
];
};
}
18 changes: 18 additions & 0 deletions pkgs/top-level/python-packages.nix
Original file line number Diff line number Diff line change
Expand Up @@ -17386,6 +17386,24 @@ self: super: with self; {
callPackage ../development/python-modules/tree-sitter-embedded-template
{ };

tree-sitter-grammars = lib.recurseIntoAttrs (
lib.mapAttrs
(
name: grammarDrv:
callPackage ../development/python-modules/tree-sitter-grammars { inherit name grammarDrv; }
)
(
# Filtering grammars not compatible with current py-tree-sitter version
lib.filterAttrs (
name: value:
!(builtins.elem name [
"tree-sitter-sql"
"tree-sitter-templ"
])
) pkgs.tree-sitter.builtGrammars
)
);

tree-sitter-html = callPackage ../development/python-modules/tree-sitter-html { };

tree-sitter-javascript = callPackage ../development/python-modules/tree-sitter-javascript { };
Expand Down