diff --git a/pkgs/applications/editors/vim/plugins/nvim-treesitter/overrides.nix b/pkgs/applications/editors/vim/plugins/nvim-treesitter/overrides.nix index 2b8ba7616ae62..4fe658b8207be 100644 --- a/pkgs/applications/editors/vim/plugins/nvim-treesitter/overrides.nix +++ b/pkgs/applications/editors/vim/plugins/nvim-treesitter/overrides.nix @@ -4,7 +4,7 @@ self: super: let generatedGrammars = callPackage ./generated.nix { - buildGrammar = callPackage ../../../../../development/tools/parsing/tree-sitter/grammar.nix { }; + buildGrammar = callPackage ../../../../../development/tools/parsing/tree-sitter/build-grammar.nix { }; }; generatedDerivations = lib.filterAttrs (_: lib.isDerivation) generatedGrammars; diff --git a/pkgs/development/tools/parsing/tree-sitter/README.md b/pkgs/development/tools/parsing/tree-sitter/README.md new file mode 100644 index 0000000000000..5d4e5a2272a7d --- /dev/null +++ b/pkgs/development/tools/parsing/tree-sitter/README.md @@ -0,0 +1,41 @@ +# tree-sitter libraries, binaries & grammars + +This packages tree sitter and its grammars. + +The grammar descriptions can be found in [./grammars.toml](). + +## Updating tree-sitter + +1) change all hashes at the beginning of [./default.nix](). +2) Update the grammars (see below) + +## Updating all grammars + +First you need a github Personal Access Token, otherwise it runs into rate limits. +Go to https://github.com/settings/tokens and generate a classic token, copy the secret. + +You generate the update script and run it: + +```bash +$ nix-build -A tree-sitter.updater.update-all-grammars +$ env GITHUB_TOKEN= ./result +``` + +This will prefetch all repos mentioned in [./grammars.toml]() and put their new hashes +into the [./grammars]() directory. + +If a new repository was added to the `github.com/tree-sitter` organization, +the update process will throw an error and you need to add the new repo to +either `knownTreeSitterOrgGrammarRepos` (if it’s a grammar) or to +`ignoredTreeSitterOrgRepos`. +This is to make sure we always package every official grammar. + +## Adding a third-party grammar + +Add it to the `otherGrammars` section in [./grammars.toml](). +The grammar name has to be unique among all grammars (upstream and third party). + +## Deleting a grammar + +In case a grammar needs to be removed, please remove the generated outputs +in the [./grammar]() directory manually. diff --git a/pkgs/development/tools/parsing/tree-sitter/grammar.nix b/pkgs/development/tools/parsing/tree-sitter/build-grammar.nix similarity index 88% rename from pkgs/development/tools/parsing/tree-sitter/grammar.nix rename to pkgs/development/tools/parsing/tree-sitter/build-grammar.nix index a4d8d7324f993..47bbf5e9a7358 100644 --- a/pkgs/development/tools/parsing/tree-sitter/grammar.nix +++ b/pkgs/development/tools/parsing/tree-sitter/build-grammar.nix @@ -12,6 +12,7 @@ , version # source for the language grammar , source + # subdirectory inside of source that contains the actual grammar, or `null` of none. , location ? null }: @@ -20,7 +21,10 @@ stdenv.mkDerivation rec { pname = "${language}-grammar"; inherit version; - src = if location == null then source else "${source}/${location}"; + src = + if location == null + then source + else "${source}/${location}"; buildInputs = [ tree-sitter ]; diff --git a/pkgs/development/tools/parsing/tree-sitter/default.nix b/pkgs/development/tools/parsing/tree-sitter/default.nix index dcc8be0640f47..306d697ead2e9 100644 --- a/pkgs/development/tools/parsing/tree-sitter/default.nix +++ b/pkgs/development/tools/parsing/tree-sitter/default.nix @@ -20,16 +20,15 @@ , enableShared ? !stdenv.hostPlatform.isStatic , enableStatic ? stdenv.hostPlatform.isStatic , webUISupport ? false + +# REMOVED , extraGrammars ? { } }: -# TODO: move to carnix or https://github.com/kolloch/crate2nix +assert lib.assertMsg (extraGrammars == {}) "The `extraGrammars` for tree-sitter was removed, because the schema of extraGrammars was underspecified & undocumented. If you need support, please open an issue and ping @Profpatsch"; + let - # to update: - # 1) change all these hashes - # 2) nix-build -A tree-sitter.updater.update-all-grammars - # 3) Set GITHUB_TOKEN env variable to avoid api rate limit (Use a Personal Access Token from https://github.com/settings/tokens It does not need any permissions) - # 4) run the ./result script that is output by that (it updates ./grammars) + # to update: see ./README.md version = "0.20.7"; sha256 = "sha256-5ILiN5EfJ7WpeYBiXynfcLucdp8zmxVOj4gLkaFQYts="; cargoSha256 = "sha256-V4frCaU5QzTx3ujdaplw7vNkosbzyXHQvE+T7ntVOtU="; @@ -46,30 +45,38 @@ let fetchGrammar = (v: fetchgit { inherit (v) url rev sha256 fetchSubmodules; }); + # TODO: use linkFarm + + # All grammar definitions’ source repositories. grammars = - runCommand "grammars" { } ('' + runCommand "grammars" { } '' mkdir $out - '' + (lib.concatStrings (lib.mapAttrsToList - (name: grammar: "ln -s ${if grammar ? src then grammar.src else fetchGrammar grammar} $out/${name}\n") - (import ./grammars { inherit lib; })))); + ${lib.pipe + (import ./grammars { inherit lib; }) + [ + (lib.mapAttrsToList + (name: grammar: "ln -s ${fetchGrammar grammar} $out/${name}\n")) + lib.concatStrings + ]} + ''; + builtGrammars = let change = name: grammar: - callPackage ./grammar.nix { } { - language = if grammar ? language then grammar.language else name; + callPackage ./build-grammar.nix { } { + language = name; inherit version; - source = if grammar ? src then grammar.src else fetchGrammar grammar; + source = fetchGrammar grammar; location = if grammar ? location then grammar.location else null; }; - grammars' = import ./grammars { inherit lib; } // extraGrammars; + grammars' = import ./grammars { inherit lib; }; grammars = grammars' // { tree-sitter-ocaml = grammars'.tree-sitter-ocaml // { location = "ocaml"; }; } // { tree-sitter-ocaml-interface = grammars'.tree-sitter-ocaml // { location = "interface"; }; } // - { tree-sitter-org-nvim = grammars'.tree-sitter-org-nvim // { language = "org"; }; } // { tree-sitter-typescript = grammars'.tree-sitter-typescript // { location = "typescript"; }; } // { tree-sitter-tsx = grammars'.tree-sitter-typescript // { location = "tsx"; }; } // { tree-sitter-markdown = grammars'.tree-sitter-markdown // { location = "tree-sitter-markdown"; }; } // - { tree-sitter-markdown-inline = grammars'.tree-sitter-markdown // { language = "markdown_inline"; location = "tree-sitter-markdown-inline"; }; }; + { tree-sitter-markdown-inline = grammars'.tree-sitter-markdown // { location = "tree-sitter-markdown-inline"; }; }; in lib.mapAttrs change (grammars); @@ -81,25 +88,23 @@ let # which is equivalent to # pkgs.tree-sitter.withPlugins (p: builtins.attrValues p) withPlugins = grammarFn: - let - grammars = grammarFn builtGrammars; - in - linkFarm "grammars" + lib.pipe builtGrammars [ + grammarFn (map (drv: - let - name = lib.strings.getName drv; - in { - name = - (lib.strings.replaceStrings [ "-" ] [ "_" ] - (lib.strings.removePrefix "tree-sitter-" - (lib.strings.removeSuffix "-grammar" name))) - + ".so"; + name = lib.pipe drv [ + lib.strings.getName + (lib.strings.removeSuffix "-grammar") + (lib.strings.removePrefix "tree-sitter-") + (lib.strings.replaceStrings [ "-" ] [ "_" ]) + (name: name + ".so") + ]; path = "${drv}/parser"; } - ) - grammars); + )) + (linkFarm "grammars") + ]; allGrammars = builtins.attrValues builtGrammars; diff --git a/pkgs/development/tools/parsing/tree-sitter/grammars.json b/pkgs/development/tools/parsing/tree-sitter/grammars.json new file mode 100644 index 0000000000000..751b93770f072 --- /dev/null +++ b/pkgs/development/tools/parsing/tree-sitter/grammars.json @@ -0,0 +1,492 @@ +{ + "$schema": "./grammars.schema.json", + "ignoredTreeSitterOrgRepos": [ + { + "repo": "tree-sitter", + "reason": "main implementation" + }, + { + "repo": "tree-sitter-cli", + "reason": "cli implementation" + }, + { + "repo": "haskell-tree-sitter", + "reason": "this is the haskell language bindings, tree-sitter-haskell is the grammar" + }, + { + "repo": "ruby-tree-sitter.old", + "reason": "this is the ruby language bindings, tree-sitter-ruby is the grammar" + }, + { + "repo": "rust-tree-sitter", + "reason": "this is the (unmaintained) rust language bindings, tree-sitter-rust is the grammar" + }, + { + "repo": "node-tree-sitter", + "reason": "this is the nodejs language bindings, tree-sitter-javascript is the grammar" + }, + { + "repo": "py-tree-sitter", + "reason": "this is the python language bindings, tree-sitter-python is the grammar" + }, + { + "repo": "afl-tree-sitter", + "reason": "afl fuzzing for tree sitter" + }, + { + "repo": "highlight-schema", + "reason": "archived" + }, + { + "repo": "tree-sitter.github.io", + "reason": "website" + }, + { + "repo": "tree-sitter-razor", + "reason": "not maintained" + }, + { + "repo": "tree-sitter-graph", + "reason": "rust library for constructing arbitrary graph structures from source code" + }, + { + "repo": "tree-sitter-swift", + "reason": "abandoned" + } + ], + "knownTreeSitterOrgGrammarRepos": [ + { + "repo": "tree-sitter-javascript" + }, + { + "repo": "tree-sitter-c" + }, + { + "repo": "tree-sitter-json" + }, + { + "repo": "tree-sitter-cpp" + }, + { + "repo": "tree-sitter-ruby" + }, + { + "repo": "tree-sitter-go" + }, + { + "repo": "tree-sitter-c-sharp" + }, + { + "repo": "tree-sitter-python" + }, + { + "repo": "tree-sitter-typescript" + }, + { + "repo": "tree-sitter-rust" + }, + { + "repo": "tree-sitter-bash" + }, + { + "repo": "tree-sitter-php" + }, + { + "repo": "tree-sitter-java" + }, + { + "repo": "tree-sitter-scala" + }, + { + "repo": "tree-sitter-ocaml" + }, + { + "repo": "tree-sitter-julia" + }, + { + "repo": "tree-sitter-agda" + }, + { + "repo": "tree-sitter-fluent" + }, + { + "repo": "tree-sitter-html" + }, + { + "repo": "tree-sitter-haskell" + }, + { + "repo": "tree-sitter-regex" + }, + { + "repo": "tree-sitter-css" + }, + { + "repo": "tree-sitter-verilog" + }, + { + "repo": "tree-sitter-jsdoc" + }, + { + "repo": "tree-sitter-ql" + }, + { + "repo": "tree-sitter-ql-dbscheme" + }, + { + "repo": "tree-sitter-embedded-template" + }, + { + "repo": "tree-sitter-tsq" + }, + { + "repo": "tree-sitter-toml" + } + ], + "otherGrammars": { + "tree-sitter-beancount": { + "type": "github", + "orga": "polarmutex", + "repo": "tree-sitter-beancount" + }, + "tree-sitter-clojure": { + "type": "github", + "orga": "sogaiu", + "repo": "tree-sitter-clojure" + }, + "tree-sitter-comment": { + "type": "github", + "orga": "stsewd", + "repo": "tree-sitter-comment" + }, + "tree-sitter-dart": { + "type": "github", + "orga": "usernobody14", + "repo": "tree-sitter-dart" + }, + "tree-sitter-elisp": { + "type": "github", + "orga": "wilfred", + "repo": "tree-sitter-elisp" + }, + "tree-sitter-nix": { + "type": "github", + "orga": "cstrahan", + "repo": "tree-sitter-nix" + }, + "tree-sitter-latex": { + "type": "github", + "orga": "latex-lsp", + "repo": "tree-sitter-latex" + }, + "tree-sitter-lua": { + "type": "github", + "orga": "MunifTanjim", + "repo": "tree-sitter-lua" + }, + "tree-sitter-fennel": { + "type": "github", + "orga": "travonted", + "repo": "tree-sitter-fennel" + }, + "tree-sitter-make": { + "type": "github", + "orga": "alemuller", + "repo": "tree-sitter-make" + }, + "tree-sitter-markdown": { + "type": "github", + "orga": "MDeiml", + "repo": "tree-sitter-markdown" + }, + "tree-sitter-rego": { + "type": "github", + "orga": "FallenAngel97", + "repo": "tree-sitter-rego" + }, + "tree-sitter-rst": { + "type": "github", + "orga": "stsewd", + "repo": "tree-sitter-rst" + }, + "tree-sitter-svelte": { + "type": "github", + "orga": "Himujjal", + "repo": "tree-sitter-svelte" + }, + "tree-sitter-sql": { + "type": "github", + "orga": "m-novikov", + "repo": "tree-sitter-sql" + }, + "tree-sitter-vim": { + "type": "github", + "orga": "vigoux", + "repo": "tree-sitter-viml" + }, + "tree-sitter-yaml": { + "type": "github", + "orga": "ikatyang", + "repo": "tree-sitter-yaml" + }, + "tree-sitter-zig": { + "type": "github", + "orga": "maxxnino", + "repo": "tree-sitter-zig" + }, + "tree-sitter-fish": { + "type": "github", + "orga": "ram02z", + "repo": "tree-sitter-fish" + }, + "tree-sitter-dot": { + "type": "github", + "orga": "rydesun", + "repo": "tree-sitter-dot" + }, + "tree-sitter-norg": { + "type": "github", + "orga": "nvim-neorg", + "repo": "tree-sitter-norg" + }, + "tree-sitter-commonlisp": { + "type": "github", + "orga": "thehamsta", + "repo": "tree-sitter-commonlisp" + }, + "tree-sitter-cuda": { + "type": "github", + "orga": "thehamsta", + "repo": "tree-sitter-cuda" + }, + "tree-sitter-glsl": { + "type": "github", + "orga": "thehamsta", + "repo": "tree-sitter-glsl" + }, + "tree-sitter-dockerfile": { + "type": "github", + "orga": "camdencheek", + "repo": "tree-sitter-dockerfile" + }, + "tree-sitter-ledger": { + "type": "github", + "orga": "cbarrete", + "repo": "tree-sitter-ledger" + }, + "tree-sitter-gomod": { + "type": "github", + "orga": "camdencheek", + "repo": "tree-sitter-go-mod" + }, + "tree-sitter-gowork": { + "type": "github", + "orga": "omertuc", + "repo": "tree-sitter-go-work" + }, + "tree-sitter-graphql": { + "type": "github", + "orga": "bkegley", + "repo": "tree-sitter-graphql" + }, + "tree-sitter-pgn": { + "type": "github", + "orga": "rolandwalker", + "repo": "tree-sitter-pgn" + }, + "tree-sitter-perl": { + "type": "github", + "orga": "ganezdragon", + "repo": "tree-sitter-perl" + }, + "tree-sitter-kotlin": { + "type": "github", + "orga": "fwcd", + "repo": "tree-sitter-kotlin" + }, + "tree-sitter-scss": { + "type": "github", + "orga": "serenadeai", + "repo": "tree-sitter-scss" + }, + "tree-sitter-erlang": { + "type": "github", + "orga": "abstractmachineslab", + "repo": "tree-sitter-erlang" + }, + "tree-sitter-elixir": { + "type": "github", + "orga": "elixir-lang", + "repo": "tree-sitter-elixir" + }, + "tree-sitter-surface": { + "type": "github", + "orga": "connorlay", + "repo": "tree-sitter-surface" + }, + "tree-sitter-heex": { + "type": "github", + "orga": "connorlay", + "repo": "tree-sitter-heex" + }, + "tree-sitter-supercollider": { + "type": "github", + "orga": "madskjeldgaard", + "repo": "tree-sitter-supercollider" + }, + "tree-sitter-tlaplus": { + "type": "github", + "orga": "tlaplus-community", + "repo": "tree-sitter-tlaplus" + }, + "tree-sitter-glimmer": { + "type": "github", + "orga": "alexlafroscia", + "repo": "tree-sitter-glimmer" + }, + "tree-sitter-pug": { + "type": "github", + "orga": "zealot128", + "repo": "tree-sitter-pug" + }, + "tree-sitter-vue": { + "type": "github", + "orga": "ikatyang", + "repo": "tree-sitter-vue" + }, + "tree-sitter-elm": { + "type": "github", + "orga": "elm-tooling", + "repo": "tree-sitter-elm" + }, + "tree-sitter-yang": { + "type": "github", + "orga": "hubro", + "repo": "tree-sitter-yang" + }, + "tree-sitter-query": { + "type": "github", + "orga": "nvim-treesitter", + "repo": "tree-sitter-query" + }, + "tree-sitter-sparql": { + "type": "github", + "orga": "bonabeavis", + "repo": "tree-sitter-sparql" + }, + "tree-sitter-gdscript": { + "type": "github", + "orga": "prestonknopp", + "repo": "tree-sitter-gdscript" + }, + "tree-sitter-godot-resource": { + "type": "github", + "orga": "prestonknopp", + "repo": "tree-sitter-godot-resource" + }, + "tree-sitter-turtle": { + "type": "github", + "orga": "bonabeavis", + "repo": "tree-sitter-turtle" + }, + "tree-sitter-devicetree": { + "type": "github", + "orga": "joelspadin", + "repo": "tree-sitter-devicetree" + }, + "tree-sitter-r": { + "type": "github", + "orga": "r-lib", + "repo": "tree-sitter-r" + }, + "tree-sitter-bibtex": { + "type": "github", + "orga": "latex-lsp", + "repo": "tree-sitter-bibtex" + }, + "tree-sitter-fortran": { + "type": "github", + "orga": "stadelmanma", + "repo": "tree-sitter-fortran" + }, + "tree-sitter-cmake": { + "type": "github", + "orga": "uyha", + "repo": "tree-sitter-cmake" + }, + "tree-sitter-janet-simple": { + "type": "github", + "orga": "sogaiu", + "repo": "tree-sitter-janet-simple" + }, + "tree-sitter-json5": { + "type": "github", + "orga": "joakker", + "repo": "tree-sitter-json5" + }, + "tree-sitter-pioasm": { + "type": "github", + "orga": "leo60228", + "repo": "tree-sitter-pioasm" + }, + "tree-sitter-hjson": { + "type": "github", + "orga": "winston0410", + "repo": "tree-sitter-hjson" + }, + "tree-sitter-llvm": { + "type": "github", + "orga": "benwilliamgraham", + "repo": "tree-sitter-llvm" + }, + "tree-sitter-http": { + "type": "github", + "orga": "ntbbloodbath", + "repo": "tree-sitter-http" + }, + "tree-sitter-prisma": { + "type": "github", + "orga": "victorhqc", + "repo": "tree-sitter-prisma" + }, + "tree-sitter-org-nvim": { + "type": "github", + "orga": "milisims", + "repo": "tree-sitter-org" + }, + "tree-sitter-hcl": { + "type": "github", + "orga": "MichaHoffmann", + "repo": "tree-sitter-hcl" + }, + "tree-sitter-scheme": { + "type": "github", + "orga": "6cdh", + "repo": "tree-sitter-scheme" + }, + "tree-sitter-tiger": { + "type": "github", + "orga": "ambroisie", + "repo": "tree-sitter-tiger" + }, + "tree-sitter-nickel": { + "type": "github", + "orga": "nickel-lang", + "repo": "tree-sitter-nickel" + }, + "tree-sitter-smithy": { + "type": "github", + "orga": "indoorvivants", + "repo": "tree-sitter-smithy" + }, + "tree-sitter-jsonnet": { + "type": "github", + "orga": "sourcegraph", + "repo": "tree-sitter-jsonnet" + }, + "tree-sitter-jsonc": { + "type": "gitlab", + "projectId": "24426815" + } + } +} diff --git a/pkgs/development/tools/parsing/tree-sitter/grammars.schema.json b/pkgs/development/tools/parsing/tree-sitter/grammars.schema.json new file mode 100644 index 0000000000000..5269c1b1c990d --- /dev/null +++ b/pkgs/development/tools/parsing/tree-sitter/grammars.schema.json @@ -0,0 +1,108 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "description": "Definition of all tree-sitter grammars we know of.\n\nUsed by both the update script and the default.nix to construct the list of grammars.\n\nGrammar list: https://github.com/tree-sitter/tree-sitter/blob/master/docs/index.md\n", + "type": "object", + "required": [ + "ignoredTreeSitterOrgRepos" + ], + "properties": { + "ignoredTreeSitterOrgRepos": { + "description": "Repositories in the tree-sitter github org that we want to ignore.\nThese do not contain grammars.\n\nWe want to have a full overview of the tree-sitter organization, so that we can notice when a new grammar is added.\n", + "type": "array", + "items": { + "type": "object", + "required": [ + "repo", + "reason" + ], + "properties": { + "repo": { + "description": "Repository in the tree-sitter github organization which should be ignored.", + "type": "string" + }, + "reason": { + "description": "Reason why this repository is ignored.", + "type": "string" + } + } + } + }, + "knownTreeSitterOrgGrammarRepos": { + "description": "Grammars we want to fetch from the tree-sitter github orga", + "type": "array", + "items": { + "type": "object", + "required": [ + "repo" + ], + "properties": { + "repo": { + "description": "Repository name in the tree-sitter github organizaiton, containing a grammar. If there is no `grammars` field, this name will also be used as the name of the grammar & the nix attribute.", + "type": "string" + } + } + } + }, + "otherGrammars": { + "description": "Additional grammars that are not in the official github orga.\nIf you need a grammar that already exists in the official orga, make sure to give it a different name.\n", + "type": "object", + "additionalProperties": { + "allOf": [ + { + "description": "A grammar that does not live in the official tree-sitter github orga.", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "description": "The type of repo to fetch.", + "enum": [ + "github", + "gitlab" + ] + } + } + }, + { + "oneOf": [ + { + "required": [ + "orga", + "repo" + ], + "properties": { + "type": { + "const": "github" + }, + "orga": { + "description": "The github organization of this grammar’s repo", + "type": "string" + }, + "repo": { + "description": "The github repo", + "type": "string" + } + } + }, + { + "required": [ + "projectId" + ], + "properties": { + "type": { + "const": "gitlab" + }, + "projectId": { + "description": "The Gitlab projectId of this repository. It is displayed below the repository name on the web UI.", + "type": "string" + } + } + } + ] + } + ] + } + } + } +} diff --git a/pkgs/development/tools/parsing/tree-sitter/grammars/README.md b/pkgs/development/tools/parsing/tree-sitter/grammars/README.md new file mode 100644 index 0000000000000..c5cb0521a1efa --- /dev/null +++ b/pkgs/development/tools/parsing/tree-sitter/grammars/README.md @@ -0,0 +1,4 @@ +# Generated tree-sitter grammars + +These grammars have been autogenerated by the update script. +Please see [../default.nix]() for how to run it. diff --git a/pkgs/development/tools/parsing/tree-sitter/grammars/default.nix b/pkgs/development/tools/parsing/tree-sitter/grammars/default.nix index 5b3862082d8f7..7854e69723353 100644 --- a/pkgs/development/tools/parsing/tree-sitter/grammars/default.nix +++ b/pkgs/development/tools/parsing/tree-sitter/grammars/default.nix @@ -46,6 +46,7 @@ tree-sitter-jsdoc = lib.importJSON ./tree-sitter-jsdoc.json; tree-sitter-json = lib.importJSON ./tree-sitter-json.json; tree-sitter-json5 = lib.importJSON ./tree-sitter-json5.json; + tree-sitter-jsonc = lib.importJSON ./tree-sitter-jsonc.json; tree-sitter-jsonnet = lib.importJSON ./tree-sitter-jsonnet.json; tree-sitter-julia = lib.importJSON ./tree-sitter-julia.json; tree-sitter-kotlin = lib.importJSON ./tree-sitter-kotlin.json; diff --git a/pkgs/development/tools/parsing/tree-sitter/grammars/tree-sitter-jsonc.json b/pkgs/development/tools/parsing/tree-sitter/grammars/tree-sitter-jsonc.json new file mode 100644 index 0000000000000..f29175254b485 --- /dev/null +++ b/pkgs/development/tools/parsing/tree-sitter/grammars/tree-sitter-jsonc.json @@ -0,0 +1,11 @@ +{ + "url": "https://gitlab.com/WhyNotHugo/tree-sitter-jsonc.git", + "rev": "02b01653c8a1c198ae7287d566efa86a135b30d5", + "date": "2021-03-07T20:32:20+01:00", + "path": "/nix/store/hqh4kxw3fp9hr3yglsxv9d2kvcvpzdfa-tree-sitter-jsonc", + "sha256": "0mc68i7shwmn88iv3lcqyjvrhy3b62h02k272is7chk2yiw3crw9", + "fetchLFS": false, + "fetchSubmodules": false, + "deepClone": false, + "leaveDotGit": false +} diff --git a/pkgs/development/tools/parsing/tree-sitter/update.nix b/pkgs/development/tools/parsing/tree-sitter/update.nix index ad62530ee31d7..f27b9aabe9d11 100644 --- a/pkgs/development/tools/parsing/tree-sitter/update.nix +++ b/pkgs/development/tools/parsing/tree-sitter/update.nix @@ -9,367 +9,39 @@ , xe }: -# Grammar list: -# https://github.com/tree-sitter/tree-sitter/blob/master/docs/index.md let - # Grammars we want to fetch from the tree-sitter github orga - knownTreeSitterOrgGrammarRepos = [ - "tree-sitter-javascript" - "tree-sitter-c" - "tree-sitter-json" - "tree-sitter-cpp" - "tree-sitter-ruby" - "tree-sitter-go" - "tree-sitter-c-sharp" - "tree-sitter-python" - "tree-sitter-typescript" - "tree-sitter-rust" - "tree-sitter-bash" - "tree-sitter-php" - "tree-sitter-java" - "tree-sitter-scala" - "tree-sitter-ocaml" - "tree-sitter-julia" - "tree-sitter-agda" - "tree-sitter-fluent" - "tree-sitter-html" - "tree-sitter-haskell" - "tree-sitter-regex" - "tree-sitter-css" - "tree-sitter-verilog" - "tree-sitter-jsdoc" - "tree-sitter-ql" - "tree-sitter-ql-dbscheme" - "tree-sitter-embedded-template" - "tree-sitter-tsq" - "tree-sitter-toml" - ]; - knownTreeSitterOrgGrammarReposJson = jsonFile "known-tree-sitter-org-grammar-repos" knownTreeSitterOrgGrammarRepos; + grammarsJson = lib.importJSON ./grammars.json; - # repos of the tree-sitter github orga we want to ignore (not grammars) - ignoredTreeSitterOrgRepos = [ - "tree-sitter" - "tree-sitter-cli" - # this is the haskell language bindings, tree-sitter-haskell is the grammar - "haskell-tree-sitter" - # this is the ruby language bindings, tree-sitter-ruby is the grammar - "ruby-tree-sitter.old" - # this is the (unmaintained) rust language bindings, tree-sitter-rust is the grammar - "rust-tree-sitter" - # this is the nodejs language bindings, tree-sitter-javascript is the grammar - "node-tree-sitter" - # this is the python language bindings, tree-sitter-python is the grammar - "py-tree-sitter" - # afl fuzzing for tree sitter - "afl-tree-sitter" - # archived - "highlight-schema" - # website - "tree-sitter.github.io" - # not maintained - "tree-sitter-razor" - # rust library for constructing arbitrary graph structures from source code - "tree-sitter-graph" - # abandoned - "tree-sitter-swift" - ]; - ignoredTreeSitterOrgReposJson = jsonFile "ignored-tree-sitter-org-repos" ignoredTreeSitterOrgRepos; - - # Additional grammars that are not in the official github orga. - # If you need a grammar that already exists in the official orga, - # make sure to give it a different name. - otherGrammars = { - "tree-sitter-beancount" = { - orga = "polarmutex"; - repo = "tree-sitter-beancount"; - }; - "tree-sitter-clojure" = { - orga = "sogaiu"; - repo = "tree-sitter-clojure"; - }; - "tree-sitter-comment" = { - orga = "stsewd"; - repo = "tree-sitter-comment"; - }; - "tree-sitter-dart" = { - orga = "usernobody14"; - repo = "tree-sitter-dart"; - }; - "tree-sitter-elisp" = { - orga = "wilfred"; - repo = "tree-sitter-elisp"; - }; - "tree-sitter-nix" = { - orga = "cstrahan"; - repo = "tree-sitter-nix"; - }; - "tree-sitter-latex" = { - orga = "latex-lsp"; - repo = "tree-sitter-latex"; - }; - "tree-sitter-lua" = { - orga = "MunifTanjim"; - repo = "tree-sitter-lua"; - }; - "tree-sitter-fennel" = { - orga = "travonted"; - repo = "tree-sitter-fennel"; - }; - "tree-sitter-make" = { - orga = "alemuller"; - repo = "tree-sitter-make"; - }; - "tree-sitter-markdown" = { - orga = "MDeiml"; - repo = "tree-sitter-markdown"; - }; - "tree-sitter-rego" = { - orga = "FallenAngel97"; - repo = "tree-sitter-rego"; - }; - "tree-sitter-rst" = { - orga = "stsewd"; - repo = "tree-sitter-rst"; - }; - "tree-sitter-svelte" = { - orga = "Himujjal"; - repo = "tree-sitter-svelte"; - }; - "tree-sitter-sql" = { - orga = "m-novikov"; - repo = "tree-sitter-sql"; - }; - "tree-sitter-vim" = { - orga = "vigoux"; - repo = "tree-sitter-viml"; - }; - "tree-sitter-yaml" = { - orga = "ikatyang"; - repo = "tree-sitter-yaml"; - }; - "tree-sitter-zig" = { - orga = "maxxnino"; - repo = "tree-sitter-zig"; - }; - "tree-sitter-fish" = { - orga = "ram02z"; - repo = "tree-sitter-fish"; - }; - "tree-sitter-dot" = { - orga = "rydesun"; - repo = "tree-sitter-dot"; - }; - "tree-sitter-norg" = { - orga = "nvim-neorg"; - repo = "tree-sitter-norg"; - }; - "tree-sitter-commonlisp" = { - orga = "thehamsta"; - repo = "tree-sitter-commonlisp"; - }; - "tree-sitter-cuda" = { - orga = "thehamsta"; - repo = "tree-sitter-cuda"; - }; - "tree-sitter-glsl" = { - orga = "thehamsta"; - repo = "tree-sitter-glsl"; - }; - "tree-sitter-dockerfile" = { - orga = "camdencheek"; - repo = "tree-sitter-dockerfile"; - }; - "tree-sitter-ledger" = { - orga = "cbarrete"; - repo = "tree-sitter-ledger"; - }; - "tree-sitter-gomod" = { - orga = "camdencheek"; - repo = "tree-sitter-go-mod"; - }; - "tree-sitter-gowork" = { - orga = "omertuc"; - repo = "tree-sitter-go-work"; - }; - "tree-sitter-graphql" = { - orga = "bkegley"; - repo = "tree-sitter-graphql"; - }; - "tree-sitter-pgn" = { - orga = "rolandwalker"; - repo = "tree-sitter-pgn"; - }; - "tree-sitter-perl" = { - orga = "ganezdragon"; - repo = "tree-sitter-perl"; - }; - "tree-sitter-kotlin" = { - orga = "fwcd"; - repo = "tree-sitter-kotlin"; - }; - "tree-sitter-scss" = { - orga = "serenadeai"; - repo = "tree-sitter-scss"; - }; - "tree-sitter-erlang" = { - orga = "abstractmachineslab"; - repo = "tree-sitter-erlang"; - }; - "tree-sitter-elixir" = { - orga = "elixir-lang"; - repo = "tree-sitter-elixir"; - }; - "tree-sitter-surface" = { - orga = "connorlay"; - repo = "tree-sitter-surface"; - }; - "tree-sitter-heex" = { - orga = "connorlay"; - repo = "tree-sitter-heex"; - }; - "tree-sitter-supercollider" = { - orga = "madskjeldgaard"; - repo = "tree-sitter-supercollider"; - }; - "tree-sitter-tlaplus" = { - orga = "tlaplus-community"; - repo = "tree-sitter-tlaplus"; - }; - "tree-sitter-glimmer" = { - orga = "alexlafroscia"; - repo = "tree-sitter-glimmer"; - }; - "tree-sitter-pug" = { - orga = "zealot128"; - repo = "tree-sitter-pug"; - }; - "tree-sitter-vue" = { - orga = "ikatyang"; - repo = "tree-sitter-vue"; - }; - "tree-sitter-elm" = { - orga = "elm-tooling"; - repo = "tree-sitter-elm"; - }; - "tree-sitter-yang" = { - orga = "hubro"; - repo = "tree-sitter-yang"; - }; - "tree-sitter-query" = { - orga = "nvim-treesitter"; - repo = "tree-sitter-query"; - }; - "tree-sitter-sparql" = { - orga = "bonabeavis"; - repo = "tree-sitter-sparql"; - }; - "tree-sitter-gdscript" = { - orga = "prestonknopp"; - repo = "tree-sitter-gdscript"; - }; - "tree-sitter-godot-resource" = { - orga = "prestonknopp"; - repo = "tree-sitter-godot-resource"; - }; - "tree-sitter-turtle" = { - orga = "bonabeavis"; - repo = "tree-sitter-turtle"; - }; - "tree-sitter-devicetree" = { - orga = "joelspadin"; - repo = "tree-sitter-devicetree"; - }; - "tree-sitter-r" = { - orga = "r-lib"; - repo = "tree-sitter-r"; - }; - "tree-sitter-bibtex" = { - orga = "latex-lsp"; - repo = "tree-sitter-bibtex"; - }; - "tree-sitter-fortran" = { - orga = "stadelmanma"; - repo = "tree-sitter-fortran"; - }; - "tree-sitter-cmake" = { - orga = "uyha"; - repo = "tree-sitter-cmake"; - }; - "tree-sitter-janet-simple" = { - orga = "sogaiu"; - repo = "tree-sitter-janet-simple"; - }; - "tree-sitter-json5" = { - orga = "joakker"; - repo = "tree-sitter-json5"; - }; - "tree-sitter-pioasm" = { - orga = "leo60228"; - repo = "tree-sitter-pioasm"; - }; - "tree-sitter-hjson" = { - orga = "winston0410"; - repo = "tree-sitter-hjson"; - }; - "tree-sitter-llvm" = { - orga = "benwilliamgraham"; - repo = "tree-sitter-llvm"; - }; - "tree-sitter-http" = { - orga = "ntbbloodbath"; - repo = "tree-sitter-http"; - }; - "tree-sitter-prisma" = { - orga = "victorhqc"; - repo = "tree-sitter-prisma"; - }; - "tree-sitter-org-nvim" = { - orga = "milisims"; - repo = "tree-sitter-org"; - }; - "tree-sitter-hcl" = { - orga = "MichaHoffmann"; - repo = "tree-sitter-hcl"; - }; - "tree-sitter-scheme" = { - orga = "6cdh"; - repo = "tree-sitter-scheme"; - }; - "tree-sitter-tiger" = { - orga = "ambroisie"; - repo = "tree-sitter-tiger"; - }; - "tree-sitter-nickel" = { - orga = "nickel-lang"; - repo = "tree-sitter-nickel"; - }; - "tree-sitter-smithy" = { - orga = "indoorvivants"; - repo = "tree-sitter-smithy"; - }; - "tree-sitter-jsonnet" = { - orga = "sourcegraph"; - repo = "tree-sitter-jsonnet"; - }; - }; + # a list of {nixRepoAttrName, type, } allGrammars = let + # All grammars in the tree sitter orga we know of treeSitterOrgaGrammars = lib.listToAttrs (map - (repo: + ({repo, ...}: { name = repo; value = { + type = "github"; orga = "tree-sitter"; inherit repo; }; }) - knownTreeSitterOrgGrammarRepos); + grammarsJson.knownTreeSitterOrgGrammarRepos); + merged = + mergeAttrsUnique + grammarsJson.otherGrammars + treeSitterOrgaGrammars; in - mergeAttrsUnique otherGrammars treeSitterOrgaGrammars; + lib.mapAttrsToList + (nixRepoAttrName: attrs: attrs // { + inherit nixRepoAttrName; + }) + merged; + # TODO: move to lib mergeAttrsUnique = left: right: @@ -385,8 +57,6 @@ let - jsonFile = name: val: (formats.json { }).generate name val; - # implementation of the updater updateImpl = passArgs "updateImpl-with-args" { binaries = { @@ -394,10 +64,13 @@ let nix-prefetch-git = "${nix-prefetch-git}/bin/nix-prefetch-git"; printf = "${coreutils}/bin/printf"; }; - inherit - knownTreeSitterOrgGrammarRepos + inherit (grammarsJson) ignoredTreeSitterOrgRepos ; + knownTreeSitterOrgGrammarRepos = + map + ({repo, ...}: repo) + grammarsJson.knownTreeSitterOrgGrammarRepos; } (writers.writePython3 "updateImpl" { flakeIgnore = ["E501"]; @@ -410,11 +83,11 @@ let ${script} "$@" ''; - foreachSh = attrs: f: - lib.concatMapStringsSep "\n" f - (lib.mapAttrsToList (k: v: { name = k; } // v) attrs); - + # a list of nix values as a newline-separated json string, + # one entry per line jsonNewlines = lib.concatMapStringsSep "\n" (lib.generators.toJSON {}); + # a pretty-printed value as json file + jsonFile = name: val: (formats.json { }).generate name val; # Run the given script for each of the attr list. # The attrs are passed to the script as a json value. @@ -427,9 +100,14 @@ let # This will depend on your local environment, but that is intentional. outputDir = "${toString ./.}/grammars"; + # final script update-all-grammars = writeShellScript "update-all-grammars.sh" '' set -euo pipefail - ${updateImpl} fetch-and-check-tree-sitter-repos '{}' + + # first make sure we know about all upsteam repos + ${updateImpl} fetch-and-check-tree-sitter-repos '{}' + + # Then write one json file for each prefetched repo, in parallel echo "writing files to ${outputDir}" 1>&2 mkdir -p "${outputDir}" ${forEachParallel @@ -437,22 +115,18 @@ let (writeShellScript "fetch-repo" '' ${updateImpl} fetch-repo "$1" '') - (lib.mapAttrsToList - (nixRepoAttrName: attrs: attrs // { - inherit - nixRepoAttrName - outputDir; - }) + (map + (grammar: grammar // { inherit outputDir; }) allGrammars) } + + # finally, write a default.nix that calls all grammars ${updateImpl} print-all-grammars-nix-file "$(< ${ - jsonFile "all-grammars.json" { - allGrammars = - (lib.mapAttrsToList - (nixRepoAttrName: attrs: attrs // { - inherit nixRepoAttrName; - }) - allGrammars); + jsonFile "repo-attr-names.json" { + repoAttrNames = + map + ({nixRepoAttrName, ...}: nixRepoAttrName) + allGrammars; inherit outputDir; } })" diff --git a/pkgs/development/tools/parsing/tree-sitter/update_impl.py b/pkgs/development/tools/parsing/tree-sitter/update_impl.py index db470617ed9c2..f23c40175aee3 100644 --- a/pkgs/development/tools/parsing/tree-sitter/update_impl.py +++ b/pkgs/development/tools/parsing/tree-sitter/update_impl.py @@ -3,7 +3,7 @@ import subprocess as sub import os import sys -from typing import Iterator, Any, Literal, TypedDict +from typing import Iterator, Any, Literal, NoReturn, TypedDict, cast from tempfile import NamedTemporaryFile debug: bool = True if os.environ.get("DEBUG", False) else False @@ -21,6 +21,10 @@ def log(msg: str) -> None: print(msg, file=sys.stderr) +def critical(msg: str) -> NoReturn: + sys.exit(f"ERROR: {msg}") + + def atomically_write(file_path: str, content: bytes) -> None: """atomically write the content into `file_path`""" with NamedTemporaryFile( @@ -51,20 +55,36 @@ def curl_github_args(token: str | None, url: str) -> Args: yield url -def curl_result(output: bytes) -> Any | Literal["not found"]: +def curl_gitlab_args(url: str) -> Args: + """Query the gitlab API via curl""" + yield bins["curl"] + if not debug: + yield "--silent" + # follow redirects + yield "--location" + yield url + + +def github_curl_result(output: bytes) -> Any | Literal["not found"]: """Parse the curl result of the github API""" res: Any = json.loads(output) match res: case dict(res): message: str = res.get("message", "") if "rate limit" in message: - sys.exit("Rate limited by the Github API") + critical("Rate limited by the Github API") if "Not Found" in message: return "not found" # if the result is another type, we can pass it on return res +def gitlab_curl_result(output: bytes) -> Any: + """Parse the curl result of the gitlab API""" + res: Any = json.loads(output) + return res + + def nix_prefetch_git_args(url: str, version_rev: str) -> Args: """Prefetch a git repository""" yield bins["nix-prefetch-git"] @@ -86,50 +106,117 @@ def run_cmd(args: Args) -> bytes: Dir = str +GithubRepo = TypedDict( + "GithubRepo", { + "orga": str, + "repo": str + } +) + +GitlabRepo = TypedDict( + "GitlabRepo", { + "nixRepoAttrName": str, + "projectId": str + } +) + +FetchRepoArg = TypedDict( + "FetchRepoArg", { + "type": str, + "outputDir": Dir, + "nixRepoAttrName": str + } +) + def fetchRepo() -> None: """fetch the given repo and write its nix-prefetch output to the corresponding grammar json file""" - match jsonArg: - case { - "orga": orga, - "repo": repo, - "outputDir": outputDir, - "nixRepoAttrName": nixRepoAttrName, - }: - token: str | None = os.environ.get("GITHUB_TOKEN", None) - out = run_cmd( - curl_github_args( - token, - url=f"https://api.github.com/repos/{quote(orga)}/{quote(repo)}/releases/latest" - ) - ) - release: str - match curl_result(out): - case "not found": - # github sometimes returns an empty list even tough there are releases - log(f"uh-oh, latest for {orga}/{repo} is not there, using HEAD") - release = "HEAD" - case {"tag_name": tag_name}: - release = tag_name - case _: - sys.exit(f"git result for {orga}/{repo} did not have a `tag_name` field") - - log(f"Fetching latest release ({release}) of {orga}/{repo} …") - res = run_cmd( - nix_prefetch_git_args( - url=f"https://github.com/{quote(orga)}/{quote(repo)}", - version_rev=release - ) - ) - atomically_write( - file_path=os.path.join( - outputDir, - f"{nixRepoAttrName}.json" - ), - content=res - ) + arg = cast(FetchRepoArg, jsonArg) + if debug: + log(f"Fetching repo {arg}") + match arg["type"]: + case "github": + res = fetchGithubRepo(cast(GithubRepo, jsonArg)) + case "gitlab": + res = fetchGitlabRepo(cast(GitlabRepo, jsonArg)) + case other: + critical(f'''Do not yet know how to handle the repo type "{other}"''') + attrName = jsonArg["nixRepoAttrName"] + atomically_write( + file_path=os.path.join( + arg["outputDir"], + f"{attrName}.json" + ), + content=res + ) + + +def fetchGithubRepo(r: GithubRepo) -> bytes: + token: str | None = os.environ.get("GITHUB_TOKEN", None) + orga = r["orga"] + repo = r["repo"] + out = run_cmd( + curl_github_args( + token, + url=f"https://api.github.com/repos/{quote(orga)}/{quote(repo)}/releases/latest" + ) + ) + release: str + match github_curl_result(out): + case "not found": + # github sometimes returns an empty list even tough there are releases + log(f"uh-oh, latest for {orga}/{repo} is not there, using HEAD") + release = "HEAD" + case {"tag_name": tag_name}: + release = tag_name + case _: + critical(f"git result for {orga}/{repo} did not have a `tag_name` field") + + log(f"Fetching latest release ({release}) of {orga}/{repo} …") + return run_cmd( + nix_prefetch_git_args( + url=f"https://github.com/{quote(orga)}/{quote(repo)}", + version_rev=release + ) + ) + + +def fetchGitlabRepo(r: GitlabRepo) -> bytes: + projectId = r["projectId"] + nixRepoAttrName = r["nixRepoAttrName"] + out = run_cmd( + curl_gitlab_args( + url=f"https://gitlab.com/api/v4/projects/{quote(projectId)}/repository/tags?order_by=version&sort=desc" + ) + ) + release: str + projectName = f'''"{nixRepoAttrName}" (Gitlab projectId: {projectId})''' + match gitlab_curl_result(out): + case list([]): + log(f"uh-oh, no release find for for {projectName}, using HEAD") + release = "HEAD" + case list([{"name": tag_name}, *_]): + release = tag_name + case _: + critical(f"tag list for {projectName} did not have a `name` field: {out.decode()}") + out = run_cmd( + curl_gitlab_args( + url=f"https://gitlab.com/api/v4/projects/{quote(projectId)}" + ) + ) + url: str + match gitlab_curl_result(out): + case {"http_url_to_repo": url}: + url = url case _: - sys.exit("input json must have `orga` and `repo` keys") + critical(f"repository result for {projectName} did not have a `http_url_to_repo` field: {out.decode()}") + log(f"Fetching latest release ({release}) of {projectName} …") + return run_cmd( + nix_prefetch_git_args( + url, + version_rev=release + ) + ) def fetchOrgaLatestRepos(orga: str) -> set[str]: @@ -141,9 +228,9 @@ def fetchOrgaLatestRepos(orga: str) -> set[str]: url=f"https://api.github.com/orgs/{quote(orga)}/repos?per_page=100" ) ) - match curl_result(out): + match github_curl_result(out): case "not found": - sys.exit(f"github organization {orga} not found") + critical(f"github organization {orga} not found") case list(repos): res: list[str] = [] for repo in repos: @@ -151,8 +238,8 @@ def fetchOrgaLatestRepos(orga: str) -> set[str]: if name: res.append(name) return set(res) - case _: - sys.exit("github result was not a list of repos, but {other}") + case other: + critical(f"github result was not a list of repos, but {other}") def checkTreeSitterRepos(latest_github_repos: set[str]) -> None: @@ -163,29 +250,21 @@ def checkTreeSitterRepos(latest_github_repos: set[str]) -> None: unknown = latest_github_repos - (known | ignored) if unknown: - sys.exit(f"These repositories are neither known nor ignored:\n{unknown}") + critical(f"These repositories are neither known nor ignored:\n{unknown}") -Grammar = TypedDict( - "Grammar", - { - "nixRepoAttrName": str, - "orga": str, - "repo": str - } -) +NixRepoAttrName = str def printAllGrammarsNixFile() -> None: """Print a .nix file that imports all grammars.""" - allGrammars: list[dict[str, Grammar]] = jsonArg["allGrammars"] + repoAttrNames: list[NixRepoAttrName] = jsonArg["repoAttrNames"] outputDir: Dir = jsonArg["outputDir"] def file() -> Iterator[str]: yield "{ lib }:" yield "{" - for grammar in allGrammars: - n = grammar["nixRepoAttrName"] + for n in repoAttrNames: yield f" {n} = lib.importJSON ./{n}.json;" yield "}" yield "" @@ -214,4 +293,4 @@ def fetchAndCheckTreeSitterRepos() -> None: case "print-all-grammars-nix-file": printAllGrammarsNixFile() case _: - sys.exit(f"mode {mode} unknown") + critical(f"mode {mode} unknown")