diff --git a/pkgs/applications/editors/vim/plugins/nvim-treesitter/overrides.nix b/pkgs/applications/editors/vim/plugins/nvim-treesitter/overrides.nix index 2b8ba7616ae62..4fe658b8207be 100644 --- a/pkgs/applications/editors/vim/plugins/nvim-treesitter/overrides.nix +++ b/pkgs/applications/editors/vim/plugins/nvim-treesitter/overrides.nix @@ -4,7 +4,7 @@ self: super: let generatedGrammars = callPackage ./generated.nix { - buildGrammar = callPackage ../../../../../development/tools/parsing/tree-sitter/grammar.nix { }; + buildGrammar = callPackage ../../../../../development/tools/parsing/tree-sitter/build-grammar.nix { }; }; generatedDerivations = lib.filterAttrs (_: lib.isDerivation) generatedGrammars; diff --git a/pkgs/development/tools/parsing/tree-sitter/README.md b/pkgs/development/tools/parsing/tree-sitter/README.md new file mode 100644 index 0000000000000..5d4e5a2272a7d --- /dev/null +++ b/pkgs/development/tools/parsing/tree-sitter/README.md @@ -0,0 +1,41 @@ +# tree-sitter libraries, binaries & grammars + +This packages tree sitter and its grammars. + +The grammar descriptions can be found in [./grammars.toml](). + +## Updating tree-sitter + +1) change all hashes at the beginning of [./default.nix](). +2) Update the grammars (see below) + +## Updating all grammars + +First you need a github Personal Access Token, otherwise it runs into rate limits. +Go to https://github.com/settings/tokens and generate a classic token, copy the secret. + +You generate the update script and run it: + +```bash +$ nix-build -A tree-sitter.updater.update-all-grammars +$ env GITHUB_TOKEN= ./result +``` + +This will prefetch all repos mentioned in [./grammars.toml]() and put their new hashes +into the [./grammars]() directory. + +If a new repository was added to the `github.com/tree-sitter` organization, +the update process will throw an error and you need to add the new repo to +either `knownTreeSitterOrgGrammarRepos` (if it’s a grammar) or to +`ignoredTreeSitterOrgRepos`. +This is to make sure we always package every official grammar. + +## Adding a third-party grammar + +Add it to the `otherGrammars` section in [./grammars.toml](). +The grammar name has to be unique among all grammars (upstream and third party). + +## Deleting a grammar + +In case a grammar needs to be removed, please remove the generated outputs +in the [./grammar]() directory manually. diff --git a/pkgs/development/tools/parsing/tree-sitter/grammar.nix b/pkgs/development/tools/parsing/tree-sitter/build-grammar.nix similarity index 100% rename from pkgs/development/tools/parsing/tree-sitter/grammar.nix rename to pkgs/development/tools/parsing/tree-sitter/build-grammar.nix diff --git a/pkgs/development/tools/parsing/tree-sitter/default.nix b/pkgs/development/tools/parsing/tree-sitter/default.nix index dcc8be0640f47..3c59e03db9e9b 100644 --- a/pkgs/development/tools/parsing/tree-sitter/default.nix +++ b/pkgs/development/tools/parsing/tree-sitter/default.nix @@ -23,13 +23,8 @@ , extraGrammars ? { } }: -# TODO: move to carnix or https://github.com/kolloch/crate2nix let - # to update: - # 1) change all these hashes - # 2) nix-build -A tree-sitter.updater.update-all-grammars - # 3) Set GITHUB_TOKEN env variable to avoid api rate limit (Use a Personal Access Token from https://github.com/settings/tokens It does not need any permissions) - # 4) run the ./result script that is output by that (it updates ./grammars) + # to update: see ./README.md version = "0.20.7"; sha256 = "sha256-5ILiN5EfJ7WpeYBiXynfcLucdp8zmxVOj4gLkaFQYts="; cargoSha256 = "sha256-V4frCaU5QzTx3ujdaplw7vNkosbzyXHQvE+T7ntVOtU="; @@ -55,7 +50,7 @@ let builtGrammars = let change = name: grammar: - callPackage ./grammar.nix { } { + callPackage ./build-grammar.nix { } { language = if grammar ? language then grammar.language else name; inherit version; source = if grammar ? src then grammar.src else fetchGrammar grammar; diff --git a/pkgs/development/tools/parsing/tree-sitter/grammars.toml b/pkgs/development/tools/parsing/tree-sitter/grammars.toml new file mode 100644 index 0000000000000..9f9ecd308e3f6 --- /dev/null +++ b/pkgs/development/tools/parsing/tree-sitter/grammars.toml @@ -0,0 +1,346 @@ +# Grammar list: +# https://github.com/tree-sitter/tree-sitter/blob/master/docs/index.md + +# Grammars we want to fetch from the tree-sitter github orga +knownTreeSitterOrgGrammarRepos = [ + "tree-sitter-javascript", + "tree-sitter-c", + "tree-sitter-json", + "tree-sitter-cpp", + "tree-sitter-ruby", + "tree-sitter-go", + "tree-sitter-c-sharp", + "tree-sitter-python", + "tree-sitter-typescript", + "tree-sitter-rust", + "tree-sitter-bash", + "tree-sitter-php", + "tree-sitter-java", + "tree-sitter-scala", + "tree-sitter-ocaml", + "tree-sitter-julia", + "tree-sitter-agda", + "tree-sitter-fluent", + "tree-sitter-html", + "tree-sitter-haskell", + "tree-sitter-regex", + "tree-sitter-css", + "tree-sitter-verilog", + "tree-sitter-jsdoc", + "tree-sitter-ql", + "tree-sitter-ql-dbscheme", + "tree-sitter-embedded-template", + "tree-sitter-tsq", + "tree-sitter-toml", +] +# repos of the tree-sitter github orga we want to ignore (not grammars) +ignoredTreeSitterOrgRepos = [ + "tree-sitter", + "tree-sitter-cli", + # this is the haskell language bindings, tree-sitter-haskell is the grammar + "haskell-tree-sitter", + # this is the ruby language bindings, tree-sitter-ruby is the grammar + "ruby-tree-sitter.old", + # this is the (unmaintained) rust language bindings, tree-sitter-rust is the grammar + "rust-tree-sitter", + # this is the nodejs language bindings, tree-sitter-javascript is the grammar + "node-tree-sitter", + # this is the python language bindings, tree-sitter-python is the grammar + "py-tree-sitter", + # afl fuzzing for tree sitter + "afl-tree-sitter", + # archived + "highlight-schema", + # website + "tree-sitter.github.io", + # not maintained + "tree-sitter-razor", + # rust library for constructing arbitrary graph structures from source code + "tree-sitter-graph", + # abandoned + "tree-sitter-swift", +] + +# Additional grammars that are not in the official github orga. +# If you need a grammar that already exists in the official orga, +# make sure to give it a different name. +# +# This can be type == "github", then it needs an orga and a repo. +# +# Or it can be type == "gitlab", then it needs a projectId, +# which you can find under the repository name on gitlab. +[otherGrammars.tree-sitter-beancount] +type = "github" +orga = "polarmutex" +repo = "tree-sitter-beancount" +[otherGrammars.tree-sitter-clojure] +type = "github" +orga = "sogaiu" +repo = "tree-sitter-clojure" +[otherGrammars.tree-sitter-comment] +type = "github" +orga = "stsewd" +repo = "tree-sitter-comment" +[otherGrammars.tree-sitter-dart] +type = "github" +orga = "usernobody14" +repo = "tree-sitter-dart" +[otherGrammars.tree-sitter-elisp] +type = "github" +orga = "wilfred" +repo = "tree-sitter-elisp" +[otherGrammars.tree-sitter-nix] +type = "github" +orga = "cstrahan" +repo = "tree-sitter-nix" +[otherGrammars.tree-sitter-latex] +type = "github" +orga = "latex-lsp" +repo = "tree-sitter-latex" +[otherGrammars.tree-sitter-lua] +type = "github" +orga = "MunifTanjim" +repo = "tree-sitter-lua" +[otherGrammars.tree-sitter-fennel] +type = "github" +orga = "travonted" +repo = "tree-sitter-fennel" +[otherGrammars.tree-sitter-make] +type = "github" +orga = "alemuller" +repo = "tree-sitter-make" +[otherGrammars.tree-sitter-markdown] +type = "github" +orga = "MDeiml" +repo = "tree-sitter-markdown" +[otherGrammars.tree-sitter-rego] +type = "github" +orga = "FallenAngel97" +repo = "tree-sitter-rego" +[otherGrammars.tree-sitter-rst] +type = "github" +orga = "stsewd" +repo = "tree-sitter-rst" +[otherGrammars.tree-sitter-svelte] +type = "github" +orga = "Himujjal" +repo = "tree-sitter-svelte" +[otherGrammars.tree-sitter-sql] +type = "github" +orga = "m-novikov" +repo = "tree-sitter-sql" +[otherGrammars.tree-sitter-vim] +type = "github" +orga = "vigoux" +repo = "tree-sitter-viml" +[otherGrammars.tree-sitter-yaml] +type = "github" +orga = "ikatyang" +repo = "tree-sitter-yaml" +[otherGrammars.tree-sitter-zig] +type = "github" +orga = "maxxnino" +repo = "tree-sitter-zig" +[otherGrammars.tree-sitter-fish] +type = "github" +orga = "ram02z" +repo = "tree-sitter-fish" +[otherGrammars.tree-sitter-dot] +type = "github" +orga = "rydesun" +repo = "tree-sitter-dot" +[otherGrammars.tree-sitter-norg] +type = "github" +orga = "nvim-neorg" +repo = "tree-sitter-norg" +[otherGrammars.tree-sitter-commonlisp] +type = "github" +orga = "thehamsta" +repo = "tree-sitter-commonlisp" +[otherGrammars.tree-sitter-cuda] +type = "github" +orga = "thehamsta" +repo = "tree-sitter-cuda" +[otherGrammars.tree-sitter-glsl] +type = "github" +orga = "thehamsta" +repo = "tree-sitter-glsl" +[otherGrammars.tree-sitter-dockerfile] +type = "github" +orga = "camdencheek" +repo = "tree-sitter-dockerfile" +[otherGrammars.tree-sitter-ledger] +type = "github" +orga = "cbarrete" +repo = "tree-sitter-ledger" +[otherGrammars.tree-sitter-gomod] +type = "github" +orga = "camdencheek" +repo = "tree-sitter-go-mod" +[otherGrammars.tree-sitter-gowork] +type = "github" +orga = "omertuc" +repo = "tree-sitter-go-work" +[otherGrammars.tree-sitter-graphql] +type = "github" +orga = "bkegley" +repo = "tree-sitter-graphql" +[otherGrammars.tree-sitter-pgn] +type = "github" +orga = "rolandwalker" +repo = "tree-sitter-pgn" +[otherGrammars.tree-sitter-perl] +type = "github" +orga = "ganezdragon" +repo = "tree-sitter-perl" +[otherGrammars.tree-sitter-kotlin] +type = "github" +orga = "fwcd" +repo = "tree-sitter-kotlin" +[otherGrammars.tree-sitter-scss] +type = "github" +orga = "serenadeai" +repo = "tree-sitter-scss" +[otherGrammars.tree-sitter-erlang] +type = "github" +orga = "abstractmachineslab" +repo = "tree-sitter-erlang" +[otherGrammars.tree-sitter-elixir] +type = "github" +orga = "elixir-lang" +repo = "tree-sitter-elixir" +[otherGrammars.tree-sitter-surface] +type = "github" +orga = "connorlay" +repo = "tree-sitter-surface" +[otherGrammars.tree-sitter-heex] +type = "github" +orga = "connorlay" +repo = "tree-sitter-heex" +[otherGrammars.tree-sitter-supercollider] +type = "github" +orga = "madskjeldgaard" +repo = "tree-sitter-supercollider" +[otherGrammars.tree-sitter-tlaplus] +type = "github" +orga = "tlaplus-community" +repo = "tree-sitter-tlaplus" +[otherGrammars.tree-sitter-glimmer] +type = "github" +orga = "alexlafroscia" +repo = "tree-sitter-glimmer" +[otherGrammars.tree-sitter-pug] +type = "github" +orga = "zealot128" +repo = "tree-sitter-pug" +[otherGrammars.tree-sitter-vue] +type = "github" +orga = "ikatyang" +repo = "tree-sitter-vue" +[otherGrammars.tree-sitter-elm] +type = "github" +orga = "elm-tooling" +repo = "tree-sitter-elm" +[otherGrammars.tree-sitter-yang] +type = "github" +orga = "hubro" +repo = "tree-sitter-yang" +[otherGrammars.tree-sitter-query] +type = "github" +orga = "nvim-treesitter" +repo = "tree-sitter-query" +[otherGrammars.tree-sitter-sparql] +type = "github" +orga = "bonabeavis" +repo = "tree-sitter-sparql" +[otherGrammars.tree-sitter-gdscript] +type = "github" +orga = "prestonknopp" +repo = "tree-sitter-gdscript" +[otherGrammars.tree-sitter-godot-resource] +type = "github" +orga = "prestonknopp" +repo = "tree-sitter-godot-resource" +[otherGrammars.tree-sitter-turtle] +type = "github" +orga = "bonabeavis" +repo = "tree-sitter-turtle" +[otherGrammars.tree-sitter-devicetree] +type = "github" +orga = "joelspadin" +repo = "tree-sitter-devicetree" +[otherGrammars.tree-sitter-r] +type = "github" +orga = "r-lib" +repo = "tree-sitter-r" +[otherGrammars.tree-sitter-bibtex] +type = "github" +orga = "latex-lsp" +repo = "tree-sitter-bibtex" +[otherGrammars.tree-sitter-fortran] +type = "github" +orga = "stadelmanma" +repo = "tree-sitter-fortran" +[otherGrammars.tree-sitter-cmake] +type = "github" +orga = "uyha" +repo = "tree-sitter-cmake" +[otherGrammars.tree-sitter-janet-simple] +type = "github" +orga = "sogaiu" +repo = "tree-sitter-janet-simple" +[otherGrammars.tree-sitter-json5] +type = "github" +orga = "joakker" +repo = "tree-sitter-json5" +[otherGrammars.tree-sitter-pioasm] +type = "github" +orga = "leo60228" +repo = "tree-sitter-pioasm" +[otherGrammars.tree-sitter-hjson] +type = "github" +orga = "winston0410" +repo = "tree-sitter-hjson" +[otherGrammars.tree-sitter-llvm] +type = "github" +orga = "benwilliamgraham" +repo = "tree-sitter-llvm" +[otherGrammars.tree-sitter-http] +type = "github" +orga = "ntbbloodbath" +repo = "tree-sitter-http" +[otherGrammars.tree-sitter-prisma] +type = "github" +orga = "victorhqc" +repo = "tree-sitter-prisma" +[otherGrammars.tree-sitter-org-nvim] +type = "github" +orga = "milisims" +repo = "tree-sitter-org" +[otherGrammars.tree-sitter-hcl] +type = "github" +orga = "MichaHoffmann" +repo = "tree-sitter-hcl" +[otherGrammars.tree-sitter-scheme] +type = "github" +orga = "6cdh" +repo = "tree-sitter-scheme" +[otherGrammars.tree-sitter-tiger] +type = "github" +orga = "ambroisie" +repo = "tree-sitter-tiger" +[otherGrammars.tree-sitter-nickel] +type = "github" +orga = "nickel-lang" +repo = "tree-sitter-nickel" +[otherGrammars.tree-sitter-smithy] +type = "github" +orga = "indoorvivants" +repo = "tree-sitter-smithy" +[otherGrammars.tree-sitter-jsonnet] +type = "github" +orga = "sourcegraph" +repo = "tree-sitter-jsonnet" +[otherGrammars.tree-sitter-jsonc] +type = "gitlab" +projectId = "24426815" diff --git a/pkgs/development/tools/parsing/tree-sitter/grammars/README.md b/pkgs/development/tools/parsing/tree-sitter/grammars/README.md new file mode 100644 index 0000000000000..c5cb0521a1efa --- /dev/null +++ b/pkgs/development/tools/parsing/tree-sitter/grammars/README.md @@ -0,0 +1,4 @@ +# Generated tree-sitter grammars + +These grammars have been autogenerated by the update script. +Please see [../default.nix]() for how to run it. diff --git a/pkgs/development/tools/parsing/tree-sitter/grammars/default.nix b/pkgs/development/tools/parsing/tree-sitter/grammars/default.nix index 5b3862082d8f7..7854e69723353 100644 --- a/pkgs/development/tools/parsing/tree-sitter/grammars/default.nix +++ b/pkgs/development/tools/parsing/tree-sitter/grammars/default.nix @@ -46,6 +46,7 @@ tree-sitter-jsdoc = lib.importJSON ./tree-sitter-jsdoc.json; tree-sitter-json = lib.importJSON ./tree-sitter-json.json; tree-sitter-json5 = lib.importJSON ./tree-sitter-json5.json; + tree-sitter-jsonc = lib.importJSON ./tree-sitter-jsonc.json; tree-sitter-jsonnet = lib.importJSON ./tree-sitter-jsonnet.json; tree-sitter-julia = lib.importJSON ./tree-sitter-julia.json; tree-sitter-kotlin = lib.importJSON ./tree-sitter-kotlin.json; diff --git a/pkgs/development/tools/parsing/tree-sitter/grammars/tree-sitter-jsonc.json b/pkgs/development/tools/parsing/tree-sitter/grammars/tree-sitter-jsonc.json new file mode 100644 index 0000000000000..f29175254b485 --- /dev/null +++ b/pkgs/development/tools/parsing/tree-sitter/grammars/tree-sitter-jsonc.json @@ -0,0 +1,11 @@ +{ + "url": "https://gitlab.com/WhyNotHugo/tree-sitter-jsonc.git", + "rev": "02b01653c8a1c198ae7287d566efa86a135b30d5", + "date": "2021-03-07T20:32:20+01:00", + "path": "/nix/store/hqh4kxw3fp9hr3yglsxv9d2kvcvpzdfa-tree-sitter-jsonc", + "sha256": "0mc68i7shwmn88iv3lcqyjvrhy3b62h02k272is7chk2yiw3crw9", + "fetchLFS": false, + "fetchSubmodules": false, + "deepClone": false, + "leaveDotGit": false +} diff --git a/pkgs/development/tools/parsing/tree-sitter/update.nix b/pkgs/development/tools/parsing/tree-sitter/update.nix index ad62530ee31d7..4f140c9497334 100644 --- a/pkgs/development/tools/parsing/tree-sitter/update.nix +++ b/pkgs/development/tools/parsing/tree-sitter/update.nix @@ -9,367 +9,39 @@ , xe }: -# Grammar list: -# https://github.com/tree-sitter/tree-sitter/blob/master/docs/index.md let - # Grammars we want to fetch from the tree-sitter github orga - knownTreeSitterOrgGrammarRepos = [ - "tree-sitter-javascript" - "tree-sitter-c" - "tree-sitter-json" - "tree-sitter-cpp" - "tree-sitter-ruby" - "tree-sitter-go" - "tree-sitter-c-sharp" - "tree-sitter-python" - "tree-sitter-typescript" - "tree-sitter-rust" - "tree-sitter-bash" - "tree-sitter-php" - "tree-sitter-java" - "tree-sitter-scala" - "tree-sitter-ocaml" - "tree-sitter-julia" - "tree-sitter-agda" - "tree-sitter-fluent" - "tree-sitter-html" - "tree-sitter-haskell" - "tree-sitter-regex" - "tree-sitter-css" - "tree-sitter-verilog" - "tree-sitter-jsdoc" - "tree-sitter-ql" - "tree-sitter-ql-dbscheme" - "tree-sitter-embedded-template" - "tree-sitter-tsq" - "tree-sitter-toml" - ]; - knownTreeSitterOrgGrammarReposJson = jsonFile "known-tree-sitter-org-grammar-repos" knownTreeSitterOrgGrammarRepos; + grammarsToml = builtins.fromTOML (builtins.readFile ./grammars.toml); - # repos of the tree-sitter github orga we want to ignore (not grammars) - ignoredTreeSitterOrgRepos = [ - "tree-sitter" - "tree-sitter-cli" - # this is the haskell language bindings, tree-sitter-haskell is the grammar - "haskell-tree-sitter" - # this is the ruby language bindings, tree-sitter-ruby is the grammar - "ruby-tree-sitter.old" - # this is the (unmaintained) rust language bindings, tree-sitter-rust is the grammar - "rust-tree-sitter" - # this is the nodejs language bindings, tree-sitter-javascript is the grammar - "node-tree-sitter" - # this is the python language bindings, tree-sitter-python is the grammar - "py-tree-sitter" - # afl fuzzing for tree sitter - "afl-tree-sitter" - # archived - "highlight-schema" - # website - "tree-sitter.github.io" - # not maintained - "tree-sitter-razor" - # rust library for constructing arbitrary graph structures from source code - "tree-sitter-graph" - # abandoned - "tree-sitter-swift" - ]; - ignoredTreeSitterOrgReposJson = jsonFile "ignored-tree-sitter-org-repos" ignoredTreeSitterOrgRepos; - - # Additional grammars that are not in the official github orga. - # If you need a grammar that already exists in the official orga, - # make sure to give it a different name. - otherGrammars = { - "tree-sitter-beancount" = { - orga = "polarmutex"; - repo = "tree-sitter-beancount"; - }; - "tree-sitter-clojure" = { - orga = "sogaiu"; - repo = "tree-sitter-clojure"; - }; - "tree-sitter-comment" = { - orga = "stsewd"; - repo = "tree-sitter-comment"; - }; - "tree-sitter-dart" = { - orga = "usernobody14"; - repo = "tree-sitter-dart"; - }; - "tree-sitter-elisp" = { - orga = "wilfred"; - repo = "tree-sitter-elisp"; - }; - "tree-sitter-nix" = { - orga = "cstrahan"; - repo = "tree-sitter-nix"; - }; - "tree-sitter-latex" = { - orga = "latex-lsp"; - repo = "tree-sitter-latex"; - }; - "tree-sitter-lua" = { - orga = "MunifTanjim"; - repo = "tree-sitter-lua"; - }; - "tree-sitter-fennel" = { - orga = "travonted"; - repo = "tree-sitter-fennel"; - }; - "tree-sitter-make" = { - orga = "alemuller"; - repo = "tree-sitter-make"; - }; - "tree-sitter-markdown" = { - orga = "MDeiml"; - repo = "tree-sitter-markdown"; - }; - "tree-sitter-rego" = { - orga = "FallenAngel97"; - repo = "tree-sitter-rego"; - }; - "tree-sitter-rst" = { - orga = "stsewd"; - repo = "tree-sitter-rst"; - }; - "tree-sitter-svelte" = { - orga = "Himujjal"; - repo = "tree-sitter-svelte"; - }; - "tree-sitter-sql" = { - orga = "m-novikov"; - repo = "tree-sitter-sql"; - }; - "tree-sitter-vim" = { - orga = "vigoux"; - repo = "tree-sitter-viml"; - }; - "tree-sitter-yaml" = { - orga = "ikatyang"; - repo = "tree-sitter-yaml"; - }; - "tree-sitter-zig" = { - orga = "maxxnino"; - repo = "tree-sitter-zig"; - }; - "tree-sitter-fish" = { - orga = "ram02z"; - repo = "tree-sitter-fish"; - }; - "tree-sitter-dot" = { - orga = "rydesun"; - repo = "tree-sitter-dot"; - }; - "tree-sitter-norg" = { - orga = "nvim-neorg"; - repo = "tree-sitter-norg"; - }; - "tree-sitter-commonlisp" = { - orga = "thehamsta"; - repo = "tree-sitter-commonlisp"; - }; - "tree-sitter-cuda" = { - orga = "thehamsta"; - repo = "tree-sitter-cuda"; - }; - "tree-sitter-glsl" = { - orga = "thehamsta"; - repo = "tree-sitter-glsl"; - }; - "tree-sitter-dockerfile" = { - orga = "camdencheek"; - repo = "tree-sitter-dockerfile"; - }; - "tree-sitter-ledger" = { - orga = "cbarrete"; - repo = "tree-sitter-ledger"; - }; - "tree-sitter-gomod" = { - orga = "camdencheek"; - repo = "tree-sitter-go-mod"; - }; - "tree-sitter-gowork" = { - orga = "omertuc"; - repo = "tree-sitter-go-work"; - }; - "tree-sitter-graphql" = { - orga = "bkegley"; - repo = "tree-sitter-graphql"; - }; - "tree-sitter-pgn" = { - orga = "rolandwalker"; - repo = "tree-sitter-pgn"; - }; - "tree-sitter-perl" = { - orga = "ganezdragon"; - repo = "tree-sitter-perl"; - }; - "tree-sitter-kotlin" = { - orga = "fwcd"; - repo = "tree-sitter-kotlin"; - }; - "tree-sitter-scss" = { - orga = "serenadeai"; - repo = "tree-sitter-scss"; - }; - "tree-sitter-erlang" = { - orga = "abstractmachineslab"; - repo = "tree-sitter-erlang"; - }; - "tree-sitter-elixir" = { - orga = "elixir-lang"; - repo = "tree-sitter-elixir"; - }; - "tree-sitter-surface" = { - orga = "connorlay"; - repo = "tree-sitter-surface"; - }; - "tree-sitter-heex" = { - orga = "connorlay"; - repo = "tree-sitter-heex"; - }; - "tree-sitter-supercollider" = { - orga = "madskjeldgaard"; - repo = "tree-sitter-supercollider"; - }; - "tree-sitter-tlaplus" = { - orga = "tlaplus-community"; - repo = "tree-sitter-tlaplus"; - }; - "tree-sitter-glimmer" = { - orga = "alexlafroscia"; - repo = "tree-sitter-glimmer"; - }; - "tree-sitter-pug" = { - orga = "zealot128"; - repo = "tree-sitter-pug"; - }; - "tree-sitter-vue" = { - orga = "ikatyang"; - repo = "tree-sitter-vue"; - }; - "tree-sitter-elm" = { - orga = "elm-tooling"; - repo = "tree-sitter-elm"; - }; - "tree-sitter-yang" = { - orga = "hubro"; - repo = "tree-sitter-yang"; - }; - "tree-sitter-query" = { - orga = "nvim-treesitter"; - repo = "tree-sitter-query"; - }; - "tree-sitter-sparql" = { - orga = "bonabeavis"; - repo = "tree-sitter-sparql"; - }; - "tree-sitter-gdscript" = { - orga = "prestonknopp"; - repo = "tree-sitter-gdscript"; - }; - "tree-sitter-godot-resource" = { - orga = "prestonknopp"; - repo = "tree-sitter-godot-resource"; - }; - "tree-sitter-turtle" = { - orga = "bonabeavis"; - repo = "tree-sitter-turtle"; - }; - "tree-sitter-devicetree" = { - orga = "joelspadin"; - repo = "tree-sitter-devicetree"; - }; - "tree-sitter-r" = { - orga = "r-lib"; - repo = "tree-sitter-r"; - }; - "tree-sitter-bibtex" = { - orga = "latex-lsp"; - repo = "tree-sitter-bibtex"; - }; - "tree-sitter-fortran" = { - orga = "stadelmanma"; - repo = "tree-sitter-fortran"; - }; - "tree-sitter-cmake" = { - orga = "uyha"; - repo = "tree-sitter-cmake"; - }; - "tree-sitter-janet-simple" = { - orga = "sogaiu"; - repo = "tree-sitter-janet-simple"; - }; - "tree-sitter-json5" = { - orga = "joakker"; - repo = "tree-sitter-json5"; - }; - "tree-sitter-pioasm" = { - orga = "leo60228"; - repo = "tree-sitter-pioasm"; - }; - "tree-sitter-hjson" = { - orga = "winston0410"; - repo = "tree-sitter-hjson"; - }; - "tree-sitter-llvm" = { - orga = "benwilliamgraham"; - repo = "tree-sitter-llvm"; - }; - "tree-sitter-http" = { - orga = "ntbbloodbath"; - repo = "tree-sitter-http"; - }; - "tree-sitter-prisma" = { - orga = "victorhqc"; - repo = "tree-sitter-prisma"; - }; - "tree-sitter-org-nvim" = { - orga = "milisims"; - repo = "tree-sitter-org"; - }; - "tree-sitter-hcl" = { - orga = "MichaHoffmann"; - repo = "tree-sitter-hcl"; - }; - "tree-sitter-scheme" = { - orga = "6cdh"; - repo = "tree-sitter-scheme"; - }; - "tree-sitter-tiger" = { - orga = "ambroisie"; - repo = "tree-sitter-tiger"; - }; - "tree-sitter-nickel" = { - orga = "nickel-lang"; - repo = "tree-sitter-nickel"; - }; - "tree-sitter-smithy" = { - orga = "indoorvivants"; - repo = "tree-sitter-smithy"; - }; - "tree-sitter-jsonnet" = { - orga = "sourcegraph"; - repo = "tree-sitter-jsonnet"; - }; - }; + # a list of {nixRepoAttrName, type, } allGrammars = let + # All grammars in the tree sitter orga we know of treeSitterOrgaGrammars = lib.listToAttrs (map (repo: { name = repo; value = { + type = "github"; orga = "tree-sitter"; inherit repo; }; }) - knownTreeSitterOrgGrammarRepos); + grammarsToml.knownTreeSitterOrgGrammarRepos); + merged = + mergeAttrsUnique + grammarsToml.otherGrammars + treeSitterOrgaGrammars; in - mergeAttrsUnique otherGrammars treeSitterOrgaGrammars; + lib.mapAttrsToList + (nixRepoAttrName: attrs: attrs // { + inherit nixRepoAttrName; + }) + merged; + # TODO: move to lib mergeAttrsUnique = left: right: @@ -385,8 +57,6 @@ let - jsonFile = name: val: (formats.json { }).generate name val; - # implementation of the updater updateImpl = passArgs "updateImpl-with-args" { binaries = { @@ -394,7 +64,7 @@ let nix-prefetch-git = "${nix-prefetch-git}/bin/nix-prefetch-git"; printf = "${coreutils}/bin/printf"; }; - inherit + inherit (grammarsToml) knownTreeSitterOrgGrammarRepos ignoredTreeSitterOrgRepos ; @@ -410,11 +80,11 @@ let ${script} "$@" ''; - foreachSh = attrs: f: - lib.concatMapStringsSep "\n" f - (lib.mapAttrsToList (k: v: { name = k; } // v) attrs); - + # a list of nix values as a newline-separated json string, + # one entry per line jsonNewlines = lib.concatMapStringsSep "\n" (lib.generators.toJSON {}); + # a pretty-printed value as json file + jsonFile = name: val: (formats.json { }).generate name val; # Run the given script for each of the attr list. # The attrs are passed to the script as a json value. @@ -427,9 +97,14 @@ let # This will depend on your local environment, but that is intentional. outputDir = "${toString ./.}/grammars"; + # final script update-all-grammars = writeShellScript "update-all-grammars.sh" '' set -euo pipefail - ${updateImpl} fetch-and-check-tree-sitter-repos '{}' + + # first make sure we know about all upsteam repos + ${updateImpl} fetch-and-check-tree-sitter-repos '{}' + + # Then write one json file for each prefetched repo, in parallel echo "writing files to ${outputDir}" 1>&2 mkdir -p "${outputDir}" ${forEachParallel @@ -437,22 +112,18 @@ let (writeShellScript "fetch-repo" '' ${updateImpl} fetch-repo "$1" '') - (lib.mapAttrsToList - (nixRepoAttrName: attrs: attrs // { - inherit - nixRepoAttrName - outputDir; - }) + (map + (grammar: grammar // { inherit outputDir; }) allGrammars) } + + # finally, write a default.nix that calls all grammars ${updateImpl} print-all-grammars-nix-file "$(< ${ - jsonFile "all-grammars.json" { - allGrammars = - (lib.mapAttrsToList - (nixRepoAttrName: attrs: attrs // { - inherit nixRepoAttrName; - }) - allGrammars); + jsonFile "repo-attr-names.json" { + repoAttrNames = + map + ({nixRepoAttrName, ...}: nixRepoAttrName) + allGrammars; inherit outputDir; } })" diff --git a/pkgs/development/tools/parsing/tree-sitter/update_impl.py b/pkgs/development/tools/parsing/tree-sitter/update_impl.py index db470617ed9c2..f23c40175aee3 100644 --- a/pkgs/development/tools/parsing/tree-sitter/update_impl.py +++ b/pkgs/development/tools/parsing/tree-sitter/update_impl.py @@ -3,7 +3,7 @@ import subprocess as sub import os import sys -from typing import Iterator, Any, Literal, TypedDict +from typing import Iterator, Any, Literal, NoReturn, TypedDict, cast from tempfile import NamedTemporaryFile debug: bool = True if os.environ.get("DEBUG", False) else False @@ -21,6 +21,10 @@ def log(msg: str) -> None: print(msg, file=sys.stderr) +def critical(msg: str) -> NoReturn: + sys.exit(f"ERROR: {msg}") + + def atomically_write(file_path: str, content: bytes) -> None: """atomically write the content into `file_path`""" with NamedTemporaryFile( @@ -51,20 +55,36 @@ def curl_github_args(token: str | None, url: str) -> Args: yield url -def curl_result(output: bytes) -> Any | Literal["not found"]: +def curl_gitlab_args(url: str) -> Args: + """Query the gitlab API via curl""" + yield bins["curl"] + if not debug: + yield "--silent" + # follow redirects + yield "--location" + yield url + + +def github_curl_result(output: bytes) -> Any | Literal["not found"]: """Parse the curl result of the github API""" res: Any = json.loads(output) match res: case dict(res): message: str = res.get("message", "") if "rate limit" in message: - sys.exit("Rate limited by the Github API") + critical("Rate limited by the Github API") if "Not Found" in message: return "not found" # if the result is another type, we can pass it on return res +def gitlab_curl_result(output: bytes) -> Any: + """Parse the curl result of the gitlab API""" + res: Any = json.loads(output) + return res + + def nix_prefetch_git_args(url: str, version_rev: str) -> Args: """Prefetch a git repository""" yield bins["nix-prefetch-git"] @@ -86,50 +106,117 @@ def run_cmd(args: Args) -> bytes: Dir = str +GithubRepo = TypedDict( + "GithubRepo", { + "orga": str, + "repo": str + } +) + +GitlabRepo = TypedDict( + "GitlabRepo", { + "nixRepoAttrName": str, + "projectId": str + } +) + +FetchRepoArg = TypedDict( + "FetchRepoArg", { + "type": str, + "outputDir": Dir, + "nixRepoAttrName": str + } +) + def fetchRepo() -> None: """fetch the given repo and write its nix-prefetch output to the corresponding grammar json file""" - match jsonArg: - case { - "orga": orga, - "repo": repo, - "outputDir": outputDir, - "nixRepoAttrName": nixRepoAttrName, - }: - token: str | None = os.environ.get("GITHUB_TOKEN", None) - out = run_cmd( - curl_github_args( - token, - url=f"https://api.github.com/repos/{quote(orga)}/{quote(repo)}/releases/latest" - ) - ) - release: str - match curl_result(out): - case "not found": - # github sometimes returns an empty list even tough there are releases - log(f"uh-oh, latest for {orga}/{repo} is not there, using HEAD") - release = "HEAD" - case {"tag_name": tag_name}: - release = tag_name - case _: - sys.exit(f"git result for {orga}/{repo} did not have a `tag_name` field") - - log(f"Fetching latest release ({release}) of {orga}/{repo} …") - res = run_cmd( - nix_prefetch_git_args( - url=f"https://github.com/{quote(orga)}/{quote(repo)}", - version_rev=release - ) - ) - atomically_write( - file_path=os.path.join( - outputDir, - f"{nixRepoAttrName}.json" - ), - content=res - ) + arg = cast(FetchRepoArg, jsonArg) + if debug: + log(f"Fetching repo {arg}") + match arg["type"]: + case "github": + res = fetchGithubRepo(cast(GithubRepo, jsonArg)) + case "gitlab": + res = fetchGitlabRepo(cast(GitlabRepo, jsonArg)) + case other: + critical(f'''Do not yet know how to handle the repo type "{other}"''') + attrName = jsonArg["nixRepoAttrName"] + atomically_write( + file_path=os.path.join( + arg["outputDir"], + f"{attrName}.json" + ), + content=res + ) + + +def fetchGithubRepo(r: GithubRepo) -> bytes: + token: str | None = os.environ.get("GITHUB_TOKEN", None) + orga = r["orga"] + repo = r["repo"] + out = run_cmd( + curl_github_args( + token, + url=f"https://api.github.com/repos/{quote(orga)}/{quote(repo)}/releases/latest" + ) + ) + release: str + match github_curl_result(out): + case "not found": + # github sometimes returns an empty list even tough there are releases + log(f"uh-oh, latest for {orga}/{repo} is not there, using HEAD") + release = "HEAD" + case {"tag_name": tag_name}: + release = tag_name + case _: + critical(f"git result for {orga}/{repo} did not have a `tag_name` field") + + log(f"Fetching latest release ({release}) of {orga}/{repo} …") + return run_cmd( + nix_prefetch_git_args( + url=f"https://github.com/{quote(orga)}/{quote(repo)}", + version_rev=release + ) + ) + + +def fetchGitlabRepo(r: GitlabRepo) -> bytes: + projectId = r["projectId"] + nixRepoAttrName = r["nixRepoAttrName"] + out = run_cmd( + curl_gitlab_args( + url=f"https://gitlab.com/api/v4/projects/{quote(projectId)}/repository/tags?order_by=version&sort=desc" + ) + ) + release: str + projectName = f'''"{nixRepoAttrName}" (Gitlab projectId: {projectId})''' + match gitlab_curl_result(out): + case list([]): + log(f"uh-oh, no release find for for {projectName}, using HEAD") + release = "HEAD" + case list([{"name": tag_name}, *_]): + release = tag_name + case _: + critical(f"tag list for {projectName} did not have a `name` field: {out.decode()}") + out = run_cmd( + curl_gitlab_args( + url=f"https://gitlab.com/api/v4/projects/{quote(projectId)}" + ) + ) + url: str + match gitlab_curl_result(out): + case {"http_url_to_repo": url}: + url = url case _: - sys.exit("input json must have `orga` and `repo` keys") + critical(f"repository result for {projectName} did not have a `http_url_to_repo` field: {out.decode()}") + log(f"Fetching latest release ({release}) of {projectName} …") + return run_cmd( + nix_prefetch_git_args( + url, + version_rev=release + ) + ) def fetchOrgaLatestRepos(orga: str) -> set[str]: @@ -141,9 +228,9 @@ def fetchOrgaLatestRepos(orga: str) -> set[str]: url=f"https://api.github.com/orgs/{quote(orga)}/repos?per_page=100" ) ) - match curl_result(out): + match github_curl_result(out): case "not found": - sys.exit(f"github organization {orga} not found") + critical(f"github organization {orga} not found") case list(repos): res: list[str] = [] for repo in repos: @@ -151,8 +238,8 @@ def fetchOrgaLatestRepos(orga: str) -> set[str]: if name: res.append(name) return set(res) - case _: - sys.exit("github result was not a list of repos, but {other}") + case other: + critical(f"github result was not a list of repos, but {other}") def checkTreeSitterRepos(latest_github_repos: set[str]) -> None: @@ -163,29 +250,21 @@ def checkTreeSitterRepos(latest_github_repos: set[str]) -> None: unknown = latest_github_repos - (known | ignored) if unknown: - sys.exit(f"These repositories are neither known nor ignored:\n{unknown}") + critical(f"These repositories are neither known nor ignored:\n{unknown}") -Grammar = TypedDict( - "Grammar", - { - "nixRepoAttrName": str, - "orga": str, - "repo": str - } -) +NixRepoAttrName = str def printAllGrammarsNixFile() -> None: """Print a .nix file that imports all grammars.""" - allGrammars: list[dict[str, Grammar]] = jsonArg["allGrammars"] + repoAttrNames: list[NixRepoAttrName] = jsonArg["repoAttrNames"] outputDir: Dir = jsonArg["outputDir"] def file() -> Iterator[str]: yield "{ lib }:" yield "{" - for grammar in allGrammars: - n = grammar["nixRepoAttrName"] + for n in repoAttrNames: yield f" {n} = lib.importJSON ./{n}.json;" yield "}" yield "" @@ -214,4 +293,4 @@ def fetchAndCheckTreeSitterRepos() -> None: case "print-all-grammars-nix-file": printAllGrammarsNixFile() case _: - sys.exit(f"mode {mode} unknown") + critical(f"mode {mode} unknown")