diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 00000000..ffcc0d16
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,18 @@
+root = true
+
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+indent_style = space
+trim_trailing_whitespace = true
+
+[site/**]
+charset = unset
+end_of_line = unset
+insert_final_newline = unset
+indent_style = unset
+trim_trailing_whitespace = unset
+
+[*.{proto,yaml,yml}]
+indent_size = 2
diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..6c94fd85
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,4 @@
+[flake8]
+ignore = E203, E266, E501, W503, F403, F401
+max-line-length = 88
+select = B,C,E,F,W,T4,B9
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..8f8c97e6
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+proto/buf.lock linguist-generated=true
diff --git a/.github/actions/dev-tool-python/action.yml b/.github/actions/dev-tool-python/action.yml
new file mode 100644
index 00000000..f16b8774
--- /dev/null
+++ b/.github/actions/dev-tool-python/action.yml
@@ -0,0 +1,24 @@
+name: 'Install Python'
+inputs:
+  python-version:
+    required: true
+    default: '3.9'
+runs:
+  using: "composite"
+  steps:
+    - name: Set up Python ${{ inputs.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ inputs.python-version }}
+    - uses: actions/cache@v2
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
+        restore-keys: |
+          ${{ runner.os }}-pip-
+    - name: Install dependencies
+      shell: bash
+      run: |
+        python -m pip install --upgrade pip
+        pip install tox tox-gh-actions
+      working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/c.yml b/.github/workflows/c.yml
new file mode 100644
index 00000000..6aa7f629
--- /dev/null
+++ b/.github/workflows/c.yml
@@ -0,0 +1,36 @@
+name: C
+
+on:
+  pull_request:
+  push:
+    branches: [ main ]
+
+jobs:
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: recursive
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - name: Configure
+        run: mkdir -p build && cd build && cmake ../c -DSUBSTRAIT_VALIDATOR_BUILD_TESTS=ON
+      - name: Build
+        run: cmake --build build
+      - name: Test
+        run: ctest --output-on-failure --test-dir build
+
+  style:
+    name: Style
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: jidicula/clang-format-action@v4.5.0
+        with:
+          clang-format-version: '13'
+          check-path: c
diff --git a/.github/workflows/misc.yml b/.github/workflows/misc.yml
new file mode 100644
index 00000000..dbd1083d
--- /dev/null
+++ b/.github/workflows/misc.yml
@@ -0,0 +1,23 @@
+name: Misc
+
+on:
+  pull_request:
+  push:
+    branches: [ main ]
+
+jobs:
+  license:
+    name: Check license headers
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Check License Header
+      uses: enarx/spdx@master
+      with:
+        licenses: Apache-2.0 MIT
+
+  editorconfig:
+    name: Check editorconfig
+    runs-on: ubuntu-latest
+    steps:
+      - uses: editorconfig-checker/action-editorconfig-checker@v1
diff --git a/.github/workflows/proto.yml b/.github/workflows/proto.yml
new file mode 100644
index 00000000..a9537e72
--- /dev/null
+++ b/.github/workflows/proto.yml
@@ -0,0 +1,30 @@
+name: Protobuf
+
+on:
+  pull_request:
+  push:
+    branches: [ main ]
+
+jobs:
+  style:
+    name: Style
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: recursive
+      - uses: bufbuild/buf-setup-action@v1.4.0
+      - run: buf format --diff --exit-code
+
+  check:
+    name: Check
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: recursive
+      - uses: arduino/setup-protoc@v1
+      - uses: bufbuild/buf-setup-action@v0.7.0
+      - uses: bufbuild/buf-lint-action@v1
+      - name: Compile protobuf
+        run: buf generate
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
new file mode 100644
index 00000000..cdc7ea16
--- /dev/null
+++ b/.github/workflows/python.yml
@@ -0,0 +1,113 @@
+name: Python
+
+on:
+  pull_request:
+  push:
+    branches: [ main ]
+
+jobs:
+  build:
+    name: Test
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        type: [wheel]
+        include:
+        - os: ubuntu-latest
+          type: sdist
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        submodules: recursive
+    - name: Install sdist-only dependencies
+      if: ${{ matrix.type == 'sdist' }}
+      uses: actions-rs/toolchain@v1
+      with:
+        profile: minimal
+        toolchain: stable
+        override: true
+    - name: sdist-only build prep
+      if: ${{ matrix.type == 'sdist' }}
+      working-directory: rs
+      run: cargo check
+      # ^ this ensures that its build.rs is run, which is all we need. Sadly
+      # there doesn't seem to be a way to do this that doesn't pull in all
+      # normal dependencies: https://github.com/rust-lang/cargo/issues/7178
+    - name: Install build dependencies
+      run: python3 -m pip install --upgrade pip maturin
+    - name: Prepare build environment
+      working-directory: py
+      run: python3 prepare_build.py populate
+    - name: Create sdist
+      if: ${{ matrix.type == 'sdist' }}
+      uses: messense/maturin-action@v1
+      with:
+        command: sdist
+        args: -o dist -m py/Cargo.toml
+    - name: Build manylinux wheels
+      if: ${{ matrix.type == 'wheel' && matrix.os == 'ubuntu-latest' }}
+      uses: messense/maturin-action@v1
+      with:
+        manylinux: auto
+        command: build
+        args: --release --no-sdist -o dist -m py/Cargo.toml
+    - name: Build Windows wheels
+      if: ${{ matrix.type == 'wheel' && matrix.os == 'windows-latest' }}
+      uses: messense/maturin-action@v1
+      with:
+        command: build
+        args: --release --no-sdist -o dist -m py/Cargo.toml
+    - name: Build MacOS wheels
+      if: ${{ matrix.type == 'wheel' && matrix.os == 'macos-latest' }}
+      uses: messense/maturin-action@v1
+      with:
+        command: build
+        args: --release --no-sdist -o dist --universal2 -m py/Cargo.toml
+    - name: Install runtime dependencies
+      run: python3 -m pip install --upgrade protobuf pytest click pyyaml jdot
+    - name: Install generated sdist
+      if: ${{ matrix.type == 'sdist' }}
+      run: python3 -m pip install dist/substrait_validator-*.tar.gz
+    - name: Install generated wheel
+      if: ${{ matrix.type == 'wheel' }}
+      run: python3 -m pip install --no-index --find-links=dist substrait-validator
+    - name: Test
+      working-directory: py/tests
+      run: python3 -m pytest
+    - name: Upload wheels
+      if: "startsWith(github.ref, 'refs/tags/')"
+      uses: actions/upload-artifact@v2
+      with:
+        name: wheels
+        path: dist
+
+  release:
+    name: Release
+    runs-on: ubuntu-latest
+    if: "startsWith(github.ref, 'refs/tags/')"
+    needs: [ build ]
+    steps:
+      - uses: actions/download-artifact@v2
+        with:
+          name: wheels
+      - name: Publish to PyPI
+        uses: messense/maturin-action@v1
+        env:
+          MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
+        with:
+          command: upload
+          args: --skip-existing *
+
+  fmt-lint:
+    name: Style & lint
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Install dependencies
+      run: python3 -m pip install --upgrade pip black==22.3.0 flake8==4.0.1
+    - name: Black
+      run: python3 -m black --diff --check .
+    - name: Flake8
+      run: python3 -m flake8 .
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
new file mode 100644
index 00000000..aa5edbeb
--- /dev/null
+++ b/.github/workflows/rust.yml
@@ -0,0 +1,103 @@
+name: Rust
+
+on:
+  pull_request:
+  push:
+    branches: [ main ]
+
+jobs:
+  check:
+    name: Check
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: recursive
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - uses: Swatinem/rust-cache@v1
+      - name: Check
+        run: cargo check --all-features
+
+  test:
+    name: Test
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: recursive
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - uses: Swatinem/rust-cache@v1
+      - name: Run unit tests
+        run: cargo test --all-features
+      - name: Install test runner dependencies
+        run: python3 -m pip install --upgrade pip protobuf pyyaml
+      - name: Run validation tests
+        # No need to run validation tests for all operating systems, and Linux
+        # runners are the fastest of the bunch.
+        if: ${{ matrix.os == 'ubuntu-latest' }}
+        working-directory: tests
+        run: python3 runner.py run --no-html
+
+  style:
+    name: Style
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: recursive
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+          components: rustfmt
+      - uses: Swatinem/rust-cache@v1
+      - name: Rustfmt
+        run: cargo fmt --all -- --check
+
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: recursive
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          # Clippy 1.60 crashes on the codebase, see
+          # https://github.com/rust-lang/rust-clippy/issues/8527
+          toolchain: "1.59.0"
+          override: true
+          components: clippy
+      - uses: Swatinem/rust-cache@v1
+      - name: Clippy
+        run: cargo clippy --all-features -- -D warnings
+
+  doc:
+    name: Doc
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: recursive
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - uses: Swatinem/rust-cache@v1
+      - name: Doc
+        run: RUSTDOCFLAGS="-Dwarnings" cargo doc --workspace --all-features
diff --git a/.github/workflows/yaml.yml b/.github/workflows/yaml.yml
new file mode 100644
index 00000000..bc7132b4
--- /dev/null
+++ b/.github/workflows/yaml.yml
@@ -0,0 +1,15 @@
+name: YAML
+
+on:
+  pull_request:
+  push:
+    branches: [ main ]
+
+jobs:
+  lint:
+    name: Lint
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Run yamllint
+        run: yamllint .
diff --git a/.gitignore b/.gitignore
index 088ba6ba..78bf0d69 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,8 @@
-# Generated by Cargo
-# will have compiled files and executables
+**/*.rs.bk
+**/target
+**/.gradle
+**/.idea
+**/build
+gen
 /target/
-
-# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
-# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
-Cargo.lock
-
-# These are backup files generated by rustfmt
 **/*.rs.bk
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000..592be67a
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "substrait"]
+	path = substrait
+	url = https://github.com/substrait-io/substrait.git
diff --git a/.licenserc.yaml b/.licenserc.yaml
new file mode 100644
index 00000000..70a60952
--- /dev/null
+++ b/.licenserc.yaml
@@ -0,0 +1,13 @@
+header:
+  license:
+    spdx-id: Apache-2.0
+
+  paths:
+    - 'proto/substrait/**'
+    - 'derive/**'
+    - 'rs/**'
+    - 'py/**'
+    - 'c/**'
+    - 'tests/**'
+
+  comment: never
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..4c93f9ed
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,29 @@
+repos:
+- repo: https://github.com/doublify/pre-commit-rust
+  rev: v1.0
+  hooks:
+  - id: fmt
+  - id: clippy
+  - id: cargo-check
+- repo: https://github.com/pre-commit/mirrors-clang-format
+  rev: v13.0.1
+  hooks:
+  - id: clang-format
+    types_or: [c, c++]
+- repo: https://github.com/nametake/pre-commit-buf
+  rev: v2.0.0
+  hooks:
+  - id: buf-lint
+- repo: https://github.com/adrienverge/yamllint.git
+  rev: v1.26.0
+  hooks:
+  - id: yamllint
+    args: [-c=.yamllint.yaml]
+- repo: https://github.com/psf/black
+  rev: 22.3.0
+  hooks:
+  - id: black
+- repo: https://gitlab.com/pycqa/flake8
+  rev: 4.0.1
+  hooks:
+  - id: flake8
diff --git a/.yamllint.yaml b/.yamllint.yaml
new file mode 100644
index 00000000..35e3e8ea
--- /dev/null
+++ b/.yamllint.yaml
@@ -0,0 +1,9 @@
+rules:
+  line-length:
+    max: 120
+  brackets:
+    forbid: false
+    min-spaces-inside: 0
+    max-spaces-inside: 1
+    min-spaces-inside-empty: 0
+    max-spaces-inside-empty: 0
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 00000000..657b9220
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,1446 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "ahash"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
+dependencies = [
+ "getrandom",
+ "once_cell",
+ "serde",
+ "version_check",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "ansi_term"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "159bb86af3a200e19a068f4224eae4c8bb2d0fa054c7e5d1cacd5cef95e684cd"
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "base64"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
+
+[[package]]
+name = "bit-set"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de"
+dependencies = [
+ "bit-vec",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bytecount"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e"
+
+[[package]]
+name = "bytes"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8"
+
+[[package]]
+name = "cbindgen"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51e3973b165dc0f435831a9e426de67e894de532754ff7a3f307c03ee5dec7dc"
+dependencies = [
+ "clap",
+ "heck 0.3.3",
+ "indexmap",
+ "log",
+ "proc-macro2",
+ "quote",
+ "serde",
+ "serde_json",
+ "syn",
+ "tempfile",
+ "toml",
+]
+
+[[package]]
+name = "cc"
+version = "1.0.73"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "chrono"
+version = "0.4.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
+dependencies = [
+ "libc",
+ "num-integer",
+ "num-traits",
+ "time 0.1.43",
+ "winapi",
+]
+
+[[package]]
+name = "clap"
+version = "2.34.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
+dependencies = [
+ "ansi_term",
+ "atty",
+ "bitflags",
+ "strsim",
+ "textwrap",
+ "unicode-width",
+ "vec_map",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aaa7bd5fb665c6864b5f963dd9097905c54125909c7aa94c9e18507cdbe6c53"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
+dependencies = [
+ "cfg-if",
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1145cf131a2c6ba0615079ab6a638f7e1973ac9c2634fcbeaaad6114246efe8c"
+dependencies = [
+ "autocfg",
+ "cfg-if",
+ "crossbeam-utils",
+ "lazy_static",
+ "memoffset",
+ "scopeguard",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38"
+dependencies = [
+ "cfg-if",
+ "lazy_static",
+]
+
+[[package]]
+name = "curl"
+version = "0.4.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7de97b894edd5b5bcceef8b78d7da9b75b1d2f2f9a910569d0bde3dd31d84939"
+dependencies = [
+ "curl-sys",
+ "libc",
+ "openssl-probe",
+ "openssl-sys",
+ "schannel",
+ "socket2",
+ "winapi",
+]
+
+[[package]]
+name = "curl-sys"
+version = "0.4.52+curl-7.81.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14b8c2d1023ea5fded5b7b892e4b8e95f70038a421126a056761a84246a28971"
+dependencies = [
+ "cc",
+ "libc",
+ "libz-sys",
+ "openssl-sys",
+ "pkg-config",
+ "vcpkg",
+ "winapi",
+]
+
+[[package]]
+name = "dunce"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "453440c271cf5577fd2a40e4942540cb7d0d2f85e27c8d07dd0023c925a67541"
+
+[[package]]
+name = "either"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
+
+[[package]]
+name = "fancy-regex"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d6b8560a05112eb52f04b00e5d3790c0dd75d9d980eb8a122fb23b92a623ccf"
+dependencies = [
+ "bit-set",
+ "regex",
+]
+
+[[package]]
+name = "fastrand"
+version = "1.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf"
+dependencies = [
+ "instant",
+]
+
+[[package]]
+name = "fixedbitset"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "279fb028e20b3c4c320317955b77c5e0c9701f05a1d309905d6fc702cdc5053e"
+
+[[package]]
+name = "float-pretty-print"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cae1cdc50a756244008a19d313827537e5e18d55f76779e8d5f9aa00769ca231"
+
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
+[[package]]
+name = "form_urlencoded"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191"
+dependencies = [
+ "matches",
+ "percent-encoding",
+]
+
+[[package]]
+name = "fraction"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aba3510011eee8825018be07f08d9643421de007eaf62a3bde58d89b058abfa7"
+dependencies = [
+ "lazy_static",
+ "num",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d39cd93900197114fa1fcb7ae84ca742095eed9442088988ae74fa744e930e77"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
+
+[[package]]
+name = "hashbrown"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
+
+[[package]]
+name = "heck"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
+dependencies = [
+ "unicode-segmentation",
+]
+
+[[package]]
+name = "heck"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "idna"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8"
+dependencies = [
+ "matches",
+ "unicode-bidi",
+ "unicode-normalization",
+]
+
+[[package]]
+name = "indexmap"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223"
+dependencies = [
+ "autocfg",
+ "hashbrown",
+]
+
+[[package]]
+name = "indoc"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47741a8bc60fb26eb8d6e0238bbb26d8575ff623fdc97b1a2c00c050b9684ed8"
+dependencies = [
+ "indoc-impl",
+ "proc-macro-hack",
+]
+
+[[package]]
+name = "indoc-impl"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce046d161f000fffde5f432a0d034d0341dc152643b2598ed5bfce44c4f3a8f0"
+dependencies = [
+ "proc-macro-hack",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "unindent",
+]
+
+[[package]]
+name = "instant"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "iso8601"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a59a3f2be6271b2a844cd0dd13bf8ccc88a9540482d872c7ce58ab1c4db9fab"
+dependencies = [
+ "nom",
+]
+
+[[package]]
+name = "itertools"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
+
+[[package]]
+name = "jsonschema"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4be404426c47c9b868fc9b6ddda07f84e2885d12b17066036717db2cd4e5d77"
+dependencies = [
+ "ahash",
+ "anyhow",
+ "base64",
+ "bytecount",
+ "fancy-regex",
+ "fraction",
+ "iso8601",
+ "itoa",
+ "lazy_static",
+ "memchr",
+ "num-cmp",
+ "parking_lot 0.12.0",
+ "percent-encoding",
+ "regex",
+ "serde",
+ "serde_json",
+ "time 0.3.7",
+ "url",
+ "uuid",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "libc"
+version = "0.2.119"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bf2e165bb3457c8e098ea76f3e3bc9db55f87aa90d52d0e6be741470916aaa4"
+
+[[package]]
+name = "libz-sys"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de5435b8549c16d423ed0c03dbaafe57cf6c3344744f1242520d59c9d8ecec66"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "linked-hash-map"
+version = "0.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3"
+
+[[package]]
+name = "lock_api"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88943dd7ef4a2e5a4bfa2753aaab3013e34ce2533d1996fb18ef591e315e2b3b"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "log"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "matches"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f"
+
+[[package]]
+name = "memchr"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
+
+[[package]]
+name = "memoffset"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "multimap"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
+
+[[package]]
+name = "nom"
+version = "7.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b1d11e1ef389c76fe5b81bcaf2ea32cf88b62bc494e19f493d0b30e7a930109"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+ "version_check",
+]
+
+[[package]]
+name = "num"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8536030f9fea7127f841b45bb6243b27255787fb4eb83958aa1ef9d2fdc0c36"
+dependencies = [
+ "num-bigint",
+ "num-complex",
+ "num-integer",
+ "num-iter",
+ "num-rational",
+ "num-traits",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "090c7f9998ee0ff65aa5b723e4009f7b217707f1fb5ea551329cc4d6231fb304"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-cmp"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa"
+
+[[package]]
+name = "num-complex"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6b19411a9719e753aff12e5187b74d60d3dc449ec3f4dc21e3989c3f554bc95"
+dependencies = [
+ "autocfg",
+ "num-traits",
+]
+
+[[package]]
+name = "num-derive"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "876a53fff98e03a936a674b29568b0e605f06b29372c2489ff4de23f1949743d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
+dependencies = [
+ "autocfg",
+ "num-traits",
+]
+
+[[package]]
+name = "num-iter"
+version = "0.1.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c000134b5dbf44adc5cb772486d335293351644b801551abe8f75c84cfa4aef"
+dependencies = [
+ "autocfg",
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
+dependencies = [
+ "hermit-abi",
+ "libc",
+]
+
+[[package]]
+name = "num_threads"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97ba99ba6393e2c3734791401b66902d981cb03bf190af674ca69949b6d5fb15"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
+
+[[package]]
+name = "openssl-probe"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
+
+[[package]]
+name = "openssl-sys"
+version = "0.9.72"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e46109c383602735fa0a2e48dd2b7c892b048e1bf69e5c3b1d804b7d9c203cb"
+dependencies = [
+ "autocfg",
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
+dependencies = [
+ "instant",
+ "lock_api",
+ "parking_lot_core 0.8.5",
+]
+
+[[package]]
+name = "parking_lot"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58"
+dependencies = [
+ "lock_api",
+ "parking_lot_core 0.9.1",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216"
+dependencies = [
+ "cfg-if",
+ "instant",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "winapi",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-sys",
+]
+
+[[package]]
+name = "paste"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880"
+dependencies = [
+ "paste-impl",
+ "proc-macro-hack",
+]
+
+[[package]]
+name = "paste-impl"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6"
+dependencies = [
+ "proc-macro-hack",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
+
+[[package]]
+name = "petgraph"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a13a2fa9d0b63e5f22328828741e523766fff0ee9e779316902290dff3f824f"
+dependencies = [
+ "fixedbitset",
+ "indexmap",
+]
+
+[[package]]
+name = "pkg-config"
+version = "0.3.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe"
+
+[[package]]
+name = "proc-macro-hack"
+version = "0.5.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
+dependencies = [
+ "unicode-xid",
+]
+
+[[package]]
+name = "prost"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "444879275cb4fd84958b1a1d5420d15e6fcf7c235fe47f053c9c2a80aceb6001"
+dependencies = [
+ "bytes",
+ "prost-derive",
+]
+
+[[package]]
+name = "prost-build"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62941722fb675d463659e49c4f3fe1fe792ff24fe5bbaa9c08cd3b98a1c354f5"
+dependencies = [
+ "bytes",
+ "heck 0.3.3",
+ "itertools 0.10.3",
+ "lazy_static",
+ "log",
+ "multimap",
+ "petgraph",
+ "prost",
+ "prost-types",
+ "regex",
+ "tempfile",
+ "which",
+]
+
+[[package]]
+name = "prost-derive"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9cc1a3263e07e0bf68e96268f37665207b49560d98739662cdfaae215c720fe"
+dependencies = [
+ "anyhow",
+ "itertools 0.10.3",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "prost-types"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "534b7a0e836e3c482d2693070f982e39e7611da9695d4d1f5a4b186b51faef0a"
+dependencies = [
+ "bytes",
+ "prost",
+]
+
+[[package]]
+name = "pyo3"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7cf01dbf1c05af0a14c7779ed6f3aa9deac9c3419606ac9de537a2d649005720"
+dependencies = [
+ "cfg-if",
+ "indoc",
+ "libc",
+ "parking_lot 0.11.2",
+ "paste",
+ "pyo3-build-config",
+ "pyo3-macros",
+ "unindent",
+]
+
+[[package]]
+name = "pyo3-build-config"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbf9e4d128bfbddc898ad3409900080d8d5095c379632fbbfbb9c8cfb1fb852b"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "pyo3-macros"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67701eb32b1f9a9722b4bc54b548ff9d7ebfded011c12daece7b9063be1fd755"
+dependencies = [
+ "pyo3-macros-backend",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "pyo3-macros-backend"
+version = "0.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f44f09e825ee49a105f2c7b23ebee50886a9aee0746f4dd5a704138a64b0218a"
+dependencies = [
+ "proc-macro2",
+ "pyo3-build-config",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rayon"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90"
+dependencies = [
+ "autocfg",
+ "crossbeam-deque",
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e"
+dependencies = [
+ "crossbeam-channel",
+ "crossbeam-deque",
+ "crossbeam-utils",
+ "lazy_static",
+ "num_cpus",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.2.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.5.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
+
+[[package]]
+name = "remove_dir_all"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f"
+
+[[package]]
+name = "ryu"
+version = "1.0.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "schannel"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f05ba609c234e60bee0d547fe94a4c7e9da733d1c962cf6e59efa4cd9c8bc75"
+dependencies = [
+ "lazy_static",
+ "winapi",
+]
+
+[[package]]
+name = "scopeguard"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
+
+[[package]]
+name = "serde"
+version = "1.0.136"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.136"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.79"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95"
+dependencies = [
+ "itoa",
+ "ryu",
+ "serde",
+]
+
+[[package]]
+name = "smallvec"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
+
+[[package]]
+name = "socket2"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "strsim"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
+
+[[package]]
+name = "strum"
+version = "0.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb"
+
+[[package]]
+name = "strum_macros"
+version = "0.23.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38"
+dependencies = [
+ "heck 0.3.3",
+ "proc-macro2",
+ "quote",
+ "rustversion",
+ "syn",
+]
+
+[[package]]
+name = "substrait-validator"
+version = "0.0.1"
+dependencies = [
+ "base64",
+ "chrono",
+ "curl",
+ "float-pretty-print",
+ "glob",
+ "heck 0.4.0",
+ "itertools 0.8.2",
+ "jsonschema",
+ "num-derive",
+ "num-traits",
+ "once_cell",
+ "percent-encoding",
+ "prost",
+ "prost-build",
+ "prost-types",
+ "regex",
+ "serde_json",
+ "strum",
+ "strum_macros",
+ "substrait-validator-derive",
+ "thiserror",
+ "uriparse",
+ "url",
+ "walkdir",
+ "yaml-rust",
+]
+
+[[package]]
+name = "substrait-validator-c"
+version = "0.0.1"
+dependencies = [
+ "cbindgen",
+ "libc",
+ "substrait-validator",
+ "thiserror",
+]
+
+[[package]]
+name = "substrait-validator-derive"
+version = "0.0.1"
+dependencies = [
+ "heck 0.4.0",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "substrait-validator-py"
+version = "0.0.1"
+dependencies = [
+ "dunce",
+ "prost-build",
+ "pyo3",
+ "substrait-validator",
+ "walkdir",
+]
+
+[[package]]
+name = "syn"
+version = "1.0.86"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
+[[package]]
+name = "tempfile"
+version = "3.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
+dependencies = [
+ "cfg-if",
+ "fastrand",
+ "libc",
+ "redox_syscall",
+ "remove_dir_all",
+ "winapi",
+]
+
+[[package]]
+name = "test-runner"
+version = "0.0.1"
+dependencies = [
+ "glob",
+ "prost-build",
+ "rayon",
+ "serde",
+ "serde_json",
+ "substrait-validator",
+ "walkdir",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.30"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "time"
+version = "0.1.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "time"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "004cbc98f30fa233c61a38bc77e96a9106e65c88f2d3bef182ae952027e5753d"
+dependencies = [
+ "libc",
+ "num_threads",
+ "time-macros",
+]
+
+[[package]]
+name = "time-macros"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25eb0ca3468fc0acc11828786797f6ef9aa1555e4a211a60d64cc8e4d1be47d6"
+
+[[package]]
+name = "tinyvec"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c1c1d5a42b6245520c249549ec267180beaffcc0615401ac8e31853d4b6d8d2"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
+
+[[package]]
+name = "toml"
+version = "0.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a31142970826733df8241ef35dc040ef98c679ab14d7c3e54d827099b3acecaa"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "unicode-bidi"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f"
+
+[[package]]
+name = "unicode-normalization"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
+name = "unicode-segmentation"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99"
+
+[[package]]
+name = "unicode-width"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
+
+[[package]]
+name = "unindent"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "514672a55d7380da379785a4d70ca8386c8883ff7eaae877be4d2081cebe73d8"
+
+[[package]]
+name = "uriparse"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0200d0fc04d809396c2ad43f3c95da3582a2556eba8d453c1087f4120ee352ff"
+dependencies = [
+ "fnv",
+ "lazy_static",
+]
+
+[[package]]
+name = "url"
+version = "2.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "matches",
+ "percent-encoding",
+]
+
+[[package]]
+name = "uuid"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "vec_map"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
+
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
+name = "walkdir"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
+dependencies = [
+ "same-file",
+ "winapi",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasi"
+version = "0.10.2+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
+
+[[package]]
+name = "which"
+version = "4.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a5a7e487e921cf220206864a94a89b6c6905bfc19f1057fa26a4cb360e5c1d2"
+dependencies = [
+ "either",
+ "lazy_static",
+ "libc",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-sys"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3df6e476185f92a12c072be4a189a0210dcdcf512a1891d6dff9edb874deadc6"
+dependencies = [
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8e92753b1c443191654ec532f14c199742964a061be25d77d7a96f09db20bf5"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a711c68811799e017b6038e0922cb27a5e2f43a2ddb609fe0b6f3eeda9de615"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "146c11bb1a02615db74680b32a68e2d61f553cc24c4eb5b4ca10311740e44172"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c912b12f7454c6620635bbff3450962753834be2a594819bd5e945af18ec64bc"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.32.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316"
+
+[[package]]
+name = "yaml-rust"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85"
+dependencies = [
+ "linked-hash-map",
+]
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 00000000..c026aa4b
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,9 @@
+[workspace]
+
+members = [
+    "derive",
+    "rs",
+    "c",
+    "py",
+    "tests"
+]
diff --git a/LICENSE b/LICENSE
index 261eeb9e..67db8588 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,3 +1,4 @@
+
                                  Apache License
                            Version 2.0, January 2004
                         http://www.apache.org/licenses/
@@ -172,30 +173,3 @@
       defend, and hold each Contributor harmless for any liability
       incurred by, or claims asserted against, such Contributor by reason
       of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..0f93e115
--- /dev/null
+++ b/README.md
@@ -0,0 +1,152 @@
+Substrait validator
+===================
+
+This repository contains a validator for
+[Substrait](https://github.com/substrait-io/substrait) plans. It's written in
+Rust, but bindings are available for Python and C. Other languages may use the
+C API via their respective foreign function interface systems.
+
+Command-line interface
+----------------------
+
+The easiest way to play around with the validator is via the command-line
+interface provided by the Python `substrait-validator` module. At the time of
+writing, the package is not yet available on PyPI, but it should be easy enough
+to build from source (see the `py` subdirectory). After installing, you should
+be able to run:
+
+```console
+user@host:~$ substrait-validator
+Missing input file. Try --help for usage information.
+```
+
+If that doesn't work, try `python3 -m substrait-validator`.
+
+Without any options, the validator will decode the given input file based on
+the format implied by the file extension, validate the plan, print any
+diagnostics encountered, and fail with code 1 if the validator determines that
+the plan is invalid. Here's a valid YAML plan as a starting point for playing
+around with it:
+
+```yaml
+relations:
+- rel:
+    read:
+      namedTable:
+        names:
+        - person
+      baseSchema:
+        names:
+        - name
+        struct:
+          nullability: NULLABILITY_REQUIRED
+          types:
+          - string:
+              nullability: NULLABILITY_REQUIRED
+```
+
+When you save that as a `.yaml` file and pass it to the validator, it will
+simply exit with code 0 without printing anything. Of course, it's more
+interesting to try a plan that *isn't* valid, but we'll leave that as an
+excercise to the reader.
+
+It's also more interesting to have the validator tell you how it interpreted
+the plan. Let's change the command line to do that:
+
+```console
+user@host:~$ substrait-validator input.yaml --out-file output.html --mode ignore
+```
+
+This generates `output.html`, a self-contained HTML file describing the plan.
+
+Just like the input file, the output file format is derived from the file
+extension, so the `.html` part is significant. If you don't want to rely on
+this, you can also just specify the formats you want manually using `--in-type`
+and `--out-type`.
+
+`--mode ignore` tells the validator to emit a file and exit with code 0
+regardless of the validation result. The full list of modes is:
+
+ - `strict`: fail unless the plan was proven to be valid;
+ - `loose` (default): fail if the plan was proven to be invalid;
+ - `ignore`: ignore the validation result, though the plan still needs some
+   level of sanity to succeed; for example, the file must exist, and must
+   decode according to the specified file format.
+ - `convert`: don't run validation at all; simply convert between different
+   representations of the given `substrait.Plan` message. For example, you
+   can use this to convert between the binary protobuf serialization format
+   and any of the text-based formats supported by the validator.
+
+Note that, without `--mode convert`, the output message type will be
+`subtrait.validator.ParseResult` rather than `substrait.Plan` if you use any
+of the protobuf-like serialization formats. This message type is a meta
+description of the incoming `substrait.Plan` message, with all the information
+gathered by the validator annotated to the nodes. The HTML format is pretty
+much just a pretty-printed version of this format. More information about this
+type is available in the associated `.proto` file.
+
+For more information, use the `--help` option.
+
+Library usage
+-------------
+
+For library usage information, refer to the readme files for the language that
+you want to use the library from.
+
+Diagnostics
+-----------
+
+The primary output of the validator (beyond its validity verdict) is a list of
+diagnostics. In fact, the validator derives its verdict from this list. Each
+diagnostic consists of the following bits of information:
+
+ - a severity, being either info, warning, or error;
+ - a classification, represented using a 4-digit diagnostic code;
+ - a cause description; and
+ - a path into the protobuf/YAML tree, pointing to the node that the diagnostic
+   originated from.
+
+The severity levels strictly map as follows:
+
+ - an error means that something is invalid;
+ - a warning means that something may or may not be invalid (i.e. validity
+   could not be determined for some reason); and
+ - info has no effect on validity.
+
+Once the validator as gathered all diagnostics, the validity of the plan is
+simply determined by the above mapping applied to the highest severity level
+encountered.
+
+Note that the command line interface specifically could be said to have an
+extra "fatal" level. Such fatal diagnostics are not strictly diagnostics in the
+sense that they are validation results; they simply indicate that the CLI
+returned a non-zero exit code and why.
+
+Severity levels can be clamped to a certain range, distinguished by their
+classification. This allows you to, for example, disable warnings of a certain
+type by clamping them down to info when you know that those particular warnings
+are not of interest to your application, or raise severity to error if you want
+the validator to be extra pedantic about something. Because the validator
+derives its verdict from the highest-severity diagnostic encountered, clamping
+severity levels may also change the verdict.
+
+You can request the list of diagnostic codes from the command-line interface
+using the `--help-diagnostics` flag:
+
+```console
+user@host:~$ substrait-validator --help-diagnostics
+The following diagnostic codes are defined:
+
+0000 (Unclassified): unclassified diagnostic.
+ |- 0001 (NotYetImplemented): not yet implemented.
+ |- 0002 (IllegalValue): illegal value.
+...
+```
+
+Diagnostic codes are organized in a tree. When you configure the severity range
+of a diagnostic code with children, its children will inherit this
+configuration, unless they themselves are also explicitly configured. For
+example, you can disable all warnings and errors except for those corresponding
+to one particular diagnostic by clamping code 0000 down to info only, and then
+overriding the configuration for the diagnostic you're interested in back to
+the full info to error range.
diff --git a/RELEASE.md b/RELEASE.md
new file mode 100644
index 00000000..6b064cc1
--- /dev/null
+++ b/RELEASE.md
@@ -0,0 +1,76 @@
+Release process
+===============
+
+Note: this is only intended for maintainers. See `README.md` for general
+usage information.
+
+Incrementing version numbers
+----------------------------
+
+There are version numbers all over the place, though some of them aren't that
+important:
+
+ - `derive/Cargo.toml` and its reference as dependency in `rs/Cargo.toml`:
+   these two version numbers must be kept in sync, but only need to be
+   incremented when anything changes in `substrait-validator-derive`.
+ - `rs/Cargo.toml` and its references as dependencies in `py/Cargo.toml`,
+   `c/Cargo.toml`, and `tests/Cargo.toml`, as well as in `rs/README.md` for
+   the Cargo dependency copypasta: these must be kept in sync and incremented
+   when the `substrait-validator` sources, the protobuf files, OR the YAML
+   schema files are updated.
+ - `py/Cargo.toml` and `py/pyproject.toml`: must be kept in sync, and must be
+   incremented whenever the `substrait-validator` crate is updated OR the
+   Python bindings are modified.
+ - `c/Cargo.toml`: not very important as it should always be built from source
+   by corrosion, but good to synchronize with the version of the main crate.
+ - `tests/Cargo.toml`: can be ignored.
+
+Relation of `substrait-validator` crate version to the Substrait specification
+version is TBD.
+
+Pushing to crates.io
+--------------------
+
+Note in advance: the crates in the `py`, `c`, and `tests` directories should
+NOT be pushed to `crates.io`:
+
+ - the Python bindings crate is either embedded as sources in Python source
+   distributions or is shipped pre-built from the git repo in binary wheels;
+ - the C bindings should be built by CMake/Corrosion after it obtains the
+   complete git repo or a tarball thereof; and
+ - the `tests` crate is just a test runner that serves no purpose outside of
+   this repository.
+
+Only the crates in the `derive` and `rs` directories, respectively
+`substrait-validator` and `substrait-validator-derive` should be released.
+
+The release steps are as follows.
+
+ - Update version numbers (see section above).
+ - If `substrait-validator-derive` changed, release it per normal procedures.
+ - Remove the `rs/src/resources` directory, if one exists.
+ - Run `cargo build` locally for `substrait-validator` to recreate above
+   directory using the protobuf and schema files from outside the validator
+   folder.
+ - Run `cargo package`. Verify that it ONLY complains about files in
+   `src/resources` not being committed yet. This is unavoidable without
+   checking in the protobuf files in multiple places.
+ - Release `substrait-validator` per normal procedures, but using
+   `--allow-dirty` to suppress the above.
+
+Pushing to PyPI
+---------------
+
+The release steps are as follows, though they should probably be performed by
+CI to use the appropriate environment.
+
+ - Update version numbers (see section above).
+ - Remove the `rs/src/resources` directory, if one exists.
+ - Run `cargo build` to recreate above directory using the protobuf and schema
+   files from outside the validator folder.
+ - Run `python3 prepare_build.py clean`.
+ - Run `python3 prepare_build.py populate`. This makes a local copy of the
+   protobuf files for inclusion in an sdist.
+ - Run `maturin sdist` to build the source distribution.
+ - Run `maturin build` in the appropriate environments to build binary
+   distributions.
diff --git a/buf.gen.yaml b/buf.gen.yaml
new file mode 100644
index 00000000..d413e58c
--- /dev/null
+++ b/buf.gen.yaml
@@ -0,0 +1,10 @@
+version: v1
+plugins:
+  - name: cpp
+    out: gen/proto/cpp
+  - name: csharp
+    out: gen/proto/csharp
+  - name: java
+    out: gen/proto/java
+  - name: python
+    out: gen/proto/python
diff --git a/buf.work.yaml b/buf.work.yaml
new file mode 100644
index 00000000..4ca5887e
--- /dev/null
+++ b/buf.work.yaml
@@ -0,0 +1,4 @@
+version: v1
+directories:
+  - proto
+  - substrait/proto
diff --git a/c/.gitignore b/c/.gitignore
new file mode 100644
index 00000000..1ece1ed6
--- /dev/null
+++ b/c/.gitignore
@@ -0,0 +1,2 @@
+/include/
+/build/
diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt
new file mode 100644
index 00000000..82b3f6e6
--- /dev/null
+++ b/c/CMakeLists.txt
@@ -0,0 +1,59 @@
+cmake_minimum_required(VERSION 3.11)
+project(substrait-validator)
+
+include(FetchContent)
+
+# Use Corrosion to make a shared librarywith target name
+# "substrait-validator-c". Note that static linking is not (easily) possible
+# due to the many dependencies introduced by libcurl.
+FetchContent_Declare(
+    Corrosion
+    GIT_REPOSITORY https://github.com/corrosion-rs/corrosion.git
+    GIT_TAG v0.1.0
+)
+FetchContent_MakeAvailable(Corrosion)
+corrosion_import_crate(
+    MANIFEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Cargo.toml
+)
+
+# Add the include directory with the header file generated by build.rs.
+target_include_directories(
+    substrait-validator-c
+    INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include
+)
+
+# Handle testing.
+option(
+    SUBSTRAIT_VALIDATOR_BUILD_TESTS
+    "Whether to build tests for the Substrait validator C bindings"
+    OFF
+)
+if(SUBSTRAIT_VALIDATOR_BUILD_TESTS)
+
+    # GoogleTest requires at least C++11
+    set(CMAKE_CXX_STANDARD 11)
+
+    include(FetchContent)
+    FetchContent_Declare(
+        googletest
+        URL https://github.com/google/googletest/archive/609281088cfefc76f9d0ce82e1ff6c30cc3591e5.zip
+    )
+    set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
+    FetchContent_MakeAvailable(googletest)
+
+    enable_testing()
+
+    add_executable(
+        substrait-validator-c-test
+        ${CMAKE_CURRENT_SOURCE_DIR}/tests/test.cc
+    )
+    target_link_libraries(
+        substrait-validator-c-test
+        gtest_main
+        substrait-validator-c
+    )
+
+    include(GoogleTest)
+    gtest_discover_tests(substrait-validator-c-test)
+
+endif()
diff --git a/c/Cargo.toml b/c/Cargo.toml
new file mode 100644
index 00000000..0e9b476a
--- /dev/null
+++ b/c/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "substrait-validator-c"
+version = "0.0.1"
+edition = "2021"
+license = "Apache-2.0"
+
+[lib]
+crate-type = ["cdylib", "staticlib"]
+doc = false
+
+[build-dependencies]
+cbindgen = "0.20.0"
+
+[dependencies]
+substrait-validator = { path = "../rs", version = "0.0.1" }
+libc = "0.2"
+thiserror = "1.0"
diff --git a/c/README.md b/c/README.md
new file mode 100644
index 00000000..312310a4
--- /dev/null
+++ b/c/README.md
@@ -0,0 +1,55 @@
+# C bindings for validator
+
+This directory contains a Rust/cbindgen project to generate C bindings for
+the validator crate.
+
+## Installation
+
+No binaries are published yet.
+
+### Building manually
+
+To build manually, you will need:
+
+ - [rust](https://www.rust-lang.org/tools/install)
+
+At which point you can run:
+
+```console
+user@host:/path/to/substrait-validator/c$ cargo build --release
+```
+
+This will generate a static and shared library at
+`/path/to/substrait-validator/target/release/libsubstrait_validator_c.[a|so|lib|dll|dylib]`,
+and header at `/path/to/substrait-validator/c/include`.
+
+### Building using CMake
+
+You can also build via CMake, and in doing so use the validator from within a
+CMake-based project. You should be able to simply add this directory as a
+subdirectory and link against the `substrait-validator-c` target. This will
+refer to the static or shared library based on `BUILD_SHARED_LIBS`.
+
+You can also run tests as follows:
+
+```console
+user@host:/path/to/substrait-validator/c$ mkdir build
+user@host:/path/to/substrait-validator/c$ cd build
+user@host:/path/to/substrait-validator/c/build$ cmake .. -DSUBSTRAIT_VALIDATOR_BUILD_TESTS=ON
+...
+user@host:/path/to/substrait-validator/c/build$ cmake --build .
+...
+user@host:/path/to/substrait-validator/c/build$ ctest .
+Test project /path/to/substrait-validator/c/build
+    Start 1: BasicTest.BasicTest
+1/1 Test #1: BasicTest.BasicTest ..............   Passed    0.00 sec
+
+100% tests passed, 0 tests failed out of 1
+
+Total Test time (real) =   0.00 sec
+```
+
+## Usage
+
+The generated header file includes docstrings that should be fairly
+self-explanatory.
diff --git a/c/build.rs b/c/build.rs
new file mode 100644
index 00000000..9fbb2f53
--- /dev/null
+++ b/c/build.rs
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: Apache-2.0
+
+use std::env;
+
+fn main() {
+    let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
+
+    let mut config = cbindgen::Config {
+        cpp_compat: true,
+        language: cbindgen::Language::C,
+        ..Default::default()
+    };
+    config.export.prefix = Some("substrait_validator_".to_string());
+    config
+        .export
+        .rename
+        .insert("ConfigHandle".to_string(), "config_handle".to_string());
+    config
+        .export
+        .rename
+        .insert("ResultHandle".to_string(), "result_handle".to_string());
+    config
+        .export
+        .rename
+        .insert("Resolver".to_string(), "resolver".to_string());
+    config
+        .export
+        .rename
+        .insert("Deleter".to_string(), "deleter".to_string());
+    config.header = Some("// SPDX-License-Identifier: Apache-2.0".to_string());
+
+    cbindgen::Builder::new()
+        .with_crate(crate_dir)
+        .with_config(config)
+        .generate()
+        .expect("Unable to generate bindings")
+        .write_to_file("include/substrait_validator.h");
+}
diff --git a/c/src/lib.rs b/c/src/lib.rs
new file mode 100644
index 00000000..af46872e
--- /dev/null
+++ b/c/src/lib.rs
@@ -0,0 +1,796 @@
+// SPDX-License-Identifier: Apache-2.0
+
+// Functions dereferencing raw pointers are kind of par for the course in a C
+// interface, and if we have to mark effectively all functions unsafe here, we
+// can no longer selectively place unsafe {} blocks (there is no way to mark a
+// function as unsafe to use without implicitly allowing unsafe code to be used
+// in its implementation).
+#![allow(clippy::not_unsafe_ptr_arg_deref)]
+
+use std::cell::RefCell;
+
+thread_local! {
+    /// Most recent error message, stored in thread-local storage for
+    /// thread-safety.
+    pub static LAST_ERROR: RefCell<std::ffi::CString> = RefCell::new(std::ffi::CString::default());
+}
+
+/// Pushes an error message.
+fn set_last_error<S: AsRef<str>>(s: S) {
+    LAST_ERROR.with(|f| {
+        *f.borrow_mut() = std::ffi::CString::new(s.as_ref()).unwrap_or_default();
+    });
+}
+
+/// Returns the most recent error message. Note that the returned pointer is
+/// only valid until the next call that the current thread makes to this
+/// library.
+#[no_mangle]
+pub extern "C" fn substrait_validator_get_last_error() -> *const libc::c_char {
+    LAST_ERROR.with(|f| {
+        let reference = f.borrow();
+        reference.as_bytes_with_nul().as_ptr() as *const libc::c_char
+    })
+}
+
+/// Parser/validator configuration handle.
+pub struct ConfigHandle {
+    pub config: substrait_validator::Config,
+}
+
+/// Creates a parser/validator configuration structure.
+#[no_mangle]
+pub extern "C" fn substrait_validator_config_new() -> *mut ConfigHandle {
+    // Create a box to store the return value handle on the stack.
+    let handle = Box::new(ConfigHandle {
+        config: substrait_validator::Config::new(),
+    });
+
+    // Convert the box to its raw pointer and relinquish ownership.
+    Box::into_raw(handle)
+}
+
+/// Frees memory associated with a configuration handle. No-op if given a
+/// nullptr.
+#[no_mangle]
+pub extern "C" fn substrait_validator_config_free(handle: *mut ConfigHandle) {
+    // Ignore null pointers.
+    if handle.is_null() {
+        return;
+    }
+
+    // UNSAFE: recover the box that we created the handle with and drop it.
+    // Assumes that the pointer was created by substrait_validator_config_new().
+    let config = unsafe { Box::from_raw(handle) };
+    drop(config);
+}
+
+/// Queries which diagnostic codes are defined. If buf is non-null and size is
+/// nonzero, up to size entries in buf are filled with valid diagnostic codes.
+/// Regardless of how many entries were populated, the number of defined
+/// diagnostic codes is returned.
+pub extern "C" fn substrait_validator_diag_codes(buf: *mut u32, size: usize) -> usize {
+    if !buf.is_null() && size > 0 {
+        // UNSAFE: assumes that buf is properly aligned, that there is
+        // read/write access to a region of size u32s from buf onwards, and
+        // that nothing else is mutating the buffer.
+        let slice = unsafe { std::slice::from_raw_parts_mut(buf, size) };
+
+        for (code, class) in slice
+            .iter_mut()
+            .zip(substrait_validator::iter_diagnostics())
+        {
+            *code = class.code();
+        }
+    }
+
+    substrait_validator::iter_diagnostics().count()
+}
+
+/// For the given diagnostic code, returns the code for the group it belongs
+/// to. Configuring a level override for the parent of a group of diagnostic
+/// codes will set the default override for all diagnostics contained within
+/// that group.
+pub extern "C" fn substrait_validator_diag_parent(code: u32) -> u32 {
+    substrait_validator::Classification::parent(code)
+}
+
+/// Returns the name of the given diagnostic code. If buf is non-null and size
+/// is nonzero, up to size-1 characters in buf are filled with this name,
+/// followed by a null termination character. The null termination character is
+/// considered to be part of size. If buf is non-null, size is nonzero, and
+/// code is valid, it is always written, even if this means that the name is
+/// cut short. Bytes in buf beyond the resulting string length but within the
+/// size limit may be clobbered.
+///
+/// If code is valid, the function returns the minimum buffer size needed to
+/// contain the complete name (being its string length + 1), regardless of the
+/// supplied buffer. If code is invalid, 0 is returned, and an error message
+/// can be retrieved with substrait_validator_get_last_error().
+pub extern "C" fn substrait_validator_diag_name(
+    code: u32,
+    buf: *mut libc::c_char,
+    size: usize,
+) -> usize {
+    if let Some(class) = substrait_validator::Classification::from_code(code) {
+        let name = class.name();
+        let name_bytes = name.as_bytes();
+
+        if !buf.is_null() && size > 0 {
+            // UNSAFE: assumes that buf is properly aligned, that there is
+            // read/write access to a region of size bytes from buf onwards,
+            // and that nothing else is mutating the buffer.
+            let slice = unsafe { std::slice::from_raw_parts_mut(buf as *mut u8, size) };
+
+            // Try to write name followed by a 0 to the first size-1 bytes
+            // of the buffer.
+            for (buf_byte, name_byte) in slice[..size - 1]
+                .iter_mut()
+                .zip(name_bytes.iter().cloned().chain(std::iter::once(0)))
+            {
+                *buf_byte = name_byte;
+            }
+
+            // Pessimistically always write a 0 to the last byte of the buffer,
+            // even though we may already have written an early termination
+            // character.
+            slice[size - 1] = 0;
+        }
+
+        // Return the minimum buffer size.
+        name_bytes.len() + 1
+    } else {
+        set_last_error(format!("{code} is not a valid diagnostic code"));
+        0
+    }
+}
+
+/// Returns the description of the given diagnostic code. If buf is non-null
+/// and size is nonzero, up to size-1 characters in buf are filled with this
+/// description, followed by a null termination character. The null
+/// termination character is considered to be part of size. If buf is
+/// non-null, size is nonzero, and code is valid, it is always written, even
+/// if this means that the name is cut short. Bytes in buf beyond the
+/// resulting string length but within the size limit may be clobbered.
+///
+/// If code is valid, the function returns the minimum buffer size needed to
+/// contain the complete description (being its string length + 1), regardless
+/// of the supplied buffer. If code is invalid, 0 is returned, and an error
+/// message can be retrieved with substrait_validator_get_last_error().
+pub extern "C" fn substrait_validator_diag_desc(
+    code: u32,
+    buf: *mut libc::c_char,
+    size: usize,
+) -> usize {
+    if let Some(class) = substrait_validator::Classification::from_code(code) {
+        let description = class.description();
+        let description_bytes = description.as_bytes();
+
+        if !buf.is_null() && size > 0 {
+            // UNSAFE: assumes that buf is properly aligned, that there is
+            // read/write access to a region of size bytes from buf onwards,
+            // and that nothing else is mutating the buffer.
+            let slice = unsafe { std::slice::from_raw_parts_mut(buf as *mut u8, size) };
+
+            // Try to write name followed by a 0 to the first size-1 bytes
+            // of the buffer.
+            for (buf_byte, name_byte) in slice[..size - 1]
+                .iter_mut()
+                .zip(description_bytes.iter().cloned().chain(std::iter::once(0)))
+            {
+                *buf_byte = name_byte;
+            }
+
+            // Pessimistically always write a 0 to the last byte of the buffer,
+            // even though we may already have written an early termination
+            // character.
+            slice[size - 1] = 0;
+        }
+
+        // Return the minimum buffer size.
+        description_bytes.len() + 1
+    } else {
+        set_last_error(format!("{code} is not a valid diagnostic code"));
+        0
+    }
+}
+
+/// Instructs the validator to ignore protobuf fields that it doesn't know
+/// about yet (i.e., that have been added to the Substrait protobuf
+/// descriptions, but haven't yet been implemented in the validator) if the
+/// fields are set to their default value. If this option isn't set, or if an
+/// unknown field is not set to its default value, a warning is emitted.
+///
+/// Returns whether the function was successful. If false is returned, retrieve
+/// the error message with substrait_validator_get_last_error().
+#[no_mangle]
+pub extern "C" fn substrait_validator_config_ignore_unknown_fields(
+    config: *mut ConfigHandle,
+) -> bool {
+    // Check for null.
+    if config.is_null() {
+        set_last_error("received null configuration handle");
+        return false;
+    }
+
+    // UNSAFE: unpack configuration handle. Assumes that the pointer was
+    // created by substrait_validator_config_new(), or behavior is undefined.
+    let config = unsafe { &mut (*config).config };
+
+    // Update configuration and return success.
+    config.ignore_unknown_fields();
+    true
+}
+
+/// Explicitly allows a protobuf message type for use in advanced extensions,
+/// despite the fact that the validator can't validate it. If an advanced
+/// extension is encountered that isn't explicitly allowed, a warning is
+/// emitted. The type URL pattern may include * and ? wildcards for glob-like
+/// matching (see https://docs.rs/glob/latest/glob/struct.Pattern.html for the
+/// complete syntax).
+///
+/// Returns whether the function was successful. If false is returned, retrieve
+/// the error message with substrait_validator_get_last_error().
+#[no_mangle]
+pub extern "C" fn substrait_validator_config_allow_proto_any_url(
+    config: *mut ConfigHandle,
+    pattern: *const libc::c_char,
+) -> bool {
+    // Check for nulls.
+    if config.is_null() {
+        set_last_error("received null configuration handle");
+        return false;
+    }
+    if pattern.is_null() {
+        set_last_error("received null pattern");
+        return false;
+    }
+
+    // UNSAFE: unpack configuration handle. Assumes that the pointer was
+    // created by substrait_validator_config_new(), or behavior is undefined.
+    let config = unsafe { &mut (*config).config };
+
+    // UNSAFE: unpack pattern string. Assumes that the pointer points to a
+    // null-terminated string.
+    let pattern = unsafe { std::ffi::CStr::from_ptr(pattern) };
+
+    // Parse the pattern.
+    let pattern = match pattern.to_str() {
+        Ok(u) => u,
+        Err(e) => {
+            set_last_error(format!("received invalid pattern: {e}"));
+            return false;
+        }
+    };
+    let pattern = match substrait_validator::Pattern::new(pattern) {
+        Ok(p) => p,
+        Err(e) => {
+            set_last_error(format!("received invalid pattern: {e}"));
+            return false;
+        }
+    };
+
+    // Update configuration and return success.
+    config.allow_proto_any_url(pattern);
+    true
+}
+
+/// Converts a positive/zero/negative integer into Info/Warning/Error
+/// respectively.
+fn int_to_level(x: i32) -> substrait_validator::Level {
+    match x {
+        1..=i32::MAX => substrait_validator::Level::Info,
+        0 => substrait_validator::Level::Warning,
+        i32::MIN..=-1 => substrait_validator::Level::Error,
+    }
+}
+
+/// Sets a minimum and/or maximum error level for the given class of diagnostic
+/// messages. Any previous settings for this class are overridden. The levels
+/// are encoded as integers, where any positive value means info, zero means
+/// warning, and negative means error.
+///
+/// Returns whether the function was successful. If false is returned, retrieve
+/// the error message with substrait_validator_get_last_error().
+#[no_mangle]
+pub extern "C" fn substrait_validator_config_override_diagnostic_level(
+    config: *mut ConfigHandle,
+    class: u32,
+    minimum: i32,
+    maximum: i32,
+) -> bool {
+    // Check for null.
+    if config.is_null() {
+        set_last_error("received null configuration handle");
+        return false;
+    }
+
+    // UNSAFE: unpack configuration handle. Assumes that the pointer was
+    // created by substrait_validator_config_new(), or behavior is undefined.
+    let config = unsafe { &mut (*config).config };
+
+    // Parse the diagnostic class/code.
+    let class = match substrait_validator::Classification::from_code(class) {
+        Some(c) => c,
+        None => {
+            set_last_error(format!("unknown diagnostic class {class}"));
+            return false;
+        }
+    };
+
+    // Parse the minimum and maximum levels.
+    let minimum = int_to_level(minimum);
+    let maximum = int_to_level(maximum);
+
+    // Update configuration and return success.
+    config.override_diagnostic_level(class, minimum, maximum);
+    true
+}
+
+/// Overrides the resolution behavior for (YAML) URIs matching the given
+/// pattern. The pattern may include * and ? wildcards for glob-like matching
+/// (see https://docs.rs/glob/latest/glob/struct.Pattern.html for the complete
+/// syntax). If resolve_as is null, the URI will not be resolved; otherwise, it
+/// will be resolved as if the URI in the plan had been that string.
+///
+/// Returns whether the function was successful. If false is returned, retrieve
+/// the error message with substrait_validator_get_last_error().
+#[no_mangle]
+pub extern "C" fn substrait_validator_config_override_uri(
+    config: *mut ConfigHandle,
+    pattern: *const libc::c_char,
+    resolve_as: *const libc::c_char,
+) -> bool {
+    // Check for nulls.
+    if config.is_null() {
+        set_last_error("received null configuration handle");
+        return false;
+    }
+    if pattern.is_null() {
+        set_last_error("received null pattern");
+        return false;
+    }
+
+    // UNSAFE: unpack configuration handle. Assumes that the pointer was
+    // created by substrait_validator_config_new(), or behavior is undefined.
+    let config = unsafe { &mut (*config).config };
+
+    // UNSAFE: unpack pattern string. Assumes that the pointer points to a
+    // null-terminated string.
+    let pattern = unsafe { std::ffi::CStr::from_ptr(pattern) };
+
+    // Parse the pattern.
+    let pattern = match pattern.to_str() {
+        Ok(p) => p,
+        Err(e) => {
+            set_last_error(format!("received invalid pattern: {e}"));
+            return false;
+        }
+    };
+    let pattern = match substrait_validator::Pattern::new(pattern) {
+        Ok(p) => p,
+        Err(e) => {
+            set_last_error(format!("received invalid pattern: {e}"));
+            return false;
+        }
+    };
+
+    // Unpack and parse resolve_as.
+    let resolve_as = if resolve_as.is_null() {
+        None
+    } else {
+        // UNSAFE: unpack resolve_as string. Assumes that the pointer points to
+        // a null-terminated string.
+        let resolve_as = unsafe { std::ffi::CStr::from_ptr(resolve_as) };
+
+        Some(match resolve_as.to_str() {
+            Ok(p) => p,
+            Err(e) => {
+                set_last_error(format!("received invalid replacement URI: {e}"));
+                return false;
+            }
+        })
+    };
+
+    // Update configuration and return success.
+    config.override_uri(pattern, resolve_as);
+    true
+}
+
+/// Callback function for deleting a buffer allocated by the user application.
+pub type Deleter =
+    Option<unsafe extern "C" fn(user: *mut libc::c_void, buf: *const u8, size: usize)>;
+
+/// (YAML) URI resolution callback function.
+///
+/// The first argument (uri) is set to a null-terminated UTF-8 string
+/// representing the URI that is to be resolved. If resolution succeeds, the
+/// function must return the binary result buffer via buf and size and return
+/// true. If it fails, it should instead write a UTF-8 error message to this
+/// buffer (but it may also set buf to nullptr or leave it unchanged) and
+/// return false.
+///
+/// The buffer must remain valid only until the validator library returns
+/// control to the application. Thus, the application may keep track of the
+/// current buffer via thread-local storage or a global. It may also assign a
+/// deleter function to the deleter parameter, which will be called by the
+/// validator library when it is done with the buffer. deleter_user may be
+/// used to pass additional contextual information to the deleter; it is not
+/// used by the validator library for any purpose other than calling the
+/// deleter function.
+///
+/// All output parameters will be set to zero by the validator library before
+/// the callback is called.
+pub type Resolver = Option<
+    unsafe extern "C" fn(
+        uri: *const libc::c_char,
+        buf: *mut *const u8,
+        size: *mut usize,
+        deleter: *mut Deleter,
+        deleter_user: *mut *mut libc::c_void,
+    ) -> bool,
+>;
+
+/// Wraps a buffer returned by Resolver.
+struct ApplicationBuffer {
+    pub buf: *const u8,
+    pub size: usize,
+    pub deleter: Deleter,
+    pub deleter_user: *mut libc::c_void,
+}
+
+impl Default for ApplicationBuffer {
+    fn default() -> Self {
+        Self {
+            buf: std::ptr::null(),
+            size: 0,
+            deleter: None,
+            deleter_user: std::ptr::null_mut(),
+        }
+    }
+}
+
+impl Drop for ApplicationBuffer {
+    fn drop(&mut self) {
+        if let Some(deleter) = self.deleter {
+            // UNSAFE: assumes that the deleter function passed by the user is
+            // valid.
+            unsafe { deleter(self.deleter_user, self.buf, self.size) }
+        }
+    }
+}
+
+impl AsRef<[u8]> for ApplicationBuffer {
+    fn as_ref(&self) -> &[u8] {
+        // UNSAFE: assumes that the pointer to the buffer returned by the
+        // application is non-null, that the pointed-to byte up to that byte
+        // plus self.size bytes can be dereferenced.
+        unsafe { std::slice::from_raw_parts(self.buf, self.size) }
+    }
+}
+
+/// Rust representation of an error returned by the Resolver callback function.
+#[derive(Debug, thiserror::Error)]
+struct ApplicationError {
+    msg: String,
+}
+
+impl ApplicationError {
+    fn new<S: Into<String>>(msg: S) -> Self {
+        ApplicationError { msg: msg.into() }
+    }
+}
+
+impl From<ApplicationBuffer> for ApplicationError {
+    fn from(buf: ApplicationBuffer) -> Self {
+        ApplicationError {
+            msg: match std::str::from_utf8(buf.as_ref()) {
+                Ok(e) => e.to_string(),
+                Err(e) => format!("unknown error (failed to decode error message: {e})"),
+            },
+        }
+    }
+}
+
+impl std::fmt::Display for ApplicationError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.msg)
+    }
+}
+
+/// Registers a URI resolution function with this configuration. If the given
+/// function fails, any previously registered function will be used as a
+/// fallback.
+///
+/// See the documentation for the substrait_validator_resolver typedef for
+/// more information about the semantics of the callback function.
+///
+/// Returns whether the function was successful. If false is returned, retrieve
+/// the error message with substrait_validator_get_last_error().
+#[no_mangle]
+pub extern "C" fn substrait_validator_config_uri_resolver(
+    config: *mut ConfigHandle,
+    resolver: Resolver,
+) -> bool {
+    // Check for nulls.
+    if config.is_null() {
+        set_last_error("received null configuration handle");
+        return false;
+    }
+
+    // UNSAFE: unpack configuration handle. Assumes that the pointer was
+    // created by substrait_validator_config_new(), or behavior is undefined.
+    let config = unsafe { &mut (*config).config };
+
+    // Unpack resolution function.
+    let resolver = match resolver {
+        Some(r) => r,
+        None => {
+            set_last_error("received null resolution function pointer");
+            return false;
+        }
+    };
+
+    // Update configuration and return success.
+    config.add_uri_resolver(move |uri| {
+        let uri = match std::ffi::CString::new(uri) {
+            Ok(u) => u,
+            Err(_) => {
+                return Err(ApplicationError::new(
+                    "cannot resolve URI with embedded nul characters",
+                ))
+            }
+        };
+        let mut buffer = ApplicationBuffer::default();
+
+        // UNSAFE: assumes that the resolver function passed by the user is
+        // valid.
+        let result = unsafe {
+            resolver(
+                uri.as_ptr(),
+                &mut buffer.buf,
+                &mut buffer.size,
+                &mut buffer.deleter,
+                &mut buffer.deleter_user,
+            )
+        };
+
+        if result {
+            if buffer.buf.is_null() {
+                Err(ApplicationError::new(
+                    "URI resolver callback returned success but also a null buffer",
+                ))
+            } else {
+                Ok(buffer)
+            }
+        } else if buffer.buf.is_null() {
+            Err(ApplicationError::new("URI resolver callback failed"))
+        } else {
+            Err(ApplicationError::from(buffer))
+        }
+    });
+    true
+}
+
+/// Parse/validation result handle.
+pub struct ResultHandle {
+    pub result: substrait_validator::ParseResult,
+}
+
+/// Parses the given byte buffer as a substrait.Plan message, using the given
+/// configuration. If a null pointer is passed for the configuration, the
+/// default configuration is used.
+///
+/// Returns a handle to the parse result. This handle must be freed using
+/// substrait_validator_free() when it is no longer needed. Fails and returns
+/// nullptr only if the incoming buffer is nullptr; any other failure to parse
+/// or validate the buffer is embedded in the handle.
+#[no_mangle]
+pub extern "C" fn substrait_validator_parse(
+    data: *const u8,
+    size: u64,
+    config: *const ConfigHandle,
+) -> *mut ResultHandle {
+    // Catch null pointers.
+    if data.is_null() {
+        set_last_error("received null input buffer");
+        return std::ptr::null_mut();
+    }
+
+    // UNSAFE: convert the incoming buffer information into a slice.
+    let data = unsafe { std::slice::from_raw_parts(data, size.try_into().unwrap()) };
+
+    // Perform the actual parsing.
+    let result = if config.is_null() {
+        substrait_validator::parse(data, &substrait_validator::Config::default())
+    } else {
+        // UNSAFE: unpack configuration handle. Assumes that the pointer was
+        // created by substrait_validator_config_new(), or behavior is undefined.
+        substrait_validator::parse(data, unsafe { &(*config).config })
+    };
+
+    // Create a box to store the return value handle on the stack.
+    let handle = Box::new(ResultHandle { result });
+
+    // Convert the box to its raw pointer and relinquish ownership.
+    Box::into_raw(handle)
+}
+
+/// Frees memory associated with a parse result handle. No-op if given a
+/// nullptr.
+#[no_mangle]
+pub extern "C" fn substrait_validator_free(handle: *mut ResultHandle) {
+    // Ignore null pointers.
+    if handle.is_null() {
+        return;
+    }
+
+    // UNSAFE: recover the box that we created the handle with and drop it.
+    // Assumes that the pointer was created by substrait_validator_parse().
+    let handle = unsafe { Box::from_raw(handle) };
+    drop(handle);
+}
+
+/// Returns whether the given parse result handle refers to a valid (positive
+/// return value), invalid (negative return value), or possibly valid plan
+/// (0 return value).
+#[no_mangle]
+pub extern "C" fn substrait_validator_check(handle: *const ResultHandle) -> i32 {
+    // UNSAFE: dereference the result handle. Assumes that the pointer was
+    // created by substrait_validator_parse(), or that it is null (in which
+    // case an exception is thrown safely).
+    let handle = unsafe { handle.as_ref() };
+    if handle.is_none() {
+        return -1;
+    }
+    let result = &handle.as_ref().unwrap().result;
+
+    // Perform the check.
+    match result.check() {
+        substrait_validator::Validity::Valid => 1,
+        substrait_validator::Validity::MaybeValid => 0,
+        substrait_validator::Validity::Invalid => -1,
+    }
+}
+
+/// The guts for the export functions.
+fn export(
+    format: substrait_validator::export::Format,
+    handle: *const ResultHandle,
+    size: *mut u64,
+) -> *mut u8 {
+    // UNSAFE: dereference the result handle. Assumes that the pointer was
+    // created by substrait_validator_parse(), or that it is null (in which
+    // case an exception is thrown safely).
+    let handle = unsafe { handle.as_ref() };
+    if handle.is_none() {
+        set_last_error("received null handle");
+        return std::ptr::null_mut();
+    }
+    let result = &handle.as_ref().unwrap().result;
+
+    // Create a byte vector as output. The first 16 bytes are reserved: we'll
+    // store the length and capacity of the vector in there, and advance the
+    // pointer beyond this length before passing the data to the user. This
+    // allows us to fully recover the vector from just the returned pointer
+    // later, which we need in order to drop it safely.
+    let mut data: Vec<u8> = vec![0; 16];
+
+    // Perform the actual export function.
+    if let Err(e) = result.export(&mut data, format) {
+        set_last_error(e.to_string());
+        return std::ptr::null_mut();
+    }
+
+    // UNSAFE: pass the length to the user, if they wanted to know about it.
+    // Assumes that the size pointer, if non-null, points to a writable and
+    // appropriately aligned memory location.
+    if let Some(size) = unsafe { size.as_mut() } {
+        *size = (data.len() - 16).try_into().unwrap();
+    }
+
+    // Append a null character, to prevent pain and misery if the user treats
+    // the buffer as a null-terminated string.
+    data.push(0);
+
+    // Save the length and capacity of the vector to the start of said
+    // vector, so we can recover them later.
+    let len: u64 = data.len().try_into().unwrap();
+    data[..8].clone_from_slice(&len.to_ne_bytes());
+    let capacity: u64 = data.capacity().try_into().unwrap();
+    data[8..16].clone_from_slice(&capacity.to_ne_bytes());
+
+    // Get the pointer to the vector, and relinquish ownership.
+    let ptr = data.as_mut_ptr();
+    std::mem::forget(data);
+
+    // UNSAFE: advance the pointer beyond the bytes that we're using to store
+    // the size of the vector. This assumes that advancing by 16 bytes doesn't
+    // advance beyond the end of the buffer, which should not be possible, as
+    // the buffer is at least 17 bytes long (8 bytes length, 8 bytes capacity,
+    // and null termination byte).
+    unsafe { ptr.add(16) }
+}
+
+/// Frees memory associated with an exported buffer. No-op if given a nullptr.
+#[no_mangle]
+pub extern "C" fn substrait_validator_free_exported(data: *mut u8) {
+    // Don't do anything if the user passed nullptr.
+    if data.is_null() {
+        return;
+    }
+
+    // UNSAFE: recover the pointer to the vector data. Assumes that the pointer
+    // was (ultimately) created using export(), in which case this just
+    // reverses the pointer arithmetic done at the end of its body.
+    let buffer_ptr = unsafe { data.sub(16) };
+
+    // UNSAFE: recover the vector length from the first 8 bytes. Assumes that
+    // these 8 bytes are readable.
+    let length_ptr = buffer_ptr;
+    let length = u64::from_ne_bytes(
+        unsafe { std::slice::from_raw_parts(length_ptr, 8) }
+            .try_into()
+            .unwrap(),
+    );
+    let length = usize::try_from(length).unwrap();
+
+    // UNSAFE: recover the vector capacity from the next 8 bytes. Assumes that
+    // these 8 bytes are readable.
+    let capacity_ptr = unsafe { buffer_ptr.add(8) };
+    let capacity = u64::from_ne_bytes(
+        unsafe { std::slice::from_raw_parts(capacity_ptr, 8) }
+            .try_into()
+            .unwrap(),
+    );
+    let capacity = usize::try_from(capacity).unwrap();
+
+    // UNSAFE: recover the vector and drop it. Assumes that the recovered
+    // pointer, length, and capacity do indeed form the raw parts of a valid
+    // Vec.
+    let vec = unsafe { Vec::from_raw_parts(buffer_ptr, length, capacity) };
+    drop(vec);
+}
+
+/// Converts the given parse result to a multiline, null-terminated string,
+/// where each line represents a diagnostic message. If size is non-null, the
+/// length of the string (excluding null-termination byte) will be written to
+/// it. The function will return nullptr upon failure, in which case
+/// substrait_validator_get_last_error() can be used to retrieve an error
+/// message. If the function succeeds, the returned pointer must eventually be
+/// freed using substrait_validator_free_exported() in order to not leak
+/// memory.
+#[no_mangle]
+pub extern "C" fn substrait_validator_export_diagnostics(
+    handle: *const ResultHandle,
+    size: *mut u64,
+) -> *mut u8 {
+    export(
+        substrait_validator::export::Format::Diagnostics,
+        handle,
+        size,
+    )
+}
+
+/// Same as substrait_validator_export_diagnostics(), but instead returns a
+/// buffer filled with a HTML-based human-readable description of the parsed
+/// plan.
+#[no_mangle]
+pub extern "C" fn substrait_validator_export_html(
+    handle: *const ResultHandle,
+    size: *mut u64,
+) -> *mut u8 {
+    export(substrait_validator::export::Format::Html, handle, size)
+}
+
+/// Same as substrait_validator_export_diagnostics(), but instead returns a
+/// substrait.validator.Node message in its binary serialization format. The
+/// buffer is null-terminated, but note that protobuf serialization is a binary
+/// format, so you'll need to use the size argument to get an accurate size.
+#[no_mangle]
+pub extern "C" fn substrait_validator_export_proto(
+    handle: *const ResultHandle,
+    size: *mut u64,
+) -> *mut u8 {
+    export(substrait_validator::export::Format::Proto, handle, size)
+}
diff --git a/c/tests/test.cc b/c/tests/test.cc
new file mode 100644
index 00000000..c9d3933c
--- /dev/null
+++ b/c/tests/test.cc
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+#include <string>
+#include <substrait_validator.h>
+
+TEST(BasicTest, BasicTest) {
+
+  // To not depend on the Substrait format, just throw garbage at the parser.
+  // It should immediately fail to parse that, of course, but we can still
+  // do some basic interface testing that way.
+  std::string nonsense = "protobuf bytes normally go here";
+
+  // Try parsing nonsense.
+  auto handle = substrait_validator_parse(
+      reinterpret_cast<const uint8_t *>(nonsense.c_str()), nonsense.size(),
+      nullptr);
+  ASSERT_NE(handle, nullptr);
+
+  // That should fail.
+  EXPECT_EQ(substrait_validator_check(handle), -1);
+
+  // Try getting a list of error messages.
+  uint64_t data_size = 0;
+  auto data_ptr = substrait_validator_export_diagnostics(handle, &data_size);
+
+  // Those messages should still be valid after freeing the handle.
+  substrait_validator_free(handle);
+
+  // Check sanity.
+  ASSERT_NE(data_ptr, nullptr);
+  EXPECT_GT(data_size, 0);
+  EXPECT_EQ(strlen(reinterpret_cast<const char *>(data_ptr)), data_size);
+  EXPECT_EQ(reinterpret_cast<const char *>(data_ptr),
+            std::string("Error at plan: failed to decode Protobuf message: "
+                        "invalid wire type value: 7 (code 1001)\n"));
+
+  // Free the buffer.
+  substrait_validator_free_exported(data_ptr);
+}
diff --git a/derive/Cargo.toml b/derive/Cargo.toml
new file mode 100644
index 00000000..7a8af000
--- /dev/null
+++ b/derive/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "substrait-validator-derive"
+description = "Procedural macros for substrait-validator"
+homepage = "https://substrait.io/"
+repository = "https://github.com/substrait-io/substrait"
+readme = "README.md"
+version = "0.0.1"
+edition = "2021"
+license = "Apache-2.0"
+
+[lib]
+proc-macro = true
+
+[dependencies]
+quote = "1.0"
+syn = "1.0"
+heck = "0.4"
diff --git a/derive/README.md b/derive/README.md
new file mode 100644
index 00000000..4ebb4ce8
--- /dev/null
+++ b/derive/README.md
@@ -0,0 +1,9 @@
+Procedural macro crate for substrait-validator
+==============================================
+
+This crate defines some `#[derive]` macros for
+[substrait-validator](https://crates.io/crates/substrait-validator),
+specifically for the types generated by `prost-build`. This is needed because
+`prost-build` on its own doesn't generate any introspection-like information
+for the protobuf structures, such as message type names as strings, which we
+want to be able to use in our parse tree.
diff --git a/derive/src/lib.rs b/derive/src/lib.rs
new file mode 100644
index 00000000..38dc2e39
--- /dev/null
+++ b/derive/src/lib.rs
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Procedural macro crate for `substrait-validator-core`.
+//!
+//! The derive macros defined here are essentially an ugly workaround for the
+//! lack of any protobuf introspection functionality provided by prost.
+//! Basically, they take (the AST of) the code generated by prost and try to
+//! recover the needed protobuf message metadata from there. Things would have
+//! been a *LOT* simpler and a *LOT* less brittle if prost would simply
+//! provide this information via traits of its own, but alas, there doesn't
+//! seem to be a way to do this without forking prost, and introspection
+//! seems to be a non-goal of that project.
+//!
+//! Besides being ugly, this method is rather brittle and imprecise when it
+//! comes to recovering field names, due to the various case conversions
+//! automatically done by protoc and prost. Some known issues are:
+//!
+//!  - The recovered type name for messages defined within messages uses
+//!    incorrect case conventions, as the procedural macros have no way
+//!    of distinguishing packages from message definition scopes in the
+//!    type path.
+//!  - If the .proto source files use unexpected case conventions for
+//!    various things, the resulting case conventions for types, field names,
+//!    oneof variants, and enum variants will be wrong.
+//!  - Whenever the .proto source files name a field using something that is
+//!    a reserved word in Rust (notably `type`), prost will use a raw
+//!    identifier to represent the name. This syntax is currently not filtered
+//!    out from the recovered names, so a field named `type` becomes `r#type`.
+//!    This is probably not a fundamental problem, though.
+//!
+//! Ultimately, however, these names are only used for diagnostic messages and
+//! the likes. In the worst case, the above inconsistencies may confuse the
+//! user, but they should not affect the valid/invalid/maybe-valid result of
+//! the validator or cause compile- or runtime errors.
+
+extern crate proc_macro;
+
+use heck::{ToShoutySnakeCase, ToSnakeCase};
+use proc_macro::TokenStream;
+use quote::quote;
+
+/// Converts a Rust identifier string generated via stringify!() to the
+/// original identifier by "cooking" raw identifiers.
+fn cook_ident(ident: &syn::Ident) -> String {
+    let ident = ident.to_string();
+    if let Some((_, keyword)) = ident.split_once('#') {
+        keyword.to_string()
+    } else {
+        ident
+    }
+}
+
+#[doc(hidden)]
+#[proc_macro_derive(ProtoMeta, attributes(proto_meta))]
+pub fn proto_meta(input: TokenStream) -> TokenStream {
+    proto_meta_derive(syn::parse_macro_input!(input))
+}
+
+fn proto_meta_derive(ast: syn::DeriveInput) -> TokenStream {
+    match ast.data {
+        syn::Data::Struct(ref struct_data) => proto_meta_derive_message(&ast, struct_data),
+        syn::Data::Enum(ref enum_data) => match enum_data.variants.iter().next().unwrap().fields {
+            syn::Fields::Unit => {
+                for variant in enum_data.variants.iter() {
+                    if !matches!(variant.fields, syn::Fields::Unit) {
+                        panic!("all variants of a protobuf oneof enum must have a single, unnamed field");
+                    }
+                }
+
+                proto_meta_derive_enum(&ast, enum_data)
+            }
+            syn::Fields::Unnamed(..) => {
+                for variant in enum_data.variants.iter() {
+                    if let syn::Fields::Unnamed(fields) = &variant.fields {
+                        if fields.unnamed.len() != 1 {
+                            panic!("all variants of a protobuf oneof enum must have a single, unnamed field");
+                        }
+                    } else {
+                        panic!("all variants of a protobuf oneof enum must have a single, unnamed field");
+                    }
+                }
+
+                proto_meta_derive_oneof(&ast, enum_data)
+            }
+            _ => panic!("enum with named elements don't map to protobuf constructs"),
+        },
+        syn::Data::Union(_) => panic!("unions don't map to protobuf constructs"),
+    }
+}
+
+enum FieldType {
+    Optional,
+    BoxedOptional,
+    Repeated,
+    Primitive,
+}
+
+fn is_repeated(typ: &syn::Type) -> FieldType {
+    if let syn::Type::Path(path) = typ {
+        if let Some(last) = path.path.segments.last() {
+            if last.ident == "Option" {
+                if let syn::PathArguments::AngleBracketed(ref args) = last.arguments {
+                    if let syn::GenericArgument::Type(syn::Type::Path(path2)) =
+                        args.args.first().unwrap()
+                    {
+                        if path2.path.segments.last().unwrap().ident == "Box" {
+                            return FieldType::BoxedOptional;
+                        } else {
+                            return FieldType::Optional;
+                        }
+                    }
+                }
+                panic!("Option without type argument?");
+            } else if last.ident == "Vec" {
+                if let syn::PathArguments::AngleBracketed(ref args) = last.arguments {
+                    if let syn::GenericArgument::Type(syn::Type::Path(path2)) =
+                        args.args.first().unwrap()
+                    {
+                        if path2.path.segments.last().unwrap().ident == "u8" {
+                            return FieldType::Primitive;
+                        } else {
+                            return FieldType::Repeated;
+                        }
+                    }
+                }
+                panic!("Vec without type argument?");
+            }
+        }
+    }
+    FieldType::Primitive
+}
+
+fn proto_meta_derive_message(ast: &syn::DeriveInput, data: &syn::DataStruct) -> TokenStream {
+    let name = &ast.ident;
+    let name_str = cook_ident(name);
+    let (impl_generics, ty_generics, where_clause) = ast.generics.split_for_impl();
+
+    let parse_unknown_matches: Vec<_> = data
+        .fields
+        .iter()
+        .map(|field| {
+            if let Some(ident) = &field.ident {
+                let ident_str = cook_ident(ident);
+                let action = match is_repeated(&field.ty) {
+                    FieldType::Optional => quote! {
+                        crate::parse::traversal::push_proto_field(
+                            y,
+                            &self.#ident.as_ref(),
+                            #ident_str,
+                            true,
+                            |_, _| Ok(()),
+                        );
+                    },
+                    FieldType::BoxedOptional => quote! {
+                        crate::parse::traversal::push_proto_field(
+                            y,
+                            &self.#ident,
+                            #ident_str,
+                            true,
+                            |_, _| Ok(()),
+                        );
+                    },
+                    FieldType::Repeated => quote! {
+                        crate::parse::traversal::push_proto_repeated_field(
+                            y,
+                            &self.#ident.as_ref(),
+                            #ident_str,
+                            true,
+                            |_, _| Ok(()),
+                            |_, _, _, _, _| (),
+                        );
+                    },
+                    FieldType::Primitive => quote! {
+                        use crate::input::traits::ProtoPrimitive;
+                        if !y.config.ignore_unknown_fields || !self.#ident.proto_primitive_is_default() {
+                            crate::parse::traversal::push_proto_field(
+                                y,
+                                &Some(&self.#ident),
+                                #ident_str,
+                                true,
+                                |_, _| Ok(()),
+                            );
+                        }
+                    },
+                };
+                quote! {
+                    if !y.field_parsed(#ident_str) {
+                        unknowns = true;
+                        #action
+                    }
+                }
+            } else {
+                panic!("protobuf message fields must have names");
+            }
+        })
+        .collect();
+
+    quote!(
+        impl #impl_generics crate::input::traits::ProtoMessage for #name #ty_generics #where_clause {
+            fn proto_message_type() -> &'static str {
+                use ::once_cell::sync::Lazy;
+                static TYPE_NAME: Lazy<::std::string::String> = Lazy::new(|| {
+                    crate::input::proto::cook_path(module_path!(), #name_str)
+                });
+                &TYPE_NAME
+            }
+        }
+
+        impl #impl_generics crate::input::traits::InputNode for #name #ty_generics #where_clause {
+            fn type_to_node() -> crate::output::tree::Node {
+                use crate::input::traits::ProtoMessage;
+                crate::output::tree::NodeType::ProtoMessage(Self::proto_message_type()).into()
+            }
+
+            fn data_to_node(&self) -> crate::output::tree::Node {
+                use crate::input::traits::ProtoMessage;
+                crate::output::tree::NodeType::ProtoMessage(Self::proto_message_type()).into()
+            }
+
+            fn oneof_variant(&self) -> Option<&'static str> {
+                None
+            }
+
+            fn parse_unknown(
+                &self,
+                y: &mut crate::parse::context::Context<'_>,
+            ) -> bool {
+                let mut unknowns = false;
+                #(#parse_unknown_matches)*
+                unknowns
+            }
+        }
+    )
+    .into()
+}
+
+fn proto_meta_derive_oneof(ast: &syn::DeriveInput, data: &syn::DataEnum) -> TokenStream {
+    let name = &ast.ident;
+    let (impl_generics, ty_generics, where_clause) = ast.generics.split_for_impl();
+
+    let variant_matches: Vec<_> = data
+        .variants
+        .iter()
+        .map(|variant| {
+            let ident = &variant.ident;
+            let proto_name = cook_ident(ident).to_snake_case();
+            quote! { #name::#ident (_) => #proto_name }
+        })
+        .collect();
+
+    let node_matches: Vec<_> = data
+        .variants
+        .iter()
+        .map(|variant| {
+            let ident = &variant.ident;
+            quote! { #name::#ident (x) => x.data_to_node() }
+        })
+        .collect();
+
+    let parse_unknown_matches: Vec<_> = data
+        .variants
+        .iter()
+        .map(|variant| {
+            let ident = &variant.ident;
+            quote! { #name::#ident (x) => x.parse_unknown(y) }
+        })
+        .collect();
+
+    quote!(
+        impl #impl_generics crate::input::traits::ProtoOneOf for #name #ty_generics #where_clause {
+            fn proto_oneof_variant(&self) -> &'static str {
+                match self {
+                    #(#variant_matches),*
+                }
+            }
+        }
+
+        impl #impl_generics crate::input::traits::InputNode for #name #ty_generics #where_clause {
+            fn type_to_node() -> crate::output::tree::Node {
+                crate::output::tree::NodeType::ProtoMissingOneOf.into()
+            }
+
+            fn data_to_node(&self) -> crate::output::tree::Node {
+                match self {
+                    #(#node_matches),*
+                }
+            }
+
+            fn oneof_variant(&self) -> Option<&'static str> {
+                use crate::input::traits::ProtoOneOf;
+                Some(self.proto_oneof_variant())
+            }
+
+            fn parse_unknown(
+                &self,
+                y: &mut crate::parse::context::Context<'_>,
+            ) -> bool {
+                match self {
+                    #(#parse_unknown_matches),*
+                }
+            }
+        }
+    )
+    .into()
+}
+
+fn proto_meta_derive_enum(ast: &syn::DeriveInput, data: &syn::DataEnum) -> TokenStream {
+    let name = &ast.ident;
+    let name_str = cook_ident(name);
+    let (impl_generics, ty_generics, where_clause) = ast.generics.split_for_impl();
+
+    let upper_name = name_str.to_shouty_snake_case();
+
+    let variant_names: Vec<_> = data
+        .variants
+        .iter()
+        .map(|variant| {
+            let ident = &variant.ident;
+            let proto_name = format!(
+                "{}_{}",
+                upper_name,
+                cook_ident(ident).to_shouty_snake_case()
+            );
+            (ident, proto_name)
+        })
+        .collect();
+
+    let variant_matches: Vec<_> = variant_names
+        .iter()
+        .map(|(ident, proto_name)| {
+            quote! { #name::#ident => #proto_name }
+        })
+        .collect();
+
+    let (_, first_variant_name) = &variant_names[0];
+
+    quote!(
+        impl #impl_generics crate::input::traits::ProtoEnum for #name #ty_generics #where_clause {
+            fn proto_enum_type() -> &'static str {
+                use ::once_cell::sync::Lazy;
+                static TYPE_NAME: Lazy<::std::string::String> = Lazy::new(|| {
+                    crate::input::proto::cook_path(module_path!(), #name_str)
+                });
+                &TYPE_NAME
+            }
+
+            fn proto_enum_default_variant() -> &'static str {
+                #first_variant_name
+            }
+
+            fn proto_enum_variant(&self) -> &'static str {
+                match self {
+                    #(#variant_matches),*
+                }
+            }
+
+            fn proto_enum_from_i32(x: i32) -> Option<Self> {
+                Self::from_i32(x)
+            }
+        }
+    )
+    .into()
+}
diff --git a/proto/buf.lock b/proto/buf.lock
new file mode 100644
index 00000000..c91b5810
--- /dev/null
+++ b/proto/buf.lock
@@ -0,0 +1,2 @@
+# Generated by buf. DO NOT EDIT.
+version: v1
diff --git a/proto/buf.yaml b/proto/buf.yaml
new file mode 100644
index 00000000..8b9ee4f7
--- /dev/null
+++ b/proto/buf.yaml
@@ -0,0 +1,11 @@
+version: v1
+name: buf.build/substrait-io/substrait
+lint:
+  use:
+    - DEFAULT
+  ignore_only:
+    PACKAGE_VERSION_SUFFIX:
+      - substrait
+breaking:
+  use:
+    - FILE
diff --git a/proto/substrait/validator/validator.proto b/proto/substrait/validator/validator.proto
new file mode 100644
index 00000000..0af8796a
--- /dev/null
+++ b/proto/substrait/validator/validator.proto
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: Apache-2.0
+syntax = "proto3";
+
+package substrait.validator;
+
+import "google/protobuf/any.proto";
+
+option csharp_namespace = "Substrait.Validator.Protobuf";
+option java_multiple_files = true;
+option java_package = "io.substrait.validator.proto";
+
+// One of the functions of the validator is to convert Substrait plans to a
+// format that is more easy to consume for software geared toward making
+// human-readable representations of Substrait. The validator has a few
+// builtin text-based exporters, but it can also emit the complete parse
+// result via the binary serialization of this message type.
+message ParseResult {
+  // Root node of the parse result tree.
+  Node root = 1;
+}
+
+// Nodes of the validator parse result tree.
+//
+// Note that, unlike substrait.Plan and its children, the nodes in this tree
+// are intentionally devoid of typing information: all nodes are of type Node.
+// The purpose of this is to allow a consumer of these trees to walk over the
+// entire tree without needing in-depth knowledge of how Substrait works (and,
+// with that, to decouple them from changes to the Substrait specification):
+// they are intended as an intermediate format for converting Substrait plans
+// into more human-friendly representations after all, not for programmatically
+// dealing with the semantics of Substrait itself. That's what the validator is
+// for, in this case.
+//
+// In particular, gathering all diagnostics emitted by the validator only
+// requires the consumer to use the Node, Node.Data, Node.Child, and of course
+// the Diagnostic message types.
+//
+// In case the consumer does need additional information from the original
+// substrait.Plan, every node can be related back to its corresponding message
+// via the path information associated with the nodes.
+message Node {
+  // The type of node.
+  oneof node_type {
+    // This node represents a protobuf message. The fields are described using
+    // Field, RepeatedField, and OneOfField messages in data.
+    ProtoMessage proto_message = 1;
+
+    // This node represents a protobuf primitive or enum.
+    ProtoPrimitive proto_primitive = 2;
+
+    // This node is inserted as a placeholder when a required oneof field was
+    // not populated in the input.
+    Empty proto_missing_oneof = 3;
+
+    // Special case of proto_primitive for references to anchors defined
+    // elsewhere in the plan.
+    NodeReference node_reference = 4;
+
+    // Special case for proto_primitive for references to YAML files via a URI.
+    // If resolved, the keys in the toplevel YAML map are represented using
+    // Field messages in data.
+    YamlReference yaml_reference = 5;
+
+    // This node represents a YAML map/object. The keys are represented using
+    // Field messages in data.
+    Empty yaml_map = 6;
+
+    // This node represents a YAML array. The elements are represented using
+    // ArrayElement messages in data.
+    Empty yaml_array = 7;
+
+    // This node represents a YAML primitive.
+    PrimitiveData yaml_primitive = 8;
+  }
+
+  // Semantic classification of this node.
+  Class class = 13;
+
+  // Semantic classification of a node.
+  enum Class {
+    CLASS_UNSPECIFIED = 0;
+
+    // This node represents a data type.
+    CLASS_TYPE = 1;
+
+    // This node represents an expression.
+    CLASS_EXPRESSION = 2;
+
+    // This node represents a relation.
+    CLASS_RELATION = 3;
+  }
+
+  // Optional brief description of the node. Should not contain newlines or
+  // other non-span formatting information.
+  Comment brief = 14;
+
+  // Optional summary of the node. Unlike brief, this may contain
+  // paragraph-level formatting information.
+  Comment summary = 15;
+
+  // For the following types of nodes, the validator will try to do type
+  // resolution:
+  //  - type-like nodes resolve to said type;
+  //  - expression-like nodes resolve to the type returned by the expression;
+  //  - relation-like nodes resolve to the schema (as a named struct) returned
+  //    by the relation.
+  // This field will be populated for such nodes even if resolution fails, to
+  // indicate that there is supposed to be a type. In that case, the type kind
+  // will be set to "unresolved." The field will not be populated for nodes
+  // that don't have a logical Substrait type.
+  DataType data_type = 16;
+
+  // Data associated with this node.
+  repeated Data data = 31;
+
+  // Data associated with the node. Note that some variants are illegal based
+  // on the node type (for example, a primitive does not have fields, so it
+  // makes no sense for Field data to appear).
+  message Data {
+    oneof kind {
+      // Represents a child node in the tree.
+      Child child = 1;
+
+      // Represents a diagnostic message.
+      Diagnostic diagnostic = 2;
+
+      // Represents an (intermediate) data type.
+      DataType data_type = 3;
+
+      // Unstructured additional information about the node or something in it.
+      Comment comment = 4;
+    }
+  }
+
+  // Representation of a child node in the tree.
+  message Child {
+    // Path element identifying the relation of this child node to its
+    // parent.
+    Path.Element path = 1;
+
+    // The child node.
+    Node node = 2;
+
+    // Whether the validator recognized/expected the field or element that
+    // this child represents. Fields/elements may be unrecognized simply
+    // because validation is not implemented for them yet. In any case, this
+    // flag indicates that the subtree represented by this node could not be
+    // validated.
+    bool recognized = 3;
+  }
+
+  // Information about a protobuf message.
+  message ProtoMessage {
+    // The full protobuf path for the type, for example "substrait.Plan".
+    string path = 1;
+  }
+
+  // Information about a protobuf primitive.
+  message ProtoPrimitive {
+    // Logically compatible protobuf name of the primitive type, for example
+    // uint32 for any 32-bit unsigned data storage type.
+    string path = 1;
+
+    // Value of the primitive.
+    PrimitiveData data = 2;
+  }
+
+  // Information about the reference part of a reference/anchor pair.
+  message NodeReference {
+    // Integer value of the reference and anchor.
+    uint64 value = 1;
+
+    // Absolute path to the referenced node, i.e. the node containing the
+    // anchor field.
+    Path path = 2;
+  }
+
+  // Information about a reference to a YAML file.
+  message YamlReference {
+    // URI to the YAML file.
+    string uri = 1;
+  }
+
+  // Value for a primitive data element.
+  message PrimitiveData {
+    // Note: to represent a YAML null, this field is simply not populated.
+    oneof data {
+      bool boolean = 1;
+      uint64 unsigned = 2;
+      int64 signed = 3;
+      double real = 4;
+      string unicode = 5;
+      bytes binary = 6;
+      string variant = 7;
+      google.protobuf.Any any = 8;
+    }
+  }
+}
+
+// An absolute path to a node in the tree.
+message Path {
+  // Name of the root node. Currently always set to `plan`.
+  string root = 1;
+
+  // Elements of the path. The first element selects a child node of the root
+  // node, the second selects one of its children, etc.
+  repeated Element elements = 2;
+
+  // Path element structure.
+  message Element {
+    oneof kind {
+      Field field = 1;
+      RepeatedField repeated_field = 2;
+      OneOfField oneof_field = 3;
+      ArrayElement array_element = 4;
+    }
+  }
+
+  // Path element used for protobuf fields and YAML maps.
+  // Canonically represented as `.<field>` if field matches
+  // [a-zA-Z_][a-zA-Z0-9_]*, or as `."<field>"` using \\ and \" escape
+  // sequences if not (note that this can only happen for YAML map keys).
+  message Field {
+    string field = 1;
+  }
+
+  // Path element used for protobuf repeated field elements.
+  // Canonically represented as `.<field>[<index>]`.
+  message RepeatedField {
+    string field = 1;
+    uint64 index = 2;
+  }
+
+  // Path element used for protobuf oneof fields.
+  // Canonically represented as `.<field>{<variant>}`.
+  message OneOfField {
+    string field = 1;
+    string variant = 2;
+  }
+
+  // Path element used for YAML arrays.
+  // Canonically represented as `[<index>]`.
+  message ArrayElement {
+    uint64 index = 2;
+  }
+}
+
+// Representation of a diagnostic message.
+message Diagnostic {
+  // The original error level/severity for this diagnostic.
+  Level original_level = 1;
+
+  // The error level/severity for this diagnostic after adjustment according
+  // to the validator configuration.
+  Level adjusted_level = 2;
+
+  // The machine-readable message for this diagnostic.
+  uint32 cause = 3;
+
+  // The human-readable message for this diagnostic.
+  string msg = 4;
+
+  // A path associated with this diagnostic. This is usually the path for
+  // the node it is associated with, but not necessarily: for example, a
+  // diagnostic message relating to a duplicate definition may refer back
+  // to the previous or first definition.
+  Path path = 5;
+
+  // Error level.
+  enum Level {
+    LEVEL_UNSPECIFIED = 0;
+
+    // Information diagnostic. Has no bearing on the validity of the plan.
+    LEVEL_INFO = 1;
+
+    // Warning diagnostic. The presence of warning diagnostics indicates
+    // that the plan may or may not be valid, for example because the
+    // validator was unable to access a referenced YAML file, or because
+    // enhancements using protobuf's Any type were used.
+    LEVEL_WARNING = 2;
+
+    // Error diagnostic. The presence of error diagnostics indicates that
+    // the plan is invalid.
+    LEVEL_ERROR = 3;
+  }
+}
+
+// Representation of a comment made by the validator that is only intended
+// to be interpreted by people.
+message Comment {
+  // Comments consist of one or more "elements," defining formatting
+  // information.
+  repeated Element elements = 1;
+
+  // A comment element.
+  message Element {
+    oneof kind {
+      // A span of text.
+      Span span = 1;
+
+      // A newline, i.e. the next span should start on the next line.
+      Empty new_line = 2;
+
+      // Opens a new unordered list. The next span is the start of the text for
+      // the next item. list_next elements are used to advance to the next list
+      // item; newlines can be used to add paragraphs without bullet points.
+      // Each list_open should be matched with a list_close. Lists may be
+      // nested.
+      Empty list_open = 3;
+
+      // Advances to the next list item.
+      Empty list_next = 4;
+
+      // Closes the current list.
+      Empty list_close = 5;
+    }
+  }
+
+  // A span of text.
+  message Span {
+    // Text for this span. Should not include newlines.
+    string text = 1;
+
+    // Specified if this span of text should link to something.
+    oneof link {
+      // Link to a path in the tree.
+      Path path = 2;
+
+      // Link to a web page.
+      string url = 3;
+    }
+  }
+}
+
+// Representation of a resolved data type.
+message DataType {
+  // Type class.
+  Class class = 1;
+
+  // Nullability.
+  bool nullable = 8;
+
+  // Type variation, if any.
+  oneof variation {
+    UserDefinedVariation user_defined_variation = 9;
+    Empty unresolved_variation = 15;
+  }
+
+  // Type parameters for non-simple types.
+  repeated Parameter parameters = 16;
+
+  // A type class.
+  message Class {
+    oneof kind {
+      Simple simple = 1;
+      Compound compound = 2;
+      UserDefinedType user_defined_type = 3;
+      Empty unresolved_type = 7;
+    }
+  }
+
+  // Enumeration of simple types. Message numbers correspond to the ones in
+  // substrait.Type. Note that UNSPECIFIED should never be emitted by the
+  // validator.
+  enum Simple {
+    SIMPLE_UNSPECIFIED = 0;
+    SIMPLE_BOOLEAN = 1;
+    SIMPLE_I8 = 2;
+    SIMPLE_I16 = 3;
+    SIMPLE_I32 = 5;
+    SIMPLE_I64 = 7;
+    SIMPLE_FP32 = 10;
+    SIMPLE_FP64 = 11;
+    SIMPLE_STRING = 12;
+    SIMPLE_BINARY = 13;
+    SIMPLE_TIMESTAMP = 14;
+    SIMPLE_DATE = 16;
+    SIMPLE_TIME = 17;
+    SIMPLE_INTERVAL_YEAR = 19;
+    SIMPLE_INTERVAL_DAY = 20;
+    SIMPLE_TIMESTAMP_TZ = 29;
+    SIMPLE_UUID = 32;
+  }
+
+  // Enumeration of compound types. Message numbers correspond to the ones in
+  // substrait.Type. Note that UNSPECIFIED should never be emitted by the
+  // validator.
+  enum Compound {
+    COMPOUND_UNSPECIFIED = 0;
+    COMPOUND_FIXED_CHAR = 21;
+    COMPOUND_VAR_CHAR = 22;
+    COMPOUND_FIXED_BINARY = 23;
+    COMPOUND_DECIMAL = 24;
+    COMPOUND_STRUCT = 25;
+    COMPOUND_NAMED_STRUCT = 26;
+    COMPOUND_LIST = 27;
+    COMPOUND_MAP = 28;
+  }
+
+  // Information about a user-defined type.
+  message UserDefinedType {
+    // URI of the YAML file that the type is (supposed to be) defined in, if
+    // known.
+    string uri = 1;
+
+    // Name of the type within the scope of that YAML file.
+    string name = 2;
+
+    // Type definition information from the YAML file, if resolution
+    // succeeded.
+    Definition definition = 3;
+
+    // Type definition information from a YAML file for a user-defined type.
+    message Definition {
+      // The primitive structure of the type.
+      repeated Element structure = 1;
+    }
+
+    // Primitive structure element for a user-defined type.
+    message Element {
+      // Name of the element.
+      string name = 1;
+
+      // Type of data.
+      Simple kind = 2;
+    }
+  }
+
+  // Information about a type variation.
+  message UserDefinedVariation {
+    // URI of the YAML file that the type variation is (supposed to be) defined
+    // in, if known.
+    string uri = 1;
+
+    // Name of the type within the scope of that YAML file.
+    string name = 2;
+
+    // Type definition information from the YAML file, if resolution
+    // succeeded.
+    Definition definition = 3;
+
+    // Type definition information from a YAML file for a user-defined type.
+    message Definition {
+      // Base type.
+      oneof base_type {
+        Class physical = 1;
+        UserDefinedVariation logical = 2;
+        Empty unresolved = 7;
+      }
+
+      // Function behavior for this type variation.
+      FunctionBehavior function_behavior = 8;
+    }
+
+    // Function behavior for a type variation.
+    enum FunctionBehavior {
+      FUNCTION_BEHAVIOR_UNSPECIFIED = 0;
+      FUNCTION_BEHAVIOR_INHERITS = 1;
+      FUNCTION_BEHAVIOR_SEPARATE = 2;
+    }
+  }
+
+  // Type parameter.
+  message Parameter {
+    // Type of parameter.
+    oneof kind {
+      // Anonymous data type parameter, for example the T in LIST<T>.
+      DataType data_type = 1;
+
+      // Named data type parameter, for example N:T in NSTRUCT<N:T>.
+      Named named_type = 2;
+
+      // Unsigned integer parameter, for example the L in VARCHAR<L>.
+      uint64 unsigned = 3;
+    }
+  }
+
+  // A named type, used for NSTRUCT (meta)types.
+  message Named {
+    // Name of the struct element.
+    string name = 1;
+
+    // Data type of the struct element.
+    DataType data_type = 2;
+  }
+}
+
+// Used for oneof field variants that have no data associated with them.
+message Empty {}
diff --git a/py/.gitignore b/py/.gitignore
new file mode 100644
index 00000000..a0bd3051
--- /dev/null
+++ b/py/.gitignore
@@ -0,0 +1,79 @@
+/target
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+.pytest_cache/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+.venv/
+env/
+bin/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+include/
+man/
+venv/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+pip-selfcheck.json
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# Rope
+.ropeproject
+
+# Django stuff:
+*.log
+*.pot
+
+.DS_Store
+
+# Sphinx documentation
+docs/_build/
+
+# PyCharm
+.idea/
+
+# VSCode
+.vscode/
+
+# Pyenv
+.python-version
+
+# Generated files
+/text/
+/proto/
+/LICENSE
+/protoc_out/
+/substrait_validator/substrait/
diff --git a/py/Cargo.toml b/py/Cargo.toml
new file mode 100644
index 00000000..7bd45c67
--- /dev/null
+++ b/py/Cargo.toml
@@ -0,0 +1,38 @@
+[package]
+name = "substrait-validator-py"
+version = "0.0.1"
+edition = "2018"
+license = "Apache-2.0"
+include = [
+    "/LICENSE",
+    "/README.md",
+    "/build.rs",
+    "/pyproject.toml",
+    "/proto",
+    "/text",
+    "/src",
+    "/substrait_validator/*.py",
+    "/substrait_validator_build/*.py",
+    "/tests"
+]
+
+[lib]
+crate-type = ["cdylib"]
+
+# NOTE: we need to set the crate name to substrait_validator, because maturin
+# seems to insist on also using this name for the Python module, and it'd be
+# rather unfortunate if the Python module would need to be named
+# substrait_validator_py...
+name = "substrait_validator"
+
+# cargo doc especially can't deal with name collisions.
+doc = false
+
+[dependencies]
+substrait-validator = { path = "../rs", version = "0.0.1" }
+pyo3 = { version = "0.15.1", features = ["extension-module"] }
+
+[build-dependencies]
+prost-build = "0.9"
+walkdir = "2"
+dunce = "1"
diff --git a/py/README.md b/py/README.md
new file mode 100644
index 00000000..9f624c8e
--- /dev/null
+++ b/py/README.md
@@ -0,0 +1,64 @@
+# Python bindings for validator
+
+This directory contains a Rust/PyO3 project to generate Python bindings for the
+validator library.
+
+## Installation
+
+No wheels are published yet at this time, so you have to build manually.
+Running something along the lines of `pip install .` should work. You should
+only need to have a [rust](https://www.rust-lang.org/tools/install) compiler
+installed.
+
+If you want to do an editable install, you must run
+`./prepare_build.py populate` first.
+
+## Building wheels and source distributions
+
+You can build wheels and source distributions using
+[maturin](https://github.com/PyO3/maturin), specifically using the `build` and
+`sdist` commands. However, before you can do this, you must run
+`./prepare_build.py populate`. This makes local copies of some files in the
+repository that live outside of this subdirectory, such as the protobuf
+description files. When you use `pip` or some other tool based on
+`pyproject.toml`, this will be done automatically via build system hooks, but
+unfortunately maturin doesn't itself provide hooks with which this can be
+automated.
+
+## Running tests
+
+You can test the module using `pytest` after you install it.
+
+## Command-line usage
+
+The module exposes a command-line program named `substrait-validator` for
+running the validator manually. You can also use the tool to convert between
+various serialization formats of the `substrait.Plan` message. Run
+`substrait-validator --help` for more information.
+
+## Library usage
+
+The library essentially provides a bunch of type conversion functions at
+module scope to convert between the various representations of a Substrait
+plan, including the result of the validator. The most important functions are
+arguably `check_plan_valid(plan, config=None)` and
+`check_plan_not_invalid(plan, config=None)`, which run validation on the given
+plan and throw a Python exception corresponding to the first diagnostic
+returned by the validator of the highest severity encountered if the plan is
+not strictly or loosely valid respectively. That is, `check_plan_valid` will
+throw an exception if the plan could not be proven to be valid, while
+`check_plan_not_invalid` will only throw if it could be proven to be invalid.
+
+The `plan` argument can be a number of things:
+
+ - `bytes`: treated as a binary serialization of `substrait.Plan`.
+ - `str`: treated as a protobuf JSON serialization of `substrait.Plan`.
+ - `dict`: treated as the above using Python's data model (JSON objects map
+   to `dict`s, JSON arrays map to `list`s).
+ - `substrait_validator.substrait.Plan`: a previously deserialized plan.
+ - `substrait_validator.ResultHandle`: a previously validated plan.
+
+`config` can be `None`/unspecified, or can be set to a
+`substrait_validator.Config` object to configure the validator with.
+
+For more information, use Python's `help()` function.
diff --git a/py/build.rs b/py/build.rs
new file mode 100644
index 00000000..9036d873
--- /dev/null
+++ b/py/build.rs
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: Apache-2.0
+
+use std::collections::HashSet;
+use std::env;
+use std::ffi::OsStr;
+use std::ffi::OsString;
+use std::fs;
+use std::io::BufRead;
+use std::io::BufReader;
+use std::io::Write;
+use std::path::PathBuf;
+use std::process::Command;
+use walkdir::WalkDir;
+
+fn main() {
+    // Directory that the proto files are stored in. If the local_dependencies
+    // directory exists, we're building from an sdist package, in which case
+    // the proto files should have been copied to a local directory.
+    let input_paths = if std::path::Path::new("local_dependencies").exists() {
+        vec!["proto"]
+    } else {
+        vec!["../proto", "../substrait/proto"]
+    };
+
+    // Ensure above path is relative to the Cargo.toml directory.
+    let pwd = env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR");
+    let input_paths = input_paths.iter().map(|p| PathBuf::from(&pwd).join(p));
+
+    // Output directory for protoc. This is a temporary directory: it will be
+    // completely deleted and then reconstructed. Afterward, the build script
+    // will patch the files in here and then move them to python_out.
+    let intermediate_path = "protoc_out";
+
+    // Where the final Python files will be moved to.
+    let output_path = "substrait_validator";
+
+    // The Python module prefix to patch into use statements of the files
+    // generated by protobuf.
+    let python_prefix = "substrait_validator.";
+
+    // Canonicalize all paths to prevent ambiguity.
+    let input_paths = input_paths
+        .map(|p| dunce::canonicalize(p).unwrap())
+        .collect::<Vec<_>>();
+    let workdir = std::env::current_dir().unwrap();
+    let intermediate_path = workdir.join(intermediate_path);
+    let output_path = workdir.join(output_path);
+
+    // Gather all .proto files.
+    let proto_files = input_paths
+        .iter()
+        .flat_map(|p| {
+            WalkDir::new(&p)
+                .into_iter()
+                .filter_map(|e| e.ok())
+                .filter(|e| {
+                    e.path().extension() == Some(OsStr::new("proto"))
+                        && e.metadata().unwrap().is_file()
+                })
+                .map(|e| dunce::canonicalize(e.into_path()).unwrap())
+        })
+        .collect::<Vec<_>>();
+
+    // Inform cargo that changes to the .proto files require a rerun.
+    for path in &proto_files {
+        println!("cargo:rerun-if-changed={}", path.display());
+    }
+
+    // Clean and recreate output directory.
+    fs::remove_dir_all(&intermediate_path).ok();
+    fs::create_dir_all(&intermediate_path).expect("failed to create protoc output directory");
+
+    // Run protoc.
+    let mut cmd = Command::new(prost_build::protoc());
+    for input_path in input_paths.iter() {
+        let mut proto_path_arg = OsString::new();
+        proto_path_arg.push("--proto_path=");
+        proto_path_arg.push(&input_path);
+        cmd.arg(proto_path_arg);
+    }
+    let mut python_out_arg = OsString::new();
+    python_out_arg.push("--python_out=");
+    python_out_arg.push(&intermediate_path);
+    cmd.arg(python_out_arg);
+    cmd.args(proto_files.iter());
+    let output = cmd.output().expect("failed to run protoc");
+    if !output.status.success() {
+        eprintln!("cmd: {:?}", cmd.get_program());
+        for arg in cmd.get_args() {
+            eprintln!("arg: {:?}", arg);
+        }
+        panic!("{:?}", output);
+    }
+
+    // Gather all Python files generated by protoc.
+    let intermediate_files: Vec<_> = WalkDir::new(&intermediate_path)
+        .into_iter()
+        .filter_map(|e| e.ok())
+        .filter(|e| {
+            e.path().extension() == Some(OsStr::new("py")) && e.metadata().unwrap().is_file()
+        })
+        .map(|e| dunce::canonicalize(e.into_path()).unwrap())
+        .collect();
+
+    // Patch the files.
+    let mut output_dirs = HashSet::new();
+    for intermediate_file in intermediate_files {
+        // Determine the output filename.
+        let output_file = output_path.join(
+            intermediate_file
+                .strip_prefix(&intermediate_path)
+                .expect("intermediate file is not based in the expected directory"),
+        );
+
+        // Determine the output directory.
+        let mut path = output_file.to_path_buf();
+        path.pop();
+
+        // Ensure that the directory exists, and create an __init__.py for it
+        // if we haven't already.
+        let mut path = output_file.to_path_buf();
+        path.pop();
+        if output_dirs.insert(path.clone()) {
+            fs::create_dir_all(&path).expect("failed to create output directory");
+            path.push("__init__.py");
+            fs::File::create(path).expect("failed to create __init__.py");
+        }
+
+        // Copy and patch the file.
+        let intermediate =
+            fs::File::open(&intermediate_file).expect("failed to open intermediate file");
+        let mut output = fs::File::create(&output_file).expect("failed to create output file");
+        for line in BufReader::new(intermediate).lines() {
+            let line = line.expect("failed to read from intermediate file");
+            let line = if line.starts_with("from ") && !line.starts_with("from google") {
+                format!("from {}{}", python_prefix, &line[5..])
+            } else {
+                line
+            };
+            writeln!(output, "{}", line).unwrap();
+        }
+    }
+}
diff --git a/py/prepare_build.py b/py/prepare_build.py
new file mode 100755
index 00000000..18b91b89
--- /dev/null
+++ b/py/prepare_build.py
@@ -0,0 +1,34 @@
+#!/usr/bin/python
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import sys
+import substrait_validator_build
+
+
+def eprint(*args):
+    print(*args, file=sys.stderr)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        eprint("Usage: {} [populate|clean]".format(sys.argv[0]))
+        eprint()
+        eprint(
+            "Populates or removes local copies of Substrait files needed for the build"
+        )
+        eprint("that are stored outside of this subdirectory.")
+        sys.exit(1)
+
+    if sys.argv[1] == "populate":
+        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        substrait_validator_build.populate()
+        sys.exit(0)
+
+    if sys.argv[1] == "clean":
+        os.chdir(os.path.dirname(os.path.abspath(__file__)))
+        substrait_validator_build.clean()
+        sys.exit(0)
+
+    eprint("Unknown command: {}".format(sys.argv[1]))
+    sys.exit(1)
diff --git a/py/pyproject.toml b/py/pyproject.toml
new file mode 100644
index 00000000..8481d2c7
--- /dev/null
+++ b/py/pyproject.toml
@@ -0,0 +1,36 @@
+[build-system]
+requires = ["maturin>=0.12,<0.13"]
+build-backend = "substrait_validator_build"
+backend-path = ["."]
+
+[project]
+name = "substrait-validator"
+version = "0.0.1"
+description = "Validator for Substrait query plans"
+readme = "README.md"
+license = {file = "LICENSE"}
+keywords = ["substrait"]
+requires-python = ">=3.6"
+classifiers = [
+    "Programming Language :: Rust",
+    "Programming Language :: Python :: Implementation :: CPython",
+    "Programming Language :: Python :: Implementation :: PyPy",
+]
+dependencies = [
+    "protobuf > 3.19.3",
+    "click >= 8",
+    "pyyaml >= 6",
+    "jdot >= 0.5"
+]
+
+[project.optional-dependencies]
+test = [
+  "pytest < 5.0.0",
+]
+
+[project.urls]
+homepage = "https://substrait.io/"
+repository = "https://github.com/substrait-io/substrait-validator"
+
+[project.scripts]
+substrait-validator = "substrait_validator:cli"
diff --git a/py/src/lib.rs b/py/src/lib.rs
new file mode 100644
index 00000000..20852b4a
--- /dev/null
+++ b/py/src/lib.rs
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: Apache-2.0
+
+// This happens in PyO3 generated code, and there doesn't seem to be a more
+// narrow scope that this can be disabled in (clippy seems a bit confused about
+// the code causing the warning, in general).
+#![allow(clippy::needless_option_as_deref)]
+
+use pyo3::exceptions::PyValueError;
+use pyo3::prelude::*;
+use pyo3::types::{PyBytes, PyDict, PyTuple};
+
+/// Represents a validator/parser configuration.
+#[pyclass]
+struct Config {
+    config: substrait_validator::Config,
+}
+
+#[pymethods]
+impl Config {
+    #[new]
+    pub fn new() -> Self {
+        Config {
+            config: substrait_validator::Config::new(),
+        }
+    }
+
+    /// Instructs the validator to ignore protobuf fields that it doesn't know
+    /// about yet (i.e., that have been added to the Substrait protobuf
+    /// descriptions, but haven't yet been implemented in the validator) if the
+    /// fields are set to their default value. If this option isn't set, or if
+    /// an unknown field is not set to its default value, a warning is emitted.
+    pub fn ignore_unknown_fields(&mut self) {
+        self.config.ignore_unknown_fields = true;
+    }
+
+    /// Explicitly allows a protobuf message type to be used in advanced
+    /// extensions, despite the fact that the validator can't validate it. If
+    /// an advanced extension is encountered that isn't explicitly allowed, a
+    /// warning is emitted. The pattern may include * and ? wildcards for
+    /// glob-like matching (see
+    /// https://docs.rs/glob/latest/glob/struct.Pattern.html for the complete
+    /// syntax).
+    pub fn allow_proto_any_url(&mut self, pattern: &str) -> PyResult<()> {
+        let pattern = match substrait_validator::Pattern::new(pattern) {
+            Ok(p) => p,
+            Err(e) => {
+                return Err(PyValueError::new_err(format!(
+                    "invalid pattern {pattern:?}: {e}"
+                )));
+            }
+        };
+        self.config.allow_proto_any_url(pattern);
+        Ok(())
+    }
+
+    /// Sets a minimum and/or maximum error level for the given class of
+    /// diagnostic messages. Any previous settings for this class are
+    /// overridden.
+    pub fn override_diagnostic_level(
+        &mut self,
+        class: u32,
+        minimum: &str,
+        maximum: &str,
+    ) -> PyResult<()> {
+        fn str_to_level(level: &str) -> PyResult<substrait_validator::Level> {
+            match level {
+                "info" => Ok(substrait_validator::Level::Info),
+                "warning" => Ok(substrait_validator::Level::Warning),
+                "error" => Ok(substrait_validator::Level::Error),
+                level => Err(PyValueError::new_err(format!(
+                    "invalid level {level:?}; must be \"info\", \"warning\", or \"error\""
+                ))),
+            }
+        }
+        let class = match substrait_validator::Classification::from_code(class) {
+            Some(c) => c,
+            None => {
+                return Err(PyValueError::new_err(format!(
+                    "unknown diagnostic class {class}"
+                )))
+            }
+        };
+        let minimum = str_to_level(minimum)?;
+        let maximum = str_to_level(maximum)?;
+        self.config
+            .override_diagnostic_level(class, minimum, maximum);
+        Ok(())
+    }
+
+    /// Overrides the resolution behavior for (YAML) URIs matching the given
+    /// pattern. The pattern may include * and ? wildcards for glob-like
+    /// matching (see https://docs.rs/glob/latest/glob/struct.Pattern.html
+    /// for the complete syntax). If resolve_as is None, the URI will not
+    /// be resolved; otherwise it should be a string representing the URI it
+    /// should be resolved as.
+    pub fn override_uri(&mut self, pattern: &str, resolve_as: Option<&str>) -> PyResult<()> {
+        let pattern = match substrait_validator::Pattern::new(pattern) {
+            Ok(p) => p,
+            Err(e) => {
+                return Err(PyValueError::new_err(format!(
+                    "invalid pattern {pattern:?}: {e}"
+                )));
+            }
+        };
+        self.config.override_uri(pattern, resolve_as);
+        Ok(())
+    }
+
+    /// Registers a URI resolution function with this configuration. If
+    /// the given function fails, any previously registered function will be
+    /// used as a fallback. The callback function must take a single string
+    /// argument and return a bytes object, or throw an exception on failure.
+    pub fn add_uri_resolver(&mut self, callback: PyObject) {
+        self.config
+            .add_uri_resolver(move |uri| -> Result<Vec<u8>, PyErr> {
+                pyo3::Python::with_gil(|py| {
+                    Ok(callback
+                        .call1(py, (uri,))?
+                        .as_ref(py)
+                        .downcast::<pyo3::types::PyBytes>()?
+                        .as_bytes()
+                        .to_owned())
+                })
+            })
+    }
+}
+
+/// Represents a Substrait plan parse tree, as parsed by the validator.
+///
+/// To construct a parse tree (and in doing so, validate the Substrait plan),
+/// simply pass a bytes object containing the substrait.plan message to the
+/// constructor. Note that this "never fails:" any failures to parse the
+/// bytes object will be embedded as diagnostics in the ResultHandle object.
+/// This allows multiple error messages to be contained within the object. Use
+/// check(), check_valid(), or check_not_invalid() to check validity.
+#[pyclass]
+struct ResultHandle {
+    root: substrait_validator::ParseResult,
+}
+
+#[pymethods]
+impl ResultHandle {
+    #[new]
+    pub fn new(data: &[u8], config: Option<&Config>) -> Self {
+        Self {
+            root: if let Some(config) = config {
+                substrait_validator::parse(data, &config.config)
+            } else {
+                substrait_validator::parse(data, &substrait_validator::Config::default())
+            },
+        }
+    }
+
+    /// Checks the validity of the plan passed to this ParseResult during
+    /// construction. Returns -1 for invalid plans, 0 for possibly valid
+    /// plans (i.e. the validator was unable to prove validity either way),
+    /// or 1 for valid plans.
+    pub fn check(&self) -> i32 {
+        match self.root.check() {
+            substrait_validator::Validity::Valid => 1,
+            substrait_validator::Validity::MaybeValid => 0,
+            substrait_validator::Validity::Invalid => -1,
+        }
+    }
+
+    /// Throws a ValueError exception containing the first error or warning
+    /// encountered in the plan if the plan was not proven to be valid by the
+    /// validator.
+    pub fn check_valid(&self) -> PyResult<()> {
+        if let Some(diag) = self.root.get_diagnostic() {
+            if diag.adjusted_level >= substrait_validator::Level::Warning {
+                return Err(PyValueError::new_err(diag.to_string()));
+            }
+        }
+        Ok(())
+    }
+
+    /// Throws a ValueError exception containing the first error encountered
+    /// in the plan if the plan was proven to be invalid by the validator.
+    pub fn check_not_invalid(&self) -> PyResult<()> {
+        if let Some(diag) = self.root.get_diagnostic() {
+            if diag.adjusted_level >= substrait_validator::Level::Error {
+                return Err(PyValueError::new_err(diag.to_string()));
+            }
+        }
+        Ok(())
+    }
+
+    /// Exports all diagnostic messages contained in this parse result as a
+    /// multiline string.
+    pub fn export_diagnostics(&self) -> PyResult<String> {
+        let mut result: Vec<u8> = vec![];
+        self.root.export(
+            &mut result,
+            substrait_validator::export::Format::Diagnostics,
+        )?;
+        let result = String::from_utf8(result)?;
+        Ok(result)
+    }
+
+    /// Exports the parse tree as a HTML multiline string, intended for
+    /// debugging.
+    pub fn export_html(&self) -> PyResult<String> {
+        let mut result: Vec<u8> = vec![];
+        self.root
+            .export(&mut result, substrait_validator::export::Format::Html)?;
+        let result = String::from_utf8(result)?;
+        Ok(result)
+    }
+
+    /// Exports the entire parse tree as a substrait.validator.Node protobuf
+    /// message, using binary serialization.
+    pub fn export_proto(&self, py: Python) -> PyResult<PyObject> {
+        let mut result = vec![];
+        self.root
+            .export(&mut result, substrait_validator::export::Format::Proto)?;
+        let result = PyBytes::new(py, &result).into();
+        Ok(result)
+    }
+}
+
+/// Rust-native module for the validator.
+#[pymodule]
+fn substrait_validator(_py: Python, m: &PyModule) -> PyResult<()> {
+    /// Returns a dictionary mapping all diagnostic codes currently defined
+    /// to three-tuples consisting of:
+    ///  - the name of the diagnostic as a str;
+    ///  - its description as a str; and
+    ///  - the diagnostic code of its parent as an integer, or None for code 0.
+    #[pyfn(m)]
+    #[pyo3(name = "get_diagnostic_codes")]
+    fn get_diagnostic_codes_py(py: Python) -> PyResult<PyObject> {
+        let dict = PyDict::new(py);
+        for class in substrait_validator::iter_diagnostics() {
+            dict.set_item(
+                class.code(),
+                PyTuple::new(
+                    py,
+                    [
+                        class.name().to_object(py),
+                        class.description().to_object(py),
+                        if class.code() == 0 {
+                            py.None()
+                        } else {
+                            substrait_validator::Classification::parent(class.code()).to_object(py)
+                        },
+                    ],
+                ),
+            )?;
+        }
+        Ok(dict.into())
+    }
+
+    m.add_class::<Config>()?;
+    m.add_class::<ResultHandle>()?;
+    Ok(())
+}
diff --git a/py/substrait_validator/__init__.py b/py/substrait_validator/__init__.py
new file mode 100644
index 00000000..6645fba6
--- /dev/null
+++ b/py/substrait_validator/__init__.py
@@ -0,0 +1,748 @@
+# SPDX-License-Identifier: Apache-2.0
+
+import sys
+import json
+import jdot
+import yaml
+import click
+import urllib.request
+from io import BytesIO
+from typing import Iterable
+from google.protobuf import json_format
+from google.protobuf.message import DecodeError as ProtoDecodeError
+from .substrait_validator import ResultHandle, Config as _Config, get_diagnostic_codes
+from .substrait.plan_pb2 import Plan
+from .substrait.validator.validator_pb2 import ParseResult, Diagnostic, Path
+
+
+_JDOT_MACROS = """@macros
+.field .selection { .directReference .structField .field ?v .rootReference {} }
+.field0 .selection { .directReference .structField {} .rootReference {} }
+.nullable .nullability "NULLABILITY_NULLABLE"
+.required .nullability "NULLABILITY_REQUIRED"
+
+@output
+"""
+
+
+def _jdot_coder() -> jdot.JdotCoder:
+    coder = jdot.JdotCoder()
+    coder.decode(_JDOT_MACROS)
+    return coder
+
+
+def _jdot_dumps(data) -> str:
+    return _JDOT_MACROS + _jdot_coder().encode(
+        data, formatter=jdot.formatter.JdotFormatter()
+    )
+
+
+def _jdot_loads(data: str):
+    return _jdot_coder().decode(data)
+
+
+def _populate_config(cfg):
+    """We can't derive from _Config to add the add_urllib_resolver() function,
+    so we'll just have to monkey-patch it."""
+
+    def generate_method(cls, name, fn):
+        def x(self, *args, **kwargs):
+            return fn(self._config, *args, **kwargs)
+
+        x.__name__ = name
+        x.__doc__ = f.__doc__
+        setattr(cls, name, x)
+
+    for name in dir(_Config):
+        if name.startswith("_"):
+            continue
+        f = getattr(_Config, name)
+        if not callable(f):
+            continue
+        generate_method(cfg, name, f)
+    cfg.__doc__ = _Config.__doc__
+    return cfg
+
+
+@_populate_config
+class Config:
+    def __init__(self):
+        self._config = _Config()
+
+    @staticmethod
+    def _unwrap(config):
+        if isinstance(config, Config):
+            return config._config
+        elif isinstance(config, _Config):
+            return config
+        elif config is None:
+            return None
+        else:
+            raise TypeError("unsupported type: {}".format(type(config)))
+
+    def add_urllib_resolver(self):
+        """Adds a URI resolver based on urllib."""
+
+        def urllib_resolver(uri):
+            return urllib.request.urlopen(uri).read()
+
+        self._config.add_uri_resolver(urllib_resolver)
+
+
+def load_plan_from_proto(data: bytes) -> Plan:
+    """Load a Substrait plan from its protobuf serialization."""
+    if not isinstance(data, bytes):
+        raise TypeError("unsupported type: {}".format(type(data)))
+    plan = Plan()
+    plan.ParseFromString(data)
+    return plan
+
+
+def load_plan_from_json(data: str) -> Plan:
+    """Load a Substrait plan from its JSON string representation."""
+    if not isinstance(data, str):
+        raise TypeError("unsupported type: {}".format(type(data)))
+    return json_format.Parse(data, Plan())
+
+
+def load_plan_from_dict(data: dict) -> Plan:
+    """Load a Substrait plan from its Python object JSON representation."""
+    if not isinstance(data, dict):
+        raise TypeError("unsupported type: {}".format(type(data)))
+    return load_plan_from_json(json.dumps(data))
+
+
+def load_plan_from_yaml(data: str) -> Plan:
+    """Load a Substrait plan from YAML data mimicking the structure of
+    its JSON string representation."""
+    if not isinstance(data, str):
+        raise TypeError("unsupported type: {}".format(type(data)))
+    return load_plan_from_dict(yaml.safe_load(data))
+
+
+def load_plan_from_jdot(data: str) -> Plan:
+    """Load a Substrait plan from JDOT data mimicking the structure of
+    its JSON string representation."""
+    if not isinstance(data, str):
+        raise TypeError("unsupported type: {}".format(type(data)))
+    return load_plan_from_dict(_jdot_loads(data))
+
+
+def load_plan(data) -> Plan:
+    """Loads a plan from its binary protobuf serialization (bytes input),
+    a JSON string (string input), or a dictionary representation of such a
+    JSON string (dict input). If data is already a Plan, this function is
+    no-op and simply returns its input."""
+    if isinstance(data, Plan):
+        return data
+    elif isinstance(data, bytes):
+        return load_plan_from_proto(data)
+    elif isinstance(data, str):
+        return load_plan_from_json(data)
+    elif isinstance(data, dict):
+        return load_plan_from_dict(data)
+    else:
+        raise TypeError("unsupported type: {}".format(type(data)))
+
+
+def parse_plan(plan, config=None) -> ParseResult:
+    """Parses the given plan with the validator. plan can be anything
+    supported by load_plan(), a Plan object, or a ResultHandle object. This is
+    just an alternate name for plan_to_parse_result()."""
+    return plan_to_parse_result(plan, config)
+
+
+def plan_to_proto(plan) -> bytes:
+    """Converts a plan to its binary protobuf serialization. plan can be
+    anything supported by load_plan()."""
+    return load_plan(plan).SerializeToString()
+
+
+def plan_to_json(plan) -> str:
+    """Converts a plan to its JSON serialization, returned as a string. plan
+    can be anything supported by load_plan()."""
+    return json_format.MessageToJson(load_plan(plan))
+
+
+def plan_to_dict(plan) -> dict:
+    """Converts a plan to its JSON serialization, returned as a dict. plan can
+    be anything supported by load_plan()."""
+    return json_format.MessageToDict(load_plan(plan))
+
+
+def plan_to_yaml(plan) -> str:
+    """Converts a plan to the YAML equivalent of its JSON serialization,
+    returned as a string. plan can be anything supported by load_plan()."""
+    return yaml.safe_dump(plan_to_dict(plan))
+
+
+def plan_to_jdot(plan) -> str:
+    """Converts a plan to the JDOT equivalent of its JSON serialization,
+    returned as a string. plan can be anything supported by load_plan()."""
+    return _jdot_dumps(plan_to_dict(plan))
+
+
+def plan_to_result_handle(plan, config=None) -> ResultHandle:
+    """Parses a Substrait plan using the validator, and returns its result
+    handle object. plan can be anything supported by load_plan(). If the
+    input is already a ResultHandle, it is returned as-is."""
+    if isinstance(plan, ResultHandle):
+        return plan
+    if isinstance(plan, bytes):
+        data = plan
+    else:
+        data = plan_to_proto(plan)
+    return ResultHandle(data, Config._unwrap(config))
+
+
+def plan_to_parse_result(plan, config=None) -> ParseResult:
+    """Parses the given plan with the validator, and returns its parse result.
+    plan can be anything supported by load_plan(), a Plan object, or a
+    ResultHandle object."""
+    result = ParseResult()
+    result.ParseFromString(plan_to_parse_result_proto(plan, config))
+    return result
+
+
+def plan_to_parse_result_proto(plan, config=None) -> str:
+    """Same as parse_plan(), but returns the binary serialization of the
+    parse result. This is faster, if you don't plan to use the serialization
+    from python."""
+    return plan_to_result_handle(plan, config).export_proto()
+
+
+def plan_to_diagnostics(plan, config=None) -> Iterable[Diagnostic]:
+    """Converts a plan to an iterable of Diagnostics. plan can be anything
+    supported by plan_to_result_handle()."""
+
+    def walk(node):
+        for data in node.data:
+            if data.HasField("child"):
+                for diagnostic in walk(data.child.node):
+                    yield diagnostic
+            elif data.HasField("diagnostic"):
+                yield data.diagnostic
+
+    return walk(plan_to_parse_result(plan, config).root)
+
+
+def plan_to_diagnostics_str(plan, config=None) -> str:
+    """Converts a plan to a multiline string representing the diagnostic
+    messages returned by the validator for that plan. plan can be anything
+    supported by plan_to_result_handle()."""
+    return plan_to_result_handle(plan, config).export_diagnostics()
+
+
+def plan_to_html(plan, config=None) -> str:
+    """Generates a HTML page for the given plan to serve as documentation
+    while debugging. plan can be anything supported by
+    plan_to_result_handle()."""
+    return plan_to_result_handle(plan, config).export_html()
+
+
+def check_plan(plan, config=None) -> int:
+    """Returns 1 if the given plan is valid, -1 if it is invalid, or 0 if the
+    validator cannot determine validity. plan can be anything supported by
+    load_plan(), a Plan object, or a ResultHandle object."""
+    return plan_to_result_handle(plan, config).check()
+
+
+def check_plan_valid(plan, config=None):
+    """Throws a ValueError exception containing the first error or warning
+    encountered in the plan if the validator cannot prove correctness of
+    the given plan. plan can be anything supported by load_plan(), a Plan
+    object, or a ResultHandle object."""
+    plan_to_result_handle(plan, config).check_valid()
+
+
+def check_plan_not_invalid(plan, config=None):
+    """Throws a ValueError exception containing the first error encountered in
+    the plan if the validator can prove that the given plan is invalid. plan
+    can be anything supported by load_plan(), a Plan object, or a ResultHandle
+    object."""
+    plan_to_result_handle(plan, config).check_not_invalid()
+
+
+def path_to_string(path: Path) -> str:
+    """Converts a substrait.validator.Path message to a string."""
+    elements = [path.root]
+    for element in path.elements:
+        if element.HasField("field"):
+            elements.append(f".{element.field.field}")
+        elif element.HasField("repeated_field"):
+            elements.append(
+                f".{element.repeated_field.field}[{element.repeated_field.index}]"
+            )
+        elif element.HasField("oneof_field"):
+            elements.append(
+                f".{element.oneof_field.field}<{element.oneof_field.variant}>"
+            )
+        elif element.HasField("array_element"):
+            elements.append(f"[{element.array_element.index}]")
+        else:
+            raise ValueError("invalid path element")
+    return "".join(elements)
+
+
+@click.command()
+@click.argument("infile", required=False)
+@click.option(
+    "--in-type",
+    type=click.Choice(["ext", "proto", "json", "yaml", "jdot"], case_sensitive=False),
+    default="ext",
+    help=(
+        'Input file type. "ext" uses the extension of the input '
+        'file, defaulting to "proto" if there is none.'
+    ),
+)
+@click.option(
+    "--verbosity",
+    "-v",
+    type=click.Choice(
+        ["info", "warn", "error", "fatal", "quiet"], case_sensitive=False
+    ),
+    default="warn",
+    help=("Specifies the verbosity for writing diagnostics to " "stderr."),
+)
+@click.option(
+    "--out-file",
+    "-O",
+    default=None,
+    help='Output file. "-" may be used to select stdout.',
+)
+@click.option(
+    "--out-type",
+    type=click.Choice(
+        ["ext", "diag", "html", "proto", "json", "yaml", "jdot"], case_sensitive=False
+    ),
+    default="ext",
+    help=(
+        'Output file type. "ext" uses the extension of the output '
+        'file, defaulting to "diag" if there is none.'
+    ),
+)
+@click.option(
+    "--mode",
+    "-m",
+    type=click.Choice(["convert", "ignore", "loose", "strict"], case_sensitive=False),
+    default="loose",
+    help=(
+        'Validation mode. "convert" disables all but protobuf\'s '
+        "internal validation, and can be used to convert between "
+        'different representations of substrait.Plan. "ignore" '
+        "runs validation, but ignores the result (i.e. the "
+        "program always returns 0 and emits an output file if "
+        'requested). "loose" fails only if the validator can '
+        'prove that the plan is invalid. "strict" fails if it '
+        "cannot prove that it is valid."
+    ),
+)
+@click.option(
+    "--ignore-unknown-fields",
+    help=(
+        "Do not generate warnings for unknown protobuf fields "
+        "that are set to their protobuf-defined default value."
+    ),
+)
+@click.option(
+    "--allow-proto-any",
+    multiple=True,
+    help=(
+        "Explicitly allow the given protobuf type URL(s) to be "
+        "used in protobuf Any messages. Supports glob syntax."
+    ),
+)
+@click.option(
+    "--diagnostic-level",
+    nargs=3,
+    multiple=True,
+    help=(
+        "Clamps the error level of diagnostics with diagnostic "
+        "code or class [0] to at least [1] and at most [2]. "
+        "For example, --diagnostic-level 1 warn error will "
+        "override the level of info diagnostics with code 1 "
+        "to warning, leaving the other levels unchanged."
+    ),
+)
+@click.option(
+    "--override-uri",
+    nargs=2,
+    multiple=True,
+    help=(
+        "Overrides URIs in the plan that match [0] with [1]. Set "
+        '[1] to "-" to disable resolution of matching URIs. '
+        "Supports glob syntax. For example, "
+        '"--override-uri http://* -" disables resolution via '
+        "http."
+    ),
+)
+@click.option(
+    "--use-urllib/--no-use-urllib",
+    default=True,
+    help=(
+        "Enable URI resolution via urllib. Enabled by default. "
+        "If disabled, only file:// URIs will resolve (after "
+        "application of any --override-uri options)."
+    ),
+)
+@click.option(
+    "--help-diagnostics",
+    is_flag=True,
+    help=("Show a list of all known diagnostic codes and exit."),
+)
+def cli(  # noqa: C901
+    infile,
+    in_type,
+    out_file,
+    out_type,
+    mode,
+    verbosity,
+    ignore_unknown_fields,
+    allow_proto_any,
+    diagnostic_level,
+    override_uri,
+    use_urllib,
+    help_diagnostics,
+):
+    """Validate or convert the substrait.Plan represented by INFILE (or stdin
+    using "-").
+
+    The following formats are supported:
+
+    \b
+     - proto: binary serialization format of protobuf.
+     - json: JSON serialization format of protobuf.
+     - yaml: like JSON, but represented as YAML.
+     - jdot: like JSON, but represented as JDOT (still highly experimental,
+       see https://github.com/saulpw/jdot).
+     - diag*: list of validator diagnostic messages.
+     - html*: all information known about the plan in HTML format.
+
+    *output-only, and not supported in -mconvert mode.
+
+    When validation is enabled, the output message type will be
+    substrait.validator.Result. If you just want to convert between different
+    representations of the substrait.Plan message, use -mconvert.
+    """
+
+    # Define various helper functions and constants.
+    INFO = Diagnostic.Level.LEVEL_INFO
+    WARN = Diagnostic.Level.LEVEL_WARNING
+    ERROR = Diagnostic.Level.LEVEL_ERROR
+    FATAL = ERROR + 1
+    QUIET = FATAL + 1
+
+    def level_str_to_int(level):
+        """Converts a string representation of an error level or verbosity to
+        its internal integer representation."""
+        return {
+            "info": INFO,
+            "warn": WARN,
+            "error": ERROR,
+            "fatal": FATAL,
+            "quiet": QUIET,
+        }[level]
+
+    def emit_diagnostic(level, msg, code=None, source=None, original_level=None):
+        """Emits a diagnostic message to stderr."""
+
+        # Only print the diagnostic if the configured verbosity is high enough.
+        if level < verbosity_level:
+            return
+
+        # Determine the original error level.
+        if original_level is None:
+            original_level = level
+
+        # Format the level.
+        formatted = [
+            {
+                FATAL: click.style("Fatal error", fg="red", bold=True),
+                ERROR: click.style("Error", fg="red", bold=True),
+                WARN: click.style("Warning", fg="yellow", bold=False),
+                INFO: click.style("Info", fg="green", bold=False),
+            }[level]
+        ]
+
+        # Format extra information written within parentheses.
+        parens = []
+        if original_level != level:
+            if original_level > level:
+                mod = "reduced from "
+            else:
+                mod = "promoted from "
+            mod += {
+                FATAL: "fatal",
+                ERROR: "error",
+                WARN: "warning",
+                INFO: "info",
+            }[original_level]
+            parens.append(mod)
+        if code is not None:
+            parens.append(f"code {code:04d}")
+        if parens:
+            formatted.append(" ({})".format(", ".join(parens)))
+        formatted.append(":\n")
+
+        # Append source information, if known.
+        if source is not None:
+            formatted.append(f"  at {source}:\n")
+
+        # Append the actual message.
+        formatted.append("  ")
+        formatted.append("\n  ".join(str(msg).split("\n")))
+        formatted.append("\n")
+
+        # Print the formatted diagnostic.
+        click.echo("".join(formatted), err=True)
+
+    def fatal(*args, **kwargs):
+        """Shorthand for emit_diagnostic(FATAL, ...) followed by exiting with
+        code 1."""
+        emit_diagnostic(FATAL, *args, **kwargs)
+        sys.exit(1)
+
+    def deduce_format(fil, typ, remap):
+        """Deduces the file format for fil with type hint typ using the rules
+        in remap."""
+        if typ == "ext":
+            if fil is None:
+                typ = remap["DEFAULT"]
+            else:
+                _, *ext = fil.rsplit(".", maxsplit=1)
+                if ext:
+                    typ = ext[0].lower()
+                typ = remap.get(typ, remap["DEFAULT"])
+        return typ
+
+    def emit_output(data):
+        """Emits the given output data as specified on the command line."""
+        # Encode text formats as unicode.
+        if not isinstance(data, bytes):
+            data = data.encode("utf-8")
+
+        # Write to the output.
+        if out_file == "-":
+            try:
+                count = sys.stdout.buffer.write(data)
+            except IOError as e:
+                fatal(f"failed to write to stdout: {e}")
+        elif out_file is not None:
+            try:
+                with open(out_file, "wb") as f:
+                    count = f.write(data)
+            except IOError as e:
+                fatal(f"failed to write output file: {e}")
+        else:
+            return
+        if count < len(data):
+            fatal("failed to write all output")
+
+    def emit_proto(out_message):
+        """Emits the given protobuf message as specified on the command
+        line."""
+
+        # Convert to appropriate data format.
+        if out_type == "proto":
+            emit_output(out_message.SerializeToString())
+        elif out_type == "json":
+            emit_output(json_format.MessageToJson(out_message))
+        else:
+            out_dict = json_format.MessageToDict(out_message)
+            if out_type == "yaml":
+                emit_output(yaml.safe_dump(out_dict))
+            elif out_type == "jdot":
+                emit_output(_jdot_dumps(out_dict))
+            else:
+                fatal(f"cannot emit protobuf message in {out_type} format")
+
+    # Print diagnostic code help if requested.
+    if help_diagnostics:
+        click.echo("The following diagnostic codes are defined:\n")
+        diags = {}
+        for code, (name, desc, parent) in sorted(get_diagnostic_codes().items()):
+            diag = (code, name, desc, [])
+            diags[code] = diag
+            if parent is not None:
+                diags[parent][3].append(diag)
+
+        def print_diag(diag, first_prefix="", next_prefix=""):
+            code, name, desc, children = diag
+            click.echo(f"{first_prefix}{code:04d} ({name}): {desc}.")
+            for child in children[:-1]:
+                print_diag(child, f"{next_prefix} |- ", f"{next_prefix} |  ")
+            if children:
+                print_diag(children[-1], f"{next_prefix} '- ", f"{next_prefix}    ")
+
+        print_diag(diags[0])
+        sys.exit(0)
+
+    # Parse verbosity level.
+    verbosity_level = level_str_to_int(verbosity)
+
+    # Check input file.
+    in_file = infile
+    if in_file is None:
+        click.echo("Missing input file. Try --help for usage information.", err=True)
+        sys.exit(2)
+
+    # Handle automatic format deduction.
+    in_type = deduce_format(
+        in_file,
+        in_type,
+        {
+            "DEFAULT": "proto",
+            "json": "json",
+            "yaml": "yaml",
+            "jdot": "jdot",
+        },
+    )
+    out_type = deduce_format(
+        out_file,
+        out_type,
+        {
+            "DEFAULT": "proto",
+            "json": "json",
+            "yaml": "yaml",
+            "jdot": "jdot",
+            "txt": "diag",
+            "html": "html",
+            "htm": "html",
+        },
+    )
+
+    # Read input file.
+    if in_file == "-":
+        try:
+            in_data = sys.stdin.buffer.read()
+        except IOError as e:
+            fatal(f"failed to read from stdin: {e}")
+    else:
+        try:
+            with open(in_file, "rb") as f:
+                in_data = f.read()
+        except IOError as e:
+            fatal(f"failed to read input file: {e}")
+
+    # Parse input format.
+    if in_type == "proto":
+
+        # Convert the plan directly.
+        try:
+            in_plan = load_plan_from_proto(in_data)
+        except ProtoDecodeError as e:
+            fatal(e)
+
+    else:
+
+        # Remaining formats are UTF-8 encoded.
+        try:
+            in_str = in_data.decode("utf8")
+        except UnicodeError as e:
+            fatal(f"failed to decode input file: {e}")
+
+        # Convert from different variations of the JSON object model.
+        if in_type == "json":
+            try:
+                in_dict = json.loads(in_str)
+            except json.decoder.JSONDecodeError as e:
+                fatal(f"failed to decode input file: {e}")
+        elif in_type == "yaml":
+            try:
+                in_dict = yaml.safe_load(in_str)
+            except yaml.YAMLError as e:
+                fatal(f"failed to decode input file: {e}")
+        elif in_type == "jdot":
+            try:
+                in_dict = _jdot_loads(in_str)
+            except jdot.decoder.DecodeException as e:
+                fatal(f"failed to decode input file: {e}")
+        else:
+            raise NotImplementedError(in_type)
+
+        # The outermost structure must be a dict for anything to work at all.
+        if not isinstance(in_dict, dict):
+            fatal("toplevel structure of decoded JSON-like input is not a object")
+
+        # Convert the dict representation of the JSON object model to the
+        # protobuf message wrapper.
+        try:
+            in_plan = load_plan_from_dict(in_dict)
+        except json_format.ParseError as e:
+            fatal(e)
+
+    # Handle convert-only mode.
+    if mode == "convert":
+        emit_proto(in_plan)
+        return 0
+
+    # Construct parser/validator configuration.
+    config = Config()
+    if ignore_unknown_fields:
+        config.ignore_unknown_fields()
+    for pattern in allow_proto_any:
+        try:
+            config.allow_proto_any_url(pattern)
+        except ValueError as e:
+            fatal(e)
+    for code, minimum, maximum in diagnostic_level:
+        try:
+            code = int(code, 10)
+            if code < 0:
+                raise ValueError()
+            minimum = minimum.lower()
+            if minimum == "warn":
+                minimum = "warning"
+            maximum = maximum.lower()
+            if maximum == "warn":
+                maximum = "warning"
+            config.override_diagnostic_level(code, minimum, maximum)
+        except ValueError as e:
+            fatal(e)
+    for pattern, resolve_as in override_uri:
+        if resolve_as == "-":
+            resolve_as = None
+        try:
+            config.override_uri(pattern, resolve_as)
+        except ValueError as e:
+            fatal(e)
+    if use_urllib:
+        config.add_urllib_resolver()
+
+    # Run the parser/validator.
+    result = plan_to_result_handle(in_plan, config)
+
+    # Emit diagnostics to stderr.
+    for diagnostic in plan_to_diagnostics(result):
+        emit_diagnostic(
+            msg=diagnostic.msg,
+            code=diagnostic.cause,
+            source=path_to_string(diagnostic.path),
+            level=diagnostic.adjusted_level,
+            original_level=diagnostic.original_level,
+        )
+
+    # Check validity.
+    validity = check_plan(result)
+    if mode == "loose":
+        if validity < 0:
+            fatal("plan is invalid")
+    elif mode == "strict":
+        if validity < 1:
+            fatal("failed to prove that plan is valid")
+    elif mode != "ignore":
+        raise ValueError("mode")
+
+    # Emit output file.
+    if out_type == "diag":
+        emit_output(plan_to_diagnostics_str(result))
+    elif out_type == "html":
+        emit_output(plan_to_html(result))
+    else:
+        emit_proto(plan_to_parse_result(result))
+
+    return 0
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/py/substrait_validator_build/__init__.py b/py/substrait_validator_build/__init__.py
new file mode 100644
index 00000000..bae30bde
--- /dev/null
+++ b/py/substrait_validator_build/__init__.py
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: Apache-2.0
+
+from maturin import *
+import shutil
+import os
+
+
+_PATHS = [
+    (os.path.join("..", "proto"), "proto"),
+    (os.path.join("..", "substrait", "proto"), "proto"),
+    (os.path.join("..", "substrait", "text"), "text"),
+    (os.path.join("..", "LICENSE"), "LICENSE"),
+    (None, "protoc_out"),
+    (None, "substrait_validator/substrait"),
+    (None, "substrait_validator/__pycache__"),
+]
+
+
+def clean():
+    for _, path in _PATHS:
+        if os.path.isdir(path):
+            shutil.rmtree(path)
+        elif os.path.isfile(path):
+            os.unlink(path)
+
+
+def _copytree(source, dest):
+    if os.path.isdir(source):
+        if not os.path.isdir(dest):
+            os.makedirs(dest)
+        files = os.listdir(source)
+        for f in files:
+            _copytree(os.path.join(source, f), os.path.join(dest, f))
+    else:
+        shutil.copyfile(source, dest)
+
+
+def populate():
+    clean()
+    for source, dest in _PATHS:
+        if source is not None:
+            _copytree(source, dest)
+
+
+def _prepare():
+    # If the local_dependencies directory exists, pip is building the package
+    # from a source distribution. In that case, the build environment is
+    # already as it should be.
+    if not os.path.isdir("local_dependencies"):
+        populate()
+
+
+_maturin_prepare_metadata_for_build_wheel = (
+    prepare_metadata_for_build_wheel  # noqa: F405
+)
+
+
+def prepare_metadata_for_build_wheel(*args, **kwargs):
+    _prepare()
+    return _maturin_prepare_metadata_for_build_wheel(*args, **kwargs)
+
+
+_maturin_build_wheel = build_wheel  # noqa: F405
+
+
+def build_wheel(*args, **kwargs):
+    _prepare()
+    return _maturin_build_wheel(*args, **kwargs)
+
+
+_maturin_build_sdist = build_sdist  # noqa: F405
+
+
+def build_sdist(*args, **kwargs):
+    _prepare()
+    return _maturin_build_sdist(*args, **kwargs)
diff --git a/py/tests/data.py b/py/tests/data.py
new file mode 100644
index 00000000..e70e4684
--- /dev/null
+++ b/py/tests/data.py
@@ -0,0 +1,887 @@
+# SPDX-License-Identifier: Apache-2.0
+
+BASIC_PLAN = """
+{
+ "extensionUris": [],
+ "extensions": [],
+ "relations": [
+  {
+   "rel": {
+    "project": {
+     "input": {
+      "read": {
+       "common": {
+        "direct": {}
+       },
+       "projection": {
+        "select": {
+         "structItems": [
+          {
+           "field": 0
+          },
+          {
+           "field": 1
+          }
+         ]
+        },
+        "maintainSingularStruct": false
+       },
+       "namedTable": {
+        "names": [
+         "person"
+        ]
+       }
+      }
+     },
+     "expressions": [
+      {
+       "selection": {
+        "directReference": {
+         "structField": {
+          "field": 0
+         }
+        }
+       }
+      },
+      {
+       "selection": {
+        "directReference": {
+         "structField": {
+          "field": 1
+         }
+        }
+       }
+      }
+     ]
+    }
+   }
+  }
+ ],
+ "expectedTypeUrls": []
+}
+"""
+
+COMPLEX_PLAN = """
+{
+  "extensionUris": [{
+    "extensionUriAnchor": 1,
+    "uri": "/functions_boolean.yaml"
+  }, {
+    "extensionUriAnchor": 4,
+    "uri": "/functions_arithmetic_decimal.yaml"
+  }, {
+    "extensionUriAnchor": 3,
+    "uri": "/functions_datetime.yaml"
+  }, {
+    "extensionUriAnchor": 2,
+    "uri": "/functions_comparison.yaml"
+  }],
+  "extensions": [{
+    "extensionFunction": {
+      "extensionUriReference": 1,
+      "functionAnchor": 1,
+      "name": "and:bool"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 2,
+      "functionAnchor": 2,
+      "name": "equal:any1_any1"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 3,
+      "functionAnchor": 3,
+      "name": "lt:date_date"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 3,
+      "functionAnchor": 4,
+      "name": "gt:date_date"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 4,
+      "functionAnchor": 5,
+      "name": "multiply:opt_decimal_decimal"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 4,
+      "functionAnchor": 6,
+      "name": "subtract:opt_decimal_decimal"
+    }
+  }, {
+    "extensionFunction": {
+      "extensionUriReference": 4,
+      "functionAnchor": 7,
+      "name": "sum:opt_decimal"
+    }
+  }],
+  "relations": [{
+    "root": {
+      "input": {
+        "fetch": {
+          "common": {
+            "direct": {
+            }
+          },
+          "input": {
+            "sort": {
+              "common": {
+                "direct": {
+                }
+              },
+              "input": {
+                "project": {
+                  "common": {
+                    "emit": {
+                      "outputMapping": [4, 5, 6, 7]
+                    }
+                  },
+                  "input": {
+                    "aggregate": {
+                      "common": {
+                        "direct": {
+                        }
+                      },
+                      "input": {
+                        "project": {
+                          "common": {
+                            "emit": {
+                              "outputMapping": [33, 34, 35, 36]
+                            }
+                          },
+                          "input": {
+                            "filter": {
+                              "common": {
+                                "direct": {
+                                }
+                              },
+                              "input": {
+                                "join": {
+                                  "common": {
+                                    "direct": {
+                                    }
+                                  },
+                                  "left": {
+                                    "join": {
+                                      "common": {
+                                        "direct": {
+                                        }
+                                      },
+                                      "left": {
+                                        "read": {
+                                          "common": {
+                                            "direct": {
+                                            }
+                                          },
+                                          "baseSchema": {
+                                            "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"],
+                                            "struct": {
+                                              "types": [{
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              }, {
+                                                "varchar": {
+                                                  "length": 25,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }, {
+                                                "varchar": {
+                                                  "length": 40,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }, {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              }, {
+                                                "fixedChar": {
+                                                  "length": 15,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }, {
+                                                "decimal": {
+                                                  "scale": 0,
+                                                  "precision": 19,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }, {
+                                                "fixedChar": {
+                                                  "length": 10,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }, {
+                                                "varchar": {
+                                                  "length": 117,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }],
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          },
+                                          "namedTable": {
+                                            "names": ["CUSTOMER"]
+                                          }
+                                        }
+                                      },
+                                      "right": {
+                                        "read": {
+                                          "common": {
+                                            "direct": {
+                                            }
+                                          },
+                                          "baseSchema": {
+                                            "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"],
+                                            "struct": {
+                                              "types": [{
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              }, {
+                                                "i64": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_REQUIRED"
+                                                }
+                                              }, {
+                                                "fixedChar": {
+                                                  "length": 1,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }, {
+                                                "decimal": {
+                                                  "scale": 0,
+                                                  "precision": 19,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }, {
+                                                "date": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }, {
+                                                "fixedChar": {
+                                                  "length": 15,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }, {
+                                                "fixedChar": {
+                                                  "length": 15,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }, {
+                                                "i32": {
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }, {
+                                                "varchar": {
+                                                  "length": 79,
+                                                  "typeVariationReference": 0,
+                                                  "nullability": "NULLABILITY_NULLABLE"
+                                                }
+                                              }],
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          },
+                                          "namedTable": {
+                                            "names": ["ORDERS"]
+                                          }
+                                        }
+                                      },
+                                      "expression": {
+                                        "literal": {
+                                          "boolean": true,
+                                          "nullable": false
+                                        }
+                                      },
+                                      "type": "JOIN_TYPE_INNER"
+                                    }
+                                  },
+                                  "right": {
+                                    "read": {
+                                      "common": {
+                                        "direct": {
+                                        }
+                                      },
+                                      "baseSchema": {
+                                        "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"],
+                                        "struct": {
+                                          "types": [{
+                                            "i64": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          }, {
+                                            "i64": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          }, {
+                                            "i64": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          }, {
+                                            "i32": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "decimal": {
+                                              "scale": 0,
+                                              "precision": 19,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "fixedChar": {
+                                              "length": 1,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "fixedChar": {
+                                              "length": 1,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "date": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "date": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "date": {
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "fixedChar": {
+                                              "length": 25,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "fixedChar": {
+                                              "length": 10,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }, {
+                                            "varchar": {
+                                              "length": 44,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_NULLABLE"
+                                            }
+                                          }],
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      },
+                                      "namedTable": {
+                                        "names": ["LINEITEM"]
+                                      }
+                                    }
+                                  },
+                                  "expression": {
+                                    "literal": {
+                                      "boolean": true,
+                                      "nullable": false
+                                    }
+                                  },
+                                  "type": "JOIN_TYPE_INNER"
+                                }
+                              },
+                              "condition": {
+                                "scalarFunction": {
+                                  "functionReference": 1,
+                                  "args": [{
+                                    "scalarFunction": {
+                                      "functionReference": 2,
+                                      "args": [{
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 6
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }, {
+                                        "cast": {
+                                          "type": {
+                                            "fixedChar": {
+                                              "length": 10,
+                                              "typeVariationReference": 0,
+                                              "nullability": "NULLABILITY_REQUIRED"
+                                            }
+                                          },
+                                          "input": {
+                                            "literal": {
+                                              "fixedChar": "HOUSEHOLD",
+                                              "nullable": false
+                                            }
+                                          }
+                                        }
+                                      }],
+                                      "outputType": {
+                                        "bool": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      }
+                                    }
+                                  }, {
+                                    "scalarFunction": {
+                                      "functionReference": 2,
+                                      "args": [{
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 0
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }, {
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 9
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }],
+                                      "outputType": {
+                                        "bool": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      }
+                                    }
+                                  }, {
+                                    "scalarFunction": {
+                                      "functionReference": 2,
+                                      "args": [{
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 17
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }, {
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 8
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }],
+                                      "outputType": {
+                                        "bool": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_REQUIRED"
+                                        }
+                                      }
+                                    }
+                                  }, {
+                                    "scalarFunction": {
+                                      "functionReference": 3,
+                                      "args": [{
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 12
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }, {
+                                        "literal": {
+                                          "date": 9214,
+                                          "nullable": false
+                                        }
+                                      }],
+                                      "outputType": {
+                                        "bool": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      }
+                                    }
+                                  }, {
+                                    "scalarFunction": {
+                                      "functionReference": 4,
+                                      "args": [{
+                                        "selection": {
+                                          "directReference": {
+                                            "structField": {
+                                              "field": 27
+                                            }
+                                          },
+                                          "rootReference": {
+                                          }
+                                        }
+                                      }, {
+                                        "literal": {
+                                          "date": 9214,
+                                          "nullable": false
+                                        }
+                                      }],
+                                      "outputType": {
+                                        "bool": {
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      }
+                                    }
+                                  }],
+                                  "outputType": {
+                                    "bool": {
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  }
+                                }
+                              }
+                            }
+                          },
+                          "expressions": [{
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 17
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }, {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 12
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }, {
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 15
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }, {
+                            "scalarFunction": {
+                              "functionReference": 5,
+                              "args": [{
+                                "selection": {
+                                  "directReference": {
+                                    "structField": {
+                                      "field": 22
+                                    }
+                                  },
+                                  "rootReference": {
+                                  }
+                                }
+                              }, {
+                                "scalarFunction": {
+                                  "functionReference": 6,
+                                  "args": [{
+                                    "cast": {
+                                      "type": {
+                                        "decimal": {
+                                          "scale": 0,
+                                          "precision": 19,
+                                          "typeVariationReference": 0,
+                                          "nullability": "NULLABILITY_NULLABLE"
+                                        }
+                                      },
+                                      "input": {
+                                        "literal": {
+                                          "i32": 1,
+                                          "nullable": false
+                                        }
+                                      }
+                                    }
+                                  }, {
+                                    "selection": {
+                                      "directReference": {
+                                        "structField": {
+                                          "field": 23
+                                        }
+                                      },
+                                      "rootReference": {
+                                      }
+                                    }
+                                  }],
+                                  "outputType": {
+                                    "decimal": {
+                                      "scale": 0,
+                                      "precision": 19,
+                                      "typeVariationReference": 0,
+                                      "nullability": "NULLABILITY_NULLABLE"
+                                    }
+                                  }
+                                }
+                              }],
+                              "outputType": {
+                                "decimal": {
+                                  "scale": 0,
+                                  "precision": 19,
+                                  "typeVariationReference": 0,
+                                  "nullability": "NULLABILITY_NULLABLE"
+                                }
+                              }
+                            }
+                          }]
+                        }
+                      },
+                      "groupings": [{
+                        "groupingExpressions": [{
+                          "selection": {
+                            "directReference": {
+                              "structField": {
+                                "field": 0
+                              }
+                            },
+                            "rootReference": {
+                            }
+                          }
+                        }, {
+                          "selection": {
+                            "directReference": {
+                              "structField": {
+                                "field": 1
+                              }
+                            },
+                            "rootReference": {
+                            }
+                          }
+                        }, {
+                          "selection": {
+                            "directReference": {
+                              "structField": {
+                                "field": 2
+                              }
+                            },
+                            "rootReference": {
+                            }
+                          }
+                        }]
+                      }, {
+                        "groupingExpressions": [{
+                          "selection": {
+                            "directReference": {
+                              "structField": {
+                                "field": 0
+                              }
+                            },
+                            "rootReference": {
+                            }
+                          }
+                        }, {
+                          "selection": {
+                            "directReference": {
+                              "structField": {
+                                "field": 1
+                              }
+                            },
+                            "rootReference": {
+                            }
+                          }
+                        }, {
+                          "selection": {
+                            "directReference": {
+                              "structField": {
+                                "field": 2
+                              }
+                            },
+                            "rootReference": {
+                            }
+                          }
+                        }]
+                      }],
+                      "measures": [{
+                        "measure": {
+                          "functionReference": 7,
+                          "args": [{
+                            "selection": {
+                              "directReference": {
+                                "structField": {
+                                  "field": 3
+                                }
+                              },
+                              "rootReference": {
+                              }
+                            }
+                          }],
+                          "sorts": [],
+                          "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT",
+                          "outputType": {
+                            "decimal": {
+                              "scale": 0,
+                              "precision": 19,
+                              "typeVariationReference": 0,
+                              "nullability": "NULLABILITY_NULLABLE"
+                            }
+                          }
+                        }
+                      }]
+                    }
+                  },
+                  "expressions": [{
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 0
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }, {
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 3
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }, {
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 1
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }, {
+                    "selection": {
+                      "directReference": {
+                        "structField": {
+                          "field": 2
+                        }
+                      },
+                      "rootReference": {
+                      }
+                    }
+                  }]
+                }
+              },
+              "sorts": [{
+                "expr": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 1
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                },
+                "direction": "SORT_DIRECTION_DESC_NULLS_FIRST"
+              }, {
+                "expr": {
+                  "selection": {
+                    "directReference": {
+                      "structField": {
+                        "field": 2
+                      }
+                    },
+                    "rootReference": {
+                    }
+                  }
+                },
+                "direction": "SORT_DIRECTION_ASC_NULLS_LAST"
+              }]
+            }
+          },
+          "offset": "0",
+          "count": "10"
+        }
+      },
+      "names": ["L_ORDERKEY", "REVENUE", "O_ORDERDATE", "O_SHIPPRIORITY"]
+    }
+  }],
+  "expectedTypeUrls": []
+}
+"""
+
+BASIC_YAML = """---
+types:
+  - name: point
+    structure:
+      latitude: i32
+      longitude: i32
+  - name: line
+    structure:
+      start: point
+      end: point
+"""
diff --git a/py/tests/test_api.py b/py/tests/test_api.py
new file mode 100644
index 00000000..e987fd07
--- /dev/null
+++ b/py/tests/test_api.py
@@ -0,0 +1,156 @@
+# SPDX-License-Identifier: Apache-2.0
+
+import substrait_validator as sv
+import pytest
+from data import BASIC_PLAN, BASIC_YAML
+
+
+def test_proto_roundtrip():
+    """Round-trip test a basic Plan using the protobuf wrapper functions."""
+    original_plan = sv.load_plan(BASIC_PLAN)
+    assert type(original_plan) is sv.Plan
+
+    # Round-trip via binary representation.
+    data = sv.plan_to_proto(original_plan)
+    assert type(data) is bytes
+    round_tripped_plan = sv.load_plan(data)
+    assert round_tripped_plan == original_plan
+
+    # Round-trip via JSON string.
+    data = sv.plan_to_json(original_plan)
+    assert type(data) is str
+    round_tripped_plan = sv.load_plan(data)
+    assert round_tripped_plan == original_plan
+
+    # Round-trip via JSON dict.
+    data = sv.plan_to_dict(original_plan)
+    assert type(data) is dict
+    round_tripped_plan = sv.load_plan(data)
+    assert round_tripped_plan == original_plan
+
+    # Round-trip via YAML.
+    data = sv.plan_to_yaml(original_plan)
+    assert type(data) is str
+    round_tripped_plan = sv.load_plan_from_yaml(data)
+    assert round_tripped_plan == original_plan
+
+    # Round-trip via JDOT.
+    data = sv.plan_to_jdot(original_plan)
+    assert type(data) is str
+    round_tripped_plan = sv.load_plan_from_jdot(data)
+    assert round_tripped_plan == original_plan
+
+    # Check identity.
+    round_tripped_plan = sv.load_plan(original_plan)
+    assert round_tripped_plan == original_plan
+
+
+def test_parsing():
+    """Test the parsing function."""
+    result = sv.plan_to_parse_result(BASIC_PLAN)
+    assert type(result) == sv.ParseResult
+
+    root = sv.parse_plan(BASIC_PLAN)
+    assert type(root) == sv.ParseResult
+
+    root = sv.plan_to_parse_result(BASIC_PLAN)
+    assert type(root) == sv.ParseResult
+
+
+def test_export_html():
+    """Test the HTML export function."""
+    html = sv.plan_to_html(BASIC_PLAN)
+    assert type(html) == str
+    lines = list(filter(bool, html.split("\n")))
+    assert lines[0] == "<!DOCTYPE html>"
+    assert lines[-1] == "</html>"
+
+
+def test_export_diags():
+    """Test the diagnostics export functions."""
+    diags = sv.plan_to_diagnostics_str(BASIC_PLAN)
+    assert type(diags) == str
+
+    diags = list(sv.plan_to_diagnostics(BASIC_PLAN))
+    for diag in diags:
+        assert type(diag) == sv.Diagnostic
+
+
+def test_valid_invalid():
+    """Test the plan validity functions."""
+    # Override all diagnostics to info, so the plan is considered valid.
+    config = sv.Config()
+    config.override_diagnostic_level(0, "info", "info")
+    plan = sv.plan_to_result_handle(BASIC_PLAN, config)
+    assert sv.check_plan(plan) == 1
+    sv.check_plan_valid(plan)
+    sv.check_plan_not_invalid(plan)
+
+    # Override all diagnostics to warning, so the validity is considered to be
+    # unknown.
+    config = sv.Config()
+    config.override_diagnostic_level(0, "warning", "warning")
+    plan = sv.plan_to_result_handle(BASIC_PLAN, config)
+    assert sv.check_plan(plan) == 0
+    with pytest.raises(ValueError):
+        sv.check_plan_valid(plan)
+    sv.check_plan_not_invalid(plan)
+
+    # Override all diagnostics to error, so the plan is considered to be
+    # invalid.
+    config = sv.Config()
+    config.override_diagnostic_level(0, "error", "error")
+    plan = sv.plan_to_result_handle(BASIC_PLAN, config)
+    assert sv.check_plan(plan) == -1
+    with pytest.raises(ValueError):
+        sv.check_plan_valid(plan)
+    with pytest.raises(ValueError):
+        sv.check_plan_not_invalid(plan)
+
+
+def test_resolver_callback():
+    """Tests whether the YAML URI resolver callback works."""
+
+    def resolver(s):
+        if s == "test:hello":
+            return BASIC_YAML.encode("utf-8")
+        raise ValueError("unknown URI")
+
+    config = sv.Config()
+
+    # Disable "not yet implemented" warnings.
+    config.override_diagnostic_level(1, "info", "info")
+
+    # Disable missing root relation error, so we don't have to supply one.
+    config.override_diagnostic_level(5001, "info", "info")
+
+    # Add the resolver.
+    config.add_uri_resolver(resolver)
+
+    sv.check_plan_valid(
+        {
+            "extensionUris": [
+                {
+                    "extension_uri_anchor": 1,
+                    "uri": "test:hello",
+                }
+            ]
+        },
+        config,
+    )
+
+    with pytest.raises(
+        ValueError,
+        match=r"failed to resolve YAML: ValueError: unknown URI \(code 2002\)",
+    ):
+        sv.check_plan_valid(
+            {
+                "extensionUris": [
+                    {
+                        "extension_uri_anchor": 1,
+                        "uri": "test:bye",
+                    }
+                ]
+            },
+            config,
+        )
diff --git a/py/tests/test_cli.py b/py/tests/test_cli.py
new file mode 100644
index 00000000..15398a13
--- /dev/null
+++ b/py/tests/test_cli.py
@@ -0,0 +1,271 @@
+# SPDX-License-Identifier: Apache-2.0
+
+from click.testing import CliRunner
+from substrait_validator import cli
+from data import BASIC_PLAN, COMPLEX_PLAN
+import tempfile
+import json
+import pprint
+from os.path import join as pjoin
+from os.path import isfile
+import platform
+
+
+def run(*args):
+    return CliRunner().invoke(cli, args)
+
+
+def test_no_args():
+    result = run()
+    assert result.exit_code == 2
+    assert "Missing input file." in result.output
+
+
+def test_mconvert_auto():
+    """Test -mconvert with automatic format deduction from file extensions."""
+    with tempfile.TemporaryDirectory() as tmp:
+        with open(pjoin(tmp, "plan.json"), "w") as f:
+            f.write(BASIC_PLAN)
+
+        def convert(src, dest):
+            assert (
+                run(pjoin(tmp, src), "-O", pjoin(tmp, dest), "-mconvert").exit_code == 0
+            )
+
+        convert("plan.json", "plan.proto")
+
+        with open(pjoin(tmp, "plan.proto"), "rb") as f:
+            a = f.read()
+
+        convert("plan.proto", "plan.yaml")
+        convert("plan.yaml", "plan.jdot")
+        convert("plan.jdot", "plan.json")
+        convert("plan.json", "plan.bin")
+
+        with open(pjoin(tmp, "plan.bin"), "rb") as f:
+            b = f.read()
+
+        assert a == b
+
+
+def test_mconvert_manual():
+    """Test -mconvert with automatic format deduction from file extensions."""
+    with tempfile.TemporaryDirectory() as tmp:
+        with open(pjoin(tmp, "data"), "w") as f:
+            f.write(BASIC_PLAN)
+
+        def convert(in_type, out_type):
+            assert (
+                run(
+                    pjoin(tmp, "data"),
+                    "-O",
+                    pjoin(tmp, "data"),
+                    "-mconvert",
+                    "--in-type",
+                    in_type,
+                    "--out-type",
+                    out_type,
+                ).exit_code
+                == 0
+            )
+
+        convert("json", "proto")
+
+        with open(pjoin(tmp, "data"), "rb") as f:
+            a = f.read()
+
+        convert("proto", "yaml")
+        convert("yaml", "jdot")
+        convert("jdot", "json")
+        convert("json", "proto")
+
+        with open(pjoin(tmp, "data"), "rb") as f:
+            b = f.read()
+
+        assert a == b
+
+
+def test_mconvert_complex():
+    """Test -mconvert with a complex plan."""
+    with tempfile.TemporaryDirectory() as tmp:
+        with open(pjoin(tmp, "data"), "w") as f:
+            f.write(COMPLEX_PLAN)
+
+        def convert(in_type, out_type):
+            assert (
+                run(
+                    pjoin(tmp, "data"),
+                    "-O",
+                    pjoin(tmp, "data"),
+                    "-mconvert",
+                    "--in-type",
+                    in_type,
+                    "--out-type",
+                    out_type,
+                ).exit_code
+                == 0
+            )
+
+        convert("json", "proto")
+
+        with open(pjoin(tmp, "data"), "rb") as f:
+            a = f.read()
+
+        convert("proto", "yaml")
+        convert("yaml", "jdot")
+        convert("jdot", "json")
+        convert("json", "proto")
+
+        with open(pjoin(tmp, "data"), "rb") as f:
+            b = f.read()
+
+        assert a == b
+
+
+def test_valid_invalid():
+    """Test exit code based on validity for various modes using diagnostic
+    level overrides to force an outcome."""
+    with tempfile.TemporaryDirectory() as tmp:
+        with open(pjoin(tmp, "plan.json"), "w") as f:
+            f.write(BASIC_PLAN)
+
+        # Test all corner cases.
+        def x(mode, level):
+            return run(
+                pjoin(tmp, "plan.json"),
+                "-m",
+                mode,
+                "--diagnostic-level",
+                "0",
+                level,
+                level,
+            ).exit_code
+
+        assert x("ignore", "error") == 0
+        assert x("loose", "error") == 1
+        assert x("loose", "warning") == 0
+        assert x("strict", "warning") == 1
+        assert x("strict", "info") == 0
+
+        # Default should be -mloose.
+        def x(level):
+            return run(
+                pjoin(tmp, "plan.json"), "--diagnostic-level", "0", level, level
+            ).exit_code
+
+        assert x("info") == 0
+        assert x("warning") == 0
+        assert x("error") == 1
+
+
+def test_verbosity():
+    """Test verbosity using diagnostic level overrides."""
+    with tempfile.TemporaryDirectory() as tmp:
+        with open(pjoin(tmp, "plan.json"), "w") as f:
+            f.write(BASIC_PLAN)
+
+        # Test all corner cases.
+        def x(verbosity, level):
+            return run(
+                pjoin(tmp, "plan.json"),
+                "-v",
+                verbosity,
+                "--diagnostic-level",
+                "0",
+                level,
+                level,
+            ).output.split(maxsplit=1)[:1]
+
+        assert x("quiet", "error") == []
+        assert x("fatal", "error") == ["Fatal"]
+        assert x("error", "error") == ["Error"]
+        assert x("error", "warn") == []
+        assert x("warn", "warn") == ["Warning"]
+        assert x("warn", "info") == []
+        assert x("info", "info") == ["Info"]
+
+
+def test_export():
+    """Test export logic."""
+    with tempfile.TemporaryDirectory() as tmp:
+        with open(pjoin(tmp, "plan.json"), "w") as f:
+            f.write(BASIC_PLAN)
+
+        def x(output, level):
+            return run(
+                pjoin(tmp, "plan.json"),
+                "-O",
+                pjoin(tmp, output),
+                "--diagnostic-level",
+                "0",
+                level,
+                level,
+            ).exit_code
+
+        def y(output):
+            assert x(output, "error") == 1
+            assert not isfile(pjoin(tmp, output))
+            assert x(output, "info") == 0
+            with open(pjoin(tmp, output), "rb") as f:
+                return f.read()
+
+        assert y("output.proto")[0] == 10
+        assert y("output.json").startswith(b'{\n  "root":')
+        assert y("output.yaml").startswith(b"root:")
+        assert y("output.jdot").startswith(b"@macros")
+        assert b"<!DOCTYPE html>" in y("output.html")
+        assert y("output.txt").startswith(b"Info")
+
+
+def test_uri_resolution():
+    """Test URI resolution logic."""
+    with tempfile.TemporaryDirectory() as tmp:
+        with open(pjoin(tmp, "plan.json"), "w") as f:
+            f.write(
+                json.dumps(
+                    {
+                        "extensionUris": [
+                            {
+                                "extension_uri_anchor": 1,
+                                "uri": "https://raw.githubusercontent.com/substrait-io/substrait/82078995c19faa9d4e53a90cd66800c26d88f970/extensions/extension_types.yaml",
+                            }
+                        ]
+                    }
+                )
+            )
+
+        # Obtain a valid file:// URL for the above JSON file as well.
+        if platform.system() == "Windows":
+            local_url = "file:///" + pjoin(tmp, "plan.json").replace("\\", "/")
+        else:
+            local_url = "file://" + pjoin(tmp, "plan.json")
+
+        def x(*args):
+            return run(
+                pjoin(tmp, "plan.json"),
+                "-verror",  # verbosity error
+                "--diagnostic-level",
+                "2002",
+                "error",
+                "error",  # YAML resolution failure -> error
+                "--diagnostic-level",
+                "0",
+                "info",
+                "info",  # all other diagnostics -> info
+                *args
+            ).exit_code
+
+        # Actual remote lookup.
+        assert x() == 0
+
+        # Disable remote lookups, so we expect a failure (not file://).
+        assert x("--no-use-urllib") == 1
+
+        # Try file:// protocol instead. This one is handled by the Rust
+        # fallback resolution logic. Note that plan.json is obviously not
+        # valid YAML, but all diagnostics not related to URI resolution are
+        # overridden to info, so we don't have to care.
+        assert x("--no-use-urllib", "--override-uri", "*", local_url) == 0
+
+        # urllib should also support file://.
+        assert x("--use-urllib", "--override-uri", "*", local_url) == 0
diff --git a/rs/Cargo.toml b/rs/Cargo.toml
new file mode 100644
index 00000000..319cef84
--- /dev/null
+++ b/rs/Cargo.toml
@@ -0,0 +1,86 @@
+[package]
+name = "substrait-validator"
+description = "Substrait validator"
+homepage = "https://substrait.io/"
+repository = "https://github.com/substrait-io/substrait"
+readme = "README.md"
+version = "0.0.1"
+edition = "2021"
+license = "Apache-2.0"
+include = ["src", "build.rs", "README.md"]
+
+[dependencies]
+
+# Prost is used to deal with protobuf serialization and deserialization.
+prost = "0.9"
+prost-types = "0.9"
+
+# Prost doesn't generate any introspection stuff, so we hack that stuff in with
+# our own procedural macros.
+substrait-validator-derive = { path = "../derive", version = "0.0.1" }
+
+# Google/protobuf has a funny idea about case conventions (it converts them all
+# over the place) and prost remaps to Rust's conventions to boot. So, to
+# recover the original names as much as possible, we need some case conversion
+# of our own.
+heck = "0.4"
+
+# Used for dealing with deserializing the YAML extension files. Note that we're
+# not using serde_yaml here because serde_yaml is just a wrapper around
+# yaml-rust, and we don't use any of serde's derive logic.
+yaml-rust = "0.4"
+
+# The schema for the extension files uses jsonschema syntax. The rust crate for
+# schema validation with this format this uses serde_json types as input for
+# both the schema and the input, so we need to depend on that as well, even
+# though we don't actually do any JSON serialization and deserialization.
+jsonschema = { version = "=0.15.0", default-features = false }
+serde_json = "1"
+
+# Used for checking identifier syntax (could be removed if regexes don't end up
+# being useful elsewhere too).
+regex = "1.5"
+
+# Used for checking URI syntax.
+uriparse = "0.6"
+
+# Used for only compiling regexes and the extension file schema once.
+once_cell = "1.9"
+
+# Various small helper crates for the diagnostic/error enums.
+thiserror = "1.0"
+strum = "0.23"
+strum_macros = "0.23"
+num-traits = "0.2"
+num-derive = "0.3"
+
+# For intersperse().
+itertools = "0.8"
+
+# Glob patterns are used in the configuration structure and are used to
+# syntax-check URI glob patterns in the Substrait plans.
+glob = "0.3"
+
+# Used to resolve YAML URIs. If the curl feature is disabled, only file://
+# URLs will work without adding a custom resolver.
+url = "2.2"
+curl = { version = "0.4", optional = true }
+
+# Used for interpretation and conversion of various date/time-related literals
+# in plans.
+chrono = "0.4"
+
+# Used for pretty-printing floating point literal values.
+float-pretty-print = "0.1"
+
+# Used by the HTML exporter.
+base64 = "0.13"
+percent-encoding = "2.1"
+
+[build-dependencies]
+
+# Used for generating Rust structs from the protobuf definitions.
+prost-build = "0.9"
+
+# Used to automatically find all protobuf files.
+walkdir = "2"
diff --git a/rs/README.md b/rs/README.md
new file mode 100644
index 00000000..1ea785f6
--- /dev/null
+++ b/rs/README.md
@@ -0,0 +1,29 @@
+Substrait query plan validator
+==============================
+
+This crate implements a validator for [Substrait](https://substrait.io/) query
+plans.
+
+```
+[dependencies]
+substrait-validator = "0.0.1"
+```
+
+YAML file resolution
+--------------------
+
+One of the complexities of validating Substrait plans is resolving the YAML
+extension files. By default, the crate only supports `file://...` URLs, but
+often, the YAML files will be stored remotely. To make handling this easier,
+you can enable [curl](https://crates.io/crates/curl) as an optional
+dependency:
+
+```
+[dependencies]
+substrait-validator = { version = "0.0.1", features = ["curl"] }
+```
+
+This adds the `substrait_validator::Config::add_curl_yaml_uri_resolver()`
+method, which will use `libcurl` to resolve the files, thus supporting all the
+common protocols (http, https, ftp, etc.). The downside is that the curl crate
+depends on system libraries.
diff --git a/rs/build.rs b/rs/build.rs
new file mode 100644
index 00000000..ee614584
--- /dev/null
+++ b/rs/build.rs
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: Apache-2.0
+
+use std::env;
+use std::ffi::OsStr;
+use std::fs;
+use std::io::Error;
+use std::io::ErrorKind;
+use std::io::Result;
+use std::path::Path;
+use std::path::PathBuf;
+
+/// Copies the file at src_tree/path to dest_tree/path if it's newer.
+/// Automatically creates parent directories in dest as needed.
+fn synchronize(src_tree: &Path, dest_tree: &Path, path: &Path) -> Result<()> {
+    // Construct paths.
+    let src = src_tree.join(path);
+    let dest = dest_tree.join(path);
+
+    // Inform cargo that we should re-run if src changes.
+    println!("cargo:rerun-if-changed={}", src.display());
+
+    // Ensure that the source exists.
+    if !src.exists() {
+        return Err(Error::new(ErrorKind::Other, "source file not found"));
+    }
+
+    // Check if destination already exists.
+    if dest.exists() {
+        // Check if it's newer than or equally old as the source; in that case
+        // we don't have to copy it again.
+        if dest.metadata()?.modified()? >= src.metadata()?.modified()? {
+            return Ok(());
+        }
+    } else {
+        // Check if the destination directory exists, and if not, create it.
+        if let Some(parent) = dest.parent() {
+            if !parent.is_dir() {
+                fs::create_dir_all(parent)?;
+            }
+        }
+    }
+
+    // Copy the file.
+    std::fs::copy(&src, &dest)?;
+
+    Ok(())
+}
+
+/// Returns all protobuf files in the given directory.
+fn find_proto_files(proto_path: &Path) -> Vec<PathBuf> {
+    walkdir::WalkDir::new(proto_path)
+        .into_iter()
+        .filter_map(|e| e.ok())
+        .filter(|e| {
+            e.path().extension() == Some(OsStr::new("proto")) && e.metadata().unwrap().is_file()
+        })
+        .map(|e| e.into_path())
+        .collect()
+}
+
+fn main() -> Result<()> {
+    // Determine the directory of Cargo.toml for this crate.
+    let manifest_dir =
+        PathBuf::from(&env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set"));
+    let resource_dir = manifest_dir.join("src/resources");
+
+    // Determine whether we're building from the git repository or from a
+    // crate file. If the former, we first synchronize our src/resources
+    // directory with the rest of the repository.
+    if manifest_dir.join("in-git-repo").exists() {
+        let validator_git_dir = manifest_dir.join("..");
+        let substrait_git_dir = validator_git_dir.join("substrait");
+
+        // Synchronize the YAML extension file schema.
+        synchronize(
+            &substrait_git_dir,
+            &resource_dir,
+            &PathBuf::from("text/simple_extensions_schema.yaml"),
+        )?;
+
+        // Synchronize the protobuf files from the main repository.
+        for proto_file in find_proto_files(&substrait_git_dir.join("proto")) {
+            synchronize(
+                &substrait_git_dir,
+                &resource_dir,
+                proto_file
+                    .strip_prefix(&substrait_git_dir)
+                    .expect("failed to strip prefix"),
+            )?;
+        }
+
+        // Synchronize the validator-specific protobuf files.
+        for proto_file in find_proto_files(&validator_git_dir.join("proto")) {
+            synchronize(
+                &validator_git_dir,
+                &resource_dir,
+                proto_file
+                    .strip_prefix(&validator_git_dir)
+                    .expect("failed to strip prefix"),
+            )?;
+        }
+    }
+
+    // Find all protobuf files in our resource directory. We just synchronized
+    // these files if we're building from git.
+    let proto_path = PathBuf::from(&resource_dir).join("proto");
+    let proto_files: Vec<_> = find_proto_files(&proto_path);
+
+    // Compile the protobuf files using prost.
+    let mut config = prost_build::Config::new();
+    config.type_attribute(".", "#[derive(::substrait_validator_derive::ProtoMeta)]");
+    config.compile_protos(&proto_files, &[&proto_path.display().to_string()])?;
+
+    // Inform cargo that changes to the .proto files require a rerun.
+    for path in &proto_files {
+        println!("cargo:rerun-if-changed={}", path.display());
+    }
+
+    Ok(())
+}
diff --git a/rs/in-git-repo b/rs/in-git-repo
new file mode 100644
index 00000000..283ec634
--- /dev/null
+++ b/rs/in-git-repo
@@ -0,0 +1,4 @@
+This file serves as a marker for build.rs that it's building from within the
+git repository, which will make it synchronize the protobuf and schema files
+with the repo locations. It is not included in the release package, so a build
+from crates.io will not try to look for files in parent directories.
diff --git a/rs/src/.gitignore b/rs/src/.gitignore
new file mode 100644
index 00000000..cb6eb2cc
--- /dev/null
+++ b/rs/src/.gitignore
@@ -0,0 +1 @@
+/resources/
diff --git a/rs/src/export/diagnostics.rs b/rs/src/export/diagnostics.rs
new file mode 100644
index 00000000..074e16df
--- /dev/null
+++ b/rs/src/export/diagnostics.rs
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! This module provides a basic export format that's just a listing of the
+//! diagnostic messages contained in the tree.
+
+use crate::output::parse_result;
+
+/// Export the diagnostic messages of the tree as a multiline string.
+pub fn export<T: std::io::Write>(
+    out: &mut T,
+    _root_name: &'static str,
+    result: &parse_result::ParseResult,
+) -> std::io::Result<()> {
+    for diag in result.root.iter_diagnostics() {
+        writeln!(out, "{diag}")?;
+    }
+    Ok(())
+}
diff --git a/rs/src/export/html/fa-solid-900.woff2 b/rs/src/export/html/fa-solid-900.woff2
new file mode 100644
index 00000000..360ba115
Binary files /dev/null and b/rs/src/export/html/fa-solid-900.woff2 differ
diff --git a/rs/src/export/html/fa-solid-900.woff2.LICENSE.txt b/rs/src/export/html/fa-solid-900.woff2.LICENSE.txt
new file mode 100644
index 00000000..87c82e3e
--- /dev/null
+++ b/rs/src/export/html/fa-solid-900.woff2.LICENSE.txt
@@ -0,0 +1,92 @@
+Copyright (c) 2022 Fonticons, Inc. (https://fontawesome.com)
+with Reserved Font Name: "Font Awesome".
+
+This Font Software is licensed under the SIL Open Font License, Version 1.1.
+This license is copied below, and is also available with a FAQ at:
+http://scripts.sil.org/OFL
+
+SIL OPEN FONT LICENSE
+Version 1.1 - 26 February 2007
+
+PREAMBLE
+The goals of the Open Font License (OFL) are to stimulate worldwide
+development of collaborative font projects, to support the font creation
+efforts of academic and linguistic communities, and to provide a free and
+open framework in which fonts may be shared and improved in partnership
+with others.
+
+The OFL allows the licensed fonts to be used, studied, modified and
+redistributed freely as long as they are not sold by themselves. The
+fonts, including any derivative works, can be bundled, embedded,
+redistributed and/or sold with any software provided that any reserved
+names are not used by derivative works. The fonts and derivatives,
+however, cannot be released under any other type of license. The
+requirement for fonts to remain under this license does not apply
+to any document created using the fonts or their derivatives.
+
+DEFINITIONS
+"Font Software" refers to the set of files released by the Copyright
+Holder(s) under this license and clearly marked as such. This may
+include source files, build scripts and documentation.
+
+"Reserved Font Name" refers to any names specified as such after the
+copyright statement(s).
+
+"Original Version" refers to the collection of Font Software components as
+distributed by the Copyright Holder(s).
+
+"Modified Version" refers to any derivative made by adding to, deleting,
+or substituting — in part or in whole — any of the components of the
+Original Version, by changing formats or by porting the Font Software to a
+new environment.
+
+"Author" refers to any designer, engineer, programmer, technical
+writer or other person who contributed to the Font Software.
+
+PERMISSION & CONDITIONS
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Font Software, to use, study, copy, merge, embed, modify,
+redistribute, and sell modified and unmodified copies of the Font
+Software, subject to the following conditions:
+
+1) Neither the Font Software nor any of its individual components,
+in Original or Modified Versions, may be sold by itself.
+
+2) Original or Modified Versions of the Font Software may be bundled,
+redistributed and/or sold with any software, provided that each copy
+contains the above copyright notice and this license. These can be
+included either as stand-alone text files, human-readable headers or
+in the appropriate machine-readable metadata fields within text or
+binary files as long as those fields can be easily viewed by the user.
+
+3) No Modified Version of the Font Software may use the Reserved Font
+Name(s) unless explicit written permission is granted by the corresponding
+Copyright Holder. This restriction only applies to the primary font name as
+presented to the users.
+
+4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
+Software shall not be used to promote, endorse or advertise any
+Modified Version, except to acknowledge the contribution(s) of the
+Copyright Holder(s) and the Author(s) or with their explicit written
+permission.
+
+5) The Font Software, modified or unmodified, in part or in whole,
+must be distributed entirely under this license, and must not be
+distributed under any other license. The requirement for fonts to
+remain under this license does not apply to any document created
+using the Font Software.
+
+TERMINATION
+This license becomes null and void if any of the above conditions are
+not met.
+
+DISCLAIMER
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
+COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
+DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
+OTHER DEALINGS IN THE FONT SOFTWARE.
diff --git a/rs/src/export/html/mod.rs b/rs/src/export/html/mod.rs
new file mode 100644
index 00000000..7d5e4ccd
--- /dev/null
+++ b/rs/src/export/html/mod.rs
@@ -0,0 +1,682 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! This module provides a human-readable export format based on HTML.
+
+use crate::output::comment;
+use crate::output::data_type;
+use crate::output::data_type::ParameterInfo;
+use crate::output::diagnostic;
+use crate::output::parse_result;
+use crate::output::path;
+use crate::output::tree;
+use std::sync::Arc;
+
+const HEADER1: &str = concat!(
+    r#"
+<!DOCTYPE html>
+<!-- Generated by Substrait validator; https://substrait.io/ -->
+<html>
+<head>
+<style>
+/*
+Copy of license text for icon font
+==================================
+
+"#,
+    include_str!("fa-solid-900.woff2.LICENSE.txt"),
+    r#"
+*/
+@font-face {
+    font-family: 'Font Awesome 6 Free';
+    font-style: normal;
+    font-weight: 900;
+    font-display: block;
+    src: url("data:font/woff2;base64,"#
+);
+
+const FONT_AWESOME: &[u8] = include_bytes!("fa-solid-900.woff2");
+
+const HEADER2: &str = concat!(
+    r#"") format("woff2");
+}
+"#,
+    include_str!("style.css"),
+    r#"
+</style>
+</head>
+<body>
+"#
+);
+
+const FOOTER: &str = r#"
+<script>
+function open_cards(element) {
+    if (element.tagName.toLowerCase() === 'details') {
+        element.open = true;
+    }
+    if (element.parentElement !== null) {
+        open_cards(element.parentElement);
+    }
+}
+function select() {
+    var hash = location.hash.substring(1);
+    if (hash) {
+        var details = document.getElementById(hash);
+        if (details) {
+            open_cards(details);
+        }
+    }
+}
+window.addEventListener('hashchange', select);
+select();
+</script>
+</body>
+</html>
+"#;
+
+/// All the error levels for nodes that we have different formatting for in
+/// the context of HTML output.
+#[derive(PartialOrd, Ord, PartialEq, Eq)]
+enum Level {
+    /// Subtree is valid.
+    Ok,
+
+    /// There are descendent nodes with warnings.
+    ChildWarning,
+
+    /// The current node has warnings.
+    Warning,
+
+    /// There are descendent nodes with errors.
+    ChildError,
+
+    /// The current node has errors.
+    Error,
+}
+
+impl From<diagnostic::Level> for Level {
+    fn from(level: diagnostic::Level) -> Self {
+        match level {
+            diagnostic::Level::Info => Level::Ok,
+            diagnostic::Level::Warning => Level::Warning,
+            diagnostic::Level::Error => Level::Error,
+        }
+    }
+}
+
+impl Level {
+    pub fn class(&self) -> &'static str {
+        match self {
+            Level::Ok => "ok",
+            Level::ChildWarning => "warn_child",
+            Level::Warning => "warn_here",
+            Level::ChildError => "error_child",
+            Level::Error => "error_here",
+        }
+    }
+}
+
+/// Escapes HTML text or parameter values using character entities.
+fn html_escape<S: AsRef<str>>(text: S) -> String {
+    let text = text.as_ref();
+    let mut result = String::with_capacity(text.len());
+    for c in text.chars() {
+        match c {
+            '&' => result += "&amp;",
+            '<' => result += "&lt;",
+            '>' => result += "&gt;",
+            '"' => result += "&quot;",
+            '\'' => result += "&apos;",
+            c => result.push(c),
+        }
+    }
+    result
+}
+
+/// Encodes part of an URL using percent escape sequences.
+fn url_encode<S: AsRef<str>>(text: S) -> String {
+    use std::fmt::Write;
+    let text = text.as_ref();
+    let mut result = String::with_capacity(text.len());
+    for c in text.chars() {
+        if c.is_alphanumeric() || "-._~!$&'()*+,;=:@".contains(c) {
+            result.push(c);
+        } else {
+            let mut buf = [0; 4];
+            for b in c.encode_utf8(&mut buf).as_bytes() {
+                write!(result, "%{:02x}", *b).unwrap();
+            }
+        }
+    }
+    result
+}
+
+/// Encodes a node path using () instead of [] and {}. Such paths should be
+/// still be unambiguous, and should be more readable than their
+/// percent-encoded variants (only round parentheses are unreserved in URLs).
+fn path_encode<S: AsRef<str>>(text: S) -> String {
+    text.as_ref()
+        .chars()
+        .map(|c| match c {
+            '[' => '(',
+            ']' => ')',
+            '<' => '(',
+            '>' => ')',
+            c => c,
+        })
+        .collect()
+}
+
+/// Formats a path to a node or diagnostic.
+fn format_path(path: &path::PathBuf, index: Option<usize>) -> String {
+    if let Some(index) = index {
+        format!("{path}:{index}")
+    } else {
+        path.to_string()
+    }
+}
+
+/// Formats the parameters of an <a> tag to a node or diagnostic.
+fn format_reference_parameters(path: &path::PathBuf, index: Option<usize>) -> String {
+    let path = format_path(path, index);
+    format!(
+        "href=\"#{}\" title=\"{}\"",
+        html_escape(url_encode(path_encode(&path))),
+        html_escape(&path)
+    )
+}
+
+/// Formats a link to a node (index = None)
+/// or diagnostic (index = Some(index of NodeData entry)).
+fn format_reference<S: std::fmt::Display>(
+    text: S,
+    path: &path::PathBuf,
+    index: Option<usize>,
+) -> String {
+    format!("<a {}>{text}</a>", format_reference_parameters(path, index))
+}
+
+/// Formats an anchor/permalink tag for a node (index = None)
+/// or diagnostic (index = Some(index of NodeData entry)).
+fn format_anchor(path: &path::PathBuf, index: Option<usize>) -> String {
+    format!(
+        "<a {} class=\"anchor\"></a>",
+        format_reference_parameters(path, index)
+    )
+}
+
+/// Formats the id parameter for a div/details tag for a node (index = None)
+/// or diagnostic (index = Some(index of NodeData entry)).
+fn format_id(path: &path::PathBuf, index: Option<usize>) -> String {
+    format!(
+        "id=\"{}\"",
+        html_escape(url_encode(path_encode(format_path(path, index))))
+    )
+}
+
+/// Creates a span with the given class name. The text is HTML-escaped.
+fn format_span<S: std::fmt::Display>(class: &'static str, text: S) -> String {
+    format!(
+        "<span class=\"{class}\">{}</span>",
+        html_escape(text.to_string())
+    )
+}
+
+/// Creates a span with the given class name.
+fn format_span_html<S: std::fmt::Display>(class: &'static str, html: S) -> String {
+    format!("<span class=\"{class}\">{}</span>", html)
+}
+
+/// Formats a diagnostic message box. path should be the node that the
+/// diagnostic is defined in, and index should be its index within Node::data.
+/// with_id specifies whether the HTML id parameter should be included.
+fn format_diagnostic(
+    diag: &diagnostic::Diagnostic,
+    path: &path::PathBuf,
+    index: usize,
+    with_id: bool,
+    with_path: bool,
+) -> String {
+    let cause = format_span(
+        "cause",
+        if with_path {
+            diag.to_string()
+        } else {
+            format!("{:#}", diag)
+        },
+    );
+    let cause = if &diag.path == path {
+        cause
+    } else {
+        format_reference(cause, &diag.path, None)
+    };
+    let id = if with_id {
+        let mut id = format_id(path, Some(index));
+        id.push(' ');
+        id
+    } else {
+        String::new()
+    };
+    let anchor = format_anchor(path, Some(index));
+
+    let class = match diag.adjusted_level {
+        diagnostic::Level::Info => "diag_info",
+        diagnostic::Level::Warning => "diag_warn",
+        diagnostic::Level::Error => "diag_error",
+    };
+
+    format!("<div {id}class=\"card {class}\">\n{cause}\n{anchor}\n</div>")
+}
+
+/// Format a flattened list of diagnostic cards.
+fn format_diagnostics(path: &path::Path, node: &tree::Node) -> (Vec<String>, diagnostic::Level) {
+    let mut html = vec![];
+    let mut level = diagnostic::Level::Info;
+    for (index, data) in node.data.iter().enumerate() {
+        match data {
+            tree::NodeData::Child(child) => {
+                let (sub_html, sub_level) =
+                    format_diagnostics(&path.with(child.path_element.clone()), &child.node);
+                html.extend(sub_html);
+                level = std::cmp::max(level, sub_level);
+            }
+            tree::NodeData::Diagnostic(diag) => {
+                html.push(format_diagnostic(
+                    diag,
+                    &path.to_path_buf(),
+                    index,
+                    false,
+                    true,
+                ));
+                level = std::cmp::max(level, diag.adjusted_level);
+            }
+            _ => {}
+        }
+    }
+    (html, level)
+}
+
+/// Formats a comment span.
+fn format_comment_span(span: &comment::Span) -> String {
+    match &span.link {
+        None => html_escape(&span.text),
+        Some(comment::Link::Path(path)) => format_reference(html_escape(&span.text), path, None),
+        Some(comment::Link::Url(url)) => format!(
+            "<a href=\"{}\">{}</a>",
+            html_escape(url),
+            html_escape(&span.text)
+        ),
+    }
+}
+
+/// Formats a comment using HTML markup.
+fn format_comment(comment: &comment::Comment) -> String {
+    let mut result = String::new();
+    let mut p_open = false;
+    for element in comment.elements().iter() {
+        match element {
+            comment::Element::Span(span) => {
+                if !p_open {
+                    result += "<p>";
+                    p_open = true;
+                }
+                result += &format_comment_span(span);
+            }
+            comment::Element::NewLine => {
+                if p_open {
+                    result += "</p>";
+                    p_open = false;
+                }
+            }
+            comment::Element::ListOpen => {
+                if p_open {
+                    result += "</p>";
+                    p_open = false;
+                }
+                result += "<ul><li>";
+            }
+            comment::Element::ListNext => {
+                if p_open {
+                    result += "</p>";
+                    p_open = false;
+                }
+                result += "</li><li>";
+            }
+            comment::Element::ListClose => {
+                if p_open {
+                    result += "</p>";
+                    p_open = false;
+                }
+                result += "</li></ul>";
+            }
+        }
+    }
+    if p_open {
+        result += "</p>";
+    }
+    result
+}
+
+/// Formats a brief comment using HTML markup.
+fn format_brief(brief: &comment::Brief) -> String {
+    let mut result = String::new();
+    for span in brief.spans().iter() {
+        result += &format_comment_span(span);
+    }
+    result
+}
+
+// Format the relation trees.
+fn format_relation_tree(
+    path: &path::Path,
+    node: &tree::Node,
+    index: &mut usize,
+    is_root: bool,
+    in_expression: bool,
+) -> Vec<String> {
+    let mut html = vec![];
+
+    let text = node
+        .brief
+        .as_ref()
+        .map(format_brief)
+        .unwrap_or_else(|| String::from("unknown"));
+    let is_relation = matches!(node.class, tree::Class::Relation);
+    let is_expression = matches!(node.class, tree::Class::Expression);
+
+    if is_relation {
+        if is_root {
+            html.push("<details class=\"relation_tree\">".to_string());
+            html.push(format!(
+                "<summary>Query/relation graph #{}</summary>",
+                *index
+            ));
+            html.push("<ul class=\"tree\"><li><span class=\"root\">Sink</span><ul>".to_string());
+        };
+        html.push(format!(
+            "<li><span class=\"{}\">{text} ({})</span>",
+            if in_expression {
+                "subquery"
+            } else {
+                "data_source"
+            },
+            format_reference("link", &path.to_path_buf(), None)
+        ));
+    }
+
+    let mut has_children = false;
+    for data in node.data.iter() {
+        if let tree::NodeData::Child(child) = data {
+            let sub_html = format_relation_tree(
+                &path.with(child.path_element.clone()),
+                &child.node,
+                index,
+                is_root && !is_relation,
+                (in_expression && !is_relation) || is_expression,
+            );
+            if !sub_html.is_empty() {
+                if is_relation && !has_children {
+                    html.push("<ul>".to_string());
+                }
+                has_children = true;
+                html.extend(sub_html);
+            }
+        }
+    }
+
+    if is_relation {
+        if has_children {
+            html.push("</ul>".to_string());
+        }
+        html.push("</li>".to_string());
+        if is_root {
+            html.push("</ul></li></ul>".to_string());
+            html.push("</details>".to_string());
+            *index += 1;
+        }
+    }
+
+    html
+}
+
+// Format a data type parameter card.
+fn format_data_type_card(content: &str) -> String {
+    format!(
+        "<div class=\"card data_type\">\n{}\n</div>",
+        html_escape(content),
+    )
+}
+
+// Format a data type.
+fn format_data_type(prefix: &str, data_type: &Arc<data_type::DataType>) -> Vec<String> {
+    let mut html = vec![];
+
+    if data_type.parameters().is_empty() {
+        html.push(format_data_type_card(&format!("{prefix}: {:#}", data_type)));
+    } else {
+        html.push("<details class=\"data_type\">\n<summary>".to_string());
+        html.push(format!("{prefix}: {}", html_escape(data_type.to_string())));
+        html.push("</summary>".to_string());
+        for (index, parameter) in data_type.parameters().iter().enumerate() {
+            let name = data_type
+                .class()
+                .parameter_name(index)
+                .unwrap_or_else(|| "?".to_string());
+            match parameter {
+                data_type::Parameter::Type(t) => {
+                    html.extend(format_data_type(&format!(".{name}"), t))
+                }
+                data_type::Parameter::NamedType(n, t) => {
+                    html.extend(format_data_type(&format!(".{n}"), t))
+                }
+                data_type::Parameter::Unsigned(i) => {
+                    html.push(format_data_type_card(&format!(".{name}: {i}")))
+                }
+            }
+        }
+        html.push("</details>".to_string());
+    }
+
+    html
+}
+
+// Format the node tree.
+fn format_node_tree(
+    path: &path::Path,
+    unknown_subtree: bool,
+    node: &tree::Node,
+) -> (Vec<String>, Level) {
+    // Get the HTML ID for this card.
+    let pathbuf = path.to_path_buf();
+    let id = format_id(&pathbuf, None);
+
+    // Format the card header.
+    let brief = if let Some(brief) = &node.brief {
+        format_span_html("brief", format_brief(brief))
+    } else {
+        String::from("")
+    };
+    let value = match &node.node_type {
+        tree::NodeType::ProtoMessage(proto_type) => {
+            format!("{brief} {}", format_span("type", proto_type))
+        }
+        tree::NodeType::ProtoPrimitive(proto_type, data) => {
+            format!(
+                "= {} {brief} {}",
+                format_span("value", data),
+                format_span("type", proto_type)
+            )
+        }
+        tree::NodeType::ProtoMissingOneOf => "?".to_string(),
+        tree::NodeType::NodeReference(num, target) => format_reference(
+            format!(
+                "= {} {brief} {}",
+                format_span("value", num),
+                format_span("type", "uint32, reference")
+            ),
+            &target.path,
+            None,
+        ),
+        tree::NodeType::YamlReference(yaml) => {
+            format!(
+                "= {} {brief} {}",
+                format_span("value", &yaml.uri),
+                format_span("type", "string, resolved to YAML")
+            )
+        }
+        tree::NodeType::YamlMap => format!("{brief} {}", format_span("type", "YAML map")),
+        tree::NodeType::YamlArray => format!("{brief} {}", format_span("type", "YAML array")),
+        tree::NodeType::YamlPrimitive(data) => format!("= {}{brief}", format_span("value", data)),
+    };
+    let header = format!(
+        "{} {value} {}",
+        format_span("field", path.end_to_string()),
+        format_anchor(&pathbuf, None)
+    );
+
+    // If the node doesn't have any additional data associated with it, output
+    // a normal <div> rather than a <details> card.
+    if node.data.is_empty() && node.summary.is_none() {
+        let class = if unknown_subtree { "unknown" } else { "ok" };
+        return (
+            vec![format!("<div {id} class=\"card {class}\">{header}</div>")],
+            Level::Ok,
+        );
+    }
+
+    // Gather child nodes here. The first entry of the html Vec is reserved for
+    // the open tags, which we don't have all the information for just yet.
+    let mut html = vec![String::new()];
+    let mut level = Level::Ok;
+
+    // Add the summary.
+    if let Some(ref summary) = node.summary {
+        html.push(format_comment(summary));
+    }
+
+    // Iterate over node data here, recursively entering children.
+    for (index, data) in node.data.iter().enumerate() {
+        match data {
+            tree::NodeData::Child(child) => {
+                let (sub_html, sub_level) = format_node_tree(
+                    &path.with(child.path_element.clone()),
+                    !child.recognized,
+                    &child.node,
+                );
+                html.extend(sub_html);
+                level = std::cmp::max(level, sub_level);
+            }
+            tree::NodeData::Diagnostic(diag) => {
+                html.push(format_diagnostic(
+                    diag,
+                    &pathbuf,
+                    index,
+                    true,
+                    diag.path != pathbuf,
+                ));
+                level = std::cmp::max(level, diag.adjusted_level.into());
+            }
+            tree::NodeData::DataType(data_type) => {
+                html.extend(format_data_type(
+                    if matches!(node.class, tree::Class::Relation) {
+                        "Schema"
+                    } else {
+                        "Data type"
+                    },
+                    data_type,
+                ));
+            }
+            tree::NodeData::Comment(comment) => {
+                html.push("<div class=\"card comment\">\n".to_string());
+                html.push(format_comment(comment));
+                html.push("\n</div>".to_string());
+            }
+        }
+    }
+
+    // Add the surrounding <details> tags now that we have the error level
+    // information we needed.
+    let class = if unknown_subtree {
+        "unknown"
+    } else {
+        level.class()
+    };
+    html[0] = format!("<details {id} class=\"{class}\">\n<summary>\n{header}\n</summary>");
+    html.push("</details>".to_string());
+
+    // Determine the minimum error level for the parent.
+    let level = match level {
+        Level::Error => Level::ChildError,
+        Level::Warning => Level::ChildWarning,
+        x => x,
+    };
+
+    (html, level)
+}
+
+/// Export the tree in HTML format, with as many details as possible, and as
+/// human-readable as possible. Purely intended for debugging.
+pub fn export<T: std::io::Write>(
+    out: &mut T,
+    root_name: &'static str,
+    result: &parse_result::ParseResult,
+) -> std::io::Result<()> {
+    let path = path::Path::Root(root_name);
+
+    // Generate and write header.
+    let font_awesome_b64 = base64::encode(FONT_AWESOME);
+    write!(out, "{HEADER1}{}{HEADER2}", font_awesome_b64)?;
+
+    // Emit the node graph.
+    writeln!(out, "<details class=\"relation_tree\" open=\"true\">")?;
+    writeln!(out, "<summary>Relation graphs</summary>")?;
+    writeln!(
+        out,
+        "<div class=\"note\">Note: data flows upwards in these graphs.</div>"
+    )?;
+    let mut index = 0;
+    for s in format_relation_tree(&path, &result.root, &mut index, true, false) {
+        writeln!(out, "{s}")?;
+    }
+    writeln!(out, "</details>")?;
+
+    // Emit diagnostics summary.
+    let (diag_html, level) = format_diagnostics(&path, &result.root);
+    let validity_class = match level {
+        diagnostic::Level::Info => "valid",
+        diagnostic::Level::Warning => "maybe_valid",
+        diagnostic::Level::Error => "invalid",
+    };
+    let validity_summary = match level {
+        diagnostic::Level::Info => "This plan is <span class=\"valid\">VALID</span>",
+        diagnostic::Level::Warning => "The validator was unable to determine validity",
+        diagnostic::Level::Error => "This plan is <span class=\"invalid\">INVALID</span>",
+    };
+    writeln!(
+        out,
+        "<details class=\"{}\" open=\"true\">",
+        Level::from(level).class()
+    )?;
+    writeln!(
+        out,
+        "<summary class=\"{validity_class}\">{validity_summary}</summary>"
+    )?;
+    if diag_html.is_empty() {
+        writeln!(
+            out,
+            "<div class=\"note\">No diagnostics were reported.</div>"
+        )?;
+    } else {
+        for s in diag_html {
+            writeln!(out, "{s}")?;
+        }
+    }
+    writeln!(out, "</details>")?;
+
+    // Emit protobuf-level raw node tree.
+    for s in format_node_tree(&path, false, &result.root).0 {
+        writeln!(out, "{s}")?;
+    }
+
+    write!(out, "{FOOTER}")
+}
diff --git a/rs/src/export/html/style.css b/rs/src/export/html/style.css
new file mode 100644
index 00000000..96dc1747
--- /dev/null
+++ b/rs/src/export/html/style.css
@@ -0,0 +1,333 @@
+body {
+    font-family: sans-serif;
+}
+
+details,
+div.card {
+    border: 1px solid;
+    border-color: rgba(0, 0, 0, .3);
+    color: rgba(0, 0, 0, .8);
+    border-radius: 4px;
+    margin-top: .2em;
+}
+
+details:hover,
+div.card:hover {
+    border-color: #000;
+    color: rgba(0, 0, 0, .9);
+}
+
+details:target,
+div.card:target {
+    box-shadow: 0 0 .3em .2em rgba(0, 0, 0, 0.3);
+    border-color: #000;
+    color: rgba(0, 0, 0, .9);
+}
+
+details {
+    padding: .2em .5em 0;
+}
+
+summary {
+    margin: -.2em -.5em 0;
+    padding: .2em .5em;
+}
+
+details[open] {
+    padding: .2em .5em;
+}
+
+details[open] > summary {
+    border-bottom: 1px solid rgba(0, 0, 0, .3);
+    margin-bottom: .2em;
+}
+
+div.card {
+    padding: .2em .5em;
+}
+
+details.ok {
+    background-color: #dfd;
+}
+
+details.warn_child {
+    background-color: #fed;
+}
+
+details.warn_here {
+    background-color: #fdb;
+}
+
+details.error_child {
+    background-color: #fdd;
+}
+
+details.error_here {
+    background-color: #fbb;
+}
+
+details.unknown,
+div.unknown {
+    background-color: #ddd;
+}
+
+details.data_type {
+    background-color: #def;
+}
+
+details.data_type > summary::before {
+    font: normal 900 1em "Font Awesome 6 Free";
+    color: #048;
+    content: "\f0db";
+    padding-right: .2em;
+}
+
+div.data_type {
+    background-color: #bdf;
+}
+
+div.data_type::before {
+    font: normal 900 1em "Font Awesome 6 Free";
+    color: #048;
+    content: "\f0db";
+    padding-right: .2em;
+}
+
+div.comment {
+    background-color: #bfd;
+}
+
+div.comment::before {
+    font: normal 900 1em "Font Awesome 6 Free";
+    color: #084;
+    content: "\f249";
+    padding-right: .2em;
+    float: left;
+}
+
+div.comment > p {
+    margin: 0 0 0.2em;
+}
+
+details > p {
+    margin: 0 0 0.2em;
+}
+
+details.relation_tree {
+    background-color: #bdf;
+}
+
+details.relation_tree > summary::before {
+    font: normal 900 1em "Font Awesome 6 Free";
+    color: #048;
+    content: "\f0e8";
+    padding-right: .2em;
+}
+
+div.diag_info {
+    background-color: #9f9;
+    color: #333;
+}
+
+div.diag_info::before,
+summary.valid::before {
+    font: normal 900 1em "Font Awesome 6 Free";
+    color: #080;
+    content: "\f058";
+}
+
+span.valid {
+    color: #080;
+    font-weight: bold;
+}
+
+div.diag_warn {
+    background-color: #fc9;
+    color: #333;
+    font-weight: bold;
+}
+
+div.diag_warn::before,
+summary.maybe_valid::before {
+    font: normal 900 1em "Font Awesome 6 Free";
+    color: #840;
+    content: "\f059";
+}
+
+div.diag_error {
+    background-color: #f99;
+    color: #000;
+    font-weight: bold;
+}
+
+div.diag_error::before,
+summary.invalid::before {
+    font: normal 900 1em "Font Awesome 6 Free";
+    color: #800;
+    content: "\f00d";
+}
+
+span.invalid {
+    color: #c00;
+    font-weight: bold;
+}
+
+a.anchor {
+    opacity: 0.4;
+    text-decoration: none;
+    float: right;
+}
+
+a.anchor:hover {
+    opacity: 1.0;
+}
+
+a.anchor::before {
+    font: normal 900 1em "Font Awesome 6 Free";
+    color: #000;
+    content: "\f0c1";
+}
+
+details:target,
+div.card:target {
+  animation: highlight 1000ms ease-out;
+}
+
+@keyframes highlight {
+  0% { box-shadow: 0 0 2em 1em rgba(0, 0, 0, 0.3); }
+  50% { box-shadow: 0 0 2em 1em rgba(0, 0, 0, 0.3); }
+  100% { }
+}
+
+span.field {
+    font-weight: bold;
+    color: #333;
+}
+
+span.value {
+    font-weight: bold;
+    color: #000;
+}
+
+span.brief {
+    font-style: italic;
+    color: #000;
+}
+
+span.type {
+    font-style: italic;
+    font-size: 80%;
+    color: #555;
+}
+
+span.cause {
+    font-weight: normal;
+}
+
+div.note {
+    font-style: italic;
+    color: #555;
+}
+
+.tree,
+.tree ul,
+.tree li {
+    list-style: none;
+    margin: 0;
+    padding: 0;
+    position: relative;
+}
+
+.tree {
+    margin: 0 auto 1em;
+    text-align: center;
+}
+
+.tree,
+.tree ul {
+    display: table;
+}
+
+.tree ul {
+    width: 100%;
+}
+
+.tree li {
+    display: table-cell;
+    padding: 1.5em 0 0;
+    vertical-align: top;
+}
+
+/* _________ */
+.tree li:before {
+    outline: solid 1px #666;
+    content: "";
+    left: 0;
+    position: absolute;
+    right: 0;
+    top: 0;
+}
+
+.tree li:first-child:before {
+    left: 50%;
+}
+
+.tree li:last-child:before {
+    right: 50%;
+}
+
+.tree span {
+    border: solid 0.1em #666;
+    border-radius: 0.2em;
+    display: inline-block;
+    margin: 0 0.2em 0.5em;
+    padding: 0.2em 0.5em;
+    position: relative;
+}
+
+/* | */
+.tree ul:before {
+    outline: solid 1px #555;
+    content: "";
+    height: 0.5em;
+    left: 50%;
+    position: absolute;
+}
+
+.tree span:before {
+    margin-left: -1px;
+    padding-left: 0.2em;
+    font-size: 100%;
+    content: "";
+    height: 1.5em;
+    left: 50%;
+    position: absolute;
+}
+
+.tree span.data_source:before {
+    border-left: solid 2px #555;
+}
+
+.tree span.subquery:before {
+    border-left: dotted 2px #555;
+}
+
+.tree ul:before {
+    top: -0.5em;
+}
+
+.tree span:before {
+    top: -1.55em;
+}
+
+/* The root node doesn't connect upwards */
+.tree > li {
+    margin-top: 0;
+}
+
+.tree > li:before,
+.tree > li:after,
+.tree > li > span:before {
+    outline: none !important;
+    border: none !important;
+}
diff --git a/rs/src/export/mod.rs b/rs/src/export/mod.rs
new file mode 100644
index 00000000..2a865667
--- /dev/null
+++ b/rs/src/export/mod.rs
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module dealing with serializing a [ParseResult](parse_result::ParseResult)
+//! to a byte stream in various formats.
+
+mod diagnostics;
+mod html;
+mod proto;
+
+use crate::output::parse_result;
+
+/// Supported output formats for exporting.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum Format {
+    /// Emit a newline-separated, flattened list of diagnostics.
+    Diagnostics,
+
+    /// Emit a HTML page with detailed information about the parsed plan.
+    Html,
+
+    /// Emit all parse information as a substrait.validator.Node protobuf
+    /// message, using binary serialization.
+    Proto,
+}
+
+/// Exports the given doctree with the given format to the given output.
+pub fn export<T: std::io::Write>(
+    out: &mut T,
+    format: Format,
+    root_name: &'static str,
+    result: &parse_result::ParseResult,
+) -> std::io::Result<()> {
+    match format {
+        Format::Diagnostics => diagnostics::export(out, root_name, result),
+        Format::Html => html::export(out, root_name, result),
+        Format::Proto => proto::export(out, root_name, result),
+    }
+}
diff --git a/rs/src/export/proto.rs b/rs/src/export/proto.rs
new file mode 100644
index 00000000..4343abe0
--- /dev/null
+++ b/rs/src/export/proto.rs
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! This module provides an export format based on protobuf, to represent the
+//! output tree as accurately as possible.
+//!
+//! This is primarily intended to be used to cross programming language
+//! boundaries for the validator output, whenever the simplified formats are
+//! not comprehensive enough. The Python bindings specifically make extensive
+//! use of this.
+
+use crate::input::proto::substrait::validator;
+use crate::output::comment;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::output::extension;
+use crate::output::parse_result;
+use crate::output::path;
+use crate::output::primitive_data;
+use crate::output::tree;
+use prost::Message;
+
+impl From<&parse_result::ParseResult> for validator::ParseResult {
+    fn from(result: &parse_result::ParseResult) -> Self {
+        Self {
+            root: Some((&result.root).into()),
+        }
+    }
+}
+
+impl From<&tree::Node> for validator::Node {
+    fn from(node: &tree::Node) -> Self {
+        Self {
+            node_type: Some((&node.node_type).into()),
+            class: (&node.class).into(),
+            brief: node.brief.as_ref().map(|x| x.into()),
+            summary: node.summary.as_ref().map(|x| x.into()),
+            data_type: node.data_type.as_ref().map(|x| x.as_ref().into()),
+            data: node.data.iter().map(|x| x.into()).collect(),
+        }
+    }
+}
+
+impl From<&tree::Class> for i32 {
+    fn from(class: &tree::Class) -> Self {
+        match class {
+            tree::Class::Misc => validator::node::Class::Unspecified,
+            tree::Class::Type => validator::node::Class::Type,
+            tree::Class::Expression => validator::node::Class::Expression,
+            tree::Class::Relation => validator::node::Class::Relation,
+        }
+        .into()
+    }
+}
+
+impl From<&tree::NodeData> for validator::node::Data {
+    fn from(node: &tree::NodeData) -> Self {
+        Self {
+            kind: Some(match node {
+                tree::NodeData::Child(child) => validator::node::data::Kind::Child(child.into()),
+                tree::NodeData::Diagnostic(diagnostic) => {
+                    validator::node::data::Kind::Diagnostic(diagnostic.into())
+                }
+                tree::NodeData::DataType(data_type) => {
+                    validator::node::data::Kind::DataType(data_type.as_ref().into())
+                }
+                tree::NodeData::Comment(comment) => {
+                    validator::node::data::Kind::Comment(comment.into())
+                }
+            }),
+        }
+    }
+}
+
+impl From<&tree::Child> for validator::node::Child {
+    fn from(node: &tree::Child) -> Self {
+        Self {
+            path: Some((&node.path_element).into()),
+            node: Some(node.node.as_ref().into()),
+            recognized: node.recognized,
+        }
+    }
+}
+
+impl From<&diagnostic::Diagnostic> for validator::Diagnostic {
+    fn from(node: &diagnostic::Diagnostic) -> Self {
+        Self {
+            original_level: (&node.original_level).into(),
+            adjusted_level: (&node.adjusted_level).into(),
+            cause: node.cause.classification.into(),
+            msg: node.cause.to_string(),
+            path: Some((&node.path).into()),
+        }
+    }
+}
+
+impl From<&diagnostic::Level> for i32 {
+    fn from(node: &diagnostic::Level) -> Self {
+        match node {
+            diagnostic::Level::Error => validator::diagnostic::Level::Error,
+            diagnostic::Level::Warning => validator::diagnostic::Level::Warning,
+            diagnostic::Level::Info => validator::diagnostic::Level::Info,
+        }
+        .into()
+    }
+}
+
+impl From<&comment::Comment> for validator::Comment {
+    fn from(node: &comment::Comment) -> Self {
+        Self {
+            elements: node.elements().iter().map(|x| x.into()).collect(),
+        }
+    }
+}
+
+impl From<&comment::Brief> for validator::Comment {
+    fn from(node: &comment::Brief) -> Self {
+        Self {
+            elements: node
+                .spans()
+                .iter()
+                .map(|x| validator::comment::Element {
+                    kind: Some(validator::comment::element::Kind::Span(x.into())),
+                })
+                .collect(),
+        }
+    }
+}
+
+impl From<&comment::Element> for validator::comment::Element {
+    fn from(node: &comment::Element) -> Self {
+        validator::comment::Element {
+            kind: Some(match node {
+                comment::Element::Span(span) => {
+                    validator::comment::element::Kind::Span(span.into())
+                }
+                comment::Element::NewLine => {
+                    validator::comment::element::Kind::NewLine(validator::Empty {})
+                }
+                comment::Element::ListOpen => {
+                    validator::comment::element::Kind::ListOpen(validator::Empty {})
+                }
+                comment::Element::ListNext => {
+                    validator::comment::element::Kind::ListNext(validator::Empty {})
+                }
+                comment::Element::ListClose => {
+                    validator::comment::element::Kind::ListClose(validator::Empty {})
+                }
+            }),
+        }
+    }
+}
+
+impl From<&comment::Span> for validator::comment::Span {
+    fn from(node: &comment::Span) -> Self {
+        Self {
+            text: node.text.to_string(),
+            link: node.link.as_ref().map(|x| x.into()),
+        }
+    }
+}
+
+impl From<&comment::Link> for validator::comment::span::Link {
+    fn from(node: &comment::Link) -> Self {
+        match node {
+            comment::Link::Path(path) => validator::comment::span::Link::Path(path.into()),
+            comment::Link::Url(url) => validator::comment::span::Link::Url(url.into()),
+        }
+    }
+}
+
+impl From<&tree::NodeType> for validator::node::NodeType {
+    fn from(node: &tree::NodeType) -> Self {
+        match node {
+            tree::NodeType::ProtoMessage(proto_type) => {
+                validator::node::NodeType::ProtoMessage(validator::node::ProtoMessage {
+                    path: proto_type.to_string(),
+                })
+            }
+            tree::NodeType::ProtoPrimitive(proto_type, data) => {
+                validator::node::NodeType::ProtoPrimitive(validator::node::ProtoPrimitive {
+                    path: proto_type.to_string(),
+                    data: Some(data.into()),
+                })
+            }
+            tree::NodeType::ProtoMissingOneOf => {
+                validator::node::NodeType::ProtoMissingOneof(validator::Empty::default())
+            }
+            tree::NodeType::NodeReference(anchor, node) => {
+                validator::node::NodeType::NodeReference(validator::node::NodeReference {
+                    value: *anchor,
+                    path: Some((&node.path).into()),
+                })
+            }
+            tree::NodeType::YamlReference(info) => {
+                validator::node::NodeType::YamlReference(validator::node::YamlReference {
+                    uri: info.uri.name().unwrap_or_default().to_string(),
+                })
+            }
+            tree::NodeType::YamlMap => {
+                validator::node::NodeType::YamlMap(validator::Empty::default())
+            }
+            tree::NodeType::YamlArray => {
+                validator::node::NodeType::YamlArray(validator::Empty::default())
+            }
+            tree::NodeType::YamlPrimitive(data) => {
+                validator::node::NodeType::YamlPrimitive(data.into())
+            }
+        }
+    }
+}
+
+impl From<&primitive_data::PrimitiveData> for validator::node::PrimitiveData {
+    fn from(node: &primitive_data::PrimitiveData) -> Self {
+        Self {
+            data: match node {
+                primitive_data::PrimitiveData::Null => None,
+                primitive_data::PrimitiveData::Bool(x) => {
+                    Some(validator::node::primitive_data::Data::Boolean(*x))
+                }
+                primitive_data::PrimitiveData::Unsigned(x) => {
+                    Some(validator::node::primitive_data::Data::Unsigned(*x))
+                }
+                primitive_data::PrimitiveData::Signed(x) => {
+                    Some(validator::node::primitive_data::Data::Signed(*x))
+                }
+                primitive_data::PrimitiveData::Float(x) => {
+                    Some(validator::node::primitive_data::Data::Real(*x))
+                }
+                primitive_data::PrimitiveData::String(x) => Some(
+                    validator::node::primitive_data::Data::Unicode(x.to_string()),
+                ),
+                primitive_data::PrimitiveData::Bytes(x) => {
+                    Some(validator::node::primitive_data::Data::Binary(x.clone()))
+                }
+                primitive_data::PrimitiveData::Enum(x) => Some(
+                    validator::node::primitive_data::Data::Variant(x.to_string()),
+                ),
+                primitive_data::PrimitiveData::Any(x) => {
+                    Some(validator::node::primitive_data::Data::Any(x.clone()))
+                }
+            },
+        }
+    }
+}
+
+impl From<&path::PathBuf> for validator::Path {
+    fn from(node: &path::PathBuf) -> Self {
+        Self {
+            root: node.root.to_string(),
+            elements: node.elements.iter().map(|x| x.into()).collect(),
+        }
+    }
+}
+
+impl From<&path::PathElement> for validator::path::Element {
+    fn from(node: &path::PathElement) -> Self {
+        Self {
+            kind: Some(match node {
+                path::PathElement::Field(field) => {
+                    validator::path::element::Kind::Field(validator::path::Field {
+                        field: field.to_string(),
+                    })
+                }
+                path::PathElement::Repeated(field, index) => {
+                    validator::path::element::Kind::RepeatedField(validator::path::RepeatedField {
+                        field: field.to_string(),
+                        index: (*index).try_into().unwrap(),
+                    })
+                }
+                path::PathElement::Variant(field, variant) => {
+                    validator::path::element::Kind::OneofField(validator::path::OneOfField {
+                        field: field.to_string(),
+                        variant: variant.to_string(),
+                    })
+                }
+                path::PathElement::Index(index) => {
+                    validator::path::element::Kind::ArrayElement(validator::path::ArrayElement {
+                        index: (*index).try_into().unwrap(),
+                    })
+                }
+            }),
+        }
+    }
+}
+
+impl From<&data_type::DataType> for validator::DataType {
+    fn from(node: &data_type::DataType) -> Self {
+        Self {
+            class: Some(node.class().into()),
+            nullable: node.nullable(),
+            variation: node.variation().as_ref().map(|x| x.as_ref().into()),
+            parameters: node.parameters().iter().map(|x| x.into()).collect(),
+        }
+    }
+}
+
+impl From<&data_type::Class> for validator::data_type::Class {
+    fn from(node: &data_type::Class) -> Self {
+        validator::data_type::Class {
+            kind: Some(match node {
+                data_type::Class::Simple(simple) => {
+                    validator::data_type::class::Kind::Simple(simple.into())
+                }
+                data_type::Class::Compound(compound) => {
+                    validator::data_type::class::Kind::Compound(compound.into())
+                }
+                data_type::Class::UserDefined(user_defined) => {
+                    validator::data_type::class::Kind::UserDefinedType(user_defined.as_ref().into())
+                }
+                data_type::Class::Unresolved => {
+                    validator::data_type::class::Kind::UnresolvedType(validator::Empty {})
+                }
+            }),
+        }
+    }
+}
+
+impl From<&data_type::Simple> for i32 {
+    fn from(node: &data_type::Simple) -> Self {
+        match node {
+            data_type::Simple::Boolean => validator::data_type::Simple::Boolean,
+            data_type::Simple::I8 => validator::data_type::Simple::I8,
+            data_type::Simple::I16 => validator::data_type::Simple::I16,
+            data_type::Simple::I32 => validator::data_type::Simple::I32,
+            data_type::Simple::I64 => validator::data_type::Simple::I64,
+            data_type::Simple::Fp32 => validator::data_type::Simple::Fp32,
+            data_type::Simple::Fp64 => validator::data_type::Simple::Fp64,
+            data_type::Simple::String => validator::data_type::Simple::String,
+            data_type::Simple::Binary => validator::data_type::Simple::Binary,
+            data_type::Simple::Timestamp => validator::data_type::Simple::Timestamp,
+            data_type::Simple::TimestampTz => validator::data_type::Simple::TimestampTz,
+            data_type::Simple::Date => validator::data_type::Simple::Date,
+            data_type::Simple::Time => validator::data_type::Simple::Time,
+            data_type::Simple::IntervalYear => validator::data_type::Simple::IntervalYear,
+            data_type::Simple::IntervalDay => validator::data_type::Simple::IntervalDay,
+            data_type::Simple::Uuid => validator::data_type::Simple::Uuid,
+        }
+        .into()
+    }
+}
+
+impl From<&data_type::Compound> for i32 {
+    fn from(node: &data_type::Compound) -> Self {
+        match node {
+            data_type::Compound::FixedChar => validator::data_type::Compound::FixedChar,
+            data_type::Compound::VarChar => validator::data_type::Compound::VarChar,
+            data_type::Compound::FixedBinary => validator::data_type::Compound::FixedBinary,
+            data_type::Compound::Decimal => validator::data_type::Compound::Decimal,
+            data_type::Compound::Struct => validator::data_type::Compound::Struct,
+            data_type::Compound::NamedStruct => validator::data_type::Compound::NamedStruct,
+            data_type::Compound::List => validator::data_type::Compound::List,
+            data_type::Compound::Map => validator::data_type::Compound::Map,
+        }
+        .into()
+    }
+}
+
+impl From<&extension::Reference<extension::DataType>> for validator::data_type::UserDefinedType {
+    fn from(node: &extension::Reference<extension::DataType>) -> Self {
+        Self {
+            uri: node.uri.name().unwrap_or_default().to_string(),
+            name: node.name.name().unwrap_or_default().to_string(),
+            definition: node.definition.as_ref().map(|x| x.as_ref().into()),
+        }
+    }
+}
+
+impl From<&extension::DataType> for validator::data_type::user_defined_type::Definition {
+    fn from(node: &extension::DataType) -> Self {
+        Self {
+            structure: node
+                .structure
+                .iter()
+                .map(
+                    |(name, simple)| validator::data_type::user_defined_type::Element {
+                        name: name.to_string(),
+                        kind: simple.into(),
+                    },
+                )
+                .collect(),
+        }
+    }
+}
+
+impl From<&extension::Reference<extension::TypeVariation>> for validator::data_type::Variation {
+    fn from(node: &extension::Reference<extension::TypeVariation>) -> Self {
+        if let Some(ref definition) = node.definition {
+            validator::data_type::Variation::UserDefinedVariation(
+                validator::data_type::UserDefinedVariation {
+                    uri: node.uri.name().unwrap_or_default().to_string(),
+                    name: node.name.name().unwrap_or_default().to_string(),
+                    definition: Some(Box::new(definition.as_ref().into())),
+                },
+            )
+        } else {
+            validator::data_type::Variation::UnresolvedVariation(validator::Empty {})
+        }
+    }
+}
+
+impl From<&extension::TypeVariation> for validator::data_type::user_defined_variation::Definition {
+    fn from(node: &extension::TypeVariation) -> Self {
+        Self {
+            base_type: None,
+            function_behavior: (&node.function_behavior).into(),
+        }
+    }
+}
+
+impl From<&extension::FunctionBehavior> for i32 {
+    fn from(node: &extension::FunctionBehavior) -> Self {
+        match node {
+            extension::FunctionBehavior::Inherits => {
+                validator::data_type::user_defined_variation::FunctionBehavior::Inherits
+            }
+            extension::FunctionBehavior::Separate => {
+                validator::data_type::user_defined_variation::FunctionBehavior::Separate
+            }
+        }
+        .into()
+    }
+}
+
+impl From<&data_type::Parameter> for validator::data_type::Parameter {
+    fn from(node: &data_type::Parameter) -> Self {
+        Self {
+            kind: Some(match node {
+                data_type::Parameter::Type(data_type) => {
+                    validator::data_type::parameter::Kind::DataType(data_type.as_ref().into())
+                }
+                data_type::Parameter::NamedType(name, data_type) => {
+                    validator::data_type::parameter::Kind::NamedType(validator::data_type::Named {
+                        name: name.to_string(),
+                        data_type: Some(data_type.as_ref().into()),
+                    })
+                }
+                data_type::Parameter::Unsigned(unsigned) => {
+                    validator::data_type::parameter::Kind::Unsigned(*unsigned)
+                }
+            }),
+        }
+    }
+}
+
+/// Export the complete parse tree in protobuf substrait.validator.Node format.
+pub fn export<T: std::io::Write>(
+    out: &mut T,
+    _root_name: &'static str,
+    result: &parse_result::ParseResult,
+) -> std::io::Result<()> {
+    let root = validator::ParseResult::from(result);
+    let buf = root.encode_to_vec();
+    if out.write(&buf)? < buf.len() {
+        Err(std::io::Error::new(
+            std::io::ErrorKind::Other,
+            "failed to write all bytes",
+        ))
+    } else {
+        Ok(())
+    }
+}
diff --git a/rs/src/input/config.rs b/rs/src/input/config.rs
new file mode 100644
index 00000000..0f6dd99f
--- /dev/null
+++ b/rs/src/input/config.rs
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! This module provides the configuration structure for the validator.
+//!
+//! This structure, [`Config`], is to be constructed by the application using
+//! the validator to configure it. Alternatively, the default configuration can
+//! be constructed by using the [`std::default::Default`] trait.
+
+use crate::output::diagnostic;
+pub use glob;
+use std::collections::HashMap;
+
+/// Trait object representing some immutable binary data.
+pub type BinaryData = Box<dyn AsRef<[u8]>>;
+
+/// Trait object representing some error data.
+pub type ErrorData = Box<dyn std::error::Error>;
+
+/// Callback function type for resolving/downloading URIs.
+pub type UriResolver = Box<dyn Fn(&str) -> std::result::Result<BinaryData, ErrorData> + Send>;
+
+/// Attempts to resolve and fetch the data for the given URI using libcurl,
+/// allowing the validator to handle remote YAML extension URLs with most
+/// protocols.
+#[cfg(feature = "curl")]
+fn resolve_with_curl(uri: &str) -> Result<Vec<u8>, curl::Error> {
+    let mut binary_data: Vec<u8> = vec![];
+    let mut curl_handle = curl::easy::Easy::new();
+    curl_handle.url(uri)?;
+    {
+        let mut transfer = curl_handle.transfer();
+        transfer.write_function(|buf| {
+            binary_data.extend_from_slice(buf);
+            Ok(buf.len())
+        })?;
+        transfer.perform()?;
+    }
+    Ok(binary_data)
+}
+
+/// Configuration structure.
+#[derive(Default)]
+pub struct Config {
+    /// When set, do not generate warnings for unknown protobuf fields that are
+    /// set to their protobuf-defined default value.
+    pub ignore_unknown_fields: bool,
+
+    /// Protobuf message URLs that are explicitly allowed for use in "any"
+    /// messages, i.e. that the caller warrants the existence of in the
+    /// consumer that the plan is validated for.
+    pub allowed_proto_any_urls: Vec<glob::Pattern>,
+
+    /// Allows the level of diagnostic messages to be overridden based on their
+    /// classification/code. The logic for this is as follows:
+    ///
+    ///  - if an entry exists for the classication of the incoming diagnostic,
+    ///    override its error level to at most the second argument, and then to
+    ///    at least the first argument. Otherwise,
+    ///  - if an entry exists for the group of said classification, use its
+    ///    level limits instead. Otherwise,
+    ///  - if an entry exists for Unclassified (code 0), use its level limits
+    ///    instead. Otherwise, do not adjust the level.
+    ///
+    /// Note that setting an entry to  (Info, Error) leaves the diagnostic
+    /// level unchanged.
+    pub diagnostic_level_overrides:
+        HashMap<diagnostic::Classification, (diagnostic::Level, diagnostic::Level)>,
+
+    /// Allows URIs from the plan to be remapped (Some(mapping)) or ignored
+    /// (None). All resolution can effectively be disabled by just adding a
+    /// rule that maps * to None. Furthermore, in the absence of a custom
+    /// yaml_uri_resolver function, this can be used to remap URIs to
+    /// pre-downloaded files.
+    pub uri_overrides: Vec<(glob::Pattern, Option<String>)>,
+
+    /// Optional callback function for resolving URIs. If specified, all
+    /// URIs (after processing yaml_uri_overrides) are resolved using this
+    /// function. The function takes the URI as its argument, and should either
+    /// return the download contents as a Vec<u8> or return a String-based
+    /// error. If no downloader is specified, only file:// URLs with an
+    /// absolute path are supported.
+    pub uri_resolver: Option<UriResolver>,
+}
+
+impl Config {
+    /// Creates a default configuration.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Instructs the validator to ignore protobuf fields that it doesn't know
+    /// about yet (i.e., that have been added to the Substrait protobuf
+    /// descriptions, but haven't yet been implemented in the validator) if the
+    /// fields are set to their default value. If this option isn't set, or if
+    /// an unknown field is not set to its default value, a warning is emitted.
+    pub fn ignore_unknown_fields(&mut self) {
+        self.ignore_unknown_fields = true;
+    }
+
+    /// Explicitly allows a protobuf message type to be used in advanced
+    /// extensions, despite the fact that the validator can't validate it. If
+    /// an advanced extension is encountered that isn't explicitly allowed, a
+    /// warning is emitted.
+    pub fn allow_proto_any_url(&mut self, pattern: glob::Pattern) {
+        self.allowed_proto_any_urls.push(pattern);
+    }
+
+    /// Sets a minimum and/or maximum error level for the given class of
+    /// diagnostic messages. Any previous settings for this class are
+    /// overridden.
+    pub fn override_diagnostic_level(
+        &mut self,
+        class: diagnostic::Classification,
+        minimum: diagnostic::Level,
+        maximum: diagnostic::Level,
+    ) {
+        self.diagnostic_level_overrides
+            .insert(class, (minimum, maximum));
+    }
+
+    /// Overrides the resolution behavior for (YAML) URIs matching the given
+    /// pattern. If resolve_as is None, the URI file will not be resolved;
+    /// if it is Some(s), it will be resolved as if the URI in the plan had
+    /// been s.
+    pub fn override_uri<S: Into<String>>(&mut self, pattern: glob::Pattern, resolve_as: Option<S>) {
+        self.uri_overrides
+            .push((pattern, resolve_as.map(|s| s.into())));
+    }
+
+    /// Registers a URI resolution function with this configuration. If
+    /// the given function fails, any previously registered function will be
+    /// used as a fallback.
+    pub fn add_uri_resolver<F, D, E>(&mut self, resolver: F)
+    where
+        F: Fn(&str) -> Result<D, E> + Send + 'static,
+        D: AsRef<[u8]> + 'static,
+        E: std::error::Error + 'static,
+    {
+        let previous = self.uri_resolver.take();
+        self.uri_resolver = Some(Box::new(move |uri| match resolver(uri) {
+            Ok(d) => Ok(Box::new(d)),
+            Err(e) => match &previous {
+                Some(f) => f.as_ref()(uri),
+                None => Err(Box::new(e)),
+            },
+        }));
+    }
+
+    /// Registers a URI resolver based on libcurl. If libcurl fails, any
+    /// `uri_resolver` registered previously will be used as a fallback.
+    #[cfg(feature = "curl")]
+    pub fn add_curl_uri_resolver(&mut self) {
+        self.add_uri_resolver(resolve_with_curl)
+    }
+}
diff --git a/rs/src/input/mod.rs b/rs/src/input/mod.rs
new file mode 100644
index 00000000..b7ba8808
--- /dev/null
+++ b/rs/src/input/mod.rs
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Input representation module.
+//!
+//! This module provides the data structures for representing the input of the
+//! validator.
+
+pub mod config;
+pub mod proto;
+pub mod traits;
+pub mod yaml;
diff --git a/rs/src/input/proto.rs b/rs/src/input/proto.rs
new file mode 100644
index 00000000..27c4a62f
--- /dev/null
+++ b/rs/src/input/proto.rs
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for representing Substrait protobuf input.
+//!
+//! The structures here are generated using [`prost`], but have a bunch of
+//! extra traits from [`traits`](crate::input::traits) associated with them,
+//! for which the implementations are generated using
+//! [`substrait_validator_derive`]. The purpose of these traits is to add basic
+//! introspection capabilities to the prost structures. One of the use cases
+//! for this is to let the parsing code automatically detect when the
+//! validation code ignored a subtree while validating, which implies that the
+//! validator hasn't checked everything and thus should not warrant that the
+//! received plan is valid.
+
+use crate::input::traits;
+use crate::output::primitive_data;
+
+use heck::ToUpperCamelCase;
+
+#[allow(clippy::large_enum_variant)]
+pub mod substrait {
+    include!(concat!(env!("OUT_DIR"), "/substrait.rs"));
+    pub mod extensions {
+        include!(concat!(env!("OUT_DIR"), "/substrait.extensions.rs"));
+    }
+    pub mod validator {
+        include!(concat!(env!("OUT_DIR"), "/substrait.validator.rs"));
+    }
+}
+
+/// Converts a Rust module path and name (the latter already processed by
+/// cook_ident()) to a protobuf type path.
+pub fn cook_path(module_path: &str, type_name: &str) -> String {
+    let mut iter = module_path
+        .split("::")
+        .skip(module_path!().split("::").count())
+        .map(cook_ident)
+        .chain(::std::iter::once(type_name))
+        .peekable();
+    let mut items = vec![];
+    if matches!(iter.peek(), Some(&"substrait")) {
+        items.push(iter.next().unwrap().to_string());
+        if matches!(iter.peek(), Some(&"extensions") | Some(&"validator")) {
+            items.push(iter.next().unwrap().to_string());
+        }
+    }
+    items.extend(iter.map(|x| x.to_upper_camel_case()));
+    ::itertools::Itertools::intersperse(items.iter().map(|x| x.as_ref()), ".").collect()
+}
+
+/// Converts a Rust identifier string generated via stringify!() to the
+/// original identifier by "cooking" raw identifiers.
+pub fn cook_ident(ident: &str) -> &str {
+    if let Some((_, keyword)) = ident.split_once('#') {
+        keyword
+    } else {
+        ident
+    }
+}
+
+impl traits::ProtoPrimitive for bool {
+    fn proto_primitive_type() -> &'static str {
+        "bool"
+    }
+
+    fn proto_primitive_default() -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Bool(false)
+    }
+
+    fn proto_primitive_data(&self) -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Bool(*self)
+    }
+
+    fn proto_primitive_is_default(&self) -> bool {
+        !*self
+    }
+}
+
+impl traits::ProtoPrimitive for u32 {
+    fn proto_primitive_type() -> &'static str {
+        "uint32"
+    }
+
+    fn proto_primitive_default() -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Unsigned(0)
+    }
+
+    fn proto_primitive_data(&self) -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Unsigned((*self).into())
+    }
+
+    fn proto_primitive_is_default(&self) -> bool {
+        *self == 0
+    }
+}
+
+impl traits::ProtoPrimitive for u64 {
+    fn proto_primitive_type() -> &'static str {
+        "uint64"
+    }
+
+    fn proto_primitive_default() -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Unsigned(0)
+    }
+
+    fn proto_primitive_data(&self) -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Unsigned(*self)
+    }
+
+    fn proto_primitive_is_default(&self) -> bool {
+        *self == 0
+    }
+}
+
+impl traits::ProtoPrimitive for i32 {
+    fn proto_primitive_type() -> &'static str {
+        "int32"
+    }
+
+    fn proto_primitive_default() -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Signed(0)
+    }
+
+    fn proto_primitive_data(&self) -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Signed((*self).into())
+    }
+
+    fn proto_primitive_is_default(&self) -> bool {
+        *self == 0
+    }
+}
+
+impl traits::ProtoPrimitive for i64 {
+    fn proto_primitive_type() -> &'static str {
+        "int64"
+    }
+
+    fn proto_primitive_default() -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Signed(0)
+    }
+
+    fn proto_primitive_data(&self) -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Signed(*self)
+    }
+
+    fn proto_primitive_is_default(&self) -> bool {
+        *self == 0
+    }
+}
+
+impl traits::ProtoPrimitive for f32 {
+    fn proto_primitive_type() -> &'static str {
+        "float"
+    }
+
+    fn proto_primitive_default() -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Float(0.0)
+    }
+
+    fn proto_primitive_data(&self) -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Float((*self).into())
+    }
+
+    fn proto_primitive_is_default(&self) -> bool {
+        *self == 0.0
+    }
+}
+
+impl traits::ProtoPrimitive for f64 {
+    fn proto_primitive_type() -> &'static str {
+        "double"
+    }
+
+    fn proto_primitive_default() -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Float(0.0)
+    }
+
+    fn proto_primitive_data(&self) -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Float(*self)
+    }
+
+    fn proto_primitive_is_default(&self) -> bool {
+        *self == 0.0
+    }
+}
+
+impl traits::ProtoPrimitive for String {
+    fn proto_primitive_type() -> &'static str {
+        "string"
+    }
+
+    fn proto_primitive_default() -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::String(String::new())
+    }
+
+    fn proto_primitive_data(&self) -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::String(self.clone())
+    }
+
+    fn proto_primitive_is_default(&self) -> bool {
+        self.is_empty()
+    }
+}
+
+impl traits::ProtoPrimitive for Vec<u8> {
+    fn proto_primitive_type() -> &'static str {
+        "bytes"
+    }
+
+    fn proto_primitive_default() -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Bytes(vec![])
+    }
+
+    fn proto_primitive_data(&self) -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Bytes(self.clone())
+    }
+
+    fn proto_primitive_is_default(&self) -> bool {
+        self.is_empty()
+    }
+}
+
+impl traits::ProtoPrimitive for prost_types::Any {
+    fn proto_primitive_type() -> &'static str {
+        "any"
+    }
+
+    fn proto_primitive_default() -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Any(prost_types::Any::default())
+    }
+
+    fn proto_primitive_data(&self) -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Any(self.clone())
+    }
+
+    fn proto_primitive_is_default(&self) -> bool {
+        self.type_url.is_empty()
+    }
+}
diff --git a/rs/src/input/traits.rs b/rs/src/input/traits.rs
new file mode 100644
index 00000000..d3a02946
--- /dev/null
+++ b/rs/src/input/traits.rs
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing introspection traits for [`prost`]-generated protobuf
+//! types.
+
+use crate::output::primitive_data;
+use crate::output::tree;
+use crate::parse::context;
+
+/// Trait for all Rust types that represent input tree node types.
+pub trait InputNode {
+    /// Creates an empty output node for a protobuf datum of this type.
+    ///
+    /// For primitive types, this fills the value with protobuf's default.
+    fn type_to_node() -> tree::Node;
+
+    /// Creates an empty output node for a protobuf datum with this value.
+    fn data_to_node(&self) -> tree::Node;
+
+    /// Returns the name of the selected variant of a oneof field, if this
+    /// is a rust enum used to represent a oneof field.
+    fn oneof_variant(&self) -> Option<&'static str>;
+
+    /// Complete the subtrees of this datum in output that have not already
+    /// been parsed using UnknownField nodes. Returns whether any such nodes
+    /// were added.
+    fn parse_unknown(&self, context: &mut context::Context<'_>) -> bool;
+}
+
+/// Trait for all Rust types that represent protobuf messages. These are
+/// always structs for which all fields implement InputNode.
+pub trait ProtoMessage: InputNode {
+    /// Returns the protobuf type name for messages of this type.
+    fn proto_message_type() -> &'static str;
+}
+
+/// Trait for all Rust types that represent protobuf's oneof abstraction.
+/// In the world of protobuf, these aren't really a thing of their own, but
+/// in Rust, they are defined as enums, each variant containing a one-tuple
+/// of some type implementing InputNode.
+pub trait ProtoOneOf: InputNode {
+    /// Returns the name of the selected variant of a oneof field.
+    fn proto_oneof_variant(&self) -> &'static str;
+}
+
+/// Trait for Rust types that map to the protobuf primitive types.
+pub trait ProtoPrimitive: InputNode {
+    /// Returns the protobuf type name for primitives of this type.
+    fn proto_primitive_type() -> &'static str;
+
+    /// Returns the protobuf-specified default value for this primitive
+    /// data type.
+    fn proto_primitive_default() -> primitive_data::PrimitiveData;
+
+    /// Returns the actual value for this primitive data type as a
+    /// ProtoPrimitiveData variant.
+    fn proto_primitive_data(&self) -> primitive_data::PrimitiveData;
+
+    /// Returns whether this is the default value of the primitive.
+    fn proto_primitive_is_default(&self) -> bool;
+}
+
+/// Trait for all Rust types that represent protobuf enums. These are
+/// always represented as a Rust enum with no contained values for any of
+/// the variants.
+pub trait ProtoEnum: ProtoPrimitive {
+    /// Returns the protobuf type name for enums of this type.
+    fn proto_enum_type() -> &'static str;
+
+    /// Returns the name of the default variant of an enum.
+    fn proto_enum_default_variant() -> &'static str;
+
+    /// Returns the name of the selected variant of an enum.
+    fn proto_enum_variant(&self) -> &'static str;
+
+    /// Returns the enumeration entry corresponding to the given integer
+    /// value, if any.
+    fn proto_enum_from_i32(x: i32) -> Option<Self>
+    where
+        Self: Sized;
+}
+
+/// Blanket implementation to make all protobuf enums behave like
+/// primitives as well.
+impl<T: ProtoEnum> ProtoPrimitive for T {
+    fn proto_primitive_type() -> &'static str {
+        T::proto_enum_type()
+    }
+
+    fn proto_primitive_default() -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Enum(T::proto_enum_default_variant())
+    }
+
+    fn proto_primitive_data(&self) -> primitive_data::PrimitiveData {
+        primitive_data::PrimitiveData::Enum(self.proto_enum_variant())
+    }
+
+    fn proto_primitive_is_default(&self) -> bool {
+        self.proto_enum_variant() == T::proto_enum_default_variant()
+    }
+}
+
+/// Blanket implementation to make all protobuf primitives behave like
+/// generic protobuf datums.
+///
+/// Note: if Rust would allow it, we could define blanket implementations
+/// for ProtoMessage and ProtoOneOf as well, since they're always the same.
+/// Unfortunately, we can only define a single blanket implementation, so
+/// we opt for the one that isn't already generated via derive macros.
+impl<T: ProtoPrimitive> InputNode for T {
+    fn type_to_node() -> tree::Node {
+        tree::NodeType::ProtoPrimitive(T::proto_primitive_type(), T::proto_primitive_default())
+            .into()
+    }
+
+    fn data_to_node(&self) -> tree::Node {
+        tree::NodeType::ProtoPrimitive(T::proto_primitive_type(), self.proto_primitive_data())
+            .into()
+    }
+
+    fn oneof_variant(&self) -> Option<&'static str> {
+        None
+    }
+
+    fn parse_unknown(&self, _context: &mut context::Context<'_>) -> bool {
+        false
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::input::proto::substrait;
+    use crate::output::primitive_data;
+    use crate::output::tree;
+
+    #[test]
+    fn message() {
+        assert_eq!(substrait::Plan::proto_message_type(), "substrait.Plan");
+        assert_eq!(
+            substrait::Plan::type_to_node(),
+            tree::Node {
+                class: tree::Class::Misc,
+                brief: None,
+                summary: None,
+                node_type: tree::NodeType::ProtoMessage("substrait.Plan"),
+                data_type: None,
+                data: vec![],
+            }
+        );
+
+        let msg = substrait::Plan::default();
+        assert_eq!(
+            msg.data_to_node(),
+            tree::Node {
+                class: tree::Class::Misc,
+                brief: None,
+                summary: None,
+                node_type: tree::NodeType::ProtoMessage("substrait.Plan"),
+                data_type: None,
+                data: vec![],
+            }
+        );
+        assert_eq!(msg.oneof_variant(), None);
+    }
+
+    #[test]
+    fn oneof() {
+        assert_eq!(
+            substrait::plan_rel::RelType::type_to_node(),
+            tree::Node {
+                class: tree::Class::Misc,
+                brief: None,
+                summary: None,
+                node_type: tree::NodeType::ProtoMissingOneOf,
+                data_type: None,
+                data: vec![],
+            }
+        );
+
+        let oneof = substrait::plan_rel::RelType::Rel(substrait::Rel::default());
+        assert_eq!(oneof.proto_oneof_variant(), "rel");
+        assert_eq!(
+            oneof.data_to_node(),
+            tree::Node {
+                class: tree::Class::Misc,
+                brief: None,
+                summary: None,
+                node_type: tree::NodeType::ProtoMessage("substrait.Rel"),
+                data_type: None,
+                data: vec![],
+            }
+        );
+        assert_eq!(oneof.oneof_variant(), Some("rel"));
+    }
+
+    #[test]
+    fn enumeration() {
+        assert_eq!(
+            substrait::AggregationPhase::proto_enum_type(),
+            "substrait.AggregationPhase"
+        );
+        assert_eq!(
+            substrait::AggregationPhase::proto_enum_default_variant(),
+            "AGGREGATION_PHASE_UNSPECIFIED"
+        );
+        assert_eq!(
+            substrait::AggregationPhase::Unspecified.proto_enum_variant(),
+            "AGGREGATION_PHASE_UNSPECIFIED"
+        );
+
+        assert_eq!(
+            substrait::AggregationPhase::proto_primitive_type(),
+            "substrait.AggregationPhase"
+        );
+        assert_eq!(
+            substrait::AggregationPhase::proto_primitive_default(),
+            primitive_data::PrimitiveData::Enum("AGGREGATION_PHASE_UNSPECIFIED")
+        );
+        assert_eq!(
+            substrait::AggregationPhase::Unspecified.proto_primitive_data(),
+            primitive_data::PrimitiveData::Enum("AGGREGATION_PHASE_UNSPECIFIED")
+        );
+
+        assert_eq!(
+            substrait::AggregationPhase::type_to_node(),
+            tree::Node {
+                class: tree::Class::Misc,
+                brief: None,
+                summary: None,
+                node_type: tree::NodeType::ProtoPrimitive(
+                    "substrait.AggregationPhase",
+                    primitive_data::PrimitiveData::Enum("AGGREGATION_PHASE_UNSPECIFIED")
+                ),
+                data_type: None,
+                data: vec![],
+            }
+        );
+        assert_eq!(
+            substrait::AggregationPhase::Unspecified.data_to_node(),
+            tree::Node {
+                class: tree::Class::Misc,
+                brief: None,
+                summary: None,
+                node_type: tree::NodeType::ProtoPrimitive(
+                    "substrait.AggregationPhase",
+                    primitive_data::PrimitiveData::Enum("AGGREGATION_PHASE_UNSPECIFIED")
+                ),
+                data_type: None,
+                data: vec![],
+            }
+        );
+        assert_eq!(
+            substrait::AggregationPhase::Unspecified.oneof_variant(),
+            None
+        );
+    }
+
+    #[test]
+    fn primitive() {
+        assert_eq!(u32::proto_primitive_type(), "uint32");
+        assert_eq!(
+            u32::proto_primitive_default(),
+            primitive_data::PrimitiveData::Unsigned(0)
+        );
+        assert_eq!(
+            42u32.proto_primitive_data(),
+            primitive_data::PrimitiveData::Unsigned(42)
+        );
+
+        assert_eq!(
+            u32::type_to_node(),
+            tree::Node {
+                class: tree::Class::Misc,
+                brief: None,
+                summary: None,
+                node_type: tree::NodeType::ProtoPrimitive(
+                    "uint32",
+                    primitive_data::PrimitiveData::Unsigned(0)
+                ),
+                data_type: None,
+                data: vec![],
+            }
+        );
+        assert_eq!(
+            42u32.data_to_node(),
+            tree::Node {
+                class: tree::Class::Misc,
+                brief: None,
+                summary: None,
+                node_type: tree::NodeType::ProtoPrimitive(
+                    "uint32",
+                    primitive_data::PrimitiveData::Unsigned(42)
+                ),
+                data_type: None,
+                data: vec![],
+            }
+        );
+        assert_eq!(42u32.oneof_variant(), None);
+    }
+}
diff --git a/rs/src/input/yaml.rs b/rs/src/input/yaml.rs
new file mode 100644
index 00000000..fcda7957
--- /dev/null
+++ b/rs/src/input/yaml.rs
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for representing YAML input.
+//!
+//! We (ab)use [`serde_json::value::Value`] for this; the primary reason being
+//! that a [JSON schema](https://json-schema.org/) is used for basic schema
+//! validation of the YAML files, and the [`jsonschema`] crate we use for that
+//! uses [`serde_json`]'s representation). [`yaml_to_json()`] may be used to
+//! convert the output from [`yaml_rust`] to this structure.
+
+use crate::output::diagnostic;
+use crate::output::path;
+use crate::output::primitive_data;
+use crate::output::tree;
+use crate::parse::traversal;
+
+use yaml_rust::yaml::Yaml;
+
+/// Type for the type used for arbitrary YAML values.
+pub type Value = serde_json::value::Value;
+
+/// Typedef for the type used for YAML arrays.
+pub type Array = Vec<Value>;
+
+/// Typedef for the type used for YAML maps.
+pub type Map = serde_json::map::Map<String, Value>;
+
+/// Converts a [`yaml_rust`] YAML structure into its equivalent JSON object
+/// model using [`serde_json`]'s types.
+pub fn yaml_to_json(y: Yaml, path: &path::Path) -> diagnostic::DiagResult<Value> {
+    match y {
+        Yaml::Real(ref s) => Ok(Value::Number(
+            serde_json::value::Number::from_f64(y.as_f64().ok_or_else(|| {
+                diag!(
+                    path.to_path_buf(),
+                    Error,
+                    YamlParseFailed,
+                    "failed to parse {s} as float"
+                )
+            })?)
+            .ok_or_else(|| {
+                diag!(
+                    path.to_path_buf(),
+                    Error,
+                    YamlParseFailed,
+                    "{s} float is not supported"
+                )
+            })?,
+        )),
+        Yaml::Integer(i) => Ok(Value::Number(i.into())),
+        Yaml::String(s) => Ok(Value::String(s)),
+        Yaml::Boolean(b) => Ok(Value::Bool(b)),
+        Yaml::Array(a) => Ok(Value::Array(
+            a.into_iter()
+                .enumerate()
+                .map(|(index, value)| yaml_to_json(value, &path.with_index(index)))
+                .collect::<diagnostic::DiagResult<Vec<Value>>>()?,
+        )),
+        Yaml::Hash(m) => Ok(Value::Object(
+            m.into_iter()
+                .map(|(key, value)| {
+                    let key = key
+                        .as_str()
+                        .ok_or_else(|| {
+                            diag!(
+                                path.to_path_buf(),
+                                Error,
+                                YamlParseFailed,
+                                "non-string map keys are not supported"
+                            )
+                        })?
+                        .to_string();
+                    let path = path.with_field(&key);
+                    let value = yaml_to_json(value, &path)?;
+                    Ok((key, value))
+                })
+                .collect::<diagnostic::DiagResult<serde_json::value::Map<String, Value>>>()?,
+        )),
+        Yaml::Alias(_) => Err(diag!(
+            path.to_path_buf(),
+            Error,
+            YamlParseFailed,
+            "YAML aliases are not supported"
+        )),
+        Yaml::Null => Ok(Value::Null),
+        Yaml::BadValue => panic!("encountered Yaml::BadValue"),
+    }
+}
+
+impl crate::input::traits::InputNode for Value {
+    fn type_to_node() -> tree::Node {
+        tree::NodeType::YamlMap.into()
+    }
+
+    fn data_to_node(&self) -> tree::Node {
+        match self {
+            Value::Null => tree::NodeType::YamlPrimitive(primitive_data::PrimitiveData::Null),
+            Value::Bool(b) => {
+                tree::NodeType::YamlPrimitive(primitive_data::PrimitiveData::Bool(*b))
+            }
+            Value::Number(n) => tree::NodeType::YamlPrimitive(
+                n.as_u64()
+                    .map(primitive_data::PrimitiveData::Unsigned)
+                    .or_else(|| n.as_i64().map(primitive_data::PrimitiveData::Signed))
+                    .or_else(|| n.as_f64().map(primitive_data::PrimitiveData::Float))
+                    .unwrap(),
+            ),
+            Value::String(s) => {
+                tree::NodeType::YamlPrimitive(primitive_data::PrimitiveData::String(s.clone()))
+            }
+            Value::Array(_) => tree::NodeType::YamlArray,
+            Value::Object(_) => tree::NodeType::YamlMap,
+        }
+        .into()
+    }
+
+    fn oneof_variant(&self) -> Option<&'static str> {
+        None
+    }
+
+    fn parse_unknown(&self, context: &mut crate::parse::context::Context<'_>) -> bool {
+        match self {
+            Value::Array(array) => {
+                let mut any = false;
+                for (index, _) in array.iter().enumerate() {
+                    if !context.field_parsed(index.to_string()) {
+                        traversal::push_yaml_element(array, context, index, true, |_, _| Ok(()));
+                        any = true;
+                    }
+                }
+                any
+            }
+            Value::Object(object) => {
+                let mut any = false;
+                let mut keys: Vec<_> = object.keys().collect();
+                keys.sort();
+                for field_name in keys {
+                    if !context.field_parsed(field_name) {
+                        traversal::push_yaml_field(self, context, field_name, true, |_, _| Ok(()))
+                            .unwrap();
+                        any = true;
+                    }
+                }
+                any
+            }
+            _ => false,
+        }
+    }
+}
diff --git a/rs/src/lib.rs b/rs/src/lib.rs
new file mode 100644
index 00000000..023f4359
--- /dev/null
+++ b/rs/src/lib.rs
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Crate for validating [Substrait](https://substrait.io/).
+//!
+//! The usage pattern is roughly as follows.
+//!
+//!  1) Build a [`Config`] structure to configure the validator. You can also
+//!     just use [`std::default::Default`] if you don't need to configure
+//!     anything, but you might want to at least call
+//!     [`Config::add_curl_uri_resolver()`] (if you're using the `curl`
+//!     feature).
+//!  2) Parse the incoming `substrait.Plan` message using [`parse()`]. This
+//!     creates a [ParseResult], containing a [tree](output::tree) structure
+//!     corresponding to the query plan that also contains diagnostics and
+//!     other annotations added by the validator.
+//!  3) You can traverse the tree yourself using [ParseResult::root], or you
+//!     can use one of the methods associated with [ParseResult] to obtain the
+//!     validation results you need.
+//!
+//! Note that only the binary protobuf serialization format is supported at the
+//! input; the JSON format is *not* supported. This is a limitation of `prost`,
+//! the crate that was used for protobuf deserialization. If you're looking for
+//! a library (or CLI) that supports more human-friendly input, check out the
+//! Python bindings.
+
+#[macro_use]
+pub mod output;
+
+#[macro_use]
+mod parse;
+
+pub mod export;
+pub mod input;
+
+mod string_util;
+
+use strum::IntoEnumIterator;
+
+// Aliases for common types used on the crate interface.
+pub use input::config::glob::Pattern;
+pub use input::config::Config;
+pub use output::diagnostic::Classification;
+pub use output::diagnostic::Diagnostic;
+pub use output::diagnostic::Level;
+pub use output::parse_result::ParseResult;
+pub use output::parse_result::Validity;
+
+/// Validates the given substrait.Plan message and returns the parse tree.
+pub fn parse<B: prost::bytes::Buf>(buffer: B, config: &Config) -> ParseResult {
+    parse::parse(buffer, config)
+}
+
+/// Returns an iterator that yields all known diagnostic classes.
+pub fn iter_diagnostics() -> impl Iterator<Item = Classification> {
+    Classification::iter()
+}
diff --git a/rs/src/output/comment.rs b/rs/src/output/comment.rs
new file mode 100644
index 00000000..45542ad0
--- /dev/null
+++ b/rs/src/output/comment.rs
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for comments.
+//!
+//! [`Comment`]s can be added to nodes between the child edges to attach
+//! additional miscellaneous information that doesn't fit in any of the more
+//! structured types, intended purely to be formatted for and interpreted by
+//! humans.
+
+use crate::output::path;
+
+/// Representation of a comment message intended only for human consumption.
+/// Includes basic formatting information.
+#[derive(Clone, Debug, PartialEq, Default)]
+pub struct Comment {
+    /// Formatting elements and spans that make up the comment.
+    elements: Vec<Element>,
+}
+
+impl Comment {
+    /// Creates an empty comment.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Adds a piece of plain text to the comment.
+    pub fn plain<S: ToString>(mut self, text: S) -> Self {
+        self.push(Element::Span(text.to_string().into()));
+        self
+    }
+
+    /// Adds a piece of text to the comment that links to the given path.
+    pub fn link<S: ToString>(mut self, text: S, path: path::PathBuf) -> Self {
+        self.push(Element::Span(Span {
+            text: text.to_string(),
+            link: Some(Link::Path(path)),
+        }));
+        self
+    }
+
+    /// Adds a piece of text to the comment that links to the given URL.
+    pub fn url<S: ToString, U: ToString>(mut self, text: S, url: U) -> Self {
+        self.push(Element::Span(Span {
+            text: text.to_string(),
+            link: Some(Link::Url(url.to_string())),
+        }));
+        self
+    }
+
+    /// Adds a newline/paragraph break.
+    pub fn nl(mut self) -> Self {
+        self.push(Element::NewLine);
+        self
+    }
+
+    /// Opens a list.
+    pub fn lo(mut self) -> Self {
+        self.push(Element::ListOpen);
+        self
+    }
+
+    /// Advances to the next list item.
+    pub fn li(mut self) -> Self {
+        self.push(Element::ListNext);
+        self
+    }
+
+    /// Closes the current list.
+    pub fn lc(mut self) -> Self {
+        self.push(Element::ListClose);
+        self
+    }
+
+    /// Pushes an element into this comment.
+    pub fn push(&mut self, element: Element) {
+        // Some pairs of element types should never follow each other, because
+        // one implies the other.
+        match self.elements.pop() {
+            None => self.elements.push(element),
+            Some(Element::Span(s1)) => {
+                if let Element::Span(s2) = element {
+                    let (s1, maybe_s2) = merge_spans(s1, s2);
+                    self.elements.push(Element::Span(s1));
+                    if let Some(s2) = maybe_s2 {
+                        self.elements.push(Element::Span(s2));
+                    }
+                } else {
+                    self.elements.push(Element::Span(s1));
+                    self.elements.push(element);
+                }
+            }
+            Some(Element::NewLine) => {
+                if matches!(element, Element::Span(_)) {
+                    self.elements.push(Element::NewLine);
+                }
+                self.elements.push(element);
+            }
+            Some(Element::ListOpen) => {
+                self.elements.push(Element::ListOpen);
+                if !matches!(element, Element::ListNext) {
+                    self.elements.push(element);
+                }
+            }
+            Some(Element::ListNext) => {
+                self.elements.push(Element::ListNext);
+                if !matches!(element, Element::ListNext) {
+                    self.elements.push(element);
+                }
+            }
+            Some(Element::ListClose) => {
+                self.elements.push(Element::ListClose);
+                if !matches!(element, Element::NewLine) {
+                    self.elements.push(element);
+                }
+            }
+        }
+    }
+
+    /// Pushes a whole other comment's worth of elements into this comment.
+    pub fn extend(&mut self, other: Comment) {
+        let mut it = other.elements.into_iter();
+
+        // The first element of other may need to be merged with its new
+        // predecessor.
+        if let Some(element) = it.next() {
+            self.push(element);
+        }
+
+        // The rest of the elements would already have been merged, so we can
+        // just copy them over.
+        self.elements.extend(it);
+    }
+
+    /// Returns the slice of elements that comprise the comment.
+    ///
+    /// This list is "minimal:"
+    ///  - there are no consecutive newlines, list item tags, or spans with
+    ///    equal formatting (they are merged together);
+    ///  - there are no empty lists, and there is never a list item immediately
+    ///    following a list open tag (as this is redundant).
+    pub fn elements(&self) -> &[Element] {
+        &self.elements
+    }
+}
+
+impl std::fmt::Display for Comment {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let mut indent = 0;
+        for element in self.elements.iter() {
+            match element {
+                Element::Span(span) => span.fmt(f),
+                Element::NewLine => write!(f, "\n\n{: >1$}", "", indent),
+                Element::ListOpen => {
+                    indent += 3;
+                    write!(f, "\n\n{: >1$}", "- ", indent)
+                }
+                Element::ListNext => {
+                    write!(f, "\n\n{: >1$}", "- ", indent)
+                }
+                Element::ListClose => {
+                    indent -= 3;
+                    write!(f, "\n\n{: >1$}", "", indent)
+                }
+            }?;
+        }
+        Ok(())
+    }
+}
+
+impl From<String> for Comment {
+    fn from(text: String) -> Self {
+        Self {
+            elements: vec![Element::Span(text.into())],
+        }
+    }
+}
+
+/// A comment element.
+#[derive(Clone, Debug, PartialEq)]
+pub enum Element {
+    /// A span of text. Should not include newlines.
+    Span(Span),
+
+    /// A newline/paragraph break.
+    NewLine,
+
+    /// Starts a new list. Subsequent spans form the text for the first item.
+    ListOpen,
+
+    /// Advances to the next list item.
+    ListNext,
+
+    /// Closes a list.
+    ListClose,
+}
+
+/// Like Comment, but single-line.
+#[derive(Clone, Debug, PartialEq, Default)]
+pub struct Brief {
+    /// Spans that make up the comment. These are simply concatenated, but
+    /// spans may contain optional formatting information.
+    spans: Vec<Span>,
+}
+
+impl Brief {
+    /// Creates an empty comment.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Adds a piece of plain text to the comment.
+    pub fn plain<S: ToString>(mut self, text: S) -> Self {
+        self.push(text.to_string().into());
+        self
+    }
+
+    /// Adds a piece of text to the comment that links to the given path.
+    pub fn link<S: ToString>(mut self, text: S, path: path::PathBuf) -> Self {
+        self.push(Span {
+            text: text.to_string(),
+            link: Some(Link::Path(path)),
+        });
+        self
+    }
+
+    /// Adds a piece of text to the comment that links to the given URL.
+    pub fn url<S: ToString, U: ToString>(mut self, text: S, url: U) -> Self {
+        self.push(Span {
+            text: text.to_string(),
+            link: Some(Link::Url(url.to_string())),
+        });
+        self
+    }
+
+    /// Pushes a span into this brief.
+    pub fn push(&mut self, span: Span) {
+        if let Some(s1) = self.spans.pop() {
+            let s2 = span;
+            let (s1, maybe_s2) = merge_spans(s1, s2);
+            self.spans.push(s1);
+            if let Some(s2) = maybe_s2 {
+                self.spans.push(s2);
+            }
+        } else {
+            self.spans.push(span);
+        }
+    }
+
+    /// Pushes a whole other brief's worth of elements into this brief.
+    pub fn extend(&mut self, other: Brief) {
+        let mut it = other.spans.into_iter();
+
+        // The first span of other may need to be merged with its new
+        // predecessor.
+        if let Some(element) = it.next() {
+            self.push(element);
+        }
+
+        // The rest of the spans would already have been merged, so we can
+        // just copy them over.
+        self.spans.extend(it);
+    }
+
+    /// Returns the slice of spans that comprise the brief.
+    pub fn spans(&self) -> &[Span] {
+        &self.spans
+    }
+}
+
+impl std::fmt::Display for Brief {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        for span in self.spans.iter() {
+            span.fmt(f)?;
+        }
+        Ok(())
+    }
+}
+
+impl From<String> for Brief {
+    fn from(text: String) -> Self {
+        Self {
+            spans: vec![text.into()],
+        }
+    }
+}
+
+impl From<Brief> for Comment {
+    fn from(brief: Brief) -> Self {
+        Self {
+            elements: brief.spans.into_iter().map(Element::Span).collect(),
+        }
+    }
+}
+
+/// A span of text within a comment.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Span {
+    /// The span of text.
+    pub text: String,
+
+    /// Whether this span of text should link to something.
+    pub link: Option<Link>,
+}
+
+impl std::fmt::Display for Span {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.text)
+    }
+}
+
+impl From<String> for Span {
+    fn from(text: String) -> Self {
+        Span { text, link: None }
+    }
+}
+
+/// Merges two spans together, if possible. A space is inserted between the
+/// spans if there isn't one already.
+fn merge_spans(mut a: Span, b: Span) -> (Span, Option<Span>) {
+    if b.text.is_empty() {
+        return (a, None);
+    }
+    if !a.text.ends_with(' ') && !b.text.starts_with(' ') {
+        a.text.push(' ');
+    }
+    if a.link == b.link {
+        a.text += &b.text;
+        return (a, None);
+    }
+    (a, Some(b))
+}
+
+/// A link to something.
+#[derive(Clone, Debug, PartialEq)]
+pub enum Link {
+    /// Link to another node in the tree, via an absolute node path.
+    Path(path::PathBuf),
+
+    /// Link to some external URL.
+    Url(String),
+}
diff --git a/rs/src/output/data_type.rs b/rs/src/output/data_type.rs
new file mode 100644
index 00000000..e63815ed
--- /dev/null
+++ b/rs/src/output/data_type.rs
@@ -0,0 +1,864 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for dealing with Substrait's type system.
+//!
+//! See [`DataType`].
+
+use crate::output::diagnostic;
+use crate::output::extension;
+use crate::string_util;
+use crate::string_util::Describe;
+use std::collections::HashSet;
+use std::fmt::Write;
+use std::sync::Arc;
+use strum_macros::{Display, EnumString};
+
+/// Typedef for type variations.
+pub type Variation = Option<Arc<extension::Reference<extension::TypeVariation>>>;
+
+/// A Substrait data type. Includes facilities for storing unresolved or
+/// partially-resolved types.
+#[derive(Clone, Debug, PartialEq)]
+pub struct DataType {
+    /// Type class (simple, compound, or user-defined).
+    class: Class,
+
+    /// Nullability.
+    nullable: bool,
+
+    /// Type variation, if any.
+    variation: Variation,
+
+    /// Type parameters for non-simple types.
+    parameters: Vec<Parameter>,
+}
+
+impl Describe for DataType {
+    fn describe(
+        &self,
+        f: &mut std::fmt::Formatter<'_>,
+        limit: string_util::Limit,
+    ) -> std::fmt::Result {
+        let mut name = String::new();
+        write!(&mut name, "{}", self.class)?;
+        if self.nullable {
+            write!(&mut name, "?")?;
+        }
+        if let Some(variation) = &self.variation {
+            write!(&mut name, "[{variation}]")?;
+        }
+        write!(f, "{}", name)?;
+        let (_, limit) = limit.split(name.len());
+        if self.class.has_parameters() {
+            write!(f, "<")?;
+            string_util::describe_sequence(
+                f,
+                &self.parameters,
+                limit,
+                20,
+                |f, param, _, limit| param.describe(f, limit),
+            )?;
+            write!(f, ">")?;
+        }
+        Ok(())
+    }
+}
+
+impl std::fmt::Display for DataType {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.display().fmt(f)
+    }
+}
+
+impl DataType {
+    /// Creates a new type.
+    pub fn new(
+        class: Class,
+        nullable: bool,
+        variation: Variation,
+        parameters: Vec<Parameter>,
+    ) -> diagnostic::Result<Arc<DataType>> {
+        // Check whether class and parameters work together.
+        class.check_parameters(&parameters)?;
+
+        // Check whether the specified type variation is applicable to this
+        // type.
+        if let Some(variation) = &variation {
+            if let Some(definition) = &variation.definition {
+                let base = definition.get_base_class();
+                if !base.weak_equals(&class) {
+                    return Err(cause!(
+                        TypeMismatchedVariation,
+                        "variation {variation} is derived from {base}, not {class}"
+                    ));
+                }
+            }
+        }
+
+        Ok(Arc::new(DataType {
+            class,
+            nullable,
+            variation,
+            parameters,
+        }))
+    }
+
+    /// Creates a new unresolved type with the given description.
+    pub fn new_unresolved() -> Arc<DataType> {
+        Arc::new(DataType {
+            class: Class::Unresolved,
+            nullable: false,
+            variation: None,
+            parameters: vec![],
+        })
+    }
+
+    /// Creates a new struct type.
+    pub fn new_struct<T: IntoIterator<Item = Arc<DataType>>>(
+        fields: T,
+        nullable: bool,
+    ) -> Arc<DataType> {
+        Arc::new(DataType {
+            class: Class::Compound(Compound::Struct),
+            nullable,
+            variation: None,
+            parameters: fields.into_iter().map(Parameter::Type).collect(),
+        })
+    }
+
+    /// Creates a new list type.
+    pub fn new_list(element: Arc<DataType>, nullable: bool) -> Arc<DataType> {
+        Arc::new(DataType {
+            class: Class::Compound(Compound::List),
+            nullable,
+            variation: None,
+            parameters: vec![Parameter::Type(element)],
+        })
+    }
+
+    /// Creates a new map type.
+    pub fn new_map(key: Arc<DataType>, value: Arc<DataType>, nullable: bool) -> Arc<DataType> {
+        Arc::new(DataType {
+            class: Class::Compound(Compound::List),
+            nullable,
+            variation: None,
+            parameters: vec![Parameter::Type(key), Parameter::Type(value)],
+        })
+    }
+
+    /// Creates the type of a predicate, i.e. a boolean.
+    pub fn new_predicate(nullable: bool) -> Arc<DataType> {
+        Arc::new(DataType {
+            class: Class::Simple(Simple::Boolean),
+            nullable,
+            variation: None,
+            parameters: vec![],
+        })
+    }
+
+    /// Creates the type of a (default) integer, i.e. i32.
+    pub fn new_integer(nullable: bool) -> Arc<DataType> {
+        Arc::new(DataType {
+            class: Class::Simple(Simple::I32),
+            nullable,
+            variation: None,
+            parameters: vec![],
+        })
+    }
+
+    /// Returns a nullable variant of this type.
+    pub fn make_nullable(&self) -> Arc<DataType> {
+        Arc::new(DataType {
+            class: self.class.clone(),
+            nullable: true,
+            variation: self.variation.clone(),
+            parameters: self.parameters.clone(),
+        })
+    }
+
+    /// Returns the type class.
+    pub fn class(&self) -> &Class {
+        &self.class
+    }
+
+    /// Returns whether the type is nullable.
+    pub fn nullable(&self) -> bool {
+        self.nullable
+    }
+
+    /// Returns the type variation.
+    pub fn variation(&self) -> &Variation {
+        &self.variation
+    }
+
+    /// Returns the type parameters.
+    pub fn parameters(&self) -> &Vec<Parameter> {
+        &self.parameters
+    }
+
+    /// Returns the value of the given integer parameter.
+    pub fn int_parameter(&self, index: usize) -> Option<u64> {
+        if let Some(Parameter::Unsigned(value)) = self.parameters.get(index) {
+            Some(*value)
+        } else {
+            None
+        }
+    }
+
+    /// Returns the value of the given type parameter.
+    pub fn type_parameter(&self, index: usize) -> Option<Arc<DataType>> {
+        match self.parameters.get(index) {
+            Some(Parameter::Type(t)) => Some(t.clone()),
+            Some(Parameter::NamedType(_, t)) => Some(t.clone()),
+            _ => None,
+        }
+    }
+
+    /// Returns whether this is an unresolved type.
+    pub fn is_unresolved(&self) -> bool {
+        matches!(self.class, Class::Unresolved)
+    }
+
+    /// Returns whether any part of this type tree is an unresolved type.
+    pub fn is_unresolved_deep(&self) -> bool {
+        self.is_unresolved()
+            || self.parameters.iter().any(|p| match p {
+                Parameter::Type(t) => t.is_unresolved_deep(),
+                Parameter::NamedType(_, t) => t.is_unresolved_deep(),
+                _ => false,
+            })
+    }
+
+    /// Returns whether this is a STRUCT or NSTRUCT type.
+    pub fn is_struct(&self) -> bool {
+        matches!(
+            self.class,
+            Class::Compound(Compound::Struct) | Class::Compound(Compound::NamedStruct)
+        )
+    }
+
+    /// Returns Some(Vec<T>)) when this is a STRUCT or NSTRUCT type, where the
+    /// vector contains the field types. Returns None otherwise.
+    pub fn unwrap_struct(&self) -> Option<Vec<Arc<DataType>>> {
+        if self.is_struct() {
+            Some(
+                self.parameters
+                    .iter()
+                    .map(|x| x.get_type().cloned().unwrap_or_default())
+                    .collect(),
+            )
+        } else {
+            None
+        }
+    }
+
+    /// Returns Some(T) when this is a STRUCT or NSTRUCT type with only a
+    /// single element of type T, or None otherwise.
+    pub fn unwrap_singular_struct(&self) -> Option<Arc<DataType>> {
+        if self.is_struct() && self.parameters.len() == 1 {
+            self.type_parameter(0)
+        } else {
+            None
+        }
+    }
+
+    /// Returns whether this is a LIST type.
+    pub fn is_list(&self) -> bool {
+        matches!(self.class, Class::Compound(Compound::List))
+    }
+
+    /// Returns Some(T) when this is a LIST type with element type T, or None
+    /// otherwise.
+    pub fn unwrap_list(&self) -> Option<Arc<DataType>> {
+        if self.is_list() {
+            self.type_parameter(0)
+        } else {
+            None
+        }
+    }
+
+    /// Returns whether this is a MAP type.
+    pub fn is_map(&self) -> bool {
+        matches!(self.class, Class::Compound(Compound::Map))
+    }
+
+    /// Returns Some(T) when this is a MAP type with value type T, or None
+    /// otherwise.
+    pub fn unwrap_map(&self) -> Option<Arc<DataType>> {
+        if self.is_map() {
+            self.type_parameter(1)
+        } else {
+            None
+        }
+    }
+
+    /// Returns Some(T) when this is a MAP type with key type T, or None
+    /// otherwise.
+    pub fn unwrap_map_key(&self) -> Option<Arc<DataType>> {
+        if self.is_map() {
+            self.type_parameter(0)
+        } else {
+            None
+        }
+    }
+
+    /// Returns whether this is the base type for this type, i.e. it does
+    /// not have a variation.
+    pub fn is_base_type(&self) -> bool {
+        self.variation.is_none()
+    }
+
+    /// Returns the type of the nth field of this struct. Returns None if
+    /// out of range or if this is known to not be a struct.
+    pub fn index_struct(&self, index: usize) -> Option<Arc<DataType>> {
+        if self.is_unresolved() {
+            Some(DataType::new_unresolved())
+        } else if self.is_struct() {
+            match self.parameters.get(index) {
+                Some(Parameter::Type(t)) => Some(t.clone()),
+                Some(Parameter::NamedType(_, t)) => Some(t.clone()),
+                _ => None,
+            }
+        } else {
+            None
+        }
+    }
+
+    /// Internal helper for split_field_names() and strip_field_names().
+    fn split_field_names_internal<F: FnMut(String)>(&self, namer: &mut F) -> Arc<DataType> {
+        let is_struct = self.is_struct();
+        let parameters = self
+            .parameters
+            .iter()
+            .cloned()
+            .enumerate()
+            .map(|(i, p)| {
+                let p = if is_struct {
+                    let (p, name) = p.split_name();
+                    namer(name.unwrap_or_else(|| i.to_string()));
+                    p
+                } else {
+                    p
+                };
+                p.map_type(|t| t.split_field_names_internal(namer))
+            })
+            .collect();
+        let class = if self.class == Class::Compound(Compound::NamedStruct) {
+            Class::Compound(Compound::Struct)
+        } else {
+            self.class.clone()
+        };
+        Arc::new(DataType {
+            class,
+            nullable: self.nullable,
+            variation: self.variation.clone(),
+            parameters,
+        })
+    }
+
+    /// Converts all NSTRUCT types in the tree to STRUCT, and returns the
+    /// flattened list of field names encountered. The fields of STRUCT types
+    /// are also returned, to ensure that the returned Vec is applicable to
+    /// apply_field_names(); their names are simply their zero-based index
+    /// converted to a string.
+    pub fn split_field_names(&self) -> (Arc<DataType>, Vec<String>) {
+        let mut names = vec![];
+        let data_type = self.split_field_names_internal(&mut |s| names.push(s));
+        (data_type, names)
+    }
+
+    /// Like split_field_names(), but drops the name strings.
+    pub fn strip_field_names(&self) -> Arc<DataType> {
+        self.split_field_names_internal(&mut |_| ())
+    }
+
+    /// Internal helper function for apply_field_names().
+    fn apply_field_names_internal<F: FnMut() -> diagnostic::Result<String>>(
+        &self,
+        mut namer: &mut F,
+    ) -> diagnostic::Result<Arc<DataType>> {
+        if self.is_struct() {
+            let parameters: Result<Vec<_>, _> = self
+                .parameters
+                .iter()
+                .cloned()
+                .map(|p| {
+                    p.with_name(&mut namer)?
+                        .map_type_result(|t| t.apply_field_names_internal(namer))
+                })
+                .collect();
+
+            // The data type may be invalid after renaming, so we need to
+            // call new() to perform check validity.
+            DataType::new(
+                Class::Compound(Compound::NamedStruct),
+                self.nullable,
+                self.variation.clone(),
+                parameters?,
+            )
+        } else {
+            let parameters: Result<Vec<_>, _> = self
+                .parameters
+                .iter()
+                .cloned()
+                .map(|p| p.map_type_result(|t| t.apply_field_names_internal(namer)))
+                .collect();
+
+            // Data types generated this way can never become invalid, so we
+            // can construct directly.
+            Ok(Arc::new(DataType {
+                class: self.class.clone(),
+                nullable: self.nullable,
+                variation: self.variation.clone(),
+                parameters: parameters?,
+            }))
+        }
+    }
+
+    /// Applies names to STRUCTs, or renames the names in NSTRUCTs, based on a
+    /// flattened vector of names.
+    pub fn apply_field_names<S: ToString>(&self, names: &[S]) -> diagnostic::Result<Arc<DataType>> {
+        let mut names = names.iter();
+        let mut num_too_few = 0;
+        let mut namer = || {
+            Ok(names.next().map(|s| s.to_string()).unwrap_or_else(|| {
+                num_too_few += 1;
+                format!("unnamed{num_too_few}")
+            }))
+        };
+        let new_type = self.apply_field_names_internal(&mut namer)?;
+        let remainder = names.count();
+        if self.is_unresolved_deep() {
+            Ok(new_type)
+        } else if remainder > 0 {
+            Err(cause!(
+                TypeMismatchedFieldNameAssociations,
+                "received {remainder} too many field name(s)"
+            ))
+        } else if num_too_few > 0 {
+            Err(cause!(
+                TypeMismatchedFieldNameAssociations,
+                "received {num_too_few} too few field name(s)"
+            ))
+        } else {
+            Ok(new_type)
+        }
+    }
+}
+
+impl Default for DataType {
+    fn default() -> Self {
+        DataType {
+            class: Class::Unresolved,
+            nullable: false,
+            variation: None,
+            parameters: vec![],
+        }
+    }
+}
+
+/// Trait for checking the type parameters for a base type.
+pub trait ParameterInfo {
+    /// Checks whether the given parameter set is valid for this base type.
+    fn check_parameters(&self, params: &[Parameter]) -> diagnostic::Result<()>;
+
+    /// Returns the logical name of the given parameter.
+    fn parameter_name(&self, index: usize) -> Option<String>;
+
+    /// Whether this type supports parameters. This is used to determine
+    /// whether to print <> when the parameter list is empty. This is used to
+    /// distinguish a concrete empty struct from a struct with unspecified
+    /// fields.
+    fn has_parameters(&self) -> bool;
+}
+
+/// Type class.
+#[derive(Clone, Debug, PartialEq)]
+pub enum Class {
+    /// Well-known simple type.
+    Simple(Simple),
+
+    /// Well-known compound type.
+    Compound(Compound),
+
+    /// User-defined type.
+    UserDefined(Arc<extension::Reference<extension::DataType>>),
+
+    /// Unresolved type. Used for error recovery.
+    Unresolved,
+}
+
+impl std::fmt::Display for Class {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Class::Simple(simple) => write!(f, "{simple}"),
+            Class::Compound(compound) => write!(f, "{compound}"),
+            Class::UserDefined(user_defined) => write!(f, "{user_defined}"),
+            Class::Unresolved => write!(f, "!"),
+        }
+    }
+}
+
+impl ParameterInfo for Class {
+    fn check_parameters(&self, params: &[Parameter]) -> diagnostic::Result<()> {
+        match self {
+            Class::Simple(_) => {
+                if params.is_empty() {
+                    Ok(())
+                } else {
+                    Err(cause!(
+                        TypeMismatchedParameters,
+                        "simple types cannot be parameterized"
+                    ))
+                }
+            }
+            Class::Compound(compound) => compound.check_parameters(params),
+            Class::UserDefined(_) => {
+                if params.is_empty() {
+                    Ok(())
+                } else {
+                    Err(cause!(
+                        TypeMismatchedParameters,
+                        "user-defined types cannot currently be parameterized"
+                    ))
+                }
+            }
+            Class::Unresolved => Ok(()),
+        }
+    }
+
+    fn parameter_name(&self, index: usize) -> Option<String> {
+        if let Class::Compound(compound) = self {
+            compound.parameter_name(index)
+        } else {
+            None
+        }
+    }
+
+    fn has_parameters(&self) -> bool {
+        if let Class::Compound(compound) = self {
+            compound.has_parameters()
+        } else {
+            false
+        }
+    }
+}
+
+impl Class {
+    /// Checks whether two classes are equal, also returning true if either or
+    /// both are unresolved.
+    pub fn weak_equals(&self, rhs: &Class) -> bool {
+        match (self, rhs) {
+            (_, Class::Unresolved) | (Class::Unresolved, _) => true,
+            (a, b) => a == b,
+        }
+    }
+}
+
+/// Enumeration of simple types defined by Substrait.
+#[derive(Clone, Debug, PartialEq, Display, EnumString)]
+#[strum(ascii_case_insensitive, serialize_all = "snake_case")]
+pub enum Simple {
+    Boolean,
+    I8,
+    I16,
+    I32,
+    I64,
+    Fp32,
+    Fp64,
+    String,
+    Binary,
+    Timestamp,
+    TimestampTz,
+    Date,
+    Time,
+    IntervalYear,
+    IntervalDay,
+    Uuid,
+}
+
+/// Enumeration of compound types defined by Substrait.
+#[derive(Clone, Debug, PartialEq, Display, EnumString)]
+#[strum(ascii_case_insensitive, serialize_all = "UPPERCASE")]
+pub enum Compound {
+    FixedChar,
+    VarChar,
+    FixedBinary,
+    Decimal,
+    Struct,
+    #[strum(serialize = "NSTRUCT")]
+    NamedStruct,
+    List,
+    Map,
+}
+
+impl ParameterInfo for Compound {
+    fn check_parameters(&self, params: &[Parameter]) -> diagnostic::Result<()> {
+        match self {
+            Compound::FixedChar | Compound::VarChar | Compound::FixedBinary => {
+                if params.len() != 1 {
+                    return Err(cause!(
+                        TypeMismatchedParameters,
+                        "{self} expects a single parameter (length)"
+                    ));
+                }
+                if let Parameter::Unsigned(length) = params[0] {
+                    // Note: 2147483647 = 2^31-1 = maximum value for signed
+                    // 32-bit integer. However, the significance of the number
+                    // is just that the Substrait specification says this is
+                    // the limit.
+                    const MIN_LENGTH: u64 = 1;
+                    const MAX_LENGTH: u64 = 2147483647;
+                    if !(MIN_LENGTH..=MAX_LENGTH).contains(&length) {
+                        return Err(cause!(
+                            TypeMismatchedParameters,
+                            "{self} length {length} is out of range {MIN_LENGTH}..{MAX_LENGTH}"
+                        ));
+                    }
+                } else {
+                    return Err(cause!(
+                        TypeMismatchedParameters,
+                        "{self} length parameter must be a positive integer"
+                    ));
+                }
+            }
+            Compound::Decimal => {
+                if params.len() != 2 {
+                    return Err(cause!(
+                        TypeMismatchedParameters,
+                        "{self} expects two parameters (precision and scale)"
+                    ));
+                }
+                if let Parameter::Unsigned(precision) = params[0] {
+                    const MIN_PRECISION: u64 = 1;
+                    const MAX_PRECISION: u64 = 38;
+                    if !(MIN_PRECISION..=MAX_PRECISION).contains(&precision) {
+                        return Err(cause!(
+                            TypeMismatchedParameters,
+                            "{self} precision {precision} is out of range {MIN_PRECISION}..{MAX_PRECISION}"
+                        ));
+                    }
+                    if let Parameter::Unsigned(scale) = params[1] {
+                        if scale > precision {
+                            return Err(cause!(
+                                TypeMismatchedParameters,
+                                "{self} scale {scale} is out of range 0..{precision}"
+                            ));
+                        }
+                    } else {
+                        return Err(cause!(
+                            TypeMismatchedParameters,
+                            "{self} scale parameter must be a positive integer"
+                        ));
+                    }
+                } else {
+                    return Err(cause!(
+                        TypeMismatchedParameters,
+                        "{self} precision parameter must be a positive integer"
+                    ));
+                }
+            }
+            Compound::Struct => {
+                for param in params.iter() {
+                    if !matches!(param, Parameter::Type(_)) {
+                        return Err(cause!(
+                            TypeMismatchedParameters,
+                            "{self} parameters must be types"
+                        ));
+                    }
+                }
+            }
+            Compound::NamedStruct => {
+                let mut names = HashSet::with_capacity(params.len());
+                for param in params.iter() {
+                    if let Parameter::NamedType(name, _) = &param {
+                        if !names.insert(name) {
+                            return Err(cause!(
+                                TypeMismatchedParameters,
+                                "duplicate field name in {self}: {name}"
+                            ));
+                        }
+                    } else {
+                        return Err(cause!(
+                            TypeMismatchedParameters,
+                            "{self} parameters must be name-types pairs"
+                        ));
+                    }
+                }
+            }
+            Compound::List => {
+                if params.len() != 1 {
+                    return Err(cause!(
+                        TypeMismatchedParameters,
+                        "{self} expects a single parameter (element type)"
+                    ));
+                }
+                if !matches!(params[0], Parameter::Type(_)) {
+                    return Err(cause!(
+                        TypeMismatchedParameters,
+                        "{self} element type parameter must be a type"
+                    ));
+                }
+            }
+            Compound::Map => {
+                if params.len() != 2 {
+                    return Err(cause!(
+                        TypeMismatchedParameters,
+                        "{self} expects two parameters (key type and value type)"
+                    ));
+                }
+                if !matches!(params[0], Parameter::Type(_)) {
+                    return Err(cause!(
+                        TypeMismatchedParameters,
+                        "{self} key type parameter must be a type"
+                    ));
+                }
+                if !matches!(params[1], Parameter::Type(_)) {
+                    return Err(cause!(
+                        TypeMismatchedParameters,
+                        "{self} value type parameter must be a type"
+                    ));
+                }
+            }
+        }
+        Ok(())
+    }
+
+    fn parameter_name(&self, index: usize) -> Option<String> {
+        match (self, index) {
+            (Compound::FixedChar, 0) => Some(String::from("length")),
+            (Compound::VarChar, 0) => Some(String::from("length")),
+            (Compound::FixedBinary, 0) => Some(String::from("length")),
+            (Compound::Decimal, 0) => Some(String::from("precision")),
+            (Compound::Decimal, 1) => Some(String::from("scale")),
+            (Compound::Struct, i) => Some(format!("{}", i)),
+            (Compound::NamedStruct, i) => Some(format!("{}", i)),
+            (Compound::List, 0) => Some(String::from("element")),
+            (Compound::Map, 0) => Some(String::from("key")),
+            (Compound::Map, 1) => Some(String::from("value")),
+            (_, _) => None,
+        }
+    }
+
+    fn has_parameters(&self) -> bool {
+        true
+    }
+}
+
+/// Parameter for parameterized types.
+#[derive(Clone, Debug, PartialEq)]
+pub enum Parameter {
+    /// Type parameter (list element type, struct element types, etc).
+    Type(Arc<DataType>),
+
+    /// Named type parameter (named struct/schema pseudotype elements).
+    NamedType(String, Arc<DataType>),
+
+    /// Integral type parameter (varchar length, etc.).
+    Unsigned(u64),
+}
+
+impl Describe for Parameter {
+    fn describe(
+        &self,
+        f: &mut std::fmt::Formatter<'_>,
+        limit: string_util::Limit,
+    ) -> std::fmt::Result {
+        match self {
+            Parameter::Type(data_type) => data_type.describe(f, limit),
+            Parameter::NamedType(name, data_type) => {
+                let (name_limit, type_limit) = limit.split(name.len());
+                string_util::describe_identifier(f, name, name_limit)?;
+                write!(f, ": ")?;
+                data_type.describe(f, type_limit)
+            }
+            Parameter::Unsigned(value) => write!(f, "{value}"),
+        }
+    }
+}
+
+impl std::fmt::Display for Parameter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.display().fmt(f)
+    }
+}
+
+impl Parameter {
+    /// Splits the name annotation off from a named type parameter.
+    pub fn split_name(self) -> (Parameter, Option<String>) {
+        match self {
+            Parameter::NamedType(n, t) => (Parameter::Type(t), Some(n)),
+            p => (p, None),
+        }
+    }
+
+    /// Returns the name of a named type parameter.
+    pub fn get_name(&self) -> Option<&str> {
+        match self {
+            Parameter::NamedType(n, _) => Some(n),
+            _ => None,
+        }
+    }
+
+    /// Returns the type of a type parameter.
+    pub fn get_type(&self) -> Option<&Arc<DataType>> {
+        match self {
+            Parameter::Type(t) => Some(t),
+            Parameter::NamedType(_, t) => Some(t),
+            _ => None,
+        }
+    }
+
+    /// Annotates the parameter with a name, if applicable. If the parameter
+    /// was already named, the name is replaced. The function is only called
+    /// for Types and NamedTypes. None is returned only if the function was
+    /// called and returned None.
+    pub fn with_name<E, F: FnOnce() -> Result<String, E>>(self, f: F) -> Result<Parameter, E> {
+        Ok(match self {
+            Parameter::Type(t) => Parameter::NamedType(f()?, t),
+            Parameter::NamedType(_, t) => Parameter::NamedType(f()?, t),
+            p => p,
+        })
+    }
+
+    /// Modifies the contained type using the given function, if applicable. If
+    /// this is not a type parameter, the function is not called.
+    pub fn map_type_result<E, F: FnOnce(Arc<DataType>) -> Result<Arc<DataType>, E>>(
+        self,
+        f: F,
+    ) -> Result<Parameter, E> {
+        Ok(match self {
+            Parameter::Type(t) => Parameter::Type(f(t)?),
+            Parameter::NamedType(n, t) => Parameter::NamedType(n, f(t)?),
+            p => p,
+        })
+    }
+
+    /// Modifies the contained type using the given function, if applicable. If
+    /// this is not a type parameter, the function is not called.
+    pub fn map_type<F: FnOnce(Arc<DataType>) -> Arc<DataType>>(self, f: F) -> Parameter {
+        match self {
+            Parameter::Type(t) => Parameter::Type(f(t)),
+            Parameter::NamedType(n, t) => Parameter::NamedType(n, f(t)),
+            p => p,
+        }
+    }
+}
+
+impl From<DataType> for Parameter {
+    fn from(t: DataType) -> Self {
+        Parameter::Type(Arc::new(t))
+    }
+}
+
+impl From<Arc<DataType>> for Parameter {
+    fn from(t: Arc<DataType>) -> Self {
+        Parameter::Type(t)
+    }
+}
+
+impl From<u64> for Parameter {
+    fn from(x: u64) -> Self {
+        Parameter::Unsigned(x)
+    }
+}
diff --git a/rs/src/output/diagnostic.rs b/rs/src/output/diagnostic.rs
new file mode 100644
index 00000000..5f8614bf
--- /dev/null
+++ b/rs/src/output/diagnostic.rs
@@ -0,0 +1,594 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for diagnostic message types.
+//!
+//! Since diagnostic messages are rather important for a validator (after all,
+//! getting a diagnostic message is hardly an exceptional case), they have
+//! quite a bit of metadata attached to them. Ultimately, the diagnostic
+//! messages attached to the tree ([`Diagnostic`]) have the following
+//! parameters:
+//!
+//!  - cause.message: an enumeration of various types of error messages, in
+//!    the usual Rust way. Messages generated by this crate are usually
+//!    untyped (they just use String), but error information from other
+//!    crates is retained as much as possible.
+//!  - cause.classification: an enumeration of various bits of the validation
+//!    process where diagnostics might occur. Each [`Classification`] enum
+//!    variant can be converted to a unique number, known as the diagnostic
+//!    code, which the user of the crate may use to easily programmatically
+//!    determine what caused a diagnostic in a language-agnostic way. The user
+//!    may also configure the validator in advance to promote or reduce the
+//!    severity of diagnostics, indexed by their code. The codes are
+//!    furthermore organized into groups, with up to 999 classes per group: the
+//!    thousands digit and up is the group identifier, and the less-significant
+//!    digits form the sub-code. Sub-code 0 is reserved to refer to the group
+//!    as a whole.
+//!  - original_level: the error [`Level`] that the validation code assigned to
+//!    the message. This can be `Error`, `Warning`, or `Info`, which correspond
+//!    directly to "this is definitely wrong," "this may or may not be wrong,"
+//!    and "this conforms to the Substrait specification, but it's worth noting
+//!    anyway" respectively.
+//!  - adjusted_level: the error [`Level`] after configuration-based adjustment.
+//!    This level is what's used by the high-level APIs to determine the
+//!    validity of a plan. Thus, a user can choose to ignore a particular error
+//!    if their consumer implementation can deal with it anyway, or they can
+//!    assert whether a particular type of warning is actually an error or not.
+//!  - path: a path into the substrait.Plan message. This is *usually* just a
+//!    copy of the path to the node that was being validated when the
+//!    diagnostic was created, but in some cases diagnostics may be placed in a
+//!    parent node (for instance to refer to a node that should exist but
+//!    doesn't), or refer to a different location altogether (for instance to
+//!    point the user to the previous definition in a note following a
+//!    duplicate definition error).
+
+use crate::output::path;
+use num_traits::cast::FromPrimitive;
+use std::sync::Arc;
+use strum::EnumProperty;
+
+/// Owned variant of jsonschema::error::ValidationError<'a>. Instead of a
+/// reference to the YAML tree node that caused the error, this just contains
+/// the formatted error message. The validation error kind and paths are
+/// however retained.
+#[derive(Debug, thiserror::Error)]
+pub struct JsonSchemaValidationError {
+    pub message: String,
+    pub kind: jsonschema::error::ValidationErrorKind,
+    pub instance_path: jsonschema::paths::JSONPointer,
+    pub schema_path: jsonschema::paths::JSONPointer,
+}
+
+impl std::fmt::Display for JsonSchemaValidationError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.message.fmt(f)
+    }
+}
+
+impl From<jsonschema::error::ValidationError<'_>> for JsonSchemaValidationError {
+    fn from(v: jsonschema::error::ValidationError) -> Self {
+        JsonSchemaValidationError {
+            message: v.to_string(),
+            kind: v.kind,
+            instance_path: v.instance_path,
+            schema_path: v.schema_path,
+        }
+    }
+}
+
+/// Enumeration for error message data we might encounter.
+#[derive(Debug, thiserror::Error)]
+pub enum Message {
+    #[error("{0}")]
+    Untyped(String),
+
+    #[error("{0}")]
+    ProstDecodeError(#[from] prost::DecodeError),
+
+    #[error("{0}")]
+    IoError(#[from] std::io::Error),
+
+    #[error("{0}")]
+    UtfError(#[from] std::str::Utf8Error),
+
+    #[error("{0}")]
+    YamlScanError(#[from] yaml_rust::ScanError),
+
+    #[error("{0}")]
+    JsonSchemaValidationError(#[from] JsonSchemaValidationError),
+
+    #[error("{0}")]
+    UriError(#[from] uriparse::URIReferenceError),
+
+    #[error("{0}")]
+    GlobError(#[from] glob::PatternError),
+}
+
+impl From<&str> for Message {
+    fn from(s: &str) -> Self {
+        Message::Untyped(s.to_string())
+    }
+}
+
+impl From<String> for Message {
+    fn from(s: String) -> Self {
+        Message::Untyped(s)
+    }
+}
+
+impl From<jsonschema::error::ValidationError<'_>> for Message {
+    fn from(v: jsonschema::error::ValidationError<'_>) -> Self {
+        JsonSchemaValidationError::from(v).into()
+    }
+}
+
+/// Enumeration for the particular types of diagnostics we might encounter.
+///
+/// Numbers must be assigned as follows:
+///  - the group identifier is represented by the thousands digit and up;
+///  - the first classification for each group (i.e. divisible by 1000) is
+///    reserved for diagnostics that have no more specific information
+///    attached to them: their description must be hidden and related to
+///    the group name;
+///  - group 0 is a sort of null group, where no group information is known;
+///  - all enum variant names for classifications belonging to a group (except
+///    the null group) must be prefixed by the group name;
+///  - for backward/forward-compatibility, numbers should not be reassigned.
+///
+/// The Description and HiddenDescription enum properties define a description
+/// of the class. When Description is used, the description is prefixed before
+/// the error message; when HiddenDescription is used, the message is not
+/// prefixed, and should thus be sufficiently specific to not need it. The
+/// latter is useful to reduce the amount of redundant information in a
+/// message.
+#[derive(
+    Clone,
+    Copy,
+    Debug,
+    PartialEq,
+    Eq,
+    Hash,
+    strum_macros::EnumIter,
+    strum_macros::EnumProperty,
+    num_derive::FromPrimitive,
+)]
+pub enum Classification {
+    // Unclassified diagnostics (group 0).
+    #[strum(props(HiddenDescription = "unclassified diagnostic"))]
+    Unclassified = 0,
+
+    #[strum(props(Description = "not yet implemented"))]
+    NotYetImplemented = 1,
+
+    #[strum(props(Description = "illegal value"))]
+    IllegalValue = 2,
+
+    #[strum(props(Description = "illegal value in hint"))]
+    IllegalValueInHint = 3,
+
+    #[strum(props(Description = "illegal URI"))]
+    IllegalUri = 4,
+
+    #[strum(props(Description = "illegal glob"))]
+    IllegalGlob = 5,
+
+    // Protobuf-related diagnostics (group 1).
+    #[strum(props(HiddenDescription = "protobuf-related diagnostic"))]
+    Proto = 1000,
+
+    #[strum(props(HiddenDescription = "protobuf parsing failed"))]
+    ProtoParseFailed = 1001,
+
+    #[strum(props(Description = "missing required protobuf field"))]
+    ProtoMissingField = 1002,
+
+    #[strum(props(Description = "encountered a protobuf \"any\""))]
+    ProtoAny = 1004,
+
+    #[strum(props(Description = "missing protobuf \"any\" declaration"))]
+    ProtoMissingAnyDeclaration = 1006,
+
+    // YAML-reated diagnostics (group 2).
+    #[strum(props(HiddenDescription = "YAML-related diagnostic"))]
+    Yaml = 2000,
+
+    #[strum(props(Description = "did not attempt to resolve YAML"))]
+    YamlResolutionDisabled = 2001,
+
+    #[strum(props(Description = "failed to resolve YAML"))]
+    YamlResolutionFailed = 2002,
+
+    #[strum(props(Description = "failed to parse YAML"))]
+    YamlParseFailed = 2003,
+
+    #[strum(props(Description = "YAML does not conform to schema"))]
+    YamlSchemaValidationFailed = 2004,
+
+    #[strum(props(Description = "missing required YAML key"))]
+    YamlMissingKey = 2005,
+
+    #[strum(props(Description = "missing required YAML array element"))]
+    YamlMissingElement = 2007,
+
+    #[strum(props(Description = "invalid YAML value type"))]
+    YamlInvalidType = 2008,
+
+    // Link resolution diagnostics (group 3).
+    #[strum(props(HiddenDescription = "link resolution diagnostic"))]
+    Link = 3000,
+
+    #[strum(props(Description = "failed to resolve anchor"))]
+    LinkMissingAnchor = 3001,
+
+    #[strum(props(Description = "failed to resolve function name"))]
+    LinkMissingFunctionName = 3002,
+
+    #[strum(props(Description = "failed to resolve type name"))]
+    LinkMissingTypeName = 3003,
+
+    #[strum(props(Description = "failed to resolve type variation name"))]
+    LinkMissingTypeVariationName = 3004,
+
+    // Type-related diagnostics (group 4).
+    #[strum(props(HiddenDescription = "type-related diagnostics"))]
+    Type = 4000,
+
+    #[strum(props(Description = "unknown type"))]
+    TypeUnknown = 4001,
+
+    #[strum(props(Description = "mismatched type parameters"))]
+    TypeMismatchedParameters = 4002,
+
+    #[strum(props(Description = "mismatched field name associations"))]
+    TypeMismatchedFieldNameAssociations = 4003,
+
+    #[strum(props(Description = "invalid swizzle operation"))]
+    TypeInvalidSwizzle = 4004,
+
+    #[strum(props(Description = "mismatched types"))]
+    TypeMismatch = 4005,
+
+    #[strum(props(Description = "struct type is required"))]
+    TypeStructRequired = 4006,
+
+    #[strum(props(Description = "mismatched type variation"))]
+    TypeMismatchedVariation = 4007,
+
+    #[strum(props(Description = "mismatched nullability"))]
+    TypeMismatchedNullability = 4008,
+
+    // Relation-related diagnostics (group 5).
+    #[strum(props(HiddenDescription = "relation-related diagnostics"))]
+    Relation = 5000,
+
+    #[strum(props(Description = "missing root relation"))]
+    RelationRootMissing = 5001,
+
+    #[strum(props(Description = "missing relation"))]
+    RelationMissing = 5002,
+
+    #[strum(props(Description = "invalid relation"))]
+    RelationInvalid = 5003,
+
+    // Expression-related diagnostics (group 6).
+    #[strum(props(HiddenDescription = "expression-related diagnostics"))]
+    Expression = 6000,
+
+    #[strum(props(Description = "field reference into non-existent stream"))]
+    ExpressionFieldRefMissingStream = 6001,
+
+    #[strum(props(Description = "illegal literal value"))]
+    ExpressionIllegalLiteralValue = 6002,
+
+    #[strum(props(Description = "function definition unavailable"))]
+    ExpressionFunctionDefinitionUnavailable = 6003,
+
+    #[strum(props(Description = "illegal subquery"))]
+    ExpressionIllegalSubquery = 6004,
+
+    // Redundant declarations (group 7).
+    #[strum(props(
+        HiddenDescription = "diagnostics for pointing out parts of the plan that can be removed without changing its semantics"
+    ))]
+    Redundant = 7000,
+
+    #[strum(props(Description = "redundant protobuf \"any\" declaration"))]
+    RedundantProtoAnyDeclaration = 7001,
+
+    #[strum(props(Description = "redundant extension URI definition"))]
+    RedundantExtensionDefition = 7002,
+
+    #[strum(props(Description = "redundant function declaration"))]
+    RedundantFunctionDeclaration = 7003,
+
+    #[strum(props(Description = "redundant type declaration"))]
+    RedundantTypeDeclaration = 7004,
+
+    #[strum(props(Description = "redundant type variation declaration"))]
+    RedundantTypeVariationDeclaration = 7005,
+
+    #[strum(props(Description = "redundant list slice"))]
+    RedundantListSlice = 7006,
+
+    #[strum(props(Description = "redundant field"))]
+    RedundantField = 7007,
+}
+
+impl Default for Classification {
+    fn default() -> Self {
+        Classification::Unclassified
+    }
+}
+
+impl Classification {
+    /// Returns the complete code for this classification.
+    pub fn code(&self) -> u32 {
+        *self as u32
+    }
+
+    /// Returns the name of the classiciation.
+    pub fn name(&self) -> String {
+        format!("{:?}", self)
+    }
+
+    /// Returns the group code for this classification.
+    pub fn group_code(&self) -> u32 {
+        (*self as u32) / 1000
+    }
+
+    /// Returns the group variant for this classification.
+    pub fn group(&self) -> Classification {
+        Self::from_group(self.group_code())
+            .unwrap_or_else(|| panic!("missing group for {:?}", self))
+    }
+
+    /// Returns the code for this classification within its group.
+    pub fn sub_code(&self) -> u32 {
+        (*self as u32) % 1000
+    }
+
+    /// Returns the description of this classification.
+    pub fn description(&self) -> &str {
+        self.get_str("Description")
+            .or_else(|| self.get_str("HiddenDescription"))
+            .unwrap_or_else(|| {
+                panic!(
+                    "missing Description or HiddenDescription property for {:?}",
+                    self
+                )
+            })
+    }
+
+    /// Returns the classification associated with the given code, if any.
+    pub fn from_code(code: u32) -> Option<Self> {
+        Self::from_u32(code)
+    }
+
+    /// Returns the group classification associated with the given code, if
+    /// any.
+    pub fn group_from_code(code: u32) -> Option<Self> {
+        Self::from_group(code / 1000)
+    }
+
+    /// Returns the group classification associated with the given group.
+    pub fn from_group(group: u32) -> Option<Self> {
+        Self::from_u32(group * 1000)
+    }
+
+    /// Returns the "parent" code for the given code. For non-group codes, this
+    /// is the code of their group (code rounded down to thousands). For group
+    /// codes, this is 0.
+    pub fn parent(code: u32) -> u32 {
+        if code % 1000 != 0 {
+            (code / 1000) * 1000
+        } else {
+            0
+        }
+    }
+
+    /// Formats a Message with this classification.
+    pub fn format_message(
+        &self,
+        message: &Message,
+        f: &mut std::fmt::Formatter,
+    ) -> std::fmt::Result {
+        if let Some(description) = self.get_str("Description") {
+            write!(f, "{description}: ")?;
+        }
+        write!(f, "{message} (code {:04})", self.code())
+    }
+}
+
+impl From<Classification> for u32 {
+    /// Converts a Classification into its error code.
+    fn from(classification: Classification) -> Self {
+        classification.code()
+    }
+}
+
+/// Description of the cause of a diagnostic.
+#[derive(Clone, Debug, thiserror::Error)]
+pub struct Cause {
+    /// The error message. Within this crate we don't bother typing these
+    /// beyond the Classification enum, but we do retain typing information for
+    /// messages from other crates.
+    pub message: Arc<Message>,
+
+    /// Classification of this cause. This attaches an error code and generic
+    /// message for said code to the diagnostic message. The user can use these
+    /// codes to for instance always promote a particular type of diagnostic to
+    /// an error (like gcc -Werror).
+    pub classification: Classification,
+}
+
+impl PartialEq for Cause {
+    fn eq(&self, other: &Self) -> bool {
+        self.message.to_string() == other.message.to_string()
+            && self.classification == other.classification
+    }
+}
+
+impl std::fmt::Display for Cause {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.classification.format_message(&self.message, f)
+    }
+}
+
+/// Convenience/shorthand macro for creating error diagnostics. Use this
+/// variant when you have something that can be cast into a Message via into(),
+/// like a pre-formatted string or a compatible Error type from a dependency.
+macro_rules! ecause {
+    ($class:ident, $message:expr) => {
+        crate::output::diagnostic::Cause {
+            message: std::sync::Arc::new($message.into()),
+            classification: crate::output::diagnostic::Classification::$class,
+        }
+    };
+}
+
+/// Convenience/shorthand macro for creating error diagnostics. Use this
+/// variant when you want to format a string. The argument list beyond the
+/// diagnostic class identifier is passed straight to [`format!`].
+macro_rules! cause {
+    ($class:ident, $($args:expr),*) => {
+        ecause!($class, format!($($args),*))
+    };
+}
+
+/// Result type for diagnostic causes.
+pub type Result<T> = std::result::Result<T, Cause>;
+
+/// Error level for a diagnostic message.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Level {
+    /// Level used for diagnostics that don't point out anything wrong with
+    /// the plan, and merely provide additional information.
+    Info,
+
+    /// Level used for diagnostics that may or may not indicate that there
+    /// is something wrong with the plan, i.e. the plan *could* be valid,
+    /// but the validator isn't sure.
+    Warning,
+
+    /// Level used for diagnostics that indicate that there is definitely
+    /// something wrong with the plan.
+    Error,
+}
+
+/// A diagnostic message, without configuration-based level override.
+#[derive(Clone, Debug, PartialEq, thiserror::Error)]
+pub struct RawDiagnostic {
+    /// The cause of the diagnostic.
+    pub cause: Cause,
+
+    /// The severity of the diagnostic.
+    pub level: Level,
+
+    /// The path within the protobuf message where the diagnostic occurred.
+    pub path: path::PathBuf,
+}
+
+impl std::fmt::Display for RawDiagnostic {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?}", self.level)?;
+        if !f.alternate() {
+            write!(f, " at {}", self.path)?;
+        }
+        write!(f, ": {}", self.cause)
+    }
+}
+
+/// A diagnostic message, including configuration-based level override.
+#[derive(Clone, Debug, PartialEq, thiserror::Error)]
+pub struct Diagnostic {
+    /// The cause of the diagnostic.
+    pub cause: Cause,
+
+    /// The original severity of the diagnostic.
+    pub original_level: Level,
+
+    /// The severity of the diagnostic after application of configuration.
+    pub adjusted_level: Level,
+
+    /// The path within the protobuf message where the diagnostic occurred.
+    pub path: path::PathBuf,
+}
+
+impl std::fmt::Display for Diagnostic {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?}", self.adjusted_level)?;
+        match self.original_level.cmp(&self.adjusted_level) {
+            std::cmp::Ordering::Less => write!(f, " (upgraded from {:?})", self.original_level)?,
+            std::cmp::Ordering::Equal => {}
+            std::cmp::Ordering::Greater => {
+                write!(f, " (downgraded from {:?})", self.original_level)?
+            }
+        }
+        if !f.alternate() {
+            write!(f, " at {}", self.path)?;
+        }
+        write!(f, ": {}", self.cause)
+    }
+}
+
+impl RawDiagnostic {
+    /// Converts to an AdjustedDiagnostic by adding an adjusted level.
+    pub fn adjust_level(self, adjusted_level: Level) -> Diagnostic {
+        Diagnostic {
+            cause: self.cause,
+            original_level: self.level,
+            adjusted_level,
+            path: self.path,
+        }
+    }
+}
+
+/// Convenience/shorthand macro for creating error diagnostics.
+macro_rules! diag {
+    ($path:expr, $level:ident, $class:ident, $($args:expr),*) => {
+        diag!($path, $level, cause!($class, $($args),*))
+    };
+    ($path:expr, $level:ident, $cause:expr) => {
+        crate::output::diagnostic::RawDiagnostic {
+            cause: $cause,
+            level: crate::output::diagnostic::Level::$level,
+            path: $path
+        }
+    };
+}
+/*macro_rules! ediag {
+    ($path:expr, $level:ident, $class:ident, $err:expr) => {
+        diag!($path, $level, ecause!($class, $err))
+    };
+}*/
+
+/// Result type for complete diagnostics, including path.
+pub type DiagResult<T> = std::result::Result<T, RawDiagnostic>;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::collections::HashSet;
+    use strum::IntoEnumIterator;
+
+    #[test]
+    fn test_diagnostic_classifications() {
+        // Check validity of the classifications definitions.
+        let mut descriptions = HashSet::new();
+        for class in Classification::iter() {
+            let group = class.group();
+            if group != Classification::Unclassified {
+                assert!(
+                    class.name().starts_with(&group.name()),
+                    "incorrect group prefix for {:?}, should start with {:?}",
+                    class,
+                    group
+                );
+            }
+            assert!(
+                descriptions.insert(class.description().to_string()),
+                "duplicate description for {:?}",
+                class
+            );
+        }
+    }
+}
diff --git a/rs/src/output/extension.rs b/rs/src/output/extension.rs
new file mode 100644
index 00000000..abc9689e
--- /dev/null
+++ b/rs/src/output/extension.rs
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for dealing with YAML-based Substrait extensions.
+
+use crate::output::data_type;
+use crate::output::path;
+use crate::output::tree;
+use crate::string_util;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+/// Represents a named reference to something.
+#[derive(Clone, Debug, Default)]
+pub struct NamedReference {
+    /// The name of the type, type variation, or function.
+    name: Option<String>,
+
+    /// The path to the node that defined the anchor for this extension, if
+    /// any.
+    anchor_path: Option<path::PathBuf>,
+}
+
+impl PartialEq for NamedReference {
+    /// Named references are equal if both references have a known name and
+    /// those names are the same.
+    fn eq(&self, other: &Self) -> bool {
+        self.name.is_some() && other.name.is_some() && self.name == other.name
+    }
+}
+
+impl Eq for NamedReference {}
+
+impl std::fmt::Display for NamedReference {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if let Some(name) = &self.name {
+            write!(f, "{}", string_util::as_ident_or_string(name))
+        } else {
+            write!(f, "?")
+        }
+    }
+}
+
+impl NamedReference {
+    /// Create a new anchor-based reference.
+    pub fn new<S: ToString>(
+        name: Option<S>,
+        anchor_path: Option<path::PathBuf>,
+    ) -> Arc<NamedReference> {
+        Arc::new(NamedReference {
+            name: name.map(|x| x.to_string()),
+            anchor_path,
+        })
+    }
+
+    /// Create a new named reference.
+    pub fn new_by_name<S: ToString>(name: S) -> Arc<NamedReference> {
+        Arc::new(NamedReference {
+            name: Some(name.to_string()),
+            anchor_path: None,
+        })
+    }
+
+    /// Create a new unknown reference.
+    pub fn new_unknown() -> Arc<NamedReference> {
+        Arc::default()
+    }
+
+    /// Returns the name, if known.
+    pub fn name(&self) -> Option<&str> {
+        self.name.as_ref().map(|s| &s[..])
+    }
+
+    /// Returns the path to the anchor, if known.
+    pub fn anchor_path(&self) -> Option<&path::PathBuf> {
+        self.anchor_path.as_ref()
+    }
+}
+
+/// Named/namespaced reference to a particular extension definition.
+#[derive(Clone, Debug, Default)]
+pub struct Reference<T> {
+    /// The name of the type, type variation, or function.
+    pub name: Arc<NamedReference>,
+
+    /// The URI of the YAML file that defined this extension.
+    pub uri: Arc<NamedReference>,
+
+    /// Extension definition information, specific to this type of extension,
+    /// if we managed to resolve the reference.
+    pub definition: Option<Arc<T>>,
+}
+
+impl<T> PartialEq for Reference<T> {
+    /// References are equal if they refer to the same thing, regardless of how
+    /// they refer to it. If we're not sure4 because either reference is
+    /// (partially) unresolved, return false pessimistically.
+    fn eq(&self, other: &Self) -> bool {
+        self.name == other.name && self.uri == other.uri
+    }
+}
+
+impl<T> Eq for Reference<T> {}
+
+impl<T> std::fmt::Display for Reference<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}::{}", self.uri, self.name)
+    }
+}
+
+/// User-defined base data type.
+#[derive(Clone, Debug, PartialEq, Default)]
+pub struct DataType {
+    /// The underlying structure of the type.
+    pub structure: Vec<(String, data_type::Simple)>,
+}
+
+/// The base type of a type variation.
+#[derive(Clone, Debug, PartialEq)]
+pub enum TypeVariationBase {
+    /// The type variation is immediately based in a physical type.
+    Physical(data_type::Class),
+
+    /// The type variation is based in another logical type variation.
+    Logical(Arc<TypeVariation>),
+
+    /// The base type is unknown.
+    Unresolved,
+}
+
+impl Default for TypeVariationBase {
+    fn default() -> Self {
+        TypeVariationBase::Unresolved
+    }
+}
+
+/// Type variation extension.
+#[derive(Clone, Debug, PartialEq, Default)]
+pub struct TypeVariation {
+    /// The base type for this variation.
+    pub base: TypeVariationBase,
+
+    /// Function behavior for this variation.
+    pub function_behavior: FunctionBehavior,
+}
+
+impl TypeVariation {
+    /// Return the base class for this type variation, if known.
+    pub fn get_base_class(&self) -> data_type::Class {
+        match &self.base {
+            TypeVariationBase::Physical(x) => x.clone(),
+            TypeVariationBase::Logical(x) => x.get_base_class(),
+            TypeVariationBase::Unresolved => data_type::Class::Unresolved,
+        }
+    }
+}
+
+/// Type variation function behavior.
+#[derive(Clone, Debug, PartialEq)]
+pub enum FunctionBehavior {
+    Inherits,
+    Separate,
+}
+
+impl Default for FunctionBehavior {
+    fn default() -> Self {
+        FunctionBehavior::Inherits
+    }
+}
+
+/// Function extension.
+#[derive(Clone, Debug, PartialEq, Default)]
+pub struct Function {
+    // TODO: need much more information here to do type checking.
+}
+
+/// Information about a YAML extension, which may or may not be resolved.
+#[derive(Clone, Debug, PartialEq)]
+pub enum YamlInfo {
+    Unresolved(Arc<NamedReference>),
+    Resolved(Arc<YamlData>),
+}
+
+impl YamlInfo {
+    pub fn data(&self) -> Option<&YamlData> {
+        match self {
+            YamlInfo::Unresolved(_) => None,
+            YamlInfo::Resolved(x) => Some(x),
+        }
+    }
+
+    pub fn uri(&self) -> &Arc<NamedReference> {
+        match self {
+            YamlInfo::Unresolved(x) => x,
+            YamlInfo::Resolved(x) => &x.uri,
+        }
+    }
+}
+
+impl Default for YamlInfo {
+    fn default() -> Self {
+        YamlInfo::Unresolved(Arc::default())
+    }
+}
+
+impl std::fmt::Display for YamlInfo {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.uri())
+    }
+}
+
+/// Data for a resolved YAML file.
+#[derive(Clone, Debug, PartialEq)]
+pub struct YamlData {
+    /// URI for the YAML file.
+    pub uri: Arc<NamedReference>,
+
+    /// Reference to the parsed YAML data, if any.
+    pub data: tree::NodeReference,
+
+    /// Functions defined in this YAML file. Names are stored in lower case
+    /// (Substrait's name resolution is case-insensitive).
+    pub functions: HashMap<String, Arc<Function>>,
+
+    /// Types defined in this YAML file. Names are stored in lower case
+    /// (Substrait's name resolution is case-insensitive).
+    pub types: HashMap<String, Arc<DataType>>,
+
+    /// Type variations defined in this YAML file. Names are stored in lower
+    /// case (Substrait's name resolution is case-insensitive).
+    pub type_variations: HashMap<String, Arc<TypeVariation>>,
+}
+
+impl YamlData {
+    /// Constructs an empty YamlData object with an invalid reference to the
+    /// data node. Everything still needs to be populated for this to become
+    /// valid.
+    pub fn new(uri: Arc<NamedReference>) -> YamlData {
+        YamlData {
+            uri,
+            data: tree::NodeReference {
+                path: path::Path::Root("").to_path_buf(),
+                node: Arc::new(tree::NodeType::YamlMap.into()),
+            },
+            functions: HashMap::default(),
+            types: HashMap::default(),
+            type_variations: HashMap::default(),
+        }
+    }
+
+    /// Helper function for the various resolvers.
+    fn local_reference<S: ToString, T>(
+        &self,
+        name: S,
+        definition: Option<Arc<T>>,
+    ) -> Arc<Reference<T>> {
+        Arc::new(Reference {
+            name: NamedReference::new_by_name(name),
+            uri: self.uri.clone(),
+            definition,
+        })
+    }
+
+    /// Resolves a function defined in this YAML data block by name. Returns an
+    /// unresolved reference if it does not exist.
+    pub fn resolve_function<S: ToString>(&self, name: S) -> Arc<Reference<Function>> {
+        let name = name.to_string();
+        let maybe_def = self.functions.get(&name).cloned();
+        self.local_reference(name, maybe_def)
+    }
+
+    /// Resolves a type defined in this YAML data block by name. Returns an
+    /// unresolved reference if it does not exist.
+    pub fn resolve_type<S: ToString>(&self, name: S) -> Arc<Reference<DataType>> {
+        let name = name.to_string();
+        let maybe_def = self.types.get(&name).cloned();
+        self.local_reference(name, maybe_def)
+    }
+
+    /// Resolves a type variation defined in this YAML data block by name.
+    /// Returns an unresolved reference if it does not exist.
+    pub fn resolve_type_variation<S: ToString>(&self, name: S) -> Arc<Reference<TypeVariation>> {
+        let name = name.to_string();
+        let maybe_def = self.type_variations.get(&name).cloned();
+        self.local_reference(name, maybe_def)
+    }
+}
diff --git a/rs/src/output/mod.rs b/rs/src/output/mod.rs
new file mode 100644
index 00000000..3cfb23df
--- /dev/null
+++ b/rs/src/output/mod.rs
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Output representation module.
+//!
+//! This module provides the data structures for representing the output of the
+//! validator.
+
+#[macro_use]
+pub mod diagnostic;
+
+pub mod comment;
+pub mod data_type;
+pub mod extension;
+pub mod parse_result;
+pub mod path;
+pub mod primitive_data;
+pub mod tree;
diff --git a/rs/src/output/parse_result.rs b/rs/src/output/parse_result.rs
new file mode 100644
index 00000000..29130846
--- /dev/null
+++ b/rs/src/output/parse_result.rs
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for the toplevel type representing a parse/validation result.
+
+use crate::export;
+use crate::output::diagnostic;
+use crate::output::tree;
+
+/// Validity of a plan.
+///
+/// Note that there is a one-to-one correspondence with Level. The only
+/// difference between Level and Validity is that the variant names for Level
+/// are more sensible in the context of a diagnostic, while the names for
+/// Validity are more sensible when talking about a validation result as a
+/// whole.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum Validity {
+    /// The plan is valid.
+    Valid,
+
+    /// The plan may or may not be valid; the validator was not able to prove
+    /// or disprove validity.
+    MaybeValid,
+
+    /// The plan is invalid.
+    Invalid,
+}
+
+impl From<diagnostic::Level> for Validity {
+    fn from(level: diagnostic::Level) -> Self {
+        match level {
+            diagnostic::Level::Info => Validity::Valid,
+            diagnostic::Level::Warning => Validity::MaybeValid,
+            diagnostic::Level::Error => Validity::Invalid,
+        }
+    }
+}
+
+impl From<Validity> for diagnostic::Level {
+    fn from(validity: Validity) -> Self {
+        match validity {
+            Validity::Valid => diagnostic::Level::Info,
+            Validity::MaybeValid => diagnostic::Level::Warning,
+            Validity::Invalid => diagnostic::Level::Error,
+        }
+    }
+}
+
+/// Representation of a parse/validation result.
+pub struct ParseResult {
+    /// The root node of the tree.
+    pub root: tree::Node,
+}
+
+impl ParseResult {
+    /// Iterates over all diagnostic messages in the tree.
+    pub fn iter_diagnostics(&self) -> impl Iterator<Item = &diagnostic::Diagnostic> + '_ {
+        self.root.iter_diagnostics()
+    }
+
+    /// Returns the first diagnostic of the highest severity level in the tree.
+    pub fn get_diagnostic(&self) -> Option<&diagnostic::Diagnostic> {
+        self.root.get_diagnostic()
+    }
+
+    /// Returns whether the plan represented by the given parse tree is valid.
+    pub fn check(&self) -> Validity {
+        if let Some(diag) = self.get_diagnostic() {
+            diag.adjusted_level.into()
+        } else {
+            Validity::Valid
+        }
+    }
+
+    /// Exports a parse tree to a file or other output device using the specified
+    /// data format.
+    pub fn export<T: std::io::Write>(
+        &self,
+        out: &mut T,
+        format: export::Format,
+    ) -> std::io::Result<()> {
+        export::export(out, format, "plan", self)
+    }
+}
diff --git a/rs/src/output/path.rs b/rs/src/output/path.rs
new file mode 100644
index 00000000..731eb095
--- /dev/null
+++ b/rs/src/output/path.rs
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for handling tree paths.
+//!
+//! The [`PathElement`], [`Path`], and [`PathBuf`] types are used to uniquely
+//! refer to any node in a Substrait plan (or, more accurately, any
+//! combination of protobuf and YAML data). [`Path`], and [`PathBuf`] work
+//! roughly the same as [`std::path::Path`], and [`std::path::PathBuf`], but
+//! for protobuf/YAML tree paths rather than filesystem paths.
+
+use crate::string_util;
+
+/// Element of a path to some field of a protobuf message and/or YAML file.
+#[derive(Clone, Debug, PartialEq)]
+pub enum PathElement {
+    /// Refers to an optional protobuf field with the given name within the
+    /// message, or a YAML map entry with the given key.
+    Field(String),
+
+    /// Refers to one of the elements of a repeated field with the given
+    /// name within the message referred to by the parent path.
+    Repeated(String, usize),
+
+    /// Refers to the selected variant of a OneOf field with the given name
+    /// within the message referred to by the parent path. The first str is
+    /// the field name, the second is the variant name.
+    Variant(String, String),
+
+    /// Refers to an indexed element within a YAML array.
+    Index(usize),
+}
+
+impl std::fmt::Display for PathElement {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if !f.alternate() {
+            match self {
+                PathElement::Index(_) => {}
+                _ => write!(f, ".")?,
+            }
+        }
+        match self {
+            PathElement::Field(field) => write!(f, "{}", string_util::as_ident_or_string(field)),
+            PathElement::Repeated(field, index) => {
+                write!(f, "{}[{index}]", string_util::as_ident_or_string(field))
+            }
+            PathElement::Variant(field, variant) => write!(
+                f,
+                "{}<{}>",
+                string_util::as_ident_or_string(field),
+                string_util::as_ident_or_string(variant)
+            ),
+            PathElement::Index(index) => write!(f, "[{index}]"),
+        }
+    }
+}
+
+impl PathElement {
+    /// Same as to_string(), but doesn't include the dot prefix for the
+    /// variants that would normally have one.
+    pub fn to_string_without_dot(&self) -> String {
+        format!("{:#}", self)
+    }
+}
+
+/// Refers to a location within a protobuf message.
+#[derive(Clone, Debug, PartialEq)]
+pub struct PathBuf {
+    pub root: &'static str,
+    pub elements: Vec<PathElement>,
+}
+
+impl std::fmt::Display for PathBuf {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.root)?;
+        for element in self.elements.iter() {
+            write!(f, "{element}")?;
+        }
+        Ok(())
+    }
+}
+
+/// Used to track a location within a protobuf message. The owned version
+/// is PathBuf.
+#[derive(Clone, Debug, PartialEq)]
+pub enum Path<'a> {
+    /// Refers to the root message.
+    Root(&'static str),
+
+    /// Refers to an optional field with the given name within the message
+    /// referred to by the given parent path.
+    Select(&'a Path<'a>, PathElement),
+}
+
+impl Default for Path<'_> {
+    fn default() -> Self {
+        Path::Root("")
+    }
+}
+
+impl Path<'_> {
+    /// Returns a new Path that references an optional field with the
+    /// given name within the protobuf message referred to by the current
+    /// path, or likewise for the key within a YAML map.
+    pub fn with(&self, element: PathElement) -> Path {
+        Path::Select(self, element)
+    }
+
+    /// Returns a new Path that references an optional field with the
+    /// given name within the protobuf message referred to by the current
+    /// path, or likewise for the key within a YAML map.
+    pub fn with_field<S: Into<String>>(&self, name: S) -> Path {
+        self.with(PathElement::Field(name.into()))
+    }
+
+    /// Returns a new Path that references an element of a repeated field
+    /// with the given name within the message referred to by the current
+    /// path.
+    pub fn with_repeated<S: Into<String>>(&self, name: S, index: usize) -> Path {
+        self.with(PathElement::Repeated(name.into(), index))
+    }
+
+    /// Returns a new Path that references a particular variant of a
+    /// OneOf field with the given name within the message referred to
+    /// by the current path.
+    pub fn with_variant<S: Into<String>, V: Into<String>>(&self, name: S, variant: V) -> Path {
+        self.with(PathElement::Variant(name.into(), variant.into()))
+    }
+
+    /// Returns a new Path that references a YAML array element.
+    pub fn with_index(&self, index: usize) -> Path {
+        self.with(PathElement::Index(index))
+    }
+}
+
+impl std::fmt::Display for Path<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Path::Root(name) => write!(f, "{name}"),
+            Path::Select(parent, element) => write!(f, "{parent}{element}"),
+        }
+    }
+}
+
+impl Path<'_> {
+    pub fn end_to_string(&self) -> String {
+        match self {
+            Path::Root(name) => name.to_string(),
+            Path::Select(_, element) => element.to_string(),
+        }
+    }
+
+    /// Creates an owned version of this Path.
+    pub fn to_path_buf(&self) -> PathBuf {
+        match self {
+            Path::Root(name) => PathBuf {
+                root: name,
+                elements: vec![],
+            },
+            Path::Select(parent, element) => {
+                let mut parent = parent.to_path_buf();
+                parent.elements.push(element.clone());
+                parent
+            }
+        }
+    }
+}
+
+impl From<Path<'_>> for PathBuf {
+    fn from(path: Path<'_>) -> Self {
+        path.to_path_buf()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn paths() {
+        let a = Path::Root("a");
+        let b = a.with_field("b");
+        let c = b.with_repeated("c", 42);
+        let d = c.with_variant("d", "e");
+        let e = d.with_index(33);
+        let buf: PathBuf = e.to_path_buf();
+        assert_eq!(e.to_string(), "a.b.c[42].d<e>[33]");
+        assert_eq!(buf.to_string(), "a.b.c[42].d<e>[33]");
+    }
+
+    #[test]
+    fn non_ident_paths() {
+        let a = Path::Root("a");
+        let b = a.with_field("4");
+        let c = b.with_repeated("8", 15);
+        let d = c.with_variant("16", "23");
+        let e = d.with_index(42);
+        let buf: PathBuf = e.to_path_buf();
+        assert_eq!(e.to_string(), "a.\"4\".\"8\"[15].\"16\"<\"23\">[42]");
+        assert_eq!(buf.to_string(), "a.\"4\".\"8\"[15].\"16\"<\"23\">[42]");
+    }
+}
diff --git a/rs/src/output/primitive_data.rs b/rs/src/output/primitive_data.rs
new file mode 100644
index 00000000..ea72e917
--- /dev/null
+++ b/rs/src/output/primitive_data.rs
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for primitive data elements.
+//!
+//! The [`PrimitiveData`] enum is used to represent primitive data in the
+//! input, for use in the leaf nodes of the tree.
+
+/// Enumeration for representing any type of primitive data that can be stored
+/// in YAML or protobuf.
+#[derive(Clone, Debug, PartialEq)]
+pub enum PrimitiveData {
+    /// Used for nulls (YAML only).
+    Null,
+
+    /// Used for booleans.
+    Bool(bool),
+
+    /// Used for unsigned integers.
+    Unsigned(u64),
+
+    /// Used for signed integers.
+    Signed(i64),
+
+    /// Used for floating-point values.
+    Float(f64),
+
+    /// Used for UTF-8 strings.
+    String(String),
+
+    /// Used for bytestrings.
+    Bytes(Vec<u8>),
+
+    /// Used for enumerations (protobuf only).
+    Enum(&'static str),
+
+    /// Used for Any messages (protobuf only).
+    Any(prost_types::Any),
+}
+
+fn hexdump(f: &mut std::fmt::Formatter<'_>, x: &[u8]) -> std::fmt::Result {
+    for (i, b) in x.iter().enumerate() {
+        if i > 0 {
+            write!(f, " ")?;
+        }
+        write!(f, "{:02X}", b)?;
+    }
+    Ok(())
+}
+
+impl std::fmt::Display for PrimitiveData {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            PrimitiveData::Null => write!(f, "null"),
+            PrimitiveData::Bool(true) => write!(f, "true"),
+            PrimitiveData::Bool(false) => write!(f, "false"),
+            PrimitiveData::Unsigned(x) => write!(f, "{x}"),
+            PrimitiveData::Signed(x) => write!(f, "{x}"),
+            PrimitiveData::Float(x) => write!(f, "{x}"),
+            PrimitiveData::String(x) => write!(f, "{x:?}"),
+            PrimitiveData::Bytes(x) => hexdump(f, x),
+            PrimitiveData::Enum(x) => write!(f, "{x}"),
+            PrimitiveData::Any(x) => {
+                write!(f, "{}(", x.type_url)?;
+                hexdump(f, &x.value)?;
+                write!(f, ")")
+            }
+        }
+    }
+}
diff --git a/rs/src/output/tree.rs b/rs/src/output/tree.rs
new file mode 100644
index 00000000..c3d10252
--- /dev/null
+++ b/rs/src/output/tree.rs
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for the output tree structure.
+//!
+//! This module provides the types for the tree structure that constitutes
+//! the output of the validator. The nodes in the tree are intended to
+//! correspond exactly to the protobuf messages, primitives, and YAML values
+//! (the latter actually using the JSON object model) that constitute the
+//! incoming plan. Likewise, the structure of the tree is the same as the
+//! input. However, unlike the input:
+//!
+//!  - All nodes and the relations between them are encapsulated in generic
+//!    types, independent from the corresponding messages/values in the
+//!    original tree. This allows the tree to be traversed by generic code
+//!    with no understanding of Substrait.
+//!  - Additional information can be attached to the nodes, edges, and
+//!    between the edges, such as diagnostic messages and data type
+//!    information.
+//!
+//! The node type for the output trees is [`Node`]. This structure contains
+//! a single [`NodeType`] enum variant and zero or more [`NodeData`] enum
+//! variants in an ordered sequence to form the tree structure; [`NodeType`]
+//! includes information about the node itself, while the [`NodeData`]
+//! elements represent edges to other nodes ([`Child`]) or contextual
+//! information. A subtree might look something like this:
+//!
+//! ```text
+//!                 Node ---> ProtoMessage                   } Parent node
+//!                  |
+//!   .--------------'--------------.
+//!   |         |         |         |
+//!   v         v         v         v
+//! Child  Diagnostic  Comment    Child                      } Edges
+//!   |                             |
+//!   v                             v
+//! Node ---> ProtoPrimitive      Node ---> ProtoMessage     } Child nodes
+//!            |                    |
+//!            '-> PrimitiveData    :
+//! ```
+//!
+//! Note that the [`Child`] struct includes information about how the child
+//! node relates to its parent (which field, array element, etc) via
+//! [`PathElement`](path::PathElement), such that the original tree structure
+//! could in theory be completely reconstructed.
+//!
+//! Nevertheless, the conversion from protobuf/YAML to this tree structure is
+//! only intended to be a one-way street; indeed, the output tree is not
+//! intended to ever be treated as some executable query plan by a computer at
+//! all. It serves only as an intermediate format for documentation, debug,
+//! and/or validation output. The [export](mod@crate::export) module deals with
+//! breaking this internal representation down further, into (file) formats
+//! that are not specific to the Substrait validator.
+
+use crate::output::comment;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::output::extension;
+use crate::output::path;
+use crate::output::primitive_data;
+use std::collections::VecDeque;
+use std::sync::Arc;
+
+/// Node for a semi-structured documentation-like tree representation of a
+/// parsed Substrait plan. The intention is for this to be serialized into
+/// some human-readable format.
+///
+/// Note: although it should be possible to reconstruct the entire plan from
+/// the information contained in the tree, the tree is only intended to be
+/// converted to structured human-readable documentation for the plan. It is
+/// expressly NOT intended to be read as a form of AST by a downstream
+/// process, and therefore isn't nearly as strictly-typed as you would
+/// otherwise want it to be. Protobuf itself is already a reasonable format
+/// for this!
+#[derive(Clone, Debug, PartialEq)]
+pub struct Node {
+    /// The type of a node in terms of plan semantics.
+    pub class: Class,
+
+    /// An optional brief description of the node. This can be regarded as
+    /// a comment placed at the start of the data vector, but it is usually
+    /// only set at the end of the parse function.
+    pub brief: Option<comment::Brief>,
+
+    /// An optional comment summarizing what this node does. This can be
+    /// regarded as a comment placed at the start of the data vector (just
+    /// after brief, if brief is also defined), but it is usually only set
+    /// at the end of the parse function.
+    pub summary: Option<comment::Comment>,
+
+    /// The type of node in terms of what it represents in the original
+    /// data structure.
+    pub node_type: NodeType,
+
+    /// The type of data returned by this node, if any. Depending on the
+    /// message and context, this may represent a table schema or scalar
+    /// data.
+    pub data_type: Option<Arc<data_type::DataType>>,
+
+    /// The information gathered about the message.
+    ///
+    /// This normally includes all child nodes for this message, possibly
+    /// interspersed with diagnostics, type information, and unstructured
+    /// comment nodes to provide context, all ordered in a reasonable way.
+    /// Note however that this information is intended to be understood by
+    /// a human, not by the validator itself (aside from serialization to a
+    /// human-readable notation).
+    pub data: Vec<NodeData>,
+}
+
+impl From<NodeType> for Node {
+    fn from(node_type: NodeType) -> Self {
+        Node {
+            class: Class::Misc,
+            brief: None,
+            summary: None,
+            node_type,
+            data_type: None,
+            data: vec![],
+        }
+    }
+}
+
+impl Node {
+    /// Returns an iterator that iterates over all nodes depth-first.
+    pub fn iter_flattened_nodes(&self) -> FlattenedNodeIter {
+        FlattenedNodeIter {
+            remaining: VecDeque::from(vec![self]),
+        }
+    }
+
+    /// Returns an iterator that iterates over all NodeData objects in the
+    /// order in which they were defined.
+    pub fn iter_flattened_node_data(&self) -> FlattenedNodeDataIter {
+        FlattenedNodeDataIter {
+            remaining: self.data.iter().rev().collect(),
+        }
+    }
+
+    /// Iterates over all diagnostics in the tree.
+    pub fn iter_diagnostics(&self) -> impl Iterator<Item = &diagnostic::Diagnostic> + '_ {
+        self.iter_flattened_node_data().filter_map(|x| match x {
+            NodeData::Diagnostic(d) => Some(d),
+            _ => None,
+        })
+    }
+
+    /// Returns the first diagnostic of the highest severity level in the tree.
+    pub fn get_diagnostic(&self) -> Option<&diagnostic::Diagnostic> {
+        let mut result: Option<&diagnostic::Diagnostic> = None;
+        for diag in self.iter_diagnostics() {
+            // We can return immediately for error diagnostics, since this is the
+            // highest level.
+            if diag.adjusted_level == diagnostic::Level::Error {
+                return Some(diag);
+            }
+
+            // For other levels, update only if the incoming diagnostic is of a
+            // higher level/severity than the current one.
+            if let Some(cur) = result.as_mut() {
+                if diag.adjusted_level > (*cur).adjusted_level {
+                    *cur = diag;
+                }
+            } else {
+                result = Some(diag);
+            }
+        }
+        result
+    }
+
+    /// Returns a reference to the data type that this node returns at runtime
+    /// or (for type nodes) represents. If no type information is attached, a
+    /// reference to a default-generated unresolved type is returned.
+    pub fn data_type(&self) -> Arc<data_type::DataType> {
+        self.data_type.clone().unwrap_or_default()
+    }
+}
+
+/// The original data type that the node represents, to (in theory) allow the
+/// original structure of the plan to be recovered from the documentation tree.
+#[derive(Clone, Debug, PartialEq)]
+pub enum NodeType {
+    /// The associated node represents a protobuf message of the given type
+    /// (full protobuf path). The contents of the message are described using
+    /// Field, RepeatedField, and OneOfField.
+    ProtoMessage(&'static str),
+
+    /// The associated node represents a protobuf primitive value of the given
+    /// type and with the given data.
+    ProtoPrimitive(&'static str, primitive_data::PrimitiveData),
+
+    /// The associated node represents an unpopulated oneof field. This is used
+    /// for an error recovery node when a required oneof field is not
+    /// populated.
+    ProtoMissingOneOf,
+
+    /// Used for anchor/reference-based references to other nodes.
+    NodeReference(u64, NodeReference),
+
+    /// Used for resolved YAML URIs, in order to include the parse result and
+    /// documentation for the referenced YAML (if available), in addition to
+    /// the URI itself.
+    YamlReference(Arc<extension::YamlData>),
+
+    /// The associated node represents a YAML map. The contents of the map are
+    /// described using Field and UnknownField.
+    YamlMap,
+
+    /// The associated node represents a YAML array. The contents of the array
+    /// are described using ArrayElement datums.
+    YamlArray,
+
+    /// The associated node represents a YAML primitive.
+    YamlPrimitive(primitive_data::PrimitiveData),
+}
+
+/// Semantical information about a node.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum Class {
+    /// Used for nodes for which no better classification exists.
+    Misc,
+
+    /// Used for nodes that define a type. The data_type field signifies this
+    /// data type.
+    Type,
+
+    /// Used for nodes that represent scalar expressions or literals. The
+    /// data_type field signifies the type of the value returned by the
+    /// expression.
+    Expression,
+
+    /// Used for nodes that represent relations. The data_type field signifies
+    /// the schema for the data returned by the relation.
+    Relation,
+}
+
+/// Information nodes for a parsed protobuf message.
+#[derive(Clone, Debug, PartialEq)]
+pub enum NodeData {
+    /// A reference to a child node in the tree.
+    Child(Child),
+
+    /// Indicates that parsing/validating this message resulted in some
+    /// diagnostic message being emitted. The secondary error level is the
+    /// modified level via
+    Diagnostic(diagnostic::Diagnostic),
+
+    /// Provides (intermediate) type information for this node. Depending on
+    /// the message, this may be a struct or named struct representing a
+    /// schema, or it may represent the type of some scalar expression.
+    /// Multiple TypeInfo nodes may be present, in particular for relations
+    /// that perform multiple operations in one go (for example read, project,
+    /// emit). The TypeInfo and operation description *Field nodes are then
+    /// ordered by data flow. In particular, the last TypeInfo node always
+    /// represents the type of the final result of a node.
+    DataType(Arc<data_type::DataType>),
+
+    /// Used for adding unstructured additional information to a message,
+    /// wherever this may aid human understanding of a message.
+    Comment(comment::Comment),
+}
+
+/// Reference to a child node in the tree.
+#[derive(Clone, Debug, PartialEq)]
+pub struct Child {
+    /// Path element identifying the relation of this child node to its parent.
+    pub path_element: path::PathElement,
+
+    /// The child node.
+    pub node: Arc<Node>,
+
+    /// Whether the validator recognized/expected the field or element that
+    /// this child represents. Fields/elements may be unrecognized simply
+    /// because validation is not implemented for them yet. In any case, this
+    /// flag indicates that the subtree represented by this node could not be
+    /// validated.
+    pub recognized: bool,
+}
+
+/// A reference to a node elsewhere in the tree.
+#[derive(Clone, Debug, PartialEq)]
+pub struct NodeReference {
+    /// Absolute path to the node.
+    pub path: path::PathBuf,
+
+    /// Link to the node.
+    pub node: Arc<Node>,
+}
+
+pub struct FlattenedNodeIter<'a> {
+    remaining: VecDeque<&'a Node>,
+}
+impl<'a> Iterator for FlattenedNodeIter<'a> {
+    type Item = &'a Node;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let maybe_node = self.remaining.pop_back();
+        if let Some(node) = maybe_node {
+            self.remaining
+                .extend(node.data.iter().rev().filter_map(|x| -> Option<&Node> {
+                    if let NodeData::Child(child) = x {
+                        Some(&child.node)
+                    } else {
+                        None
+                    }
+                }));
+        }
+        maybe_node
+    }
+}
+
+pub struct FlattenedNodeDataIter<'a> {
+    remaining: VecDeque<&'a NodeData>,
+}
+
+impl<'a> Iterator for FlattenedNodeDataIter<'a> {
+    type Item = &'a NodeData;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let maybe_node_data = self.remaining.pop_back();
+        if let Some(NodeData::Child(child)) = maybe_node_data {
+            self.remaining.extend(child.node.data.iter().rev())
+        }
+        maybe_node_data
+    }
+}
diff --git a/rs/src/parse/context.rs b/rs/src/parse/context.rs
new file mode 100644
index 00000000..c611e4fc
--- /dev/null
+++ b/rs/src/parse/context.rs
@@ -0,0 +1,570 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing the types containing contextual information for parse
+//! functions.
+//!
+//! Refer to the documentation for [`parse`](mod@crate::parse) for more
+//! information.
+
+use crate::input::config;
+use crate::output::comment;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::output::extension;
+use crate::output::path;
+use crate::output::tree;
+use std::collections::HashMap;
+use std::collections::HashSet;
+use std::fmt::Debug;
+use std::hash::Hash;
+use std::sync::Arc;
+
+/// Parse/validation context and output node, passed to parser functions along
+/// with a reference to the to-be-parsed input node.
+pub struct Context<'a> {
+    /// The node in the documentation tree that should reflect the input node.
+    /// The structure of the documentation tree will be the same as the input
+    /// tree, but represented in a more generic way, and with annotations like
+    /// comments and diagnostics attached to each node. The output tree is not
+    /// intended to be read back by the validator.
+    output: &'a mut tree::Node,
+
+    /// State object. This is tracked between nodes as they are traversed, and
+    /// is always mutable for the node currently being validated.
+    state: &'a mut State,
+
+    /// "Breadcrumbs" with information about the ancestors of the current node.
+    /// Essentially a stack structure, where only the top of the stack is
+    /// mutable.
+    breadcrumb: Breadcrumb<'a>,
+
+    /// Configuration structure, created before validation starts and immutable
+    /// afterwards.
+    pub config: &'a config::Config,
+}
+
+impl<'a> Context<'a> {
+    /// Creates a root parse context.
+    ///
+    /// root_name is the prefix used for all paths, normally just "plan" (if
+    /// different tree parsers are ever created, this can be used to
+    /// disambiguate between tree types). output is the root node that the
+    /// children will be added to as parsing progresses. state is the state
+    /// object used for tracking parser state. config is the configuration for
+    /// the parser.
+    pub fn new(
+        root_name: &'static str,
+        output: &'a mut tree::Node,
+        state: &'a mut State,
+        config: &'a config::Config,
+    ) -> Self {
+        Self {
+            output,
+            state,
+            breadcrumb: Breadcrumb::new(root_name),
+            config,
+        }
+    }
+
+    /// Creates a parse context for a child of the node corresponding to this
+    /// context. output is its node. path_element specifies its relation to
+    /// the node corresponding to the current context.
+    pub fn child<'b>(
+        &'b mut self,
+        output: &'b mut tree::Node,
+        path_element: path::PathElement,
+    ) -> Context<'b> {
+        Context {
+            output,
+            state: self.state,
+            breadcrumb: self.breadcrumb.next(path_element),
+            config: self.config,
+        }
+    }
+
+    /// Returns the node type of the associated node.
+    pub fn node_type(&self) -> &tree::NodeType {
+        &self.output.node_type
+    }
+
+    /// Replaces the node type of the associated node.
+    ///
+    /// This should only be needed to upgrade primitive nodes to more specific
+    /// types, for instance references or resolved URIs.
+    pub fn replace_node_type(&mut self, node_type: tree::NodeType) -> tree::NodeType {
+        std::mem::replace(&mut self.output.node_type, node_type)
+    }
+
+    /// Returns the data type currently associated with the current node. If no
+    /// data type was associated yet, this silently returns a reference to an
+    /// unresolved type object.
+    pub fn data_type(&self) -> Arc<data_type::DataType> {
+        self.output.data_type.clone().unwrap_or_default()
+    }
+
+    /// Sets the semantic description of the current node.
+    pub fn set_description<B: Into<comment::Brief>>(
+        &mut self,
+        class: tree::Class,
+        brief: Option<B>,
+    ) {
+        self.output.class = class;
+        self.output.brief = brief.map(|c| c.into());
+    }
+
+    /// Appends to the summary of this node.
+    pub fn push_summary<C: Into<comment::Comment>>(&mut self, comment: C) {
+        if let Some(summary) = self.output.summary.as_mut() {
+            summary.extend(comment.into())
+        } else {
+            self.output.summary = Some(comment.into())
+        }
+    }
+
+    /// Pushes data into the current node.
+    ///
+    /// This is primarily intended for use by the traversal macros and the more
+    /// specific functions defined here, like set_data_type().
+    pub fn push(&mut self, node_data: tree::NodeData) {
+        self.output.data.push(node_data);
+    }
+
+    /// Pushes a diagnostic into the node. This also evaluates its adjusted
+    /// error level.
+    pub fn push_diagnostic(&mut self, diag: diagnostic::RawDiagnostic) {
+        // Get the configured level limits for this diagnostic. First try the
+        // classification of the diagnostic itself, then its group, and then
+        // finally Unclassified. If no entries exist, simply yield
+        // (Info, Error), which is no-op.
+        let (min, max) = self
+            .config
+            .diagnostic_level_overrides
+            .get(&diag.cause.classification)
+            .or_else(|| {
+                self.config
+                    .diagnostic_level_overrides
+                    .get(&diag.cause.classification.group())
+            })
+            .or_else(|| {
+                self.config
+                    .diagnostic_level_overrides
+                    .get(&diagnostic::Classification::Unclassified)
+            })
+            .unwrap_or(&(diagnostic::Level::Info, diagnostic::Level::Error));
+
+        // Adjust the level.
+        let adjusted_level = if diag.level < *min {
+            *min
+        } else if diag.level > *max {
+            *max
+        } else {
+            diag.level
+        };
+        let adjusted = diag.adjust_level(adjusted_level);
+
+        // Actually push the data item.
+        self.output.data.push(tree::NodeData::Diagnostic(adjusted));
+    }
+
+    /// Pushes a comment into the node.
+    pub fn push_comment<C: Into<comment::Comment>>(&mut self, comment: C) {
+        self.push(tree::NodeData::Comment(comment.into()))
+    }
+
+    /// Sets the data type "returned" by this node. Specifically:
+    ///
+    ///  - for type nodes, this should be used to specify the type;
+    ///  - for expression nodes, this should be used to specify the type of the
+    ///    data returned by the expression;
+    ///
+    /// Can be called multiple times; only the data type specified for the
+    /// final call attached to the node's "return type", but each time a
+    /// NodeData::DataType is pushed into the node data as well.
+    pub fn set_data_type(&mut self, data_type: Arc<data_type::DataType>) {
+        if !data_type.is_unresolved() {
+            self.push(tree::NodeData::DataType(data_type.clone()));
+        }
+        self.output.data_type = Some(data_type);
+    }
+
+    /// Updates the current schema. This also pushes the data type to the
+    /// current node. Relation parsers *must* use this after traversing their
+    /// inputs, but before they start to parse any expressions based on that
+    /// schema; after all, the schema defines how (column) references behave.
+    /// If the schema isn't known, it may be set to an unresolved type.
+    pub fn set_schema(&mut self, schema: Arc<data_type::DataType>) {
+        *self
+            .state
+            .schema_stack
+            .last_mut()
+            .expect("no schema present on schema stack") = Some(schema.clone());
+        self.set_data_type(schema);
+    }
+
+    /// Clears the current schema, requiring schema!() to be called before
+    /// expressions can be parsed again.
+    pub fn clear_schema(&mut self) {
+        *self
+            .state
+            .schema_stack
+            .last_mut()
+            .expect("no schema present on schema stack") = None;
+    }
+
+    /// Returns the current schema. depth specifies for which subquery the
+    /// schema should be selected; depth 0 is the current query, depth 1 would
+    /// be its parent query, 2 would be its grandparent, etc. Returns Err when
+    /// the referenced schema semantically doesn't exist; returns Ok(unresolved
+    /// type) when it does but the actual type isn't known.
+    pub fn schema(&self, depth: usize) -> diagnostic::Result<Arc<data_type::DataType>> {
+        let len = self.state.schema_stack.len();
+        if depth >= len {
+            Err(cause!(
+                ExpressionFieldRefMissingStream,
+                "indexing query beyond current query depth ({len})"
+            ))
+        } else if let Some(Some(schema)) = self.state.schema_stack.get(len - depth - 1) {
+            Ok(schema.clone())
+        } else {
+            Err(cause!(
+                ExpressionFieldRefMissingStream,
+                "query data stream has not yet been instantiated"
+            ))
+        }
+    }
+
+    /// Pushes an empty slot for the schema of the relation tree onto the
+    /// schema stack, allowing schema!() to be used. This must be used when
+    /// traversing into the root of a relation tree; i.e., the root must be
+    /// parsed within the context of the provided function.
+    pub fn enter_relation_root<R, F: FnOnce(&mut Context) -> R>(&mut self, f: F) -> R {
+        // Push a schema slot onto the stack for the relation tree to fill
+        // in.
+        self.state.schema_stack.push(None);
+
+        // Ensure that return statements can't break out of the context
+        // early by wrapping the block in a closure first.
+        let result = f(self);
+
+        // Pop the schema again.
+        self.state
+            .schema_stack
+            .pop()
+            .expect("no schema present on schema stack");
+
+        result
+    }
+
+    /// Returns all data that has thus far been pushed into the current node.
+    pub fn node_data(&self) -> &[tree::NodeData] {
+        &self.output.data
+    }
+
+    /// Returns the resolver for URI anchors and references.
+    pub fn extension_uris(&mut self) -> &mut Resolver<u32, Arc<extension::YamlInfo>> {
+        &mut self.state.extension_uris
+    }
+
+    /// Registers an extension URI definition. Shorthand for uris().define(),
+    /// using the current path as the registration path.
+    pub fn define_extension_uri(
+        &mut self,
+        anchor: u32,
+        uri: Arc<extension::YamlInfo>,
+    ) -> Result<(), (Arc<extension::YamlInfo>, path::PathBuf)> {
+        self.state
+            .extension_uris
+            .define(anchor, uri, self.breadcrumb.path.to_path_buf())
+    }
+
+    /// Returns the resolver for function anchors and references.
+    pub fn fns(&mut self) -> &mut Resolver<u32, Arc<extension::Reference<extension::Function>>> {
+        &mut self.state.functions
+    }
+
+    /// Registers a function definition. Shorthand for fns().define(), using
+    /// the current path as the registration path.
+    pub fn define_fn(
+        &mut self,
+        anchor: u32,
+        uri: Arc<extension::Reference<extension::Function>>,
+    ) -> Result<
+        (),
+        (
+            Arc<extension::Reference<extension::Function>>,
+            path::PathBuf,
+        ),
+    > {
+        self.state
+            .functions
+            .define(anchor, uri, self.breadcrumb.path.to_path_buf())
+    }
+
+    /// Returns the resolver for type anchors and references.
+    pub fn types(&mut self) -> &mut Resolver<u32, Arc<extension::Reference<extension::DataType>>> {
+        &mut self.state.types
+    }
+
+    /// Registers a type definition. Shorthand for fns().define(), using the
+    /// current path as the registration path.
+    pub fn define_type(
+        &mut self,
+        anchor: u32,
+        uri: Arc<extension::Reference<extension::DataType>>,
+    ) -> Result<
+        (),
+        (
+            Arc<extension::Reference<extension::DataType>>,
+            path::PathBuf,
+        ),
+    > {
+        self.state
+            .types
+            .define(anchor, uri, self.breadcrumb.path.to_path_buf())
+    }
+
+    /// Returns the resolver for type variation anchors and references.
+    pub fn tvars(
+        &mut self,
+    ) -> &mut Resolver<u32, Arc<extension::Reference<extension::TypeVariation>>> {
+        &mut self.state.type_variations
+    }
+
+    /// Registers a type definition. Shorthand for fns().define(), using the
+    /// current path as the registration path.
+    pub fn define_tvar(
+        &mut self,
+        anchor: u32,
+        uri: Arc<extension::Reference<extension::TypeVariation>>,
+    ) -> Result<
+        (),
+        (
+            Arc<extension::Reference<extension::TypeVariation>>,
+            path::PathBuf,
+        ),
+    > {
+        self.state
+            .type_variations
+            .define(anchor, uri, self.breadcrumb.path.to_path_buf())
+    }
+
+    /// Returns the resolver for protobuf Any types present in the
+    /// `expected_type_urls` manifest.
+    pub fn proto_any_types(&mut self) -> &mut Resolver<String, ()> {
+        &mut self.state.proto_any_types
+    }
+
+    /// Defines a protobuf Any type URL, allowing it for use within the plan.
+    /// If the type was already declared, this returns the path that defined
+    /// it in the form of an Err result.
+    pub fn define_proto_any_type<S: ToString>(&mut self, url: S) -> Result<(), path::PathBuf> {
+        self.state
+            .proto_any_types
+            .define(url.to_string(), (), self.breadcrumb.path.to_path_buf())
+            .map_err(|(_, p)| p)
+    }
+
+    /// Resolves a protobuf "any" message. The first return value specifies
+    /// whether usage of the type was explicitly allowed in the validator
+    /// configuration. The second return value specifies the path to the
+    /// manifest entry for the type, if it was defined. If the type URL does
+    /// not exist in the manifest, a suitable error is generated automatically.
+    pub fn resolve_proto_any(&mut self, x: &prost_types::Any) -> (bool, Option<path::PathBuf>) {
+        let path = self
+            .state
+            .proto_any_types
+            .resolve(&x.type_url)
+            .map(|(_, path)| path.clone());
+        if path.is_none() {
+            diagnostic!(self, Error, ProtoMissingAnyDeclaration, "{}", x.type_url);
+        }
+        let allowed = self
+            .config
+            .allowed_proto_any_urls
+            .iter()
+            .any(|p| p.matches(&x.type_url));
+        (allowed, path)
+    }
+
+    /// Returns a mutable reference to the Option that possibly contains the
+    /// YAML data object under construction.
+    pub fn yaml_data_opt(&mut self) -> &mut Option<extension::YamlData> {
+        &mut self.state.yaml_data
+    }
+
+    /// Returns a mutable reference to the YAML data object under construction.
+    /// Panics if we're not currently constructing YAML data.
+    pub fn yaml_data(&mut self) -> &mut extension::YamlData {
+        self.state.yaml_data.as_mut().unwrap()
+    }
+
+    /// Returns the path leading up to the current node.
+    pub fn path(&self) -> &path::Path<'a> {
+        &self.breadcrumb.path
+    }
+
+    /// Returns the path leading up to the current node.
+    pub fn path_buf(&self) -> path::PathBuf {
+        self.breadcrumb.path.to_path_buf()
+    }
+
+    /// Returns the path leading up to the parent node, if any.
+    pub fn parent_path_buf(&self) -> Option<path::PathBuf> {
+        self.breadcrumb.parent.map(|x| x.path.to_path_buf())
+    }
+
+    /// Indicates that the field with the given name has been parsed. See also
+    /// is_field_parsed().
+    pub fn set_field_parsed<S: ToString>(&mut self, field: S) -> bool {
+        self.breadcrumb.fields_parsed.insert(field.to_string())
+    }
+
+    /// Returns whether the field with the given name has been parsed yet.
+    ///
+    /// This is primarily intended for use by the traversal macros. They use it
+    /// to ensure that:
+    ///
+    ///  - a field is only parsed once;
+    ///  - fields not parsed by the parse function are parsed using a generic
+    ///    method, along with emission of a warning message.
+    pub fn field_parsed<S: AsRef<str>>(&mut self, field: S) -> bool {
+        self.breadcrumb.fields_parsed.contains(field.as_ref())
+    }
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct Resolver<K, V>
+where
+    K: Clone + Debug + Default + Eq + Hash,
+    V: Clone + Debug + Default,
+{
+    /// Map of keys that have been registered thus far to their value and to
+    /// the path from which they were registered.
+    map: HashMap<K, (V, path::PathBuf)>,
+
+    /// The set of keys for which resolve() was called at least once. Used to
+    /// detect unused keys.
+    used: HashSet<K>,
+}
+
+impl<K, V> Resolver<K, V>
+where
+    K: Clone + Debug + Default + Eq + Hash,
+    V: Clone + Debug + Default,
+{
+    /// Creates a new resolver.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Defines a key-value-path triplet. If a key was previously defined, its
+    /// entry is overridden, and the previous value-path pair is returned
+    /// in the form of an Err result.
+    pub fn define(
+        &mut self,
+        key: K,
+        value: V,
+        path: path::PathBuf,
+    ) -> Result<(), (V, path::PathBuf)> {
+        if let Some(previous) = self.map.insert(key, (value, path)) {
+            Err(previous)
+        } else {
+            Ok(())
+        }
+    }
+
+    /// Resolves the given key to its value-path pair. If no value was
+    /// registered for the given key, None is returned. If this was the first
+    /// use of this key (regardless of whether or not a value was registered
+    /// for it yet), it is recorded in the set of used keys.
+    pub fn resolve(&mut self, key: &K) -> Option<&(V, path::PathBuf)> {
+        self.used.insert(key.clone());
+        self.map.get(key)
+    }
+
+    /// Iterates over all key-value-path triplets corresponding to def
+    pub fn iter_unused(&self) -> impl Iterator<Item = (K, V, path::PathBuf)> + '_ {
+        self.map.iter().filter_map(|(k, (v, p))| {
+            if self.used.contains(k) {
+                None
+            } else {
+                Some((k.clone(), v.clone(), p.clone()))
+            }
+        })
+    }
+}
+
+/// Global state information tracked by the validation logic.
+#[derive(Default)]
+pub struct State {
+    /// URI anchor resolver.
+    pub extension_uris: Resolver<u32, Arc<extension::YamlInfo>>,
+
+    /// YAML-defined function anchor resolver.
+    pub functions: Resolver<u32, Arc<extension::Reference<extension::Function>>>,
+
+    /// YAML-defined data type anchor resolver.
+    pub types: Resolver<u32, Arc<extension::Reference<extension::DataType>>>,
+
+    /// YAML-defined type variation anchor resolver.
+    pub type_variations: Resolver<u32, Arc<extension::Reference<extension::TypeVariation>>>,
+
+    /// Protobuf Any type URL resolver.
+    pub proto_any_types: Resolver<String, ()>,
+
+    /// Schema stack. This is what the validator for FieldRefs uses to
+    /// determine the return type of the FieldRef. The back of the vector
+    /// represents the innermost query, while entries further to the front
+    /// of the vector are used to break out of correlated subqueries.
+    /// None is used only for the top of the stack, and only when we're inside
+    /// a relation tree, but no schema is known yet (in terms of dataflow,
+    /// we're still in the time before the input relation has created a
+    /// stream).
+    pub schema_stack: Vec<Option<Arc<data_type::DataType>>>,
+
+    /// The YAML data object under construction, if any.
+    pub yaml_data: Option<extension::YamlData>,
+}
+
+/// Breadcrumbs structure. Each breadcrumb is associated with a node, and
+/// immutably links to the breadcrumb for its parent node (except for the
+/// root). Used for two things: tracking the path leading up to the current
+/// node from the root, and keeping track of mutable state information that
+/// belongs to a specific node.
+pub struct Breadcrumb<'a> {
+    /// Breadcrumb for the parent node, unless this is the root.
+    pub parent: Option<&'a Breadcrumb<'a>>,
+
+    /// The path leading up to the node associated with this breadcrumb. Used
+    /// primarily for attaching information to diagnostic messages.
+    pub path: path::Path<'a>,
+
+    /// The set of field names of the associated node that we've already
+    /// parsed. This is used to automatically search through message subtrees
+    /// that the validator doesn't yet implement: after all normal validation
+    /// for a node is done, the generic tree-walking logic checks whether there
+    /// are fields with non-default data associated with them of which the
+    /// field name hasn't been added to this set yet. It's also used to assert
+    /// that the same subtree isn't traversed twice.
+    pub fields_parsed: HashSet<String>,
+}
+
+impl Breadcrumb<'_> {
+    /// Creates a breadcrumb for the root node.
+    pub fn new(root_name: &'static str) -> Self {
+        Self {
+            parent: None,
+            path: path::Path::Root(root_name),
+            fields_parsed: HashSet::new(),
+        }
+    }
+
+    /// Creates the next breadcrumb.
+    pub fn next(&self, element: path::PathElement) -> Breadcrumb {
+        Breadcrumb {
+            parent: Some(self),
+            path: self.path.with(element),
+            fields_parsed: HashSet::new(),
+        }
+    }
+}
diff --git a/rs/src/parse/expressions/conditionals.rs b/rs/src/parse/expressions/conditionals.rs
new file mode 100644
index 00000000..8b298fc7
--- /dev/null
+++ b/rs/src/parse/expressions/conditionals.rs
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for parsing/validating conditional expression types.
+
+use crate::input::proto::substrait;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::expressions;
+use crate::parse::expressions::literals;
+use crate::parse::types;
+use std::sync::Arc;
+
+// FIXME: what promotions are allowed and when? I saw Isthmus output an
+// if/else with branches differing in nullability, and that makes sense to me
+// as something to support. But on the other hand, explicit type casts for
+// everything might be nicer for a machine format. Either way, I'm not sure
+// the specification has anything to say about this?
+
+/// Parse an if-then expression. Returns a description of said expression.
+pub fn parse_if_then(
+    x: &substrait::expression::IfThen,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    let mut return_type: Arc<data_type::DataType> = Arc::default();
+    let mut args = vec![];
+
+    // Handle branches.
+    proto_required_repeated_field!(x, y, ifs, |x, y| {
+        // Parse fields.
+        let (n, e) = proto_required_field!(x, y, r#if, expressions::parse_predicate);
+        let condition = e.unwrap_or_default();
+        let condition_type = n.data_type();
+        let (n, e) = proto_required_field!(x, y, then, expressions::parse_expression);
+        let value = e.unwrap_or_default();
+        let value_type = n.data_type();
+
+        // Check that the type is the same for each branch.
+        return_type = types::promote_and_assert_equal(
+            y,
+            &value_type,
+            &return_type,
+            "branches must yield the same type",
+        );
+
+        // Nulls in the condition are propagated to the output.
+        // FIXME: I guess?
+        if !condition_type.is_unresolved() && condition_type.nullable() {
+            return_type = return_type.make_nullable();
+        }
+
+        // Describe this branch.
+        describe!(y, Misc, "If {} yield {}", &condition, &value);
+
+        // Save to the "arguments" of the function we'll use to describe this
+        // expression.
+        args.push(condition);
+        args.push(value);
+
+        Ok(())
+    });
+
+    // Handle else branch.
+    if x.r#else.is_some() {
+        // Parse field.
+        let (n, e) = proto_boxed_required_field!(x, y, r#else, expressions::parse_expression);
+        let value = e.unwrap_or_default();
+
+        // Check that the type is the same for each branch.
+        return_type = types::promote_and_assert_equal(
+            y,
+            &n.data_type(),
+            &return_type,
+            "branches must yield the same type",
+        );
+
+        // Save to the "arguments" of the function we'll use to describe this
+        // expression.
+        args.push(value);
+    } else {
+        // Allow missing else, making the type nullable.
+        comment!(y, "Otherwise, yield null.");
+        return_type = return_type.make_nullable();
+
+        // Yield null for the else clause.
+        args.push(expressions::Expression::new_null(return_type.clone()));
+    }
+
+    // Describe node.
+    y.set_data_type(return_type);
+    summary!(
+        y,
+        "Selects the value corresponding to the first condition that yields \
+        true. If none of the conditions yield true, return {}.",
+        args.last().unwrap()
+    );
+    let expression = expressions::Expression::Function(String::from("if_then"), args);
+    describe!(y, Expression, "{}", expression);
+    Ok(expression)
+}
+
+/// Parse a switch expression. Returns a description of said expression.
+pub fn parse_switch(
+    x: &substrait::expression::SwitchExpression,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    let mut return_type: Arc<data_type::DataType> = Arc::default();
+    let mut args = vec![];
+
+    // Parse value to match.
+    let (n, e) = proto_boxed_required_field!(x, y, r#match, expressions::parse_expression);
+    let mut match_type = n.data_type();
+    args.push(e.unwrap_or_default());
+
+    // Handle branches.
+    proto_required_repeated_field!(x, y, ifs, |x, y| {
+        // Parse match field.
+        let (n, e) = proto_required_field!(x, y, r#if, literals::parse_literal);
+        let match_value = e.unwrap_or_default();
+
+        // Check that the type is the same for each branch.
+        match_type = types::promote_and_assert_equal(
+            y,
+            &n.data_type(),
+            &match_type,
+            "literal type must match switch expression",
+        );
+
+        // Parse value field.
+        let (n, e) = proto_required_field!(x, y, then, expressions::parse_expression);
+        let value = e.unwrap_or_default();
+
+        // Check that the type is the same for each branch.
+        return_type = types::promote_and_assert_equal(
+            y,
+            &n.data_type(),
+            &return_type,
+            "branches must yield the same type",
+        );
+
+        // Describe this branch.
+        describe!(y, Misc, "If match == {} yield {}", &match_value, &value);
+
+        // Save to the "arguments" of the function we'll use to describe this
+        // expression.
+        args.push(match_value.into());
+        args.push(value);
+
+        Ok(())
+    });
+
+    // Handle else branch.
+    if x.r#else.is_some() {
+        // Parse field.
+        let (n, e) = proto_boxed_required_field!(x, y, r#else, expressions::parse_expression);
+        let value = e.unwrap_or_default();
+
+        // Check that the type is the same for each branch.
+        return_type = types::promote_and_assert_equal(
+            y,
+            &n.data_type(),
+            &return_type,
+            "branches must yield the same type",
+        );
+
+        // Save to the "arguments" of the function we'll use to describe this
+        // expression.
+        args.push(value);
+    } else {
+        // Allow missing else, making the type nullable.
+        comment!(y, "Otherwise, yield null.");
+        return_type = return_type.make_nullable();
+
+        // Yield null for the else clause.
+        args.push(expressions::Expression::new_null(return_type.clone()));
+    }
+
+    // Describe node.
+    y.set_data_type(return_type);
+    summary!(
+        y,
+        "Selects the value corresponding to the switch case that matches {}. \
+        If none of the cases match, return {}.",
+        args.first().unwrap(),
+        args.last().unwrap()
+    );
+    let expression = expressions::Expression::Function(String::from("switch"), args);
+    describe!(y, Expression, "{}", expression);
+    Ok(expression)
+}
+
+/// Parse a "singular or list", i.e. something of the form
+/// `x in (a, ..., c)`.
+pub fn parse_singular_or_list(
+    x: &substrait::expression::SingularOrList,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    let mut args = vec![];
+
+    // Parse value to match.
+    let (n, e) = proto_boxed_required_field!(x, y, value, expressions::parse_expression);
+    let match_type = n.data_type();
+    args.push(e.unwrap_or_default());
+
+    // Handle allowed values.
+    proto_required_repeated_field!(x, y, options, |x, y| {
+        let expression = expressions::parse_expression(x, y)?;
+        let value_type = y.data_type();
+        args.push(expression);
+
+        // Check that the type is the same as the value.
+        types::assert_equal(
+            y,
+            &value_type,
+            &match_type,
+            "option type must match value type",
+        );
+
+        Ok(())
+    });
+
+    // Describe node.
+    y.set_data_type(data_type::DataType::new_predicate(false));
+    summary!(
+        y,
+        "Returns true if and only if {} is equal to any of the options.",
+        args.first().unwrap()
+    );
+    let expression = expressions::Expression::Function(String::from("match"), args);
+    describe!(y, Expression, "{}", expression);
+    Ok(expression)
+}
+
+/// Parse a "multi or list", i.e. something of the form
+/// `(x, .., z) in ((ax, .., az), .., (cx, .., cz))`.
+pub fn parse_multi_or_list(
+    x: &substrait::expression::MultiOrList,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    // FIXME: why is there not just an expression that forms a struct from a
+    // number of expressions? Then this could go away. Alternatively, why does
+    // SingularOrList also exist, when it's just the special case of this
+    // expression for one-tuples? And why is it named this confusingly?
+    // (a in b, contains(a, b), matches(a, b) etc. would all make more sense
+    // to me... at least add a comment in the protobuf descriptions)
+
+    let mut args = vec![];
+
+    // Parse value to match.
+    let (ns, es) = proto_required_repeated_field!(x, y, value, expressions::parse_expression);
+    let match_types = ns.iter().map(|x| x.data_type()).collect::<Vec<_>>();
+    args.push(expressions::Expression::Tuple(
+        es.into_iter().map(|x| x.unwrap_or_default()).collect(),
+    ));
+
+    // Handle allowed values.
+    proto_required_repeated_field!(x, y, options, |x, y| {
+        let (ns, es) = proto_required_repeated_field!(x, y, fields, expressions::parse_expression);
+        let value_types = ns.iter().map(|x| x.data_type()).collect::<Vec<_>>();
+        args.push(expressions::Expression::Tuple(
+            es.into_iter().map(|x| x.unwrap_or_default()).collect(),
+        ));
+
+        // Check that the type is the same as the value.
+        if match_types.len() != value_types.len() {
+            diagnostic!(
+                y,
+                Error,
+                TypeMismatch,
+                "option types must match value types: numbers of fields differ"
+            )
+        }
+        for (index, (value_type, match_type)) in
+            value_types.iter().zip(match_types.iter()).enumerate()
+        {
+            types::assert_equal(
+                y,
+                value_type,
+                match_type,
+                format!("option type must match value type for field {index}"),
+            );
+        }
+
+        Ok(())
+    });
+
+    // Describe node.
+    y.set_data_type(data_type::DataType::new_predicate(false));
+    summary!(
+        y,
+        "Returns true if and only if {} is equal to any of the options.",
+        args.first().unwrap()
+    );
+    let expression = expressions::Expression::Function(String::from("match"), args);
+    describe!(y, Expression, "{}", expression);
+    Ok(expression)
+}
diff --git a/rs/src/parse/expressions/functions.rs b/rs/src/parse/expressions/functions.rs
new file mode 100644
index 00000000..a37ec938
--- /dev/null
+++ b/rs/src/parse/expressions/functions.rs
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for parsing/validating function calls.
+
+use crate::input::proto::substrait;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::output::extension;
+use crate::output::tree;
+use crate::parse::context;
+use crate::parse::expressions;
+use crate::parse::extensions;
+use crate::parse::sorts;
+use crate::parse::types;
+use std::sync::Arc;
+
+/// Matches a function call with its YAML definition, yielding its return type.
+/// Yields an unresolved type if resolution fails.
+pub fn check_function(
+    y: &mut context::Context,
+    _function: &extension::Function,
+    _options: &[Option<String>],
+    _arg_types: &[Arc<data_type::DataType>],
+) -> Arc<data_type::DataType> {
+    // TODO: check consistency of:
+    //  - _function (function definition information from the YAML file);
+    //  - _options: number of options passed to the function, and validity of
+    //    their values;
+    //  - _arg_types: whether an overload exists for this set of argument
+    //    types;
+    diagnostic!(
+        y,
+        Warning,
+        NotYetImplemented,
+        "matching function calls with their definitions"
+    );
+    Arc::default()
+}
+
+/// Parsing logic common to scalar and window functions.
+fn parse_function(
+    y: &mut context::Context,
+    function: Option<Arc<extension::Reference<extension::Function>>>,
+    arguments: (Vec<Arc<tree::Node>>, Vec<Option<expressions::Expression>>),
+    return_type: Arc<data_type::DataType>,
+) -> (Arc<data_type::DataType>, expressions::Expression) {
+    // Determine the name of the function.
+    let name = function
+        .as_ref()
+        .map(|x| x.name.to_string())
+        .unwrap_or_else(|| String::from("?"));
+
+    // Unpack the arguments into the function's enum options and regular
+    // arguments.
+    let mut opt_values = vec![];
+    let mut opt_exprs = vec![];
+    let mut arg_types = vec![];
+    let mut arg_exprs = vec![];
+    for (node, expr) in arguments
+        .0
+        .into_iter()
+        .zip(arguments.1.into_iter().map(|x| x.unwrap_or_default()))
+    {
+        if let expressions::Expression::EnumVariant(x) = &expr {
+            if opt_exprs.is_empty() && !arg_exprs.is_empty() {
+                diagnostic!(
+                    y,
+                    Error,
+                    IllegalValue,
+                    "function option argument specified after first regular argument"
+                );
+            }
+            opt_values.push(x.clone());
+            opt_exprs.push(expr);
+        } else {
+            arg_types.push(node.data_type());
+            arg_exprs.push(expr);
+        }
+    }
+    opt_exprs.extend(arg_exprs.into_iter());
+    let expression = expressions::Expression::Function(name, opt_exprs);
+    let opt_values = opt_values;
+    let arg_types = arg_types;
+
+    // If the function was resolved, check whether it's valid.
+    let return_type = if let Some(reference) = function {
+        if let Some(function) = &reference.definition {
+            let derived = check_function(y, function, &opt_values, &arg_types);
+            types::assert_equal(
+                y,
+                &return_type,
+                &derived,
+                "specified return type must match derived",
+            )
+        } else {
+            diagnostic!(
+                y,
+                Warning,
+                ExpressionFunctionDefinitionUnavailable,
+                "cannot check validity of call"
+            );
+            return_type
+        }
+    } else {
+        return_type
+    };
+
+    (return_type, expression)
+}
+
+/// Parse a scalar function. Returns a description of the function call
+/// expression.
+pub fn parse_scalar_function(
+    x: &substrait::expression::ScalarFunction,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    // Parse function information.
+    let function = proto_primitive_field!(
+        x,
+        y,
+        function_reference,
+        extensions::simple::parse_function_reference
+    )
+    .1;
+    let arguments = proto_repeated_field!(x, y, args, expressions::parse_function_argument);
+    let return_type = proto_required_field!(x, y, output_type, types::parse_type)
+        .0
+        .data_type();
+
+    // Check function information.
+    let (return_type, expression) = parse_function(y, function, arguments, return_type);
+
+    // Describe node.
+    y.set_data_type(return_type);
+    describe!(y, Expression, "{}", expression);
+    summary!(y, "Scalar function call: {:#}", expression);
+    Ok(expression)
+}
+
+/// Parse a window function bound.
+fn parse_bound(
+    _x: &substrait::expression::window_function::Bound,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    // TODO: check window function bound.
+    // FIXME: I have no idea what these bounds signify. The spec doesn't
+    // seem to specify.
+    diagnostic!(
+        y,
+        Warning,
+        NotYetImplemented,
+        "validation of window function bounds"
+    );
+    Ok(())
+}
+
+/// Parse a window function. Returns a description of the function call
+/// expression.
+pub fn parse_window_function(
+    x: &substrait::expression::WindowFunction,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    // Parse function information.
+    let function = proto_primitive_field!(
+        x,
+        y,
+        function_reference,
+        extensions::simple::parse_function_reference
+    )
+    .1;
+    let arguments = proto_repeated_field!(x, y, args, expressions::parse_function_argument);
+    let return_type = proto_required_field!(x, y, output_type, types::parse_type)
+        .0
+        .data_type();
+
+    // Check function information.
+    let (return_type, expression) = parse_function(y, function, arguments, return_type);
+
+    // Parse modifiers.
+    proto_repeated_field!(x, y, partitions, expressions::parse_expression);
+    proto_repeated_field!(x, y, sorts, sorts::parse_sort_field);
+    proto_field!(x, y, upper_bound, parse_bound);
+    proto_field!(x, y, lower_bound, parse_bound);
+    proto_enum_field!(x, y, phase, substrait::AggregationPhase);
+
+    // TODO: check window function configuration.
+    // FIXME: I have no idea what these partitions signify. The spec doesn't
+    // seem to specify.
+    if !x.partitions.is_empty() {
+        diagnostic!(
+            y,
+            Warning,
+            NotYetImplemented,
+            "validation of partitions field"
+        );
+    }
+
+    // Describe node.
+    y.set_data_type(return_type);
+    describe!(y, Expression, "{}", expression);
+    summary!(y, "Window function call: {:#}", expression);
+    Ok(expression)
+}
+
+/// Parse an aggregate function. Returns a description of the function call
+/// expression.
+pub fn parse_aggregate_function(
+    x: &substrait::AggregateFunction,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    // Parse function information.
+    let function = proto_primitive_field!(
+        x,
+        y,
+        function_reference,
+        extensions::simple::parse_function_reference
+    )
+    .1;
+    let arguments = proto_repeated_field!(x, y, args, expressions::parse_function_argument);
+    let return_type = proto_required_field!(x, y, output_type, types::parse_type)
+        .0
+        .data_type();
+
+    // Check function information.
+    let (return_type, expression) = parse_function(y, function, arguments, return_type);
+
+    // Parse modifiers.
+    proto_repeated_field!(x, y, sorts, sorts::parse_sort_field);
+    proto_enum_field!(x, y, phase, substrait::AggregationPhase);
+    proto_enum_field!(
+        x,
+        y,
+        invocation,
+        substrait::aggregate_function::AggregationInvocation
+    );
+
+    // Describe node.
+    y.set_data_type(return_type);
+    describe!(y, Expression, "{}", expression);
+    summary!(y, "Aggregate function call: {:#}", expression);
+    Ok(expression)
+}
diff --git a/rs/src/parse/expressions/literals.rs b/rs/src/parse/expressions/literals.rs
new file mode 100644
index 00000000..4b29a234
--- /dev/null
+++ b/rs/src/parse/expressions/literals.rs
@@ -0,0 +1,946 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for parsing/validating literals.
+
+use crate::input::proto::substrait;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::types;
+use crate::string_util;
+use crate::string_util::Describe;
+use std::sync::Arc;
+
+/// The value of a literal, not including type information.
+#[derive(Clone)]
+enum LiteralValue {
+    /// May be used for any nullable type.
+    Null,
+
+    /// May be used only for booleans.
+    Boolean(bool),
+
+    /// May be used only for I8, I16, I32, I64, Timestamp, TimestampTz, Date, and Time.
+    Integer(i64),
+
+    /// May be used only for Fp32 and Fp64.
+    Float(f64),
+
+    /// May be used only for decimals and UUIDs.
+    Data16(i128),
+
+    /// May be used only for strings, FixedChars, and VarChars.
+    String(String),
+
+    /// May be used only for binary and FixedBinary.
+    Binary(Vec<u8>),
+
+    /// May be used only for IntervalYearToMonth and IntervalDayToSecond.
+    Interval(i32, i32),
+
+    /// May be used only for structs and lists.
+    Items(Vec<Literal>),
+
+    /// May be used only for maps.
+    Pairs(Vec<(Literal, Literal)>),
+}
+
+impl Default for LiteralValue {
+    fn default() -> Self {
+        LiteralValue::Null
+    }
+}
+
+/// A complete literal, including type information.
+#[derive(Clone, Default)]
+pub struct Literal {
+    /// The value of the literal.
+    value: LiteralValue,
+
+    /// The data type of the literal. LiteralValue must be a valid instance of
+    /// this.
+    data_type: Arc<data_type::DataType>,
+}
+
+/// Converts a value in microseconds since the epoch to a chrono::NaiveDateTime.
+fn to_date_time(micros: i64) -> diagnostic::Result<chrono::NaiveDateTime> {
+    let secs = micros.div_euclid(1_000_000);
+    let nsecs = ((micros.rem_euclid(1_000_000)) * 1000) as u32;
+    chrono::NaiveDateTime::from_timestamp_opt(secs, nsecs).ok_or(ecause!(
+        ExpressionIllegalLiteralValue,
+        "timestamp out of range"
+    ))
+}
+
+/// Converts a value in microseconds since the epoch to a string.
+fn to_date_time_str(micros: i64, fmt: &str) -> String {
+    to_date_time(micros)
+        .map(|x| x.format(fmt).to_string())
+        .unwrap_or_else(|_| String::from("?"))
+}
+
+impl Literal {
+    /// Shorthand for a new null literal.
+    pub fn new_null(data_type: Arc<data_type::DataType>) -> Literal {
+        Literal {
+            value: LiteralValue::Null,
+            data_type,
+        }
+    }
+
+    /// Shorthand for a new simple literal.
+    fn new_simple(
+        value: LiteralValue,
+        simple: data_type::Simple,
+        nullable: bool,
+    ) -> diagnostic::Result<Literal> {
+        Ok(Literal {
+            value,
+            data_type: data_type::DataType::new(
+                data_type::Class::Simple(simple),
+                nullable,
+                None,
+                vec![],
+            )?,
+        })
+    }
+
+    /// Shorthand for a new compound literal.
+    fn new_compound<T: Into<data_type::Parameter>>(
+        value: LiteralValue,
+        compound: data_type::Compound,
+        nullable: bool,
+        args: Vec<T>,
+    ) -> diagnostic::Result<Literal> {
+        Ok(Literal {
+            value,
+            data_type: data_type::DataType::new(
+                data_type::Class::Compound(compound),
+                nullable,
+                None,
+                args.into_iter().map(|x| x.into()).collect(),
+            )?,
+        })
+    }
+
+    /// Returns the data type of this literal.
+    pub fn data_type(&self) -> &Arc<data_type::DataType> {
+        &self.data_type
+    }
+}
+
+impl Describe for Literal {
+    /// Represents the value of this literal with some size limit. The size
+    /// limit very roughly corresponds to a number of characters, but this is
+    /// purely a heuristic thing.
+    fn describe(
+        &self,
+        f: &mut std::fmt::Formatter<'_>,
+        limit: string_util::Limit,
+    ) -> std::fmt::Result {
+        match &self.value {
+            LiteralValue::Null => {
+                if self.data_type.is_unresolved() {
+                    write!(f, "!")
+                } else {
+                    write!(f, "null")
+                }
+            }
+            LiteralValue::Boolean(true) => write!(f, "true"),
+            LiteralValue::Boolean(false) => write!(f, "false"),
+            LiteralValue::Integer(i) => match self.data_type.class() {
+                data_type::Class::Simple(data_type::Simple::I8) => write!(f, "{i}i8"),
+                data_type::Class::Simple(data_type::Simple::I16) => write!(f, "{i}i16"),
+                data_type::Class::Simple(data_type::Simple::I32) => write!(f, "{i}i32"),
+                data_type::Class::Simple(data_type::Simple::I64) => write!(f, "{i}i64"),
+                data_type::Class::Simple(data_type::Simple::Timestamp) => {
+                    write!(f, "{}", to_date_time_str(*i, "%Y-%m-%d %H:%M:%S%.6f"))
+                }
+                data_type::Class::Simple(data_type::Simple::TimestampTz) => {
+                    write!(f, "{} UTC", to_date_time_str(*i, "%Y-%m-%d %H:%M:%S%.6f"))
+                }
+                data_type::Class::Simple(data_type::Simple::Date) => {
+                    write!(
+                        f,
+                        "{}",
+                        to_date_time_str(i.saturating_mul(24 * 60 * 60 * 1_000_000), "%Y-%m-%d")
+                    )
+                }
+                data_type::Class::Simple(data_type::Simple::Time) => {
+                    write!(f, "{}", to_date_time_str(*i, "%H:%M:%S%.6f"))
+                }
+                _ => write!(f, "{i}"),
+            },
+            LiteralValue::Float(v) => {
+                let max = std::cmp::min(std::cmp::max(3, limit.chars()), 10);
+                write!(f, "{:3.1$}", float_pretty_print::PrettyPrintFloat(*v), max)
+            }
+            LiteralValue::Data16(d) => match self.data_type.class() {
+                data_type::Class::Compound(data_type::Compound::Decimal) => {
+                    if let Some(scale) = self.data_type.int_parameter(1) {
+                        if d < &0 {
+                            write!(f, "-")?;
+                        }
+                        let d = d.abs() as u128;
+                        let s = 10u128.pow(scale as u32);
+                        if self
+                            .data_type
+                            .int_parameter(0)
+                            .map(|precision| scale < precision)
+                            .unwrap_or(true)
+                        {
+                            write!(f, "{0}", d.div_euclid(s))?;
+                        }
+                        write!(f, ".")?;
+                        if scale > 0 {
+                            write!(f, "{0:01$}", d.rem_euclid(s), scale as usize)?;
+                        }
+                        Ok(())
+                    } else {
+                        string_util::describe_binary(f, &d.to_le_bytes(), limit)
+                    }
+                }
+                data_type::Class::Simple(data_type::Simple::Uuid) => {
+                    let b = d.to_ne_bytes();
+                    write!(
+                        f,
+                        "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
+                        b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15]
+                    )
+                }
+                _ => string_util::describe_binary(f, &d.to_le_bytes(), limit),
+            },
+            LiteralValue::String(s) => string_util::describe_string(f, s, limit),
+            LiteralValue::Binary(b) => string_util::describe_binary(f, b, limit),
+            LiteralValue::Interval(a, b) => match self.data_type.class() {
+                data_type::Class::Simple(data_type::Simple::IntervalYear) => {
+                    write!(f, "{a}y{b:+}m")
+                }
+                data_type::Class::Simple(data_type::Simple::IntervalDay) => write!(f, "{a}d{b:+}s"),
+                _ => write!(f, "({a}, {b})"),
+            },
+            LiteralValue::Items(x) => match self.data_type.class() {
+                data_type::Class::Compound(data_type::Compound::Struct) => {
+                    write!(f, "(")?;
+                    string_util::describe_sequence(f, x, limit, 20, |f, value, index, limit| {
+                        write!(f, ".{index}: ")?;
+                        value.describe(f, limit)
+                    })?;
+                    write!(f, ")")
+                }
+                data_type::Class::Compound(data_type::Compound::NamedStruct) => {
+                    write!(f, "(")?;
+                    string_util::describe_sequence(f, x, limit, 20, |f, value, index, limit| {
+                        if let Some(name) = self
+                            .data_type
+                            .parameters()
+                            .get(index)
+                            .and_then(|x| x.get_name())
+                        {
+                            write!(f, ".{}: ", string_util::as_ident_or_string(name))?;
+                        } else {
+                            write!(f, ".{index}: ")?;
+                        }
+                        value.describe(f, limit)
+                    })?;
+                    write!(f, ")")
+                }
+                data_type::Class::Compound(data_type::Compound::List) => {
+                    write!(f, "[")?;
+                    string_util::describe_sequence(f, x, limit, 20, |f, value, _, limit| {
+                        value.describe(f, limit)
+                    })?;
+                    write!(f, "]")
+                }
+                _ => {
+                    write!(f, "(")?;
+                    string_util::describe_sequence(f, x, limit, 20, |f, value, _, limit| {
+                        value.describe(f, limit)
+                    })?;
+                    write!(f, ")")
+                }
+            },
+            LiteralValue::Pairs(x) => match self.data_type.class() {
+                data_type::Class::Compound(data_type::Compound::Map) => {
+                    write!(f, "{{")?;
+                    string_util::describe_sequence(
+                        f,
+                        x,
+                        limit,
+                        40,
+                        |f, (key, value), _, limit| {
+                            let (key_limit, value_limit) = limit.split(20);
+                            key.describe(f, key_limit)?;
+                            write!(f, ": ")?;
+                            value.describe(f, value_limit)
+                        },
+                    )?;
+                    write!(f, "}}")
+                }
+                _ => {
+                    write!(f, "(")?;
+                    string_util::describe_sequence(
+                        f,
+                        x,
+                        limit,
+                        40,
+                        |f, (key, value), _, limit| {
+                            write!(f, "(")?;
+                            let (key_limit, value_limit) = limit.split(20);
+                            key.describe(f, key_limit)?;
+                            write!(f, ": ")?;
+                            value.describe(f, value_limit)?;
+                            write!(f, ")")
+                        },
+                    )?;
+                    write!(f, ")")
+                }
+            },
+        }
+    }
+}
+
+impl std::fmt::Display for Literal {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.display().fmt(f)
+    }
+}
+
+/// Parses a boolean literal.
+fn parse_boolean(
+    x: &bool,
+    _y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    Literal::new_simple(
+        LiteralValue::Boolean(*x),
+        data_type::Simple::Boolean,
+        nullable,
+    )
+}
+
+/// Parses an i8 literal.
+fn parse_i8(x: &i32, _y: &mut context::Context, nullable: bool) -> diagnostic::Result<Literal> {
+    let x = i8::try_from(*x)
+        .map_err(|_| cause!(ExpressionIllegalLiteralValue, "i8 value out of range"))?;
+    Literal::new_simple(
+        LiteralValue::Integer(x as i64),
+        data_type::Simple::I8,
+        nullable,
+    )
+}
+
+/// Parses an i16 literal.
+fn parse_i16(x: &i32, _y: &mut context::Context, nullable: bool) -> diagnostic::Result<Literal> {
+    let x = i16::try_from(*x)
+        .map_err(|_| cause!(ExpressionIllegalLiteralValue, "i16 value out of range"))?;
+    Literal::new_simple(
+        LiteralValue::Integer(x as i64),
+        data_type::Simple::I16,
+        nullable,
+    )
+}
+
+/// Parses an i32 literal.
+fn parse_i32(x: &i32, _y: &mut context::Context, nullable: bool) -> diagnostic::Result<Literal> {
+    Literal::new_simple(
+        LiteralValue::Integer(*x as i64),
+        data_type::Simple::I32,
+        nullable,
+    )
+}
+
+/// Parses an i64 literal.
+fn parse_i64(x: &i64, _y: &mut context::Context, nullable: bool) -> diagnostic::Result<Literal> {
+    Literal::new_simple(LiteralValue::Integer(*x), data_type::Simple::I64, nullable)
+}
+
+/// Parses an fp32 literal.
+fn parse_fp32(x: &f32, _y: &mut context::Context, nullable: bool) -> diagnostic::Result<Literal> {
+    Literal::new_simple(
+        LiteralValue::Float(*x as f64),
+        data_type::Simple::Fp32,
+        nullable,
+    )
+}
+
+/// Parses an fp64 literal.
+fn parse_fp64(x: &f64, _y: &mut context::Context, nullable: bool) -> diagnostic::Result<Literal> {
+    Literal::new_simple(LiteralValue::Float(*x), data_type::Simple::Fp64, nullable)
+}
+
+/// Parses a string literal.
+fn parse_string(x: &str, _y: &mut context::Context, nullable: bool) -> diagnostic::Result<Literal> {
+    Literal::new_simple(
+        LiteralValue::String(x.to_string()),
+        data_type::Simple::String,
+        nullable,
+    )
+}
+
+/// Parses a binary literal.
+fn parse_binary(
+    x: &[u8],
+    _y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    Literal::new_simple(
+        LiteralValue::Binary(x.to_owned()),
+        data_type::Simple::Binary,
+        nullable,
+    )
+}
+
+/// Parses a timestamp literal.
+fn parse_timestamp(
+    x: &i64,
+    y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    let dt = to_date_time(*x)?;
+    if dt < chrono::NaiveDate::from_ymd(1000, 1, 1).and_hms(0, 0, 0)
+        || dt >= chrono::NaiveDate::from_ymd(10000, 1, 1).and_hms(0, 0, 0)
+    {
+        diagnostic!(
+            y,
+            Error,
+            ExpressionIllegalLiteralValue,
+            "timestamp out of range 1000-01-01 to 9999-12-31"
+        );
+    }
+    Literal::new_simple(
+        LiteralValue::Integer(*x),
+        data_type::Simple::Timestamp,
+        nullable,
+    )
+}
+
+/// Parses a UTC timestamp literal.
+fn parse_timestamp_tz(
+    x: &i64,
+    y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    let dt = to_date_time(*x)?;
+    if dt < chrono::NaiveDate::from_ymd(1000, 1, 1).and_hms(0, 0, 0)
+        || dt >= chrono::NaiveDate::from_ymd(10000, 1, 1).and_hms(0, 0, 0)
+    {
+        diagnostic!(
+            y,
+            Error,
+            ExpressionIllegalLiteralValue,
+            "timestamp out of range 1000-01-01 UTC to 9999-12-31 UTC"
+        );
+    }
+    Literal::new_simple(
+        LiteralValue::Integer(*x),
+        data_type::Simple::TimestampTz,
+        nullable,
+    )
+}
+
+/// Parses a date literal.
+fn parse_date(x: &i32, y: &mut context::Context, nullable: bool) -> diagnostic::Result<Literal> {
+    let dt = to_date_time((*x as i64).saturating_mul(24 * 60 * 60 * 1_000_000))?;
+    if dt < chrono::NaiveDate::from_ymd(1000, 1, 1).and_hms(0, 0, 0)
+        || dt >= chrono::NaiveDate::from_ymd(10000, 1, 1).and_hms(0, 0, 0)
+    {
+        diagnostic!(
+            y,
+            Error,
+            ExpressionIllegalLiteralValue,
+            "date out of range 1000-01-01 UTC to 9999-12-31 UTC"
+        );
+    }
+    Literal::new_simple(
+        LiteralValue::Integer(*x as i64),
+        data_type::Simple::Date,
+        nullable,
+    )
+}
+
+/// Parses a time literal.
+fn parse_time(x: &i64, y: &mut context::Context, nullable: bool) -> diagnostic::Result<Literal> {
+    if *x < 0 || *x >= 24 * 60 * 60 * 1_000_000 {
+        diagnostic!(
+            y,
+            Error,
+            ExpressionIllegalLiteralValue,
+            "time of day out of range 00:00:00.000000 to 23:59:59.999999"
+        );
+    }
+    Literal::new_simple(LiteralValue::Integer(*x), data_type::Simple::Time, nullable)
+}
+
+/// Parses a year to month interval literal.
+fn parse_interval_year_to_month(
+    x: &substrait::expression::literal::IntervalYearToMonth,
+    y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    // FIXME: see FIXME for associated type.
+    proto_primitive_field!(x, y, years, |x, _| {
+        if *x < -10000 || *x > 10000 {
+            Err(cause!(
+                ExpressionIllegalLiteralValue,
+                "year count out of range -10000 to 10000"
+            ))
+        } else {
+            Ok(())
+        }
+    });
+    proto_primitive_field!(x, y, months, |x, _| {
+        if *x < -120000 || *x > 120000 {
+            Err(cause!(
+                ExpressionIllegalLiteralValue,
+                "month count out of range -120000 to 120000"
+            ))
+        } else {
+            Ok(())
+        }
+    });
+    let months = x.months.saturating_add(x.years.saturating_mul(12));
+    if months < -120000 || months > 120000 {
+        diagnostic!(
+            y,
+            Error,
+            ExpressionIllegalLiteralValue,
+            "combined interval out of range -10000 to 10000 years"
+        );
+    }
+    Literal::new_simple(
+        LiteralValue::Interval(x.years, x.months),
+        data_type::Simple::IntervalYear,
+        nullable,
+    )
+}
+
+/// Parses a day to second interval literal.
+fn parse_interval_day_to_second(
+    x: &substrait::expression::literal::IntervalDayToSecond,
+    y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    // FIXME: see FIXME for associated type.
+    proto_primitive_field!(x, y, days, |x, _| {
+        if *x < -3650000 || *x > 3650000 {
+            Err(cause!(
+                ExpressionIllegalLiteralValue,
+                "day count out of range -3_650_000 to 3_650_000"
+            ))
+        } else {
+            Ok(())
+        }
+    });
+
+    // FIXME: according to the docs, day to second supports microsecond
+    // precision. The literal doesn't. The i32 seconds also doesn't
+    // support the full specified range (but that range is weird
+    // anyway).
+    proto_primitive_field!(x, y, seconds);
+    Literal::new_simple(
+        LiteralValue::Interval(x.days, x.seconds),
+        data_type::Simple::IntervalDay,
+        nullable,
+    )
+}
+
+/// Parses a UUID literal.
+fn parse_uuid(x: &[u8], _y: &mut context::Context, nullable: bool) -> diagnostic::Result<Literal> {
+    if let Ok(x) = x.try_into() {
+        Literal::new_simple(
+            LiteralValue::Data16(i128::from_ne_bytes(x)),
+            data_type::Simple::Uuid,
+            nullable,
+        )
+    } else {
+        Err(cause!(
+            ExpressionIllegalLiteralValue,
+            "uuid literals must be 16 bytes in length, got {}",
+            x.len()
+        ))
+    }
+}
+
+/// Parses a fixed-length string literal.
+fn parse_fixed_char(
+    x: &str,
+    _y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    Literal::new_compound(
+        LiteralValue::String(x.to_string()),
+        data_type::Compound::FixedChar,
+        nullable,
+        vec![x.len() as u64],
+    )
+}
+
+/// Parses a variable-length string literal.
+fn parse_var_char(
+    x: &substrait::expression::literal::VarChar,
+    y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    proto_primitive_field!(x, y, length);
+    let len = x.length as usize;
+    proto_primitive_field!(x, y, value, |x, _| {
+        if x.len() > len {
+            Err(cause!(
+                ExpressionIllegalLiteralValue,
+                "varchar literal value is longer than specified length"
+            ))
+        } else {
+            Ok(())
+        }
+    });
+    Literal::new_compound(
+        LiteralValue::String(x.value.clone()),
+        data_type::Compound::VarChar,
+        nullable,
+        vec![len as u64],
+    )
+}
+
+/// Parses a fixed-length binary literal.
+fn parse_fixed_binary(
+    x: &[u8],
+    _y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    Literal::new_compound(
+        LiteralValue::Binary(x.to_owned()),
+        data_type::Compound::FixedBinary,
+        nullable,
+        vec![x.len() as u64],
+    )
+}
+
+/// Parses a decimal literal.
+fn parse_decimal(
+    x: &substrait::expression::literal::Decimal,
+    y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    proto_primitive_field!(x, y, precision, |x, _| {
+        if *x < 0 {
+            Err(cause!(
+                IllegalValue,
+                "negative type parameters are not supported"
+            ))
+        } else {
+            Ok(())
+        }
+    });
+    proto_primitive_field!(x, y, scale);
+    let val = proto_primitive_field!(x, y, value, |x, _| {
+        if let Ok(x) = (&x[..]).try_into() {
+            Ok(i128::from_le_bytes(x))
+        } else {
+            Err(cause!(
+                ExpressionIllegalLiteralValue,
+                "decimal literals must be 16 bytes in length, got {}",
+                x.len()
+            ))
+        }
+    })
+    .1;
+    let precision = u64::try_from(x.precision).unwrap_or_default();
+    let scale = u64::try_from(x.scale).unwrap_or_default();
+
+    if let Some(val) = val {
+        let range = 10i128.saturating_pow(precision.try_into().unwrap_or_default());
+        if val >= range || val <= -range {
+            Err(cause!(
+                ExpressionIllegalLiteralValue,
+                "decimal value is out of range for specificied precision and scale"
+            ))
+        } else {
+            Literal::new_compound(
+                LiteralValue::Data16(val),
+                data_type::Compound::Decimal,
+                nullable,
+                vec![precision, scale],
+            )
+        }
+    } else {
+        Ok(Literal::default())
+    }
+}
+
+/// Parses a struct literal.
+fn parse_struct_int(
+    x: &substrait::expression::literal::Struct,
+    y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    let (values, types): (Vec<_>, Vec<_>) = proto_repeated_field!(x, y, fields, parse_literal)
+        .1
+        .into_iter()
+        .map(|x| {
+            let x = x.unwrap_or_default();
+            let data_type = x.data_type.clone();
+            (x, data_type)
+        })
+        .unzip();
+    Literal::new_compound(
+        LiteralValue::Items(values),
+        data_type::Compound::Struct,
+        nullable,
+        types,
+    )
+}
+
+/// Parses a struct literal.
+pub fn parse_struct(
+    x: &substrait::expression::literal::Struct,
+    y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    let literal = parse_struct_int(x, y, nullable)?;
+    y.set_data_type(literal.data_type().clone());
+    Ok(literal)
+}
+
+/// Parses a list literal.
+fn parse_list(
+    x: &substrait::expression::literal::List,
+    y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    let values: Vec<_> = proto_required_repeated_field!(x, y, values, parse_literal)
+        .1
+        .into_iter()
+        .map(|x| x.unwrap_or_default())
+        .collect();
+    if values.is_empty() {
+        comment!(
+            y,
+            "At least one list element is required to derive type. Use EmptyList instead."
+        );
+    }
+    let mut data_type = Arc::default();
+    for (index, value) in values.iter().enumerate() {
+        data_type = types::assert_equal(
+            y,
+            value.data_type(),
+            &data_type,
+            format!("unexpected type for index {index}"),
+        );
+    }
+    Literal::new_compound(
+        LiteralValue::Items(values),
+        data_type::Compound::List,
+        nullable,
+        vec![data_type],
+    )
+}
+
+/// Parses a map literal.
+fn parse_map(
+    x: &substrait::expression::literal::Map,
+    y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    let values: Vec<_> = proto_required_repeated_field!(x, y, key_values, |x, y| {
+        let key = proto_required_field!(x, y, key, parse_literal)
+            .1
+            .unwrap_or_default();
+        let value = proto_required_field!(x, y, value, parse_literal)
+            .1
+            .unwrap_or_default();
+        Ok((key, value))
+    })
+    .1
+    .into_iter()
+    .map(|x| x.unwrap_or_default())
+    .collect();
+    if values.is_empty() {
+        comment!(
+            y,
+            "At least one key-value pair is required to derive types. Use EmptyMap instead."
+        );
+    }
+    let mut key_type = Arc::default();
+    let mut value_type = Arc::default();
+    for (index, value) in values.iter().enumerate() {
+        key_type = types::assert_equal(
+            y,
+            value.0.data_type(),
+            &key_type,
+            format!("unexpected key type for index {index}"),
+        );
+        value_type = types::assert_equal(
+            y,
+            value.1.data_type(),
+            &value_type,
+            format!("unexpected value type for index {index}"),
+        );
+    }
+    Literal::new_compound(
+        LiteralValue::Pairs(values),
+        data_type::Compound::Map,
+        nullable,
+        vec![key_type, value_type],
+    )
+}
+
+/// Parses an empty list literal.
+fn parse_empty_list(
+    x: &substrait::r#type::List,
+    y: &mut context::Context,
+    _nullable: bool,
+) -> diagnostic::Result<Literal> {
+    // FIXME: nullability is redundantly specified, and the type
+    // variation reference would be if it had gotten the same
+    // treatment as nullability. Why doesn't EmptyList just map to only
+    // the element data type?
+    types::parse_list(x, y)?;
+    Ok(Literal {
+        value: LiteralValue::Items(vec![]),
+        data_type: y.data_type(),
+    })
+}
+
+/// Parses an empty map literal.
+fn parse_empty_map(
+    x: &substrait::r#type::Map,
+    y: &mut context::Context,
+    _nullable: bool,
+) -> diagnostic::Result<Literal> {
+    // FIXME: same note as for EmptyList.
+    types::parse_map(x, y)?;
+    Ok(Literal {
+        value: LiteralValue::Pairs(vec![]),
+        data_type: y.data_type(),
+    })
+}
+
+/// Parses a null literal.
+fn parse_null(
+    x: &substrait::Type,
+    y: &mut context::Context,
+    _nullable: bool,
+) -> diagnostic::Result<Literal> {
+    // FIXME: same note as for EmptyList.
+    types::parse_type(x, y)?;
+    let data_type = y.data_type();
+    if !data_type.nullable() && !data_type.is_unresolved() {
+        Err(cause!(
+            TypeMismatchedNullability,
+            "type of null literal must be nullable"
+        ))
+    } else {
+        Ok(Literal {
+            value: LiteralValue::Null,
+            data_type: y.data_type(),
+        })
+    }
+}
+
+/// Parse a literal value. Returns the parsed literal.
+fn parse_literal_type(
+    x: &substrait::expression::literal::LiteralType,
+    y: &mut context::Context,
+    nullable: bool,
+) -> diagnostic::Result<Literal> {
+    use substrait::expression::literal::LiteralType;
+    match x {
+        LiteralType::Boolean(x) => parse_boolean(x, y, nullable),
+        LiteralType::I8(x) => parse_i8(x, y, nullable),
+        LiteralType::I16(x) => parse_i16(x, y, nullable),
+        LiteralType::I32(x) => parse_i32(x, y, nullable),
+        LiteralType::I64(x) => parse_i64(x, y, nullable),
+        LiteralType::Fp32(x) => parse_fp32(x, y, nullable),
+        LiteralType::Fp64(x) => parse_fp64(x, y, nullable),
+        LiteralType::String(x) => parse_string(x, y, nullable),
+        LiteralType::Binary(x) => parse_binary(x, y, nullable),
+        LiteralType::Timestamp(x) => parse_timestamp(x, y, nullable),
+        LiteralType::TimestampTz(x) => parse_timestamp_tz(x, y, nullable),
+        LiteralType::Date(x) => parse_date(x, y, nullable),
+        LiteralType::Time(x) => parse_time(x, y, nullable),
+        LiteralType::IntervalYearToMonth(x) => parse_interval_year_to_month(x, y, nullable),
+        LiteralType::IntervalDayToSecond(x) => parse_interval_day_to_second(x, y, nullable),
+        LiteralType::Uuid(x) => parse_uuid(x, y, nullable),
+        LiteralType::FixedChar(x) => parse_fixed_char(x, y, nullable),
+        LiteralType::VarChar(x) => parse_var_char(x, y, nullable),
+        LiteralType::FixedBinary(x) => parse_fixed_binary(x, y, nullable),
+        LiteralType::Decimal(x) => parse_decimal(x, y, nullable),
+        LiteralType::Struct(x) => parse_struct_int(x, y, nullable),
+        LiteralType::List(x) => parse_list(x, y, nullable),
+        LiteralType::Map(x) => parse_map(x, y, nullable),
+        LiteralType::EmptyList(x) => parse_empty_list(x, y, nullable),
+        LiteralType::EmptyMap(x) => parse_empty_map(x, y, nullable),
+        LiteralType::Null(x) => parse_null(x, y, nullable),
+    }
+}
+
+/// Parse a literal value. Returns the parsed literal.
+pub fn parse_literal(
+    x: &substrait::expression::Literal,
+    y: &mut context::Context,
+) -> diagnostic::Result<Literal> {
+    // Parse type parameters that apply to all literals (except empty objects
+    // and null...).
+    if !matches!(
+        x.literal_type,
+        Some(substrait::expression::literal::LiteralType::EmptyList(_))
+            | Some(substrait::expression::literal::LiteralType::EmptyMap(_))
+            | Some(substrait::expression::literal::LiteralType::Null(_))
+    ) {
+        // FIXME: why isn't the nullability enum used here? Especially
+        // considering nullability here actually should be unspecified when
+        // above match yields false, while it must be specified everywhere
+        // else. Better yet, change the semantics as described in the other
+        // fixmes such that it is always mandatory everywhere, and then use
+        // a boolean everywhere? If the point of the enum is to allow types
+        // to be "partially unresolved," then the type system is pretty
+        // fundamentally broken, since overload resolution depends on it.
+        proto_primitive_field!(x, y, nullable);
+
+        // FIXME: why would literals not support type variations? Feels like
+        // there should be a type variation reference here.
+    } else {
+        // FIXME: this is all very ugly. Since all types can be made nullable
+        // anyway, why isn't the nullability field taken out of the type kind
+        // for types as well? Then the "empty" values can just refer to the
+        // type kind rather than the whole type message, and the problem would
+        // be solved. Likewise, I don't see why type variations should get
+        // special treatment in the sense that (currently) user-defined types
+        // can't also have variations. Why explicitly disallow that?
+        proto_primitive_field!(x, y, nullable, |x, y| {
+            // Send diagnostic only when x is not set to its default value,
+            // since the default value is indistinguishable from unspecified.
+            if *x {
+                diagnostic!(
+                    y,
+                    Info,
+                    RedundantField,
+                    "this field is inoperative for empty lists, empty maps, and null."
+                );
+            } else {
+                comment!(
+                    y,
+                    "This field is inoperative for empty lists, empty maps, and null."
+                );
+            }
+            Ok(())
+        });
+    }
+
+    // Parse the literal value.
+    let literal = proto_required_field!(x, y, literal_type, parse_literal_type, x.nullable)
+        .1
+        .unwrap_or_default();
+
+    // Describe node.
+    y.set_data_type(literal.data_type().clone());
+    describe!(y, Expression, "{}", literal);
+    summary!(
+        y,
+        "Literal of type {:#} with value {:#}",
+        literal.data_type(),
+        literal
+    );
+    Ok(literal)
+}
diff --git a/rs/src/parse/expressions/misc.rs b/rs/src/parse/expressions/misc.rs
new file mode 100644
index 00000000..0dcf9408
--- /dev/null
+++ b/rs/src/parse/expressions/misc.rs
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for parsing/validating miscellaneous expression types.
+
+use crate::input::proto::substrait;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::expressions;
+use crate::parse::types;
+use crate::string_util;
+
+/// Parse an enum expression. Returns a description of said expression.
+pub fn parse_enum(
+    x: &substrait::expression::Enum,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    // Parse variant.
+    let variant = proto_required_field!(x, y, enum_kind, |x, y| {
+        match x {
+            substrait::expression::r#enum::EnumKind::Specified(x) => {
+                if x.is_empty() {
+                    diagnostic!(y, Error, IllegalValue, "enum variant name cannot be empty");
+                }
+                Ok(Some(x.clone()))
+            }
+            substrait::expression::r#enum::EnumKind::Unspecified(_) => Ok(None),
+        }
+    })
+    .1
+    .flatten();
+
+    // Describe node.
+    if let Some(variant) = &variant {
+        describe!(
+            y,
+            Misc,
+            "Function option variant {}",
+            string_util::as_ident_or_string(variant)
+        );
+    } else {
+        describe!(y, Misc, "Default function option variant");
+    }
+
+    Ok(expressions::Expression::EnumVariant(variant))
+}
+
+/// Parse a typecast expression. Returns a description of said expression.
+pub fn parse_cast(
+    x: &substrait::expression::Cast,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    // Parse fields.
+    let data_type = proto_required_field!(x, y, r#type, types::parse_type)
+        .0
+        .data_type();
+    let input = proto_boxed_required_field!(x, y, input, expressions::parse_expression)
+        .1
+        .unwrap_or_default();
+    let expression = expressions::Expression::Cast(data_type, Box::new(input));
+    proto_enum_field!(
+        x,
+        y,
+        failure_behavior,
+        substrait::expression::cast::FailureBehavior
+    );
+
+    // TODO: check if this is a valid typecast.
+    // FIXME: how?
+    diagnostic!(
+        y,
+        Warning,
+        NotYetImplemented,
+        "typecast validation rules are not yet implemented"
+    );
+
+    // Describe node.
+    describe!(y, Expression, "{}", expression);
+    summary!(y, "Type conversion: {:#}", expression);
+    Ok(expression)
+}
diff --git a/rs/src/parse/expressions/mod.rs b/rs/src/parse/expressions/mod.rs
new file mode 100644
index 00000000..f6c74070
--- /dev/null
+++ b/rs/src/parse/expressions/mod.rs
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for parsing/validating expressions.
+
+use crate::input::proto::substrait;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::string_util;
+use crate::string_util::Describe;
+use std::sync::Arc;
+
+pub mod conditionals;
+pub mod functions;
+pub mod literals;
+pub mod misc;
+pub mod references;
+pub mod subqueries;
+
+/// Description of an expression.
+#[derive(Clone)]
+pub enum Expression {
+    /// Used for unknown expression types.
+    Unresolved,
+
+    /// Used for literals.
+    Literal(literals::Literal),
+
+    /// Used for references.
+    Reference(Box<references::Reference>),
+
+    /// Used for function calls and conditionals (which, really, are just
+    /// builtin function calls).
+    Function(String, Vec<Expression>),
+
+    /// Used for subqueries, or anything else where the "arguments" are too
+    /// extensive to be reasonably described; the argument list is always
+    /// simply represented with an ellipsis.
+    BigFunction(String),
+
+    /// Used to represent the values of a MultiOrList.
+    Tuple(Vec<Expression>),
+
+    /// Used for type casts.
+    Cast(Arc<data_type::DataType>, Box<Expression>),
+
+    /// Used for function option enum variants. Note that these aren't normal
+    /// expressions, as they have no associated type. See FIXME at the bottom
+    /// of this file.
+    EnumVariant(Option<String>),
+}
+
+impl Default for Expression {
+    fn default() -> Self {
+        Expression::Unresolved
+    }
+}
+
+impl From<literals::Literal> for Expression {
+    fn from(l: literals::Literal) -> Self {
+        Expression::Literal(l)
+    }
+}
+
+impl From<references::Reference> for Expression {
+    fn from(r: references::Reference) -> Self {
+        Expression::Reference(Box::new(r))
+    }
+}
+
+impl Describe for Expression {
+    fn describe(
+        &self,
+        f: &mut std::fmt::Formatter<'_>,
+        limit: string_util::Limit,
+    ) -> std::fmt::Result {
+        match self {
+            Expression::Unresolved => write!(f, "?"),
+            Expression::Literal(x) => x.describe(f, limit),
+            Expression::Reference(x) => x.describe(f, limit),
+            Expression::Function(name, args) => {
+                let (name_limit, args_limit) = limit.split(name.len());
+                string_util::describe_identifier(f, name, name_limit)?;
+                write!(f, "(")?;
+                string_util::describe_sequence(f, args, args_limit, 20, |f, expr, _, limit| {
+                    expr.describe(f, limit)
+                })?;
+                write!(f, ")")
+            }
+            Expression::BigFunction(name) => string_util::describe_identifier(f, name, limit),
+            Expression::Tuple(items) => {
+                write!(f, "(")?;
+                string_util::describe_sequence(f, items, limit, 20, |f, expr, _, limit| {
+                    expr.describe(f, limit)
+                })?;
+                write!(f, ")")
+            }
+            Expression::Cast(data_type, expression) => {
+                let (type_limit, expr_limit) = limit.split(10);
+                write!(f, "(")?;
+                data_type.describe(f, type_limit)?;
+                write!(f, ")(")?;
+                expression.describe(f, expr_limit)?;
+                write!(f, ")")
+            }
+            Expression::EnumVariant(Some(x)) => string_util::describe_identifier(f, x, limit),
+            Expression::EnumVariant(None) => write!(f, "-"),
+        }
+    }
+}
+
+impl std::fmt::Display for Expression {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.display().fmt(f)
+    }
+}
+
+impl Expression {
+    /// Shorthand for a new null literal.
+    pub fn new_null(data_type: Arc<data_type::DataType>) -> Expression {
+        literals::Literal::new_null(data_type).into()
+    }
+}
+
+/// Parse an expression type. Returns a description of said expression.
+fn parse_expression_type(
+    x: &substrait::expression::RexType,
+    y: &mut context::Context,
+    enum_allowed: bool,
+) -> diagnostic::Result<Expression> {
+    match x {
+        substrait::expression::RexType::Literal(x) => {
+            literals::parse_literal(x, y).map(Expression::from)
+        }
+        substrait::expression::RexType::Selection(x) => {
+            references::parse_field_reference(x.as_ref(), y).map(Expression::from)
+        }
+        substrait::expression::RexType::ScalarFunction(x) => functions::parse_scalar_function(x, y),
+        substrait::expression::RexType::WindowFunction(x) => functions::parse_window_function(x, y),
+        substrait::expression::RexType::IfThen(x) => conditionals::parse_if_then(x.as_ref(), y),
+        substrait::expression::RexType::SwitchExpression(x) => {
+            conditionals::parse_switch(x.as_ref(), y)
+        }
+        substrait::expression::RexType::SingularOrList(x) => {
+            conditionals::parse_singular_or_list(x.as_ref(), y)
+        }
+        substrait::expression::RexType::MultiOrList(x) => conditionals::parse_multi_or_list(x, y),
+        substrait::expression::RexType::Enum(x) => {
+            if !enum_allowed {
+                diagnostic!(
+                    y,
+                    Error,
+                    IllegalValue,
+                    "function option enum variants are not allowed here"
+                );
+            }
+            misc::parse_enum(x, y)
+        }
+        substrait::expression::RexType::Cast(x) => misc::parse_cast(x.as_ref(), y),
+        substrait::expression::RexType::Subquery(x) => subqueries::parse_subquery(x.as_ref(), y),
+    }
+}
+
+/// Parse an expression. Returns a description of said expression.
+fn parse_expression_internal(
+    x: &substrait::Expression,
+    y: &mut context::Context,
+    enum_allowed: bool,
+) -> diagnostic::Result<Expression> {
+    // Parse the expression.
+    let (n, e) = proto_required_field!(x, y, rex_type, parse_expression_type, enum_allowed);
+    let expression = e.unwrap_or_default();
+    let data_type = n.data_type();
+
+    // Describe node.
+    y.set_data_type(data_type);
+    describe!(y, Expression, "{}", expression);
+    summary!(y, "Expression: {:#}", expression);
+    Ok(expression)
+}
+
+/// Parse a regular expression (anything except a function option enum
+/// variant). Returns a description of said expression.
+pub fn parse_expression(
+    x: &substrait::Expression,
+    y: &mut context::Context,
+) -> diagnostic::Result<Expression> {
+    parse_expression_internal(x, y, false)
+}
+
+/// Parse a predicate expression (a normal expression that yields a boolean).
+/// Returns a description of said expression.
+pub fn parse_predicate(
+    x: &substrait::Expression,
+    y: &mut context::Context,
+) -> diagnostic::Result<Expression> {
+    let expression = parse_expression_internal(x, y, false)?;
+    let data_type = y.data_type();
+    if !matches!(
+        data_type.class(),
+        data_type::Class::Simple(data_type::Simple::Boolean) | data_type::Class::Unresolved
+    ) {
+        diagnostic!(
+            y,
+            Error,
+            TypeMismatch,
+            "predicates must yield booleans, but found {}",
+            data_type
+        );
+    }
+    Ok(expression)
+}
+
+/// Parse a function argument, which can be an expression or an enum option.
+fn parse_function_argument(
+    x: &substrait::Expression,
+    y: &mut context::Context,
+) -> diagnostic::Result<Expression> {
+    parse_expression_internal(x, y, true)
+}
+
+// FIXME: above should really be solved with a oneof, or better yet, by
+// separating the options passed to a function from its arguments.
diff --git a/rs/src/parse/expressions/references/mask.rs b/rs/src/parse/expressions/references/mask.rs
new file mode 100644
index 00000000..fe6d00ed
--- /dev/null
+++ b/rs/src/parse/expressions/references/mask.rs
@@ -0,0 +1,476 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for parsing/validation mask expressions.
+
+use crate::input::proto::substrait;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::string_util;
+use std::sync::Arc;
+
+/// Parse a struct item.
+fn parse_struct_item(
+    x: &substrait::expression::mask_expression::StructItem,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<()> {
+    // Handle the struct index field.
+    let data_type = proto_primitive_field!(x, y, field, super::parse_struct_field_index, root)
+        .1
+        .unwrap_or_default();
+
+    // Set resulting data type.
+    y.set_data_type(data_type.clone());
+
+    // Handle child selection, if any, to recursively project the field type
+    // of the selected struct field.
+    if x.child.is_some() {
+        let data_type = proto_required_field!(x, y, child, parse_select, &data_type)
+            .0
+            .data_type();
+
+        // Update data type.
+        y.set_data_type(data_type);
+
+        // Describe node.
+        describe!(y, Expression, "Struct item selection and sub-selection");
+    } else {
+        describe!(y, Expression, "Struct item selection");
+    }
+
+    Ok(())
+}
+
+/// Parse a struct selection, a filter/swizzle for a struct type.
+fn parse_struct_select(
+    x: &substrait::expression::mask_expression::StructSelect,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<()> {
+    // Struct selections can only be applied to structs.
+    if !root.is_unresolved() && !root.is_struct() {
+        diagnostic!(
+            y,
+            Error,
+            TypeMismatch,
+            "struct selection requires a struct type, but got a {}",
+            root.class()
+        );
+    }
+
+    // Parse fields.
+    let fields = proto_repeated_field!(
+        x,
+        y,
+        struct_items,
+        parse_struct_item,
+        |_, _, _, _, _| (),
+        root
+    )
+    .0
+    .iter()
+    .map(|x| x.data_type())
+    .collect::<Vec<_>>();
+
+    // Create struct.
+    y.set_data_type(data_type::DataType::new_struct(fields, root.nullable()));
+
+    // Describe node.
+    describe!(y, Expression, "Struct selection");
+    Ok(())
+}
+
+/// Parse a list element selection.
+fn parse_list_select_item_element(
+    x: &substrait::expression::mask_expression::list_select::list_select_item::ListElement,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    proto_primitive_field!(x, y, field);
+    describe!(
+        y,
+        Expression,
+        "Select {} element",
+        string_util::describe_index(x.field)
+    );
+    Ok(())
+}
+
+/// Parse a list slice selection.
+fn parse_list_select_item_slice(
+    x: &substrait::expression::mask_expression::list_select::list_select_item::ListSlice,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    proto_primitive_field!(x, y, start);
+    proto_primitive_field!(x, y, end);
+
+    // Raise a diagnostic if the slice is always null, and describe the slice.
+    let description = if (x.start >= 0) == (x.end >= 0) && x.start < x.end {
+        diagnostic!(y, Info, RedundantListSlice, "slice is always null");
+        String::from("Selects an empty list slice")
+    } else if x.start == 0 {
+        match x.end {
+            i32::MIN..=-3 => format!("Selects all but the last {} elements", -x.end - 1),
+            -2 => String::from("Selects all but the last element"),
+            -1 => String::from("Selects the complete list"),
+            0 => String::from("Selects the first element"),
+            1..=i32::MAX => format!("Selects the first {} elements", x.end + 1),
+        }
+    } else if x.end == -1 {
+        match x.start {
+            i32::MIN..=-2 => format!("Selects the last {} elements", -x.start),
+            -1 => String::from("Selects the last element"),
+            0 => String::from("Selects the complete list"),
+            1 => String::from("Selects all but the first element"),
+            2..=i32::MAX => format!("Selects all but the first {} elements", x.start),
+        }
+    } else {
+        format!(
+            "Select {} until {} element (inclusive)",
+            string_util::describe_index(x.start),
+            string_util::describe_index(x.end)
+        )
+    };
+    describe!(y, Expression, "{}", description);
+
+    // Describe the node.
+    Ok(())
+}
+
+/// Parse a list selection item type.
+fn parse_list_select_item_type(
+    x: &substrait::expression::mask_expression::list_select::list_select_item::Type,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    match x {
+        substrait::expression::mask_expression::list_select::list_select_item::Type::Item(x) => {
+            parse_list_select_item_element(x, y)
+        }
+        substrait::expression::mask_expression::list_select::list_select_item::Type::Slice(x) => {
+            parse_list_select_item_slice(x, y)
+        }
+    }
+}
+
+/// Parse a list selection item.
+fn parse_list_select_item(
+    x: &substrait::expression::mask_expression::list_select::ListSelectItem,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    proto_required_field!(x, y, r#type, parse_list_select_item_type);
+    Ok(())
+}
+
+/// Parse a list selection, a filter/swizzle for a list type.
+fn parse_list_select(
+    x: &substrait::expression::mask_expression::ListSelect,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<()> {
+    // List selections can only be applied to lists.
+    if !root.is_unresolved() && !root.is_list() {
+        diagnostic!(
+            y,
+            Error,
+            TypeMismatch,
+            "list selection requires a list type, but got a {}",
+            root.class()
+        );
+    }
+
+    // Parse fields.
+    proto_repeated_field!(x, y, selection, parse_list_select_item);
+
+    // Set resulting data type.
+    y.set_data_type(root.clone());
+
+    // Handle child selection, if any, to recursively project the list element
+    // type.
+    if x.child.is_some() {
+        // Get the list element type.
+        let data_type = root.unwrap_list().unwrap_or_default();
+
+        // Apply selection logic recursively.
+        let data_type = proto_boxed_required_field!(x, y, child, parse_select, &data_type)
+            .0
+            .data_type();
+
+        // Create the new type.
+        y.set_data_type(data_type::DataType::new_list(data_type, root.nullable()));
+
+        // Describe node.
+        describe!(y, Expression, "List selection and sub-selection");
+    } else {
+        describe!(y, Expression, "List selection");
+    }
+
+    Ok(())
+}
+
+/// Parse a map single-key selection.
+fn parse_map_select_key(
+    _x: &substrait::expression::mask_expression::map_select::MapKey,
+    y: &mut context::Context,
+    _key_type: &Arc<data_type::DataType>,
+) -> diagnostic::Result<()> {
+    // FIXME: map keys are not necessarily strings. Why is this not a
+    // primitive?
+    diagnostic!(
+        y,
+        Error,
+        NotYetImplemented,
+        "map key remappings are not yet specified"
+    );
+    describe!(y, Expression, "Single-key map selection");
+    Ok(())
+}
+
+/// Parse a map selection by means of an expression.
+fn parse_map_select_expression(
+    _x: &substrait::expression::mask_expression::map_select::MapKeyExpression,
+    y: &mut context::Context,
+    _key_type: &Arc<data_type::DataType>,
+) -> diagnostic::Result<()> {
+    // FIXME: in Rust vernacular, need an Fn(K) -> Option<K> here. I suppose
+    // there is no structure for that yet? Or are these the regex-type things
+    // that are not yet specified?
+    diagnostic!(
+        y,
+        Error,
+        NotYetImplemented,
+        "map key remappings are not yet specified"
+    );
+    describe!(y, Expression, "Map key remapping");
+    Ok(())
+}
+
+/// Parse a map selection type.
+fn parse_map_select_type(
+    x: &substrait::expression::mask_expression::map_select::Select,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<()> {
+    match x {
+        substrait::expression::mask_expression::map_select::Select::Key(x) => {
+            parse_map_select_key(x, y, root)
+        }
+        substrait::expression::mask_expression::map_select::Select::Expression(x) => {
+            parse_map_select_expression(x, y, root)
+        }
+    }
+}
+
+/// Parse a map selection.
+fn parse_map_select(
+    x: &substrait::expression::mask_expression::MapSelect,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<()> {
+    // Map selections can only be applied to maps.
+    if !root.is_unresolved() && !root.is_map() {
+        diagnostic!(
+            y,
+            Error,
+            TypeMismatch,
+            "map selection requires a map type, but got a {}",
+            root.class()
+        );
+    }
+
+    // Parse selection field.
+    if x.select.is_some() {
+        proto_required_field!(
+            x,
+            y,
+            select,
+            parse_map_select_type,
+            &root.unwrap_map_key().unwrap_or_default()
+        );
+    } else {
+        comment!(y, "No select key specified: mapping is left unchanged.");
+    }
+
+    // Set resulting data type.
+    y.set_data_type(root.clone());
+
+    // Handle child selection, if any, to recursively project the map value
+    // type.
+    if x.child.is_some() {
+        // Get the map types.
+        let value_type = root.unwrap_map().unwrap_or_default();
+        let key_type = root.unwrap_map_key().unwrap_or_default();
+
+        // Apply selection logic recursively.
+        let value_type = proto_boxed_required_field!(x, y, child, parse_select, &value_type)
+            .0
+            .data_type();
+
+        // Create the new type.
+        y.set_data_type(data_type::DataType::new_map(
+            key_type,
+            value_type,
+            root.nullable(),
+        ));
+
+        // Describe node.
+        describe!(y, Expression, "Map selection and sub-selection");
+    } else {
+        describe!(y, Expression, "Map selection");
+    }
+
+    Ok(())
+}
+
+/// Parse a selection type.
+fn parse_select_type(
+    x: &substrait::expression::mask_expression::select::Type,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<()> {
+    match x {
+        substrait::expression::mask_expression::select::Type::Struct(x) => {
+            parse_struct_select(x, y, root)
+        }
+        substrait::expression::mask_expression::select::Type::List(x) => {
+            parse_list_select(x.as_ref(), y, root)
+        }
+        substrait::expression::mask_expression::select::Type::Map(x) => {
+            parse_map_select(x.as_ref(), y, root)
+        }
+    }
+}
+
+fn parse_select(
+    x: &substrait::expression::mask_expression::Select,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<()> {
+    let data_type = proto_required_field!(x, y, r#type, parse_select_type, root)
+        .0
+        .data_type();
+    y.set_data_type(data_type);
+    Ok(())
+}
+
+/// Parses the maintain_singular_struct field of a mask expression. is_singular
+/// must specify whether the data type is actually a singular struct, while
+/// struct_required must specify whether the context of the mask expression
+/// requires a struct type. Returns whether the data type is a singular struct
+/// and should be unwrapped.
+fn parse_maintain_singular_struct(
+    x: &bool,
+    y: &mut context::Context,
+    is_singular: bool,
+    struct_required: bool,
+) -> diagnostic::Result<bool> {
+    let maintain = *x;
+    match (is_singular, maintain, struct_required) {
+        (true, true, _) => {
+            // Okay: maintain struct.
+            summary!(
+                y,
+                "Mask expression yields a singular struct, which is \
+                maintained as-is."
+            );
+            Ok(false)
+        }
+        (true, false, true) => {
+            // Error: request to remove struct, but context requires a struct.
+            summary!(
+                y,
+                "Mask expression yields a singular struct, which would be \
+                reduced to its element type, but its context does not allow \
+                this."
+            );
+            diagnostic!(
+                y,
+                Error,
+                TypeStructRequired,
+                "context requires a struct type and type is a singular \
+                struct, maintain_singular_struct must be set"
+            );
+            Ok(false)
+        }
+        (true, false, false) => {
+            // Okay: remove singular struct wrapper.
+            summary!(
+                y,
+                "Mask expression yields a singular struct, which is reduced \
+                to its element type."
+            );
+            Ok(true)
+        }
+        (false, true, _) => {
+            // Okay: not a singular struct, so there is nothing to strip.
+            summary!(
+                y,
+                "Data type of mask expression is not a singular struct, so \
+                there is nothing to strip or maintain. The explicit true is \
+                redundant."
+            );
+            Ok(false)
+        }
+        (false, false, _) => {
+            // Okay: not a singular struct, so there is nothing to strip.
+            summary!(
+                y,
+                "Data type of mask expression is not a singular struct, so \
+                there is nothing to strip or maintain."
+            );
+            Ok(false)
+        }
+    }
+}
+
+/// Parse a mask expression; that is, a field selection that can output a
+/// nested structure. root specifies the data type being indexed, while
+/// struct_required must specify whether the context of the mask expression
+/// requires a struct type.
+pub fn parse_mask_expression(
+    x: &substrait::expression::MaskExpression,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+    struct_required: bool,
+) -> diagnostic::Result<()> {
+    // Parse the struct selection and get its data type.
+    let data_type = proto_required_field!(x, y, select, parse_struct_select, root)
+        .0
+        .data_type();
+
+    // Determine if the data type is a singular struct (i.e. a struct with only
+    // one item) and its element type if so.
+    let singular_type = data_type.unwrap_singular_struct().map(|data_type| {
+        if root.nullable() {
+            data_type.make_nullable()
+        } else {
+            data_type
+        }
+    });
+
+    // Handle the maintain_singular_struct field.
+    let unwrap = proto_primitive_field!(
+        x,
+        y,
+        maintain_singular_struct,
+        parse_maintain_singular_struct,
+        singular_type.is_some(),
+        struct_required
+    )
+    .1
+    .unwrap_or_default();
+
+    // Set the data type.
+    y.set_data_type(if unwrap {
+        singular_type.unwrap()
+    } else {
+        data_type
+    });
+
+    // Describe node.
+    describe!(
+        y,
+        Expression,
+        "References fields into a new nested structure"
+    );
+    Ok(())
+}
diff --git a/rs/src/parse/expressions/references/mod.rs b/rs/src/parse/expressions/references/mod.rs
new file mode 100644
index 00000000..15ef94d9
--- /dev/null
+++ b/rs/src/parse/expressions/references/mod.rs
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for parsing/validating references.
+
+use crate::input::proto::substrait;
+use crate::output::comment;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::expressions;
+use crate::string_util;
+use crate::string_util::Describe;
+use std::sync::Arc;
+
+pub mod mask;
+pub mod scalar;
+
+/// Description of the root of a reference.
+#[derive(Clone)]
+enum Root {
+    Unresolved,
+    Expression(expressions::Expression),
+    Schema(usize),
+}
+
+impl From<expressions::Expression> for Root {
+    fn from(e: expressions::Expression) -> Self {
+        Root::Expression(e)
+    }
+}
+
+impl Default for Root {
+    fn default() -> Self {
+        Root::Unresolved
+    }
+}
+
+/// Description of a reference path.
+#[derive(Clone)]
+pub struct ReferencePath {
+    // *Reversed* list of segments.
+    segments: Vec<String>,
+}
+
+impl Default for ReferencePath {
+    fn default() -> Self {
+        Self {
+            segments: vec![String::from(".?")],
+        }
+    }
+}
+
+impl ReferencePath {
+    fn new() -> Self {
+        Self { segments: vec![] }
+    }
+
+    fn prefix(mut self, s: String) -> Self {
+        self.segments.push(s);
+        self
+    }
+
+    /// Returns the length of the complete path string.
+    pub fn len(&self) -> usize {
+        self.segments.iter().map(String::len).sum()
+    }
+}
+
+impl Describe for ReferencePath {
+    fn describe(
+        &self,
+        f: &mut std::fmt::Formatter<'_>,
+        limit: string_util::Limit,
+    ) -> std::fmt::Result {
+        let lens = self.segments.iter().map(String::len).collect::<Vec<_>>();
+        let (n_left, n_right) = limit.split_ns(&lens);
+        for i in 0..n_left {
+            write!(f, "{}", self.segments[self.segments.len() - i - 1])?;
+        }
+        if let Some(n_right) = n_right {
+            write!(f, "..")?;
+            for i in self.segments.len() - n_right..self.segments.len() {
+                write!(f, "{}", self.segments[self.segments.len() - i - 1])?;
+            }
+        }
+        Ok(())
+    }
+}
+
+impl std::fmt::Display for ReferencePath {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.display().fmt(f)
+    }
+}
+
+/// Description of a reference.
+#[derive(Clone)]
+pub struct Reference {
+    root: Root,
+    path: ReferencePath,
+}
+
+impl Default for Reference {
+    fn default() -> Self {
+        Self {
+            root: Root::Schema(0),
+            path: ReferencePath::default(),
+        }
+    }
+}
+
+impl Describe for Reference {
+    fn describe(
+        &self,
+        f: &mut std::fmt::Formatter<'_>,
+        limit: string_util::Limit,
+    ) -> std::fmt::Result {
+        let (path_limit, root_limit) = limit.split(self.path.len());
+        match &self.root {
+            Root::Unresolved => write!(f, "?")?,
+            Root::Expression(e) => {
+                write!(f, "(")?;
+                e.describe(f, root_limit)?;
+                write!(f, ")")?;
+            }
+            Root::Schema(0) => write!(f, "<>")?,
+            Root::Schema(n) => write!(f, "<{n}>")?,
+        }
+        self.path.describe(f, path_limit)
+    }
+}
+
+impl std::fmt::Display for Reference {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.display().fmt(f)
+    }
+}
+
+/// Parse a struct field index into its data type.
+fn parse_struct_field_index(
+    x: &i32,
+    _y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<Arc<data_type::DataType>> {
+    let index = *x;
+    if index < 0 {
+        return Err(cause!(
+            IllegalValue,
+            "struct indices cannot be less than zero"
+        ));
+    }
+    let index: usize = index.try_into().unwrap();
+    if root.is_struct() {
+        let size = root.parameters().len();
+        root.type_parameter(index)
+            .ok_or_else(|| cause!(IllegalValue, "struct index out of range (size = {size})"))
+    } else {
+        Ok(Arc::default())
+    }
+}
+
+/// Parse a reference root.
+fn parse_root_type(
+    x: &substrait::expression::field_reference::RootType,
+    y: &mut context::Context,
+) -> diagnostic::Result<Root> {
+    match x {
+        substrait::expression::field_reference::RootType::Expression(x) => {
+            expressions::parse_expression(x.as_ref(), y).map(Root::from)
+        }
+        substrait::expression::field_reference::RootType::RootReference(_) => {
+            describe!(y, Misc, "Reference to field of current query");
+            y.set_data_type(y.schema(0)?);
+            Ok(Root::Schema(0))
+        }
+        substrait::expression::field_reference::RootType::OuterReference(x) => {
+            describe!(
+                y,
+                Misc,
+                "Reference to field of {} outer query",
+                string_util::describe_nth(x.steps_out)
+            );
+            proto_primitive_field!(x, y, steps_out, |x, y| {
+                if *x < 1 {
+                    diagnostic!(
+                        y,
+                        Error,
+                        IllegalValue,
+                        "must be at least 1 (use RootReference instead)"
+                    );
+                }
+                Ok(())
+            });
+            let steps_out = x.steps_out as usize;
+            y.set_data_type(y.schema(steps_out)?);
+            Ok(Root::Schema(steps_out))
+        }
+    }
+}
+
+/// Parse a reference path.
+fn parse_reference_type(
+    x: &substrait::expression::field_reference::ReferenceType,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<ReferencePath> {
+    match x {
+        substrait::expression::field_reference::ReferenceType::DirectReference(x) => {
+            scalar::parse_reference_segment(x, y, root)
+        }
+        substrait::expression::field_reference::ReferenceType::MaskedReference(x) => {
+            mask::parse_mask_expression(x, y, root, false)?;
+            Ok(ReferencePath::new().prefix(String::from(".mask(..)")))
+        }
+    }
+}
+
+/// Parse a field reference. Returns a description of the nested reference.
+pub fn parse_field_reference(
+    x: &substrait::expression::FieldReference,
+    y: &mut context::Context,
+) -> diagnostic::Result<Reference> {
+    // Parse the root of the reference.
+    let (root_node, root) = proto_required_field!(x, y, root_type, parse_root_type);
+    let root = root.unwrap_or_default();
+
+    // Parse the reference type.
+    let (path_node, path) = proto_required_field!(
+        x,
+        y,
+        reference_type,
+        parse_reference_type,
+        &root_node.data_type()
+    );
+    let path = path.unwrap_or_default();
+
+    // Set the data type.
+    y.set_data_type(path_node.data_type());
+
+    // Describe node.
+    let reference = Reference { root, path };
+    describe!(y, Expression, "Selects {}", &reference);
+    summary!(y, "Full reference path: {:#}", &reference);
+    if let Root::Schema(depth) = &reference.root {
+        let depth = *depth;
+        y.push_summary(comment::Comment::new().nl());
+        if depth == 0 {
+            summary!(
+                y,
+                "Here, <> is used to refer to the row currently being processed."
+            );
+        } else {
+            summary!(
+                y,
+                "Here, <{depth}> is used to refer to the row being processed \
+                by the {} outer query.",
+                string_util::describe_nth(depth as u32)
+            );
+        }
+    }
+    Ok(reference)
+}
diff --git a/rs/src/parse/expressions/references/scalar.rs b/rs/src/parse/expressions/references/scalar.rs
new file mode 100644
index 00000000..7c2cfdda
--- /dev/null
+++ b/rs/src/parse/expressions/references/scalar.rs
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for parsing/validation scalar references.
+
+use crate::input::proto::substrait;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::expressions::literals;
+use crate::parse::expressions::references;
+use crate::parse::types;
+use crate::string_util;
+use std::sync::Arc;
+
+/// Parse a struct field reference. Returns a description of the nested
+/// reference.
+fn parse_struct_field(
+    x: &substrait::expression::reference_segment::StructField,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<references::ReferencePath> {
+    // Struct selections can only be applied to structs.
+    if !root.is_unresolved() && !root.is_struct() {
+        diagnostic!(
+            y,
+            Error,
+            TypeMismatch,
+            "struct selection requires a struct type, but got a {}",
+            root.class()
+        );
+    }
+
+    // Create description.
+    let description = format!(".{}", x.field);
+
+    // Determine result data type.
+    let data_type = proto_primitive_field!(x, y, field, super::parse_struct_field_index, root)
+        .1
+        .unwrap_or_default();
+
+    // If the struct is nullable, the field must also be nullable.
+    let data_type = if root.nullable() {
+        data_type.make_nullable()
+    } else {
+        data_type
+    };
+
+    // Set resulting data type.
+    y.set_data_type(data_type.clone());
+
+    // Handle child selection, if any, to recursively select elements from
+    // the struct field.
+    let reference = if x.child.is_some() {
+        let (node, result) =
+            proto_boxed_required_field!(x, y, child, parse_reference_segment, &data_type);
+
+        // Update data type.
+        y.set_data_type(node.data_type());
+
+        // Generate reference.
+        result.unwrap_or_default().prefix(description)
+    } else {
+        references::ReferencePath::new().prefix(description)
+    };
+
+    // Describe node.
+    describe!(y, Expression, "Selects {}", &reference);
+    summary!(y, "Full reference path: {:#}", &reference);
+    Ok(reference)
+}
+
+/// Parse a list element reference. Returns a description of the nested
+/// reference.
+fn parse_list_element(
+    x: &substrait::expression::reference_segment::ListElement,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<references::ReferencePath> {
+    // Struct selections can only be applied to lists.
+    if !root.is_unresolved() && !root.is_list() {
+        diagnostic!(
+            y,
+            Error,
+            TypeMismatch,
+            "list selection requires a list type, but got a {}",
+            root.class()
+        );
+    }
+
+    // Handle the list index field.
+    proto_primitive_field!(x, y, offset, |x, y| {
+        describe!(
+            y,
+            Misc,
+            "Selects {} list element",
+            string_util::describe_index(*x)
+        );
+        Ok(())
+    });
+
+    // Create description.
+    let description = format!(".[{}]", x.offset);
+
+    // Determine result data type.
+    let data_type = root.unwrap_list().unwrap_or_default();
+
+    // If the list is nullable, the selection must also be nullable.
+    let data_type = if root.nullable() {
+        data_type.make_nullable()
+    } else {
+        data_type
+    };
+
+    // FIXME: what is the runtime behavior for index out of range, throw or
+    // yield null? In the latter case, the return type would always need to
+    // be nullable.
+
+    // Set resulting data type.
+    y.set_data_type(data_type.clone());
+
+    // Handle child selection, if any, to recursively select elements from
+    // the list element.
+    let reference = if x.child.is_some() {
+        let (node, result) =
+            proto_boxed_required_field!(x, y, child, parse_reference_segment, &data_type);
+
+        // Update data type.
+        y.set_data_type(node.data_type());
+
+        // Generate reference.
+        result.unwrap_or_default().prefix(description)
+    } else {
+        references::ReferencePath::new().prefix(description)
+    };
+
+    // Describe node.
+    describe!(y, Expression, "Selects {}", &reference);
+    summary!(y, "Full reference path: {:#}", &reference);
+    Ok(reference)
+}
+
+/// Parse a map key reference. Returns a description of the nested
+/// reference.
+fn parse_map_key(
+    x: &substrait::expression::reference_segment::MapKey,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<references::ReferencePath> {
+    // Map selections can only be applied to maps.
+    if !root.is_unresolved() && !root.is_map() {
+        diagnostic!(
+            y,
+            Error,
+            TypeMismatch,
+            "map selection requires a map type, but got a {}",
+            root.class()
+        );
+    }
+
+    // Handle the map key primitive.
+    let key = proto_required_field!(x, y, map_key, literals::parse_literal)
+        .1
+        .unwrap_or_default();
+
+    // Check the key type.
+    types::assert_equal(
+        y,
+        key.data_type(),
+        &root.unwrap_map_key().unwrap_or_default(),
+        "map key type mismatch",
+    );
+
+    // Create description.
+    let description = format!(".[{}]", key);
+
+    // Determine result data type.
+    let data_type = root.unwrap_map().unwrap_or_default();
+
+    // If the map is nullable, the selection must also be nullable.
+    let data_type = if root.nullable() {
+        data_type.make_nullable()
+    } else {
+        data_type
+    };
+
+    // FIXME: what is the runtime behavior for index out of range, throw or
+    // yield null? In the latter case, the return type would always need to
+    // be nullable.
+
+    // Set resulting data type.
+    y.set_data_type(data_type.clone());
+
+    // Handle child selection, if any, to recursively select elements from
+    // the map value.
+    let reference = if x.child.is_some() {
+        let (node, result) =
+            proto_boxed_required_field!(x, y, child, parse_reference_segment, &data_type);
+
+        // Update data type.
+        y.set_data_type(node.data_type());
+
+        // Generate reference.
+        result.unwrap_or_default().prefix(description)
+    } else {
+        references::ReferencePath::new().prefix(description)
+    };
+
+    // Describe node.
+    describe!(y, Expression, "Selects {}", &reference);
+    summary!(y, "Full reference path: {:#}", &reference);
+    Ok(reference)
+}
+
+/// Parse a reference segment type. Returns a description of the nested
+/// reference.
+fn parse_reference_type(
+    x: &substrait::expression::reference_segment::ReferenceType,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<references::ReferencePath> {
+    match x {
+        substrait::expression::reference_segment::ReferenceType::StructField(x) => {
+            parse_struct_field(x, y, root)
+        }
+        substrait::expression::reference_segment::ReferenceType::ListElement(x) => {
+            parse_list_element(x, y, root)
+        }
+        substrait::expression::reference_segment::ReferenceType::MapKey(x) => {
+            parse_map_key(x, y, root)
+        }
+    }
+}
+
+/// Parse a reference segment, i.e. a scalar reference into some nested
+/// structure of type root. Returns a description of the nested reference.
+pub fn parse_reference_segment(
+    x: &substrait::expression::ReferenceSegment,
+    y: &mut context::Context,
+    root: &Arc<data_type::DataType>,
+) -> diagnostic::Result<references::ReferencePath> {
+    // Parse the selection.
+    let (node, result) = proto_required_field!(x, y, reference_type, parse_reference_type, root);
+
+    // Set the data type.
+    y.set_data_type(node.data_type());
+
+    // Describe node.
+    let reference = result.unwrap_or_default();
+    describe!(y, Expression, "Selects {}", &reference);
+    summary!(y, "Full reference path: {:#}", &reference);
+    Ok(reference)
+}
diff --git a/rs/src/parse/expressions/subqueries.rs b/rs/src/parse/expressions/subqueries.rs
new file mode 100644
index 00000000..f0f47cd9
--- /dev/null
+++ b/rs/src/parse/expressions/subqueries.rs
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for parsing/validating function calls.
+
+use crate::input::proto::substrait;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::expressions;
+use crate::parse::relations;
+use crate::parse::types;
+use std::sync::Arc;
+
+/// Parse a scalar subquery.
+fn parse_scalar(
+    x: &substrait::expression::subquery::Scalar,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    // Parse the relation and get its schema.
+    let schema = y.enter_relation_root(|y| {
+        proto_boxed_required_field!(x, y, input, relations::parse_rel)
+            .0
+            .data_type()
+    });
+
+    // Scalar subqueries must return one row and one column. We can't check the
+    // row count statically, but we can check the schema.
+    let return_type = if let Some(return_type) = schema.unwrap_singular_struct() {
+        return_type
+    } else {
+        if !schema.is_unresolved() {
+            diagnostic!(
+                y,
+                Error,
+                ExpressionIllegalSubquery,
+                "subquery must return a single column"
+            );
+        }
+        Arc::default()
+    };
+
+    // FIXME: what is the behavior when the query doesn't yield one row? Should
+    // the returned data type be made nullable?
+
+    // Describe node.
+    y.set_data_type(return_type);
+    summary!(
+        y,
+        "Executes the contained subquery for each row. The query is expected \
+        to return a single row and column, the value of which is returned by \
+        the expression."
+    );
+    let expression = expressions::Expression::BigFunction(String::from("scalar_subquery"));
+    describe!(y, Expression, "{}", expression);
+    Ok(expression)
+}
+
+/// Parse a containment subquery.
+fn parse_in_predicate(
+    x: &substrait::expression::subquery::InPredicate,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    // Parse the needles.
+    let needle_types = proto_required_repeated_field!(x, y, needles, expressions::parse_expression)
+        .0
+        .iter()
+        .map(|x| x.data_type())
+        .collect::<Vec<_>>();
+
+    // Parse the relation and get its schema.
+    let schema = y.enter_relation_root(|y| {
+        proto_boxed_required_field!(x, y, haystack, relations::parse_rel)
+            .0
+            .data_type()
+    });
+
+    // Match data types of needles and haystack.
+    if let Some(field_types) = schema.unwrap_struct() {
+        if needle_types.len() != field_types.len() {
+            diagnostic!(
+                y,
+                Error,
+                TypeMismatch,
+                "column count mismatch between needle and haystack"
+            );
+        } else {
+            for (index, (field_type, needle_type)) in
+                field_types.iter().zip(needle_types.iter()).enumerate()
+            {
+                types::assert_equal(
+                    y,
+                    field_type,
+                    needle_type,
+                    format!(
+                        "haystack field type does not match needle type for column {}",
+                        index + 1
+                    ),
+                );
+            }
+        }
+    } else {
+        assert!(schema.is_unresolved());
+    }
+
+    // Describe node.
+    y.set_data_type(data_type::DataType::new_predicate(false));
+    summary!(
+        y,
+        "Executes the contained subquery for each row. Returns true \
+        if and only if the needle expressions match the fields of at \
+        least one of the rows returned by the subquery."
+    );
+    let expression = expressions::Expression::BigFunction(String::from("in_subquery"));
+    describe!(y, Expression, "{}", expression);
+    Ok(expression)
+}
+
+/// Parse a set predicate subquery.
+fn parse_set_predicate(
+    x: &substrait::expression::subquery::SetPredicate,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    use substrait::expression::subquery::set_predicate::PredicateOp;
+
+    // Parse the relation.
+    y.enter_relation_root(|y| proto_boxed_required_field!(x, y, tuples, relations::parse_rel));
+
+    // Parse the operation type.
+    let operation = proto_required_enum_field!(x, y, predicate_op, PredicateOp)
+        .1
+        .unwrap_or_default();
+
+    // Describe node.
+    y.set_data_type(data_type::DataType::new_predicate(false));
+    let expression = match operation {
+        PredicateOp::Unspecified => {
+            expressions::Expression::BigFunction(String::from("invalid_subquery"))
+        }
+        PredicateOp::Exists => {
+            summary!(
+                y,
+                "Executes the contained subquery for each row. Returns true \
+                if and only if at least one row is returned by the subquery."
+            );
+            expressions::Expression::BigFunction(String::from("subquery_exists"))
+        }
+        PredicateOp::Unique => {
+            summary!(
+                y,
+                "Executes the contained subquery for each row. Returns true \
+                if and only if no duplicate rows are returned."
+            );
+            expressions::Expression::BigFunction(String::from("subquery_unique"))
+        }
+    };
+    describe!(y, Expression, "{}", expression);
+    Ok(expression)
+}
+
+/// Parse a set comparison subquery.
+fn parse_set_comparison(
+    x: &substrait::expression::subquery::SetComparison,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    use substrait::expression::subquery::set_comparison::ComparisonOp;
+    use substrait::expression::subquery::set_comparison::ReductionOp;
+
+    // Parse the left-hand side.
+    let (n, e) = proto_boxed_required_field!(x, y, left, expressions::parse_expression);
+    let left_type = n.data_type();
+    let left_expression = e.unwrap_or_default();
+
+    // Parse the operation type.
+    let comparison_op = proto_required_enum_field!(x, y, comparison_op, ComparisonOp)
+        .1
+        .unwrap_or_default();
+    let reduction_op = proto_required_enum_field!(x, y, reduction_op, ReductionOp)
+        .1
+        .unwrap_or_default();
+
+    // Parse the right-hand side.
+    let right_schema = y.enter_relation_root(|y| {
+        proto_boxed_required_field!(x, y, right, relations::parse_rel)
+            .0
+            .data_type()
+    });
+
+    // Right-hand side must return a single column.
+    let right_type = if let Some(right_type) = right_schema.unwrap_singular_struct() {
+        right_type
+    } else {
+        if !right_schema.is_unresolved() {
+            diagnostic!(
+                y,
+                Error,
+                ExpressionIllegalSubquery,
+                "subquery must return a single column"
+            );
+        }
+        Arc::default()
+    };
+
+    // Check that the data types match.
+    types::assert_equal(
+        y,
+        &right_type,
+        &left_type,
+        "subquery field type does not match expression type",
+    );
+
+    // Describe node.
+    y.set_data_type(data_type::DataType::new_predicate(false));
+    let expression = expressions::Expression::BigFunction(format!(
+        "{}_{}_subquery",
+        match comparison_op {
+            ComparisonOp::Unspecified => "invalid",
+            ComparisonOp::Eq => "equal",
+            ComparisonOp::Ne => "not_equal",
+            ComparisonOp::Lt => "less_than",
+            ComparisonOp::Gt => "greater_than",
+            ComparisonOp::Le => "less_equal",
+            ComparisonOp::Ge => "greater_equal",
+        },
+        match reduction_op {
+            ReductionOp::Unspecified => "invalid",
+            ReductionOp::Any => "any",
+            ReductionOp::All => "all",
+        },
+    ));
+    summary!(
+        y,
+        "Executes the contained subquery for each row. Returns true if"
+    );
+    summary!(
+        y,
+        "{}",
+        match reduction_op {
+            ReductionOp::Unspecified => "<invalid>",
+            ReductionOp::Any => "any",
+            ReductionOp::All => "all",
+        }
+    );
+    summary!(
+        y,
+        "rows returned are {}",
+        match comparison_op {
+            ComparisonOp::Unspecified => "<invalid>",
+            ComparisonOp::Eq => "equal to",
+            ComparisonOp::Ne => "not equal to",
+            ComparisonOp::Lt => "less than",
+            ComparisonOp::Gt => "greater than",
+            ComparisonOp::Le => "less than or equal to",
+            ComparisonOp::Ge => "greater than or equal to",
+        }
+    );
+    summary!(y, "{:#}.", left_expression);
+    describe!(y, Expression, "{}", expression);
+    Ok(expression)
+}
+
+/// Parse a particular subquery type.
+fn parse_subquery_type(
+    x: &substrait::expression::subquery::SubqueryType,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    match x {
+        substrait::expression::subquery::SubqueryType::Scalar(x) => parse_scalar(x, y),
+        substrait::expression::subquery::SubqueryType::InPredicate(x) => parse_in_predicate(x, y),
+        substrait::expression::subquery::SubqueryType::SetPredicate(x) => parse_set_predicate(x, y),
+        substrait::expression::subquery::SubqueryType::SetComparison(x) => {
+            parse_set_comparison(x, y)
+        }
+    }
+}
+
+/// Parse a subquery.
+pub fn parse_subquery(
+    x: &substrait::expression::Subquery,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    // Parse fields.
+    let (n, e) = proto_required_field!(x, y, subquery_type, parse_subquery_type);
+    let return_type = n.data_type();
+    let expression = e.unwrap_or_default();
+
+    // Describe node.
+    y.set_data_type(return_type);
+    describe!(y, Expression, "{}", expression);
+    Ok(expression)
+}
diff --git a/rs/src/parse/extensions/advanced.rs b/rs/src/parse/extensions/advanced.rs
new file mode 100644
index 00000000..4ac72265
--- /dev/null
+++ b/rs/src/parse/extensions/advanced.rs
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for advanced extensions, i.e.
+//! those based around protobuf Any values.
+
+use crate::input::proto::substrait;
+use crate::output::diagnostic::Result;
+use crate::parse::context;
+
+/// Parse a protobuf "any" type declaration.
+#[allow(clippy::ptr_arg)]
+fn parse_expected_type_url(x: &String, y: &mut context::Context) -> Result<()> {
+    if let Err(path) = y.define_proto_any_type(x) {
+        diagnostic!(
+            y,
+            Info,
+            RedundantProtoAnyDeclaration,
+            "message type {x} redeclared"
+        );
+        link!(y, path, "Previous declaration was here.");
+    }
+    Ok(())
+}
+
+/// Parse a protobuf "any" message that consumers may ignore.
+pub fn parse_hint_any(x: &prost_types::Any, y: &mut context::Context) -> Result<()> {
+    let (allowed, path) = y.resolve_proto_any(x);
+    if allowed {
+        diagnostic!(
+            y,
+            Info,
+            ProtoAny,
+            "explicitly allowed hint of type {}",
+            x.type_url
+        );
+    } else {
+        diagnostic!(
+            y,
+            Info,
+            ProtoAny,
+            "ignoring unknown hint of type {}",
+            x.type_url
+        );
+    }
+    if let Some(path) = path {
+        link!(y, path, "Type URL declaration is here.");
+    }
+    Ok(())
+}
+
+/// Parse a protobuf "any" message that consumers are not allowed to ignore.
+pub fn parse_functional_any(x: &prost_types::Any, y: &mut context::Context) -> Result<()> {
+    let (allowed, path) = y.resolve_proto_any(x);
+    if allowed {
+        diagnostic!(
+            y,
+            Info,
+            ProtoAny,
+            "explicitly allowed enhancement of type {}",
+            x.type_url
+        );
+    } else {
+        diagnostic!(
+            y,
+            Warning,
+            ProtoAny,
+            "unknown enhancement of type {}; plan is only valid \
+            for consumers recognizing this enhancement",
+            x.type_url
+        );
+    }
+    if let Some(path) = path {
+        link!(y, path, "Type URL declaration is here.");
+    }
+    Ok(())
+}
+
+/// Parse an advanced extension message (based on protobuf "any" messages).
+/// Returns whether an enhancement was specified.
+pub fn parse_advanced_extension(
+    x: &substrait::extensions::AdvancedExtension,
+    y: &mut context::Context,
+) -> Result<bool> {
+    proto_field!(x, y, optimization, parse_hint_any);
+    Ok(proto_field!(x, y, enhancement, parse_functional_any)
+        .0
+        .is_some())
+}
+
+/// Parses the advanced extension information in a plan.
+pub fn parse_plan(x: &substrait::Plan, y: &mut context::Context) {
+    proto_repeated_field!(x, y, expected_type_urls, parse_expected_type_url);
+    proto_field!(x, y, advanced_extensions, parse_advanced_extension);
+}
+
+/// Generate Info diagnostics for any extension definitions that weren't used.
+pub fn check_unused_definitions(y: &mut context::Context) {
+    for (uri, _, path) in y
+        .proto_any_types()
+        .iter_unused()
+        .collect::<Vec<_>>()
+        .into_iter()
+    {
+        diagnostic!(
+            y,
+            Info,
+            RedundantProtoAnyDeclaration,
+            "message type {uri} is not present in the plan"
+        );
+        link!(y, path, "Declaration was here.");
+    }
+}
diff --git a/rs/src/parse/extensions/mod.rs b/rs/src/parse/extensions/mod.rs
new file mode 100644
index 00000000..82f0a82e
--- /dev/null
+++ b/rs/src/parse/extensions/mod.rs
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions relating to extensions.
+
+use crate::input::proto::substrait;
+use crate::parse::context;
+
+pub mod advanced;
+pub mod simple;
+
+/// Parses the extension information in a plan.
+pub fn parse_plan(x: &substrait::Plan, y: &mut context::Context) {
+    advanced::parse_plan(x, y);
+    simple::parse_plan(x, y);
+}
+
+/// Generate Info diagnostics for any extension definitions that weren't used.
+pub fn check_unused_definitions(y: &mut context::Context) {
+    advanced::check_unused_definitions(y);
+    simple::check_unused_definitions(y);
+}
diff --git a/rs/src/parse/extensions/simple/function_decls.rs b/rs/src/parse/extensions/simple/function_decls.rs
new file mode 100644
index 00000000..c8176f15
--- /dev/null
+++ b/rs/src/parse/extensions/simple/function_decls.rs
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for parsing YAML function
+//! declarations.
+
+use crate::input::yaml;
+use crate::output::diagnostic::Result;
+use crate::parse::context;
+
+/// Parse a scalar function declaration.
+pub fn parse_scalar_function(_x: &yaml::Value, _y: &mut context::Context) -> Result<()> {
+    // TODO
+    Ok(())
+}
+
+/// Parse an aggregate function declaration.
+pub fn parse_aggregate_function(_x: &yaml::Value, _y: &mut context::Context) -> Result<()> {
+    // TODO
+    Ok(())
+}
diff --git a/rs/src/parse/extensions/simple/mod.rs b/rs/src/parse/extensions/simple/mod.rs
new file mode 100644
index 00000000..f5dc8881
--- /dev/null
+++ b/rs/src/parse/extensions/simple/mod.rs
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for advanced extensions, i.e.
+//! those based around YAML files.
+
+use crate::input::proto::substrait;
+use crate::output::diagnostic::Result;
+use crate::output::extension;
+use crate::parse::context;
+use std::sync::Arc;
+
+mod function_decls;
+mod type_decls;
+mod type_variation_decls;
+mod yaml;
+
+/// Parse a user-defined name. Note that names are matched case-insensitively
+/// because we return the name as lowercase.
+#[allow(clippy::ptr_arg)]
+pub fn parse_name(x: &String, _y: &mut context::Context) -> Result<String> {
+    // FIXME: nothing seems to say anything about the validity of names for
+    // things, but this seems rather important to define.
+    if x.is_empty() {
+        Err(cause!(IllegalValue, "names cannot be empty"))
+    } else {
+        Ok(x.to_lowercase())
+    }
+}
+
+/// "Parse" an anchor. This just reports an error if the anchor is 0.
+fn parse_anchor(x: &u32, _y: &mut context::Context) -> Result<u32> {
+    if *x == 0 {
+        Err(cause!(
+            IllegalValue,
+            "anchor 0 is reserved to disambiguate unspecified optional references"
+        ))
+    } else {
+        Ok(*x)
+    }
+}
+
+/// Parse a mapping from a URI anchor to a YAML extension.
+fn parse_simple_extension_yaml_uri_mapping(
+    x: &substrait::extensions::SimpleExtensionUri,
+    y: &mut context::Context,
+) -> Result<()> {
+    // Parse the fields.
+    let anchor = proto_primitive_field!(x, y, extension_uri_anchor, parse_anchor).1;
+    let yaml_data = proto_primitive_field!(x, y, uri, yaml::parse_uri)
+        .1
+        .unwrap();
+
+    // If the specified anchor is valid, insert a mapping for it.
+    if let Some(anchor) = anchor {
+        if let Err((prev_data, prev_path)) = y.define_extension_uri(anchor, yaml_data) {
+            diagnostic!(
+                y,
+                Error,
+                IllegalValue,
+                "anchor {anchor} is already in use for URI {}",
+                prev_data.uri()
+            );
+            link!(y, prev_path, "Previous definition was here.");
+        }
+    }
+
+    Ok(())
+}
+
+/// Parse an URI reference and resolve it.
+fn parse_uri_reference(x: &u32, y: &mut context::Context) -> Result<Arc<extension::YamlInfo>> {
+    match y.extension_uris().resolve(x).cloned() {
+        Some((yaml_data, path)) => {
+            describe!(y, Misc, "{}", yaml_data.uri());
+            link!(y, path, "URI anchor is defined here");
+            Ok(yaml_data)
+        }
+        None => {
+            describe!(y, Misc, "Unresolved URI");
+            Err(cause!(LinkMissingAnchor, "URI anchor {x} does not exist"))
+        }
+    }
+}
+
+/// Adds a description to a resolved function/type/variation reference node.
+fn describe_reference<T>(y: &mut context::Context, reference: &Arc<extension::Reference<T>>) {
+    describe!(y, Misc, "{}", reference);
+}
+
+/// Parse a type variation reference and resolve it.
+pub fn parse_type_variation_reference(
+    x: &u32,
+    y: &mut context::Context,
+) -> Result<Arc<extension::Reference<extension::TypeVariation>>> {
+    match y.tvars().resolve(x).cloned() {
+        Some((variation, path)) => {
+            describe_reference(y, &variation);
+            link!(y, path, "Type variation anchor is defined here");
+            Ok(variation)
+        }
+        None => {
+            describe!(y, Misc, "Unresolved type variation");
+            Err(cause!(
+                LinkMissingAnchor,
+                "Type variation anchor {x} does not exist"
+            ))
+        }
+    }
+}
+
+/// Parse a type reference and resolve it.
+pub fn parse_type_reference(
+    x: &u32,
+    y: &mut context::Context,
+) -> Result<Arc<extension::Reference<extension::DataType>>> {
+    match y.types().resolve(x).cloned() {
+        Some((data_type, path)) => {
+            describe_reference(y, &data_type);
+            link!(y, path, "Type anchor is defined here");
+            Ok(data_type)
+        }
+        None => {
+            describe!(y, Misc, "Unresolved type");
+            Err(cause!(LinkMissingAnchor, "Type anchor {x} does not exist"))
+        }
+    }
+}
+
+/// Parse a function reference and resolve it.
+pub fn parse_function_reference(
+    x: &u32,
+    y: &mut context::Context,
+) -> Result<Arc<extension::Reference<extension::Function>>> {
+    match y.fns().resolve(x).cloned() {
+        Some((function, path)) => {
+            describe_reference(y, &function);
+            link!(y, path, "Function anchor is defined here");
+            Ok(function)
+        }
+        None => {
+            describe!(y, Misc, "Unresolved function");
+            Err(cause!(
+                LinkMissingAnchor,
+                "Function anchor {x} does not exist"
+            ))
+        }
+    }
+}
+
+/// Parse a mapping from a function/type/variation anchor to an extension.
+fn parse_extension_mapping_data(
+    x: &substrait::extensions::simple_extension_declaration::MappingType,
+    y: &mut context::Context,
+) -> Result<()> {
+    match x {
+        substrait::extensions::simple_extension_declaration::MappingType::ExtensionType(x) => {
+
+            // Parse the fields.
+            let yaml_info = proto_primitive_field!(x, y, extension_uri_reference, parse_uri_reference).1;
+            let anchor = proto_primitive_field!(x, y, type_anchor, parse_anchor).1;
+            let name = proto_primitive_field!(x, y, name, parse_name).1;
+
+            // If we successfully resolved the URI reference to a URI, resolved
+            // that URI, and managed to parse the YAML it pointed to, try to
+            // resolve the data type in it.
+            let data_type = yaml_info.as_ref().and_then(|yaml_info| {
+                yaml_info.data().and_then(|data| {
+                    name.as_ref().and_then(|name| {
+                        let data_type = data.types.get(name).cloned();
+                        if data_type.is_none() {
+                            // TODO: Error, LinkMissingTypeName
+                            diagnostic!(y, Warning, NotYetImplemented, "failed to resolve data type {name:?} in {yaml_info}");
+                        }
+                        data_type
+                    })
+                })
+            });
+
+            // Construct a reference for this data type.
+            let reference = Arc::new(extension::Reference {
+                name: extension::NamedReference::new(name, Some(y.path_buf())),
+                uri: yaml_info.as_ref().map(|x| x.uri().clone()).unwrap_or_default(),
+                definition: data_type
+            });
+
+            // If the specified anchor is valid, insert a mapping for it.
+            if let Some(anchor) = anchor {
+                if let Err((prev_data, prev_path)) = y.define_type(anchor, reference) {
+                    diagnostic!(
+                        y,
+                        Error,
+                        IllegalValue,
+                        "anchor {anchor} is already in use for data type {prev_data}"
+                    );
+                    link!(y, prev_path, "Previous definition was here.");
+                }
+            }
+
+        }
+        substrait::extensions::simple_extension_declaration::MappingType::ExtensionTypeVariation(x) => {
+
+            // Parse the fields.
+            let yaml_info = proto_primitive_field!(x, y, extension_uri_reference, parse_uri_reference).1;
+            let anchor = proto_primitive_field!(x, y, type_variation_anchor, parse_anchor).1;
+            let name = proto_primitive_field!(x, y, name, parse_name).1;
+
+            // If we successfully resolved the URI reference to a URI, resolved
+            // that URI, and managed to parse the YAML it pointed to, try to
+            // resolve the type variation in it.
+            let type_variation = yaml_info.as_ref().and_then(|yaml_info| {
+                yaml_info.data().and_then(|data| {
+                    name.as_ref().and_then(|name| {
+                        let type_variation = data.type_variations.get(name).cloned();
+                        if type_variation.is_none() {
+                            // TODO: Error, LinkMissingTypeVariationName
+                            diagnostic!(y, Warning, NotYetImplemented, "failed to resolve type variation {name:?} in {yaml_info}");
+                        }
+                        type_variation
+                    })
+                })
+            });
+
+            // Construct a reference for this type variation.
+            let reference = Arc::new(extension::Reference {
+                name: extension::NamedReference::new(name, Some(y.path_buf())),
+                uri: yaml_info.as_ref().map(|x| x.uri().clone()).unwrap_or_default(),
+                definition: type_variation
+            });
+
+            // If the specified anchor is valid, insert a mapping for it.
+            if let Some(anchor) = anchor {
+                if let Err((prev_data, prev_path)) = y.define_tvar(anchor, reference) {
+                    diagnostic!(
+                        y,
+                        Error,
+                        IllegalValue,
+                        "anchor {anchor} is already in use for type variation {prev_data}"
+                    );
+                    link!(y, prev_path, "Previous definition was here.");
+                }
+            }
+
+        }
+        substrait::extensions::simple_extension_declaration::MappingType::ExtensionFunction(x) => {
+
+            // Parse the fields.
+            let yaml_info = proto_primitive_field!(x, y, extension_uri_reference, parse_uri_reference).1;
+            let anchor = proto_primitive_field!(x, y, function_anchor, parse_anchor).1;
+            let name = proto_primitive_field!(x, y, name).1;
+
+            // If we successfully resolved the URI reference to a URI, resolved
+            // that URI, and managed to parse the YAML it pointed to, try to
+            // resolve the data type in it.
+            let function = yaml_info.as_ref().and_then(|yaml_info| {
+                yaml_info.data().and_then(|data| {
+                    name.as_ref().and_then(|name| {
+                        let function = data.functions.get(name).cloned();
+                        if function.is_none() {
+                            // TODO: Error, LinkMissingFunctionName
+                            diagnostic!(y, Warning, NotYetImplemented, "failed to resolve function {name:?} in {yaml_info}");
+                        }
+                        function
+                    })
+                })
+            });
+
+            // Construct a reference for this data type.
+            let reference = Arc::new(extension::Reference {
+                name: extension::NamedReference::new(name, Some(y.path_buf())),
+                uri: yaml_info.as_ref().map(|x| x.uri().clone()).unwrap_or_default(),
+                definition: function
+            });
+
+            // If the specified anchor is valid, insert a mapping for it.
+            if let Some(anchor) = anchor {
+                if let Err((prev_data, prev_path)) = y.define_fn(anchor, reference) {
+                    diagnostic!(
+                        y,
+                        Error,
+                        IllegalValue,
+                        "anchor {anchor} is already in use for function {prev_data}"
+                    );
+                    link!(y, prev_path, "Previous definition was here.");
+                }
+            }
+
+        }
+    };
+    Ok(())
+}
+
+/// Parse a mapping from a function/type/variation anchor to an extension.
+fn parse_extension_mapping(
+    x: &substrait::extensions::SimpleExtensionDeclaration,
+    y: &mut context::Context,
+) -> Result<()> {
+    proto_required_field!(x, y, mapping_type, parse_extension_mapping_data);
+    Ok(())
+}
+
+/// Parses the simple extension information in a plan.
+pub fn parse_plan(x: &substrait::Plan, y: &mut context::Context) {
+    proto_repeated_field!(
+        x,
+        y,
+        extension_uris,
+        parse_simple_extension_yaml_uri_mapping
+    );
+    proto_repeated_field!(x, y, extensions, parse_extension_mapping);
+}
+
+/// Generate Info diagnostics for any extension definitions that weren't used.
+pub fn check_unused_definitions(y: &mut context::Context) {
+    // List unused function declarations.
+    for (anchor, info, path) in y.fns().iter_unused().collect::<Vec<_>>().into_iter() {
+        diagnostic!(
+            y,
+            Info,
+            RedundantFunctionDeclaration,
+            "anchor {anchor} for function {info} is not present in the plan"
+        );
+        link!(y, path, "Declaration was here.");
+    }
+
+    // List unused type declarations.
+    for (anchor, info, path) in y.types().iter_unused().collect::<Vec<_>>().into_iter() {
+        diagnostic!(
+            y,
+            Info,
+            RedundantTypeDeclaration,
+            "anchor {anchor} for type {info} is not present in the plan"
+        );
+        link!(y, path, "Declaration was here.");
+    }
+
+    // List unused type variation declarations.
+    for (anchor, info, path) in y.tvars().iter_unused().collect::<Vec<_>>().into_iter() {
+        diagnostic!(
+            y,
+            Info,
+            RedundantTypeVariationDeclaration,
+            "anchor {anchor} for type variation {info} is not present in the plan"
+        );
+        link!(y, path, "Declaration was here.");
+    }
+}
diff --git a/rs/src/parse/extensions/simple/type_decls.rs b/rs/src/parse/extensions/simple/type_decls.rs
new file mode 100644
index 00000000..9f493fbe
--- /dev/null
+++ b/rs/src/parse/extensions/simple/type_decls.rs
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for parsing YAML type
+//! declarations.
+
+use crate::input::yaml;
+use crate::output::diagnostic::Result;
+use crate::parse::context;
+
+/// Parse a type declaration.
+pub fn parse_type(_x: &yaml::Value, _y: &mut context::Context) -> Result<()> {
+    // TODO
+    Ok(())
+}
diff --git a/rs/src/parse/extensions/simple/type_variation_decls.rs b/rs/src/parse/extensions/simple/type_variation_decls.rs
new file mode 100644
index 00000000..eecb13d5
--- /dev/null
+++ b/rs/src/parse/extensions/simple/type_variation_decls.rs
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for parsing YAML type variation
+//! declarations.
+
+use crate::input::yaml;
+use crate::output::diagnostic::Result;
+use crate::parse::context;
+
+/// Parse a type variation declaration.
+pub fn parse_type_variation(_x: &yaml::Value, _y: &mut context::Context) -> Result<()> {
+    // TODO
+    Ok(())
+}
diff --git a/rs/src/parse/extensions/simple/yaml.rs b/rs/src/parse/extensions/simple/yaml.rs
new file mode 100644
index 00000000..344ab6c3
--- /dev/null
+++ b/rs/src/parse/extensions/simple/yaml.rs
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for parsing YAML extension
+//! files.
+
+use crate::input::yaml;
+use crate::output::diagnostic::Result;
+use crate::output::extension;
+use crate::output::path;
+use crate::parse::context;
+use crate::parse::extensions::simple::function_decls;
+use crate::parse::extensions::simple::type_decls;
+use crate::parse::extensions::simple::type_variation_decls;
+use crate::parse::traversal;
+use crate::string_util;
+use std::sync::Arc;
+
+/// Toplevel parse function for a simple extension YAML file.
+fn parse_root(x: &yaml::Value, y: &mut context::Context) -> Result<()> {
+    yaml_repeated_field!(x, y, "types", type_decls::parse_type)?;
+    yaml_repeated_field!(
+        x,
+        y,
+        "type_variations",
+        type_variation_decls::parse_type_variation
+    )?;
+    yaml_repeated_field!(
+        x,
+        y,
+        "scalar_functions",
+        function_decls::parse_scalar_function
+    )?;
+    yaml_repeated_field!(
+        x,
+        y,
+        "aggregate_functions",
+        function_decls::parse_aggregate_function
+    )?;
+    Ok(())
+}
+
+/// Parse a YAML extension URI string.
+pub fn parse_uri<S: AsRef<str>>(
+    x: &S,
+    y: &mut context::Context,
+) -> Result<Arc<extension::YamlInfo>> {
+    // Check URI syntax.
+    let x = x.as_ref();
+    if let Err(e) = string_util::check_uri(x) {
+        diagnostic!(y, Error, e);
+    }
+
+    // The schema for YAML extension files.
+    static SCHEMA: once_cell::sync::Lazy<jsonschema::JSONSchema> =
+        once_cell::sync::Lazy::new(|| {
+            jsonschema::JSONSchema::compile(
+                &yaml::yaml_to_json(
+                    yaml_rust::YamlLoader::load_from_str(include_str!(
+                        "../../../resources/text/simple_extensions_schema.yaml"
+                    ))
+                    .unwrap()
+                    .pop()
+                    .unwrap(),
+                    &path::Path::default(),
+                )
+                .unwrap(),
+            )
+            .unwrap()
+        });
+
+    Ok(traversal::parse_yaml(x, y, Some(&SCHEMA), parse_root))
+}
diff --git a/rs/src/parse/mod.rs b/rs/src/parse/mod.rs
new file mode 100644
index 00000000..d35e493b
--- /dev/null
+++ b/rs/src/parse/mod.rs
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Parsing/validation module.
+//!
+//! Roughly speaking, this module takes a Substrait plan represented using the
+//! types provided by the [`input`](crate::input) module, and transforms it to
+//! an equivalent plan represented using the types provided by the
+//! [`output`](crate::output) module. In doing so, it parses and validates the
+//! plan.
+//!
+//! # Traversal
+//!
+//! Most of the boilerplate code for tree traversal is handled by the
+//! [`traversal`] module. What remains are "parse functions" of the form
+//! `(x: &T, y: &mut Context) -> Result<R>`, where:
+//!
+//!  - `x` is a reference to the the JSON/YAML value or the prost wrapper for
+//!    the protobuf message that is to be parsed and validated;
+//!  - `y` is the parse context ([`context::Context`], see next section); and
+//!  - `R` is any desired return type.
+//!
+//! The body of the parse function can use a wide variety of function-like
+//! macros from [`traversal`] to traverse the children of `x` in the
+//! appropriate order and with the appropriate parse functions. The macros
+//! return a tuple of a reference to the created
+//! [`Node`](crate::output::tree::Node) and the `R` returned by the parse
+//! function (depending on the macro, these may be wrapped in [`Option`]s or
+//! [`Vec`]s). Note that any children not traversed by the parse function will
+//! automatically be traversed by [`traversal`] (along with a warning
+//! diagnostic that these children were not validated), and that traversing a
+//! child twice is illegal (this will panic).
+//!
+//! # Parser context
+//!
+//! The mutable [`context::Context`] reference that is passed into every parse
+//! function and is needed for every traversal macro stores all contextual
+//! information needed for parsing, except for the input. Any and all results
+//! of the parse process need to eventually end up in here, and as such it has
+//! quite a few functions defined on it. It also has a reference to the
+//! configuration structure; it's kind of the odd one out here since the
+//! configuration is more of an input than output or state; it's simply
+//! convenient to pass it along with the context object to save on some typing
+//! when defining parse functions.
+//!
+//! Besides macros strictly intended for traversal, the [`traversal`] module
+//! also defines some convenience macros for pushing things other than child
+//! nodes into the context, particularly for things that regularly involve
+//! [format!].
+//!
+//! ## Diagnostics
+//!
+//! Rather than just passing `Result`s around, diagnostics are used to
+//! communicate whether a plan is valid or not. This solves two problems:
+//!
+//!  - distinguishing between messages signalling provable invalidity
+//!    (errors), messages signalling inability to determine validity
+//!    (warnings), and messages that are just intended to provide extra
+//!    information to the user;
+//!  - returning as many diagnostics as possible, rather than just stopping
+//!    at the first sight of trouble.
+//!
+//! Diagnostics can be pushed into the parser context using the [`diagnostic!`]
+//! and [`ediagnostic!`] macros. The latter allows third-party `Err` types to
+//! be pushed as the message, the former uses a [format!] syntax. However,
+//! sometimes it also very useful to just use the `?` operator for something.
+//! Therefore, parse functions also return
+//! [`diagnostic::Result<T>`](crate::output::diagnostic::Result). This result
+//! is taken care of by the traversal macros; when `Err`, the diagnostic cause
+//! is simply pushed as an error. This also suppresses the usual "unknown
+//! field" warning emitted when a parse function failed to traverse all its
+//! children; after all, it probably exited early.
+//!
+//! More information about all the information recorded in a diagnostic can be
+//! found in the docs for the [diagnostic](crate::output::diagnostic) module.
+//!
+//! Beyond diagnostics, it's also possible to push comments into the context.
+//! This can be done using the [`comment!`] and [`link!`] macros, or, for more
+//! control, by pushing a []
+//!
+//! ## Data types
+//!
+//! Data type information gets some special treatment, because it is so
+//! important for validation. It's also very useful to have when debugging a
+//! tree. It's considered so important that each
+//! [`Node`](crate::output::tree::Node) has a place where it can store its
+//! "return type". What this type actually represents depends on the type of
+//! node:
+//!
+//!  - type nodes: the represented type;
+//!  - expression nodes: the returned type;
+//!  - relation nodes: the schema (automatically set by
+//!    [`set_schema()`](context::Context::set_schema())).
+//!
+//! The data type can be set using the
+//! [`set_data_type()`](context::Context::set_data_type()) method. Note that
+//! all of the parsers for the above node types should call
+//! [`set_data_type()`](context::Context::set_data_type()) at
+//! least once, even if they're unable to determine what the actual type is;
+//! in the latter case they can just push an unresolved type (for example
+//! using `Default`, but additional information can be attached using
+//! [`new_unresolved()`](crate::output::data_type::DataType::new_unresolved()).
+//!
+//! [`set_data_type()`](context::Context::set_data_type()) may be called more
+//! than once for a single node. The data type of the node will simply be the
+//! last one that was set when parsing for that node completes. However, each
+//! call also records the data type as a special type of child of the node,
+//! making the complete history of
+//! [`set_data_type()`](context::Context::set_data_type()) calls visible in the
+//! resulting parse tree.
+//!
+//! ## Schemas
+//!
+//! Perhaps even more important than data types in general are schemas; in
+//! general, in order to be able to determine the data type returned by an
+//! expression, contextual information about the schema(s) of the data
+//! stream(s) being operated on needs to be known. Moreover, the context in
+//! which an expression is evaluated may contain more than one schema when
+//! subqueries get involved.
+//!
+//! This information is tracked in the schema stack. The stack can be
+//! manipulated using the following functions.
+//!
+//!  - The root node of a relation tree must be parsed within the context
+//!    created by
+//!    [`enter_relation_root()`](context::Context::enter_relation_root()). This
+//!    macro ensures that a schema is pushed onto the stack prior to traversal
+//!    of the relation tree, and popped after traversal completes. Initially,
+//!    the schema is set to an unresolved type, but the actual type should not
+//!    matter at this stage, because it semantically doesn't exist until the
+//!    first leaf in the relation tree is parsed.
+//!  - All relations call [`clear_schema()`](context::Context::clear_schema())
+//!    prior to any relation-specific logic (this is done by the RelType parse
+//!    function), because semantically, no schema exists prior to parsing a
+//!    relation.
+//!  - [`set_schema()`](context::Context::set_schema()) sets or updates the
+//!    current schema. It must be called every time the data stream is
+//!    functionally updated, and just after the data stream is first created
+//!    by leaf relations. Relations that combine data streams should call it
+//!    just after traversal of its data sources completes (otherwise the
+//!    active schema will be whatever the schema of the most recently parsed
+//!    data source turned out to be). Doing so will also push the data type
+//!    corresponding to the schema to the node, such that the final tree
+//!    contains a type node for every semantic change of the data stream for
+//!    debugging/documentation purposes.
+//!
+//! The current schema information can be retrieved using
+//! [`schema()`](context::Context::schema()). Its integer argument specifies
+//! how many subqueries to break out of; 0 is used to refer to the schema of
+//! the current (sub)query, 1 is its parent query, 2 is its grandparent, and
+//! so on.
+//!
+//! ## How the parser context works
+//!
+//! A context object contains the following things:
+//!
+//!  - [`output: &mut tree::Node`](crate::output::tree::Node), a mutable
+//!    reference to the node in the output tree that we're writing to. Note
+//!    that the [`traversal`] macros create a
+//!    [`Node`](crate::output::tree::Node) already populated with the default
+//!    [`NodeType`](crate::output::tree::NodeType) before calling the parse
+//!    function, including a copy of the primitive data element for leaf nodes,
+//!    and almost everything else can be added using the [`traversal`] macros,
+//!    so you shouldn't normally have to mutate this. Exceptions exist, however,
+//!    for example when an integer primitive needs to be upgraded to an anchor
+//!    reference.
+//!  - [`state: &mut context::State`](context::State), a mutable reference to a
+//!    global state structure for the parser. This includes, for instance,
+//!    lookup tables for things previously defined in the plan, such as
+//!    function declarations. The state object is initially constructed by
+//!    [`traversal`] using [`Default`], and is then just recursively passed to
+//!    every parse function.
+//!  - [`breadcrumb: &mut context::Breadcrumb`](context::Breadcrumb). This
+//!    fulfills a similar purpose as `state`, but using a stack-like structure:
+//!    for every child node, a new [`Breadcrumb`](context::Breadcrumb) is
+//!    pushed onto the stack. Note that only the top of the stack is mutable.
+//!    This is mostly used for keeping track of the current
+//!    [`Path`](crate::output::path::Path) and internally by the [`traversal`]
+//!    module; the parse functions can and should just use local variables when
+//!    they need to store something this way.
+//!  - [`config: &config::Config`](config::Config), a reference to the
+//!    configuration structure that the validator was called with.
+
+#[macro_use]
+pub mod traversal;
+
+#[macro_use]
+pub mod context;
+
+mod expressions;
+mod extensions;
+mod plan;
+mod relations;
+mod sorts;
+mod types;
+
+use crate::input::config;
+use crate::input::proto;
+use crate::output::parse_result;
+
+/// Validates the given substrait.Plan message and returns the parse tree.
+pub fn parse<B: prost::bytes::Buf>(
+    buffer: B,
+    config: &config::Config,
+) -> parse_result::ParseResult {
+    traversal::parse_proto::<proto::substrait::Plan, _, _>(
+        buffer,
+        "plan",
+        plan::parse_plan,
+        &mut context::State::default(),
+        config,
+    )
+}
diff --git a/rs/src/parse/plan.rs b/rs/src/parse/plan.rs
new file mode 100644
index 00000000..1fe93790
--- /dev/null
+++ b/rs/src/parse/plan.rs
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing toplevel parse/validation functions for plans.
+
+use crate::input::proto::substrait;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::extensions;
+use crate::parse::relations;
+use std::sync::Arc;
+
+// Parse a relation root, i.e. a toplevel relation that includes field name
+// information.
+fn parse_rel_root(x: &substrait::RelRoot, y: &mut context::Context) -> diagnostic::Result<()> {
+    // Parse the fields.
+    let schema = proto_required_field!(x, y, input, relations::parse_rel)
+        .0
+        .data_type();
+    proto_repeated_field!(x, y, names);
+
+    // Relate the names to the schema.
+    let schema = schema
+        .apply_field_names(&x.names)
+        .map_err(|x| diagnostic!(y, Error, x))
+        .unwrap_or_default();
+    y.set_schema(schema);
+
+    // Describe the node.
+    describe!(y, Misc, "Named relation root");
+    summary!(y, "Attaches names to result schema");
+    Ok(())
+}
+
+// Parse a relation type.
+fn parse_rel_type(
+    x: &substrait::plan_rel::RelType,
+    y: &mut context::Context,
+) -> diagnostic::Result<Arc<data_type::DataType>> {
+    match x {
+        substrait::plan_rel::RelType::Rel(x) => {
+            relations::parse_rel(x, y)?;
+            Ok(y.data_type().strip_field_names())
+        }
+        substrait::plan_rel::RelType::Root(x) => {
+            parse_rel_root(x, y)?;
+            Ok(y.data_type())
+        }
+    }
+}
+
+/// Parse a PlanRel node.
+fn parse_plan_rel(x: &substrait::PlanRel, y: &mut context::Context) -> diagnostic::Result<()> {
+    let data_type = y.enter_relation_root(|y| {
+        proto_required_field!(x, y, rel_type, parse_rel_type)
+            .1
+            .unwrap_or_default()
+    });
+
+    // Describe the node.
+    y.set_data_type(data_type);
+    describe!(y, Misc, "Relation root");
+    Ok(())
+}
+
+/// Toplevel parse function for a plan.
+pub fn parse_plan(x: &substrait::Plan, y: &mut context::Context) -> diagnostic::Result<()> {
+    // Handle extensions first, because we'll need their declarations to
+    // correctly interpret the relations.
+    extensions::parse_plan(x, y);
+
+    // Handle the relations.
+    let num_relations = proto_repeated_field!(x, y, relations, parse_plan_rel)
+        .0
+        .len();
+    if num_relations == 0 {
+        diagnostic!(
+            y,
+            Error,
+            RelationRootMissing,
+            "a plan must have at least one relation"
+        );
+    }
+
+    // Generate an Info diagnostic for every extension definition that wasn't
+    // used at any point, and can thus be safely removed.
+    extensions::check_unused_definitions(y);
+
+    Ok(())
+}
diff --git a/rs/src/parse/relations/aggregate.rs b/rs/src/parse/relations/aggregate.rs
new file mode 100644
index 00000000..15ed4d63
--- /dev/null
+++ b/rs/src/parse/relations/aggregate.rs
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for aggregate relations.
+//!
+//! The aggregate operation groups input data on one or more sets of grouping
+//! keys, calculating each measure for each combination of grouping key.
+//!
+//! See <https://substrait.io/relations/logical_relations/#aggregate-operation>
+
+use std::collections::HashSet;
+use std::sync::Arc;
+
+use crate::input::proto::substrait;
+use crate::output::comment;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::expressions;
+use crate::parse::expressions::functions;
+
+/// Type of output field.
+enum FieldType {
+    /// A field passed straight on from the input, but uniquified.
+    GroupedField,
+
+    /// Like GroupedField, but grouping sets exist that this field is not a
+    /// part of. Null will be returned for such rows.
+    NullableGroupedField,
+
+    /// An aggregate function applied to the input rows that were combined for
+    /// the current output row.
+    Measure,
+
+    /// The index of the grouping set that the result corresponds to.
+    GroupingSetIndex,
+}
+
+/// A grouping or aggregate expression returned by the aggregate relation.
+struct Field {
+    /// Description of the grouping or aggregate expression.
+    expression: expressions::Expression,
+
+    /// Data type returned by the expression.
+    data_type: Arc<data_type::DataType>,
+
+    /// The type of field.
+    field_type: FieldType,
+}
+
+/// Parse a measure.
+fn parse_measure(
+    x: &substrait::aggregate_rel::Measure,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    // Parse the aggregate function.
+    let (n, e) = proto_required_field!(x, y, measure, functions::parse_aggregate_function);
+    let data_type = n.data_type();
+    let expression = e.unwrap_or_default();
+    y.set_data_type(data_type);
+
+    // Parse the filter and describe the node.
+    if x.filter.is_some() {
+        let filter = proto_required_field!(x, y, filter, expressions::parse_predicate)
+            .1
+            .unwrap_or_default();
+        summary!(
+            y,
+            "Applies aggregate function {expression:#} to all rows for \
+            which {filter:#} returns true."
+        );
+        let filtered_expression =
+            expressions::Expression::Function(String::from("filter"), vec![filter, expression]);
+        describe!(
+            y,
+            Expression,
+            "Filtered aggregate function: {filtered_expression}"
+        );
+        Ok(filtered_expression)
+    } else {
+        summary!(y, "Applies aggregate function {expression:#} to all rows.");
+        describe!(y, Expression, "Aggregate function: {expression}");
+        Ok(expression)
+    }
+}
+
+/// Parse aggregate relation.
+pub fn parse_aggregate_rel(
+    x: &substrait::AggregateRel,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    // Parse input.
+    let in_type = handle_rel_input!(x, y);
+
+    // Set schema context for the grouping and measure expressions.
+    y.set_schema(in_type);
+
+    // Parse grouping sets.
+    let mut grouping_set_expressions: Vec<substrait::Expression> = vec![];
+    let mut fields = vec![];
+    let mut sets = vec![];
+    proto_repeated_field!(x, y, groupings, |x, y| {
+        sets.push(vec![]);
+        proto_repeated_field!(x, y, grouping_expressions, |x, y| {
+            let result = expressions::parse_expression(x, y);
+
+            // See if we parsed this expression before. If not, add it to the
+            // field list. Return the index in the field list.
+            let index = grouping_set_expressions
+                .iter()
+                .enumerate()
+                .find(|(_, e)| e == &x)
+                .map(|(i, _)| i)
+                .unwrap_or_else(|| {
+                    // Create new field.
+                    grouping_set_expressions.push(x.clone());
+                    fields.push(Field {
+                        expression: result.as_ref().cloned().unwrap_or_default(),
+                        data_type: y.data_type(),
+                        field_type: FieldType::NullableGroupedField,
+                    });
+
+                    fields.len() - 1
+                });
+
+            // Add index of uniquified field to grouping set.
+            sets.last_mut().unwrap().push(index);
+
+            result
+        });
+        match x.grouping_expressions.len() {
+            0 => summary!(y, "A grouping set that aggregates all rows."),
+            1 => summary!(
+                y,
+                "A grouping set that aggregates all rows for which \
+                the expression yields the same value."
+            ),
+            x => summary!(
+                y,
+                "A grouping set that aggregates all rows for which \
+                the {x} expressions yield the same tuple of values."
+            ),
+        }
+        Ok(())
+    });
+    drop(grouping_set_expressions);
+    let sets = sets;
+
+    // Each field that is part of all sets will never be made nullable by the
+    // aggregate relation, so its type does not need to be made nullable.
+    let mut set_iter = sets.iter();
+    if let Some(first_set) = set_iter.next() {
+        let mut fields_in_all_sets = first_set.iter().cloned().collect::<HashSet<_>>();
+        for set in set_iter {
+            fields_in_all_sets = &fields_in_all_sets & &set.iter().cloned().collect::<HashSet<_>>();
+        }
+        for index in fields_in_all_sets {
+            fields[index].field_type = FieldType::GroupedField;
+        }
+    }
+
+    // Parse measures.
+    proto_repeated_field!(x, y, measures, |x, y| {
+        let result = parse_measure(x, y);
+        fields.push(Field {
+            expression: result.as_ref().cloned().unwrap_or_default(),
+            data_type: y.data_type(),
+            field_type: FieldType::Measure,
+        });
+        result
+    });
+
+    // The relation is invalid if no fields result from it.
+    if fields.is_empty() {
+        diagnostic!(
+            y,
+            Error,
+            RelationInvalid,
+            "aggregate relations must have at least one grouping expression or measure"
+        );
+    }
+
+    // Add the column for the grouping set index.
+    // FIXME: this field makes no sense for aggregate relations that only have
+    // measures. It's also disputable whether it should exist when there is
+    // only one grouping set.
+    fields.push(Field {
+        expression: expressions::Expression::Function(String::from("group_index"), vec![]),
+        data_type: data_type::DataType::new_integer(false),
+        field_type: FieldType::GroupingSetIndex,
+    });
+    let fields = fields;
+
+    // Derive schema.
+    y.set_schema(data_type::DataType::new_struct(
+        fields.iter().map(|x| {
+            if matches!(x.field_type, FieldType::NullableGroupedField) {
+                x.data_type.make_nullable()
+            } else {
+                x.data_type.clone()
+            }
+        }),
+        false,
+    ));
+
+    // Describe the relation.
+    if x.groupings.is_empty() {
+        describe!(y, Relation, "Aggregate");
+        summary!(
+            y,
+            "This relation computes {} aggregate function(s) over all rows, \
+            returning a single row.",
+            x.measures.len()
+        );
+    } else if x.measures.is_empty() {
+        describe!(y, Relation, "Group");
+        summary!(
+            y,
+            "This relation groups rows from the input by the result of some \
+            expression(s)."
+        );
+    } else {
+        describe!(y, Relation, "Group & aggregate");
+        summary!(
+            y,
+            "This relation groups rows from the input by the result of some \
+            expression(s), and also compures {} aggregate function(s) over \
+            each group.",
+            x.measures.len()
+        );
+    }
+    let mut comment = comment::Comment::new()
+        .plain("The significance of the returned field(s) is:")
+        .lo();
+    for (index, field) in fields.iter().enumerate() {
+        comment = comment.li().plain(match field.field_type {
+            FieldType::GroupedField => format!(
+                "Field {index}: value of grouping expression {:#}.",
+                field.expression
+            ),
+            FieldType::NullableGroupedField => format!(
+                "Field {index}: value of grouping expression {:#} if it is \
+                part of the grouping set being returned, null otherwise.",
+                field.expression
+            ),
+            FieldType::Measure => {
+                if x.groupings.is_empty() {
+                    format!(
+                        "Field {index}: result of aggregate function {:#} \
+                        applied to all input rows.",
+                        field.expression
+                    )
+                } else {
+                    format!(
+                        "Field {index}: result of aggregate function {:#} \
+                        applied to the rows from the current group.",
+                        field.expression
+                    )
+                }
+            }
+            FieldType::GroupingSetIndex => {
+                if x.groupings.is_empty() {
+                    format!(
+                        "Field {index}: undefined value, reserved for grouping \
+                        set index."
+                    )
+                } else if x.groupings.len() == 1 {
+                    format!(
+                        "Field {index}: always zero, representing the index of the \
+                        matched grouping set (of which there is only one here)."
+                    )
+                } else {
+                    format!(
+                        "Field {index}: integer between 0 and {} inclusive, \
+                        representing the index of the matched grouping set.",
+                        x.groupings.len() - 1
+                    )
+                }
+            }
+        });
+    }
+    y.push_summary(comment.lc());
+
+    // Handle the common field.
+    handle_rel_common!(x, y);
+
+    // Handle the advanced extension field.
+    handle_advanced_extension!(x, y);
+
+    Ok(())
+}
diff --git a/rs/src/parse/relations/common.rs b/rs/src/parse/relations/common.rs
new file mode 100644
index 00000000..fe36b00c
--- /dev/null
+++ b/rs/src/parse/relations/common.rs
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for parsing logic common to all relation types.
+
+use std::sync::Arc;
+
+use crate::input::proto::substrait;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+
+/// Parse a stats node.
+fn parse_stats(
+    x: &substrait::rel_common::hint::Stats,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    proto_primitive_field!(x, y, row_count, |x, y| {
+        if *x < 0.0 {
+            diagnostic!(
+                y,
+                Error,
+                IllegalValueInHint,
+                "negative row counts are nonsensical"
+            );
+        }
+        Ok(())
+    });
+    proto_primitive_field!(x, y, record_size, |x, y| {
+        if *x < 0.0 {
+            diagnostic!(
+                y,
+                Error,
+                IllegalValueInHint,
+                "negative record sizes are nonsensical"
+            );
+        }
+        Ok(())
+    });
+    proto_field!(
+        x,
+        y,
+        advanced_extension,
+        crate::parse::extensions::advanced::parse_advanced_extension
+    );
+    Ok(())
+}
+
+/// Parse a constraints node.
+fn parse_runtime_constraint(
+    x: &substrait::rel_common::hint::RuntimeConstraint,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    proto_field!(
+        x,
+        y,
+        advanced_extension,
+        crate::parse::extensions::advanced::parse_advanced_extension
+    );
+    Ok(())
+}
+
+/// Parse a hint node.
+fn parse_hint(x: &substrait::rel_common::Hint, y: &mut context::Context) -> diagnostic::Result<()> {
+    proto_field!(x, y, stats, parse_stats);
+    proto_field!(x, y, constraint, parse_runtime_constraint);
+    proto_field!(
+        x,
+        y,
+        advanced_extension,
+        crate::parse::extensions::advanced::parse_advanced_extension
+    );
+    Ok(())
+}
+
+/// Parse emit mapping. Takes the data type of the schema thus far as argument.
+fn parse_emit_mapping(
+    x: &i32,
+    _: &mut context::Context,
+    data_type: Arc<data_type::DataType>,
+) -> diagnostic::Result<Arc<data_type::DataType>> {
+    let x: usize = (*x)
+        .try_into()
+        .map_err(|_| cause!(TypeInvalidSwizzle, "index cannot be negative"))?;
+    data_type
+        .index_struct(x)
+        .ok_or_else(|| cause!(TypeInvalidSwizzle, "index out of range"))
+}
+
+/// Parse emit kind. Takes the data type of the schema thus far as argument.
+fn parse_emit_kind(
+    x: &substrait::rel_common::EmitKind,
+    y: &mut context::Context,
+    data_type: Arc<data_type::DataType>,
+) -> diagnostic::Result<Arc<data_type::DataType>> {
+    match x {
+        substrait::rel_common::EmitKind::Direct(_) => Ok(data_type),
+        substrait::rel_common::EmitKind::Emit(x) => {
+            let fields = proto_repeated_field!(
+                x,
+                y,
+                output_mapping,
+                parse_emit_mapping,
+                |_, _, _, _, _| (),
+                data_type.clone()
+            )
+            .1
+            .into_iter()
+            .map(|x| x.unwrap_or_default())
+            .collect::<Vec<_>>();
+            Ok(data_type::DataType::new_struct(fields, false))
+        }
+    }
+}
+
+/// Parse RelCommon node. This should be processed after the rest of the
+/// relation has processed, as it can transmute the data type.
+pub fn parse_rel_common(
+    x: &substrait::RelCommon,
+    y: &mut context::Context,
+    data_type: Arc<data_type::DataType>,
+) -> diagnostic::Result<Arc<data_type::DataType>> {
+    // Handle hint.
+    proto_field!(x, y, hint, parse_hint);
+
+    // Handle advanced extension.
+    let data_type = if proto_field!(
+        x,
+        y,
+        advanced_extension,
+        crate::parse::extensions::advanced::parse_advanced_extension
+    )
+    .1
+    .unwrap_or_default()
+    {
+        data_type::DataType::new_unresolved()
+    } else {
+        data_type
+    };
+
+    // Parse emit kind.
+    let data_type = proto_field!(x, y, emit_kind, parse_emit_kind, data_type.clone())
+        .1
+        .unwrap_or(data_type);
+
+    Ok(data_type)
+}
+
+/// Handle the common field for a relation. This should be processed after the
+/// rest of the relation has processed, as it can transmute the data type.
+macro_rules! handle_rel_common {
+    ($input:expr, $context:expr) => {
+        let data_type = $context.data_type();
+
+        // Call the parser.
+        let result = proto_field!(
+            $input,
+            $context,
+            common,
+            crate::parse::relations::common::parse_rel_common,
+            data_type
+        )
+        .1;
+
+        // If common was populated and its parser succeeded (it should always
+        // do that), update the type information.
+        if let Some(data_type) = result {
+            $context.set_schema(data_type);
+        }
+    };
+}
+
+/// Handle the advanced extension field for a builtin relation.
+macro_rules! handle_advanced_extension {
+    ($input:expr, $context:expr) => {
+        if proto_field!(
+            $input,
+            $context,
+            advanced_extension,
+            crate::parse::extensions::advanced::parse_advanced_extension
+        )
+        .1
+        .unwrap_or_default()
+        {
+            $context.set_schema(std::sync::Arc::default());
+        }
+    };
+}
+
+/// Shorthand for handling the input field of a relation. Returns a the data
+/// type corresponding to the schema returned by the relation.
+macro_rules! handle_rel_input {
+    ($input:expr, $context:expr) => {
+        handle_rel_input!($input, $context, input)
+    };
+    ($input:expr, $context:expr, $field:ident) => {
+        proto_boxed_required_field!($input, $context, $field, crate::parse::relations::parse_rel)
+            .0
+            .data_type()
+    };
+}
+
+/// Shorthand for handling the input fields of a relation that takes a flexible
+/// amount of inputs. Returns an iterator to references to the data types
+/// corresponding to the schemas returned by the relations. Each data type can
+/// be None if schema type deduction failed.
+macro_rules! handle_rel_inputs {
+    ($input:expr, $context:expr) => {
+        handle_rel_inputs!($input, $context, inputs)
+    };
+    ($input:expr, $context:expr, $field:ident) => {
+        proto_repeated_field!($input, $context, $field, crate::parse::relations::parse_rel)
+            .0
+            .iter()
+            .map(|x| x.data_type())
+    };
+}
diff --git a/rs/src/parse/relations/cross.rs b/rs/src/parse/relations/cross.rs
new file mode 100644
index 00000000..cbbdfaac
--- /dev/null
+++ b/rs/src/parse/relations/cross.rs
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for cross relations.
+//!
+//! The cross product operation will combine two separate inputs into a single
+//! output. It pairs every record from the left input with every record of the
+//! right input.
+//!
+//! See <https://substrait.io/relations/logical_relations/#cross-product-operation>
+
+use std::sync::Arc;
+
+use crate::input::proto::substrait;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+
+/// Parse cross relation.
+pub fn parse_cross_rel(
+    x: &substrait::CrossRel,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    // Parse input.
+    let left = handle_rel_input!(x, y, left);
+    let right = handle_rel_input!(x, y, right);
+
+    // Derive schema.
+    if let (Some(mut fields), Some(additional_fields)) =
+        (left.unwrap_struct(), right.unwrap_struct())
+    {
+        fields.extend(additional_fields.into_iter());
+        let schema = data_type::DataType::new_struct(fields, false);
+        y.set_schema(schema);
+    } else {
+        y.set_schema(Arc::default());
+    }
+
+    // Describe the relation.
+    describe!(y, Relation, "Cross product");
+    summary!(
+        y,
+        "This relation computes the cross product of its two input datasets."
+    );
+
+    // Handle the common field.
+    handle_rel_common!(x, y);
+
+    // Handle the advanced extension field.
+    handle_advanced_extension!(x, y);
+
+    Ok(())
+}
diff --git a/rs/src/parse/relations/extension.rs b/rs/src/parse/relations/extension.rs
new file mode 100644
index 00000000..bb22f849
--- /dev/null
+++ b/rs/src/parse/relations/extension.rs
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for relational algebra
+//! extensions.
+
+use std::sync::Arc;
+
+use crate::input::proto::substrait;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::extensions;
+
+/// Parse one to one extension.
+pub fn parse_extension_single_rel(
+    x: &substrait::ExtensionSingleRel,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    // Parse input.
+    let _in_type = handle_rel_input!(x, y);
+
+    // Set schema to an unresolved type.
+    y.set_schema(Arc::default());
+
+    // Parse the extension data.
+    proto_required_field!(x, y, detail, extensions::advanced::parse_functional_any);
+
+    // Describe the relation.
+    if let Some(x) = &x.detail {
+        describe!(y, Relation, "{} extension", x.type_url);
+    } else {
+        describe!(y, Relation, "Unknown extension");
+    }
+
+    // Handle the common field.
+    handle_rel_common!(x, y);
+
+    Ok(())
+}
+
+/// Parse many to one extension.
+pub fn parse_extension_multi_rel(
+    x: &substrait::ExtensionMultiRel,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    // Parse inputs.
+    let _in_types: Vec<_> = handle_rel_inputs!(x, y).collect();
+
+    // Set schema to an unresolved type.
+    y.set_schema(Arc::default());
+
+    // Parse the extension data.
+    proto_required_field!(x, y, detail, extensions::advanced::parse_functional_any);
+
+    // Describe the relation.
+    if let Some(x) = &x.detail {
+        describe!(y, Relation, "{} extension", x.type_url);
+    } else {
+        describe!(y, Relation, "Unknown extension");
+    }
+
+    // Handle the common field.
+    handle_rel_common!(x, y);
+
+    Ok(())
+}
+
+/// Parse input extension.
+pub fn parse_extension_leaf_rel(
+    x: &substrait::ExtensionLeafRel,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    // Set schema to an unresolved type.
+    y.set_schema(Arc::default());
+
+    // Parse the extension data.
+    proto_required_field!(x, y, detail, extensions::advanced::parse_functional_any);
+
+    // Describe the relation.
+    if let Some(x) = &x.detail {
+        describe!(y, Relation, "{} extension", x.type_url);
+    } else {
+        describe!(y, Relation, "Unknown extension");
+    }
+
+    // Handle the common field.
+    handle_rel_common!(x, y);
+
+    Ok(())
+}
diff --git a/rs/src/parse/relations/fetch.rs b/rs/src/parse/relations/fetch.rs
new file mode 100644
index 00000000..f9616cf4
--- /dev/null
+++ b/rs/src/parse/relations/fetch.rs
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for fetch relations.
+//!
+//! The fetch operation eliminates records outside a desired window. Typically
+//! corresponds to a fetch/offset SQL clause.
+//!
+//! See <https://substrait.io/relations/logical_relations/#fetch-operation>
+
+use crate::input::proto::substrait;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::string_util;
+
+/// Parse fetch relation.
+pub fn parse_fetch_rel(
+    x: &substrait::FetchRel,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    // Parse input.
+    let in_type = handle_rel_input!(x, y);
+
+    // Filters pass through their input schema unchanged.
+    y.set_schema(in_type);
+
+    // Parse offset and count.
+    proto_primitive_field!(x, y, offset, |x, y| {
+        if *x < 0 {
+            diagnostic!(y, Error, IllegalValue, "offsets cannot be negative");
+        }
+        Ok(())
+    });
+    proto_primitive_field!(x, y, count, |x, y| {
+        if *x < 0 {
+            diagnostic!(y, Error, IllegalValue, "count cannot be negative");
+        }
+        Ok(())
+    });
+
+    // Describe the relation.
+    if x.count == 1 {
+        describe!(
+            y,
+            Relation,
+            "Propagate only the {} row",
+            (x.offset + 1)
+                .try_into()
+                .map(string_util::describe_nth)
+                .unwrap_or_else(|_| String::from("?"))
+        );
+    } else if x.count > 1 {
+        if x.offset > 1 {
+            describe!(
+                y,
+                Relation,
+                "Propagate only {} rows, starting from the {}",
+                x.count,
+                (x.offset + 1)
+                    .try_into()
+                    .map(string_util::describe_nth)
+                    .unwrap_or_else(|_| String::from("?"))
+            );
+        } else {
+            describe!(y, Relation, "Propagate only the first {} rows", x.count);
+        }
+    } else if x.offset == 0 {
+        describe!(y, Relation, "Fetch all rows");
+    } else if x.offset == 1 {
+        describe!(y, Relation, "Discard the first row");
+    } else if x.offset > 1 {
+        describe!(y, Relation, "Discard the first {} rows", x.offset);
+    } else {
+        describe!(y, Relation, "Invalid fetch relation");
+    }
+
+    // Handle the common field.
+    handle_rel_common!(x, y);
+
+    // Handle the advanced extension field.
+    handle_advanced_extension!(x, y);
+
+    Ok(())
+}
diff --git a/rs/src/parse/relations/filter.rs b/rs/src/parse/relations/filter.rs
new file mode 100644
index 00000000..d327ccd6
--- /dev/null
+++ b/rs/src/parse/relations/filter.rs
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for filter relations.
+//!
+//! The filter operator eliminates one or more records from the input data
+//! based on a boolean filter expression.
+//!
+//! See <https://substrait.io/relations/logical_relations/#filter-operation>
+
+use crate::input::proto::substrait;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::expressions;
+
+/// Parse filter relation.
+pub fn parse_filter_rel(
+    x: &substrait::FilterRel,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    // Parse input.
+    let in_type = handle_rel_input!(x, y);
+
+    // Filters pass through their input schema unchanged.
+    y.set_schema(in_type);
+
+    // Check the filter predicate.
+    let (n, e) = proto_boxed_required_field!(x, y, condition, expressions::parse_predicate);
+    let predicate = e.unwrap_or_default();
+    let nullable = n.data_type().nullable();
+
+    // Describe the relation.
+    describe!(y, Relation, "Filter by {}", &predicate);
+    summary!(
+        y,
+        "This relation discards all rows for which {} yields false.",
+        &predicate
+    );
+    if nullable {
+        // FIXME: what's the behavior when a filter condition is nullable and
+        // yields null? Same applies for all other usages of parse_predicate().
+        summary!(y, "Behavior for a null condition is unspecified.");
+    }
+
+    // Handle the common field.
+    handle_rel_common!(x, y);
+
+    // Handle the advanced extension field.
+    handle_advanced_extension!(x, y);
+
+    Ok(())
+}
diff --git a/rs/src/parse/relations/join.rs b/rs/src/parse/relations/join.rs
new file mode 100644
index 00000000..a31772a4
--- /dev/null
+++ b/rs/src/parse/relations/join.rs
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for join relations.
+//!
+//! The join operation will combine two separate inputs into a single output,
+//! based on a join expression. A common subtype of joins is a equality join
+//! where the join expression is constrained to a list of equality (or
+//! equality + null equality) conditions between the two inputs of the join.
+//!
+//! See <https://substrait.io/relations/logical_relations/#join-operation>
+
+use std::sync::Arc;
+
+use crate::input::proto::substrait;
+use crate::output::comment;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::expressions;
+
+/// Parse join relation.
+pub fn parse_join_rel(x: &substrait::JoinRel, y: &mut context::Context) -> diagnostic::Result<()> {
+    use substrait::join_rel::JoinType;
+
+    // Parse input.
+    let left = handle_rel_input!(x, y, left);
+    let right = handle_rel_input!(x, y, right);
+
+    // Derive schema with which the join expression is evaluated.
+    if let (Some(mut fields), Some(additional_fields)) =
+        (left.unwrap_struct(), right.unwrap_struct())
+    {
+        fields.extend(additional_fields.into_iter());
+        let schema = data_type::DataType::new_struct(fields, false);
+        y.set_schema(schema);
+    } else {
+        y.set_schema(Arc::default());
+    }
+
+    // Parse join expression.
+    let join_expression =
+        proto_boxed_required_field!(x, y, expression, expressions::parse_predicate)
+            .1
+            .unwrap_or_default();
+
+    // Parse join type.
+    let join_type = proto_required_enum_field!(x, y, r#type, JoinType)
+        .1
+        .unwrap_or_default();
+
+    // Determine whether the join can null the left and/or right side, and
+    // whether the right side is returned at all.
+    let (left_nullable, right_nullable) = match join_type {
+        JoinType::Unspecified => (false, Some(false)),
+        JoinType::Inner => (false, Some(false)),
+        JoinType::Outer => (true, Some(true)),
+        JoinType::Left => (false, Some(true)),
+        JoinType::Right => (true, Some(false)),
+        JoinType::Semi => (false, None),
+        JoinType::Anti => (false, None),
+        JoinType::Single => (false, Some(true)),
+    };
+
+    // Derive final schema.
+    if let (Some(left_fields), Some(right_fields)) = (left.unwrap_struct(), right.unwrap_struct()) {
+        let mut fields = Vec::with_capacity(left_fields.len() + right_fields.len());
+        if left_nullable {
+            fields.extend(left_fields.into_iter().map(|x| x.make_nullable()))
+        } else {
+            fields.extend(left_fields.into_iter())
+        }
+        if let Some(right_nullable) = right_nullable {
+            if right_nullable {
+                fields.extend(right_fields.into_iter().map(|x| x.make_nullable()))
+            } else {
+                fields.extend(right_fields.into_iter())
+            }
+        }
+        let schema = data_type::DataType::new_struct(fields, false);
+        y.set_schema(schema);
+    } else {
+        y.set_schema(Arc::default());
+    }
+
+    // Handle optional post-join filter.
+    let filter_expression =
+        proto_boxed_field!(x, y, post_join_filter, expressions::parse_predicate).1;
+
+    // Describe the relation.
+    let prefix = match (join_type, x.post_join_filter.is_some()) {
+        (JoinType::Unspecified, _) => "Unknown",
+        (JoinType::Inner, true) => "Filtered inner",
+        (JoinType::Inner, false) => "Inner",
+        (JoinType::Outer, true) => "Filtered outer",
+        (JoinType::Outer, false) => "Outer",
+        (JoinType::Left, true) => "Filtered left",
+        (JoinType::Left, false) => "Left",
+        (JoinType::Right, true) => "Filtered right",
+        (JoinType::Right, false) => "Right",
+        (JoinType::Semi, true) => "Filtered semi",
+        (JoinType::Semi, false) => "Semi",
+        (JoinType::Anti, true) => "Filtered anti",
+        (JoinType::Anti, false) => "Anti",
+        (JoinType::Single, true) => "Filtered single",
+        (JoinType::Single, false) => "Single",
+    };
+    describe!(y, Relation, "{prefix} join by {join_expression}");
+    summary!(y, "{prefix} join by {join_expression:#}.");
+    y.push_summary(comment::Comment::new().nl().plain(match join_type {
+        JoinType::Unspecified => "",
+        JoinType::Inner => concat!(
+            " Returns rows combining the row from the left and right ",
+            "input for each pair where the join expression yields true.",
+        ),
+        JoinType::Outer => concat!(
+            " Returns rows combining the row from the left and right ",
+            "input for each pair where the join expression yields true. ",
+            "If the join expression never yields true for any left or ",
+            "right row, this returns a row anyway, with the fields ",
+            "corresponding to the other input set to null.",
+        ),
+        JoinType::Left => concat!(
+            " Returns rows combining the row from the left and right ",
+            "input for each pair where the join expression yields true. ",
+            "If the join expression never yields true for a row from the ",
+            "left, this returns a row anyway, with the fields corresponding ",
+            "to the right input set to null.",
+        ),
+        JoinType::Right => concat!(
+            " Returns rows combining the row from the left and right ",
+            "input for each pair where the join expression yields true. ",
+            "If the join expression never yields true for a row from the ",
+            "right, this returns a row anyway, with the fields corresponding ",
+            "to the left input set to null.",
+        ),
+        JoinType::Semi => concat!(
+            " Filters rows from the left input, propagating a row only if ",
+            "the join expression yields true for that row combined with ",
+            "any row from the right input.",
+        ),
+        JoinType::Anti => concat!(
+            " Filters rows from the left input, propagating a row only if ",
+            "the join expression does not yield true for that row combined ",
+            "with any row from the right input.",
+        ),
+        JoinType::Single => concat!(
+            " Returns a row for each row from the left input, concatenating ",
+            "it with the row from the right input for which the join ",
+            "expression yields true. If the expression never yields true for ",
+            "a left input, the fields corresponding to the right input are ",
+            "set to null. If the expression yields true for a left row and ",
+            "multiple right rows, this may return the first pair encountered ",
+            "or throw an error."
+        ),
+    }));
+    if let Some(filter_expression) = filter_expression {
+        y.push_summary(
+            comment::Comment::new()
+                .nl()
+                .plain(format!("The result is filtered by {filter_expression:#}.")),
+        );
+    }
+
+    // Handle the common field.
+    handle_rel_common!(x, y);
+
+    // Handle the advanced extension field.
+    handle_advanced_extension!(x, y);
+
+    Ok(())
+}
diff --git a/rs/src/parse/relations/mod.rs b/rs/src/parse/relations/mod.rs
new file mode 100644
index 00000000..f5b24452
--- /dev/null
+++ b/rs/src/parse/relations/mod.rs
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for relational algebra.
+
+#[macro_use]
+mod common;
+mod aggregate;
+mod cross;
+mod extension;
+mod fetch;
+mod filter;
+mod join;
+mod project;
+mod read;
+mod set;
+mod sort;
+
+use crate::input::proto::substrait;
+use crate::input::traits::ProtoOneOf;
+use crate::output::diagnostic;
+use crate::parse::context;
+
+/// Parse a relation type.
+fn parse_rel_type(x: &substrait::rel::RelType, y: &mut context::Context) -> diagnostic::Result<()> {
+    // Ensure that the top of the schema stack exists and it set to an
+    // unresolved type.
+    y.clear_schema();
+
+    // Set a basic description, to ensure that these nodes are always marked
+    // as relations.
+    describe!(y, Relation, "{} relation", x.proto_oneof_variant());
+
+    // NOTE: if you're here because you added a relation type and now CI is
+    // failing, you can just add "_ => Ok(())," to the end of this list. The
+    // validator will then automatically throw a "not yet implemented" warning
+    // if it finds that relation type in a plan.
+    match x {
+        substrait::rel::RelType::Read(x) => read::parse_read_rel(x, y),
+        substrait::rel::RelType::Filter(x) => filter::parse_filter_rel(x, y),
+        substrait::rel::RelType::Fetch(x) => fetch::parse_fetch_rel(x, y),
+        substrait::rel::RelType::Aggregate(x) => aggregate::parse_aggregate_rel(x, y),
+        substrait::rel::RelType::Sort(x) => sort::parse_sort_rel(x, y),
+        substrait::rel::RelType::Join(x) => join::parse_join_rel(x, y),
+        substrait::rel::RelType::Project(x) => project::parse_project_rel(x, y),
+        substrait::rel::RelType::Set(x) => set::parse_set_rel(x, y),
+        substrait::rel::RelType::ExtensionSingle(x) => extension::parse_extension_single_rel(x, y),
+        substrait::rel::RelType::ExtensionMulti(x) => extension::parse_extension_multi_rel(x, y),
+        substrait::rel::RelType::ExtensionLeaf(x) => extension::parse_extension_leaf_rel(x, y),
+        substrait::rel::RelType::Cross(x) => cross::parse_cross_rel(x, y),
+        // _ => Ok(()),
+    }
+}
+
+/// Parse a relation root, i.e. a toplevel relation that includes field name
+/// information.
+pub fn parse_rel(x: &substrait::Rel, y: &mut context::Context) -> diagnostic::Result<()> {
+    let schema = proto_required_field!(x, y, rel_type, parse_rel_type)
+        .0
+        .data_type();
+    y.set_schema(schema);
+    Ok(())
+}
diff --git a/rs/src/parse/relations/project.rs b/rs/src/parse/relations/project.rs
new file mode 100644
index 00000000..ed3d3d84
--- /dev/null
+++ b/rs/src/parse/relations/project.rs
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for project relations.
+//!
+//! The project operation will produce one or more additional expressions based
+//! on the inputs of the dataset.
+//!
+//! See <https://substrait.io/relations/logical_relations/#project-operation>
+
+use std::sync::Arc;
+
+use crate::input::proto::substrait;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::expressions;
+
+/// Parse project relation.
+pub fn parse_project_rel(
+    x: &substrait::ProjectRel,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    // Parse input.
+    let mut schema = handle_rel_input!(x, y);
+
+    // Start with the input schema.
+    y.set_schema(schema.clone());
+
+    // Parse the expressions that are to be appended to the schema.
+    let expressions = proto_required_repeated_field!(
+        x,
+        y,
+        expressions,
+        expressions::parse_expression,
+        |_x, y, _i, n, _r| {
+            // Update the schema.
+            if let Some(mut fields) = schema.unwrap_struct() {
+                fields.push(n.data_type());
+                schema = data_type::DataType::new_struct(fields, false);
+                y.set_schema(schema.clone());
+            } else {
+                y.set_schema(Arc::default());
+            }
+        }
+    )
+    .1;
+
+    // Describe the relation.
+    describe!(y, Relation, "Projection");
+    if expressions.len() > 1 {
+        summary!(
+            y,
+            "This relation generates {} new columns by projecting the existing columns using scalar expressions.",
+            expressions.len()
+        );
+    } else {
+        summary!(
+            y,
+            "This relation generates a new column by projecting the existing columns using a scalar expression."
+        );
+    }
+
+    // Handle the common field.
+    handle_rel_common!(x, y);
+
+    // Handle the advanced extension field.
+    handle_advanced_extension!(x, y);
+
+    Ok(())
+}
diff --git a/rs/src/parse/relations/read.rs b/rs/src/parse/relations/read.rs
new file mode 100644
index 00000000..df65de7e
--- /dev/null
+++ b/rs/src/parse/relations/read.rs
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for read relations.
+//!
+//! The read operator is an operator that produces one output. A simple example
+//! would be the reading of a Parquet file.
+//!
+//! See <https://substrait.io/relations/logical_relations/#read-operator>
+
+use std::sync::Arc;
+
+use crate::input::proto::substrait;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::expressions;
+use crate::parse::expressions::literals;
+use crate::parse::expressions::references::mask;
+use crate::parse::extensions;
+use crate::parse::types;
+use crate::string_util;
+
+/// Information about a data source.
+struct SourceInfo {
+    /// Short description of the data source, used in the brief of the read
+    /// relation.
+    pub name: String,
+
+    /// The schema of the data, if not context-sensitive.
+    pub data_type: Option<Arc<data_type::DataType>>,
+}
+
+/// Parse virtual table.
+fn parse_virtual_table(
+    x: &substrait::read_rel::VirtualTable,
+    y: &mut context::Context,
+) -> diagnostic::Result<SourceInfo> {
+    let mut data_type: Arc<data_type::DataType> = Arc::default();
+
+    // Parse rows, ensuring that they all have the same type.
+    proto_repeated_field!(x, y, values, |x, y| {
+        let result = literals::parse_struct(x, y, false);
+        data_type = types::assert_equal(
+            y,
+            &y.data_type(),
+            &data_type,
+            "virtual table rows must have the same type",
+        );
+        result
+    });
+
+    // Describe the node.
+    describe!(y, Misc, "Virtual table");
+    Ok(SourceInfo {
+        name: String::from("virtual table"),
+        data_type: Some(data_type),
+    })
+}
+
+/// Parse file entry. Returns whether this matches multiple files.
+fn parse_path_type(
+    x: &substrait::read_rel::local_files::file_or_files::PathType,
+    y: &mut context::Context,
+) -> diagnostic::Result<bool> {
+    // FIXME: I'm not sure these paths should even be URIs. These are supposed
+    // to be local files after all, so shouldn't they just be paths? But they
+    // really shouldn't be called URIs if they're not going to conform to the
+    // standard governing them, and if they're paths, there should still be
+    // some specification about what kind of paths they can be (POSIX? Windows
+    // with slashes? UNC? etc).
+    //
+    // Note that the diagnostics for this have their own code, so if a user
+    // disagrees with the syntax they can just downgrade these warnings to
+    // infos.
+    use substrait::read_rel::local_files::file_or_files::PathType;
+    match x {
+        PathType::UriPath(x) => {
+            if let Err(e) = string_util::check_uri(x) {
+                diagnostic!(y, Error, e);
+            }
+            Ok(false)
+        }
+        PathType::UriPathGlob(x) => {
+            if let Err(e) = string_util::check_uri_glob(x) {
+                diagnostic!(y, Error, e);
+            }
+            Ok(true)
+        }
+        PathType::UriFile(x) => {
+            if let Err(e) = string_util::check_uri(x) {
+                diagnostic!(y, Error, e);
+            }
+            Ok(false)
+        }
+        PathType::UriFolder(x) => {
+            if let Err(e) = string_util::check_uri(x) {
+                diagnostic!(y, Error, e);
+            }
+            Ok(true)
+        }
+    }
+}
+
+/// Parse file entry.
+fn parse_file_or_files(
+    x: &substrait::read_rel::local_files::FileOrFiles,
+    y: &mut context::Context,
+    extension_present: bool,
+) -> diagnostic::Result<()> {
+    // Parse path.
+    let multiple = proto_required_field!(x, y, path_type, parse_path_type)
+        .1
+        .unwrap_or_default();
+
+    // Parse read configuration.
+    let format = proto_enum_field!(
+        x,
+        y,
+        format,
+        substrait::read_rel::local_files::file_or_files::FileFormat,
+        |x, y| {
+            if !extension_present
+                && matches!(
+                    x,
+                    substrait::read_rel::local_files::file_or_files::FileFormat::Unspecified
+                )
+            {
+                diagnostic!(
+                    y,
+                    Error,
+                    IllegalValue,
+                    "file format must be specified when no enhancement extension is present"
+                );
+            }
+            Ok(*x)
+        }
+    )
+    .1
+    .unwrap_or_default();
+    proto_primitive_field!(x, y, partition_index);
+    proto_primitive_field!(x, y, start);
+    proto_primitive_field!(x, y, length);
+
+    // Having nonzero file offsets makes no sense when this entry refers to
+    // multiple files.
+    if multiple && (x.start > 0 || x.length > 0) {
+        diagnostic!(
+            y,
+            Error,
+            IllegalValue,
+            "file offsets are not allowed in conjunction with multiple files"
+        );
+    }
+
+    // Describe the node.
+    if multiple {
+        describe!(y, Misc, "Multiple files");
+    } else {
+        describe!(y, Misc, "Single file");
+    }
+    summary!(y, "Read");
+    if x.partition_index != 0 {
+        summary!(y, "partition {}", x.partition_index);
+    }
+    summary!(y, "from");
+    if multiple {
+        summary!(y, "multiple");
+    } else {
+        if x.start > 0 {
+            if x.length > 0 {
+                summary!(y, "byte offset {} to {} of", x.start, x.start + x.length);
+            } else {
+                summary!(y, "byte offset {} to the end of", x.start);
+            }
+        } else if x.length > 0 {
+            summary!(y, "the first {} byte(s) of", x.length);
+        }
+        summary!(y, "a single");
+    }
+    match format {
+        substrait::read_rel::local_files::file_or_files::FileFormat::Unspecified => {}
+        substrait::read_rel::local_files::file_or_files::FileFormat::Parquet => {
+            summary!(y, "Parquet");
+        }
+    }
+    if multiple {
+        summary!(y, "files");
+    } else {
+        summary!(y, "file");
+    }
+
+    Ok(())
+}
+
+/// Parse local files.
+fn parse_local_files(
+    x: &substrait::read_rel::LocalFiles,
+    y: &mut context::Context,
+) -> diagnostic::Result<SourceInfo> {
+    // Parse fields.
+    let extension_present = x
+        .advanced_extension
+        .as_ref()
+        .and_then(|x| x.enhancement.as_ref())
+        .is_some();
+    proto_required_repeated_field!(
+        x,
+        y,
+        items,
+        parse_file_or_files,
+        |_, _, _, _, _| (),
+        extension_present
+    );
+    proto_field!(
+        x,
+        y,
+        advanced_extension,
+        extensions::advanced::parse_advanced_extension
+    );
+
+    // Describe the node.
+    describe!(y, Misc, "Table from file(s)");
+    Ok(SourceInfo {
+        name: String::from("local files"),
+        data_type: None,
+    })
+}
+
+/// Parse named table.
+fn parse_named_table(
+    x: &substrait::read_rel::NamedTable,
+    y: &mut context::Context,
+) -> diagnostic::Result<SourceInfo> {
+    // Parse fields.
+    proto_required_repeated_field!(x, y, names);
+    proto_field!(
+        x,
+        y,
+        advanced_extension,
+        extensions::advanced::parse_advanced_extension
+    );
+
+    // Determine and check consistency of the table name.
+    let name = if x.names.is_empty() {
+        String::from("?")
+    } else {
+        if x.names.len() > 1 {
+            // FIXME: what does this mean?
+            diagnostic!(
+                y,
+                Warning,
+                NotYetImplemented,
+                "named tables with multiple names"
+            );
+        }
+        string_util::as_ident_or_string(x.names.first().unwrap())
+    };
+
+    // Describe the node.
+    describe!(
+        y,
+        Misc,
+        "Named table {}",
+        string_util::as_ident_or_string(&name)
+    );
+    Ok(SourceInfo {
+        name,
+        data_type: None,
+    })
+}
+
+/// Parse extension table.
+fn parse_extension_table(
+    x: &substrait::read_rel::ExtensionTable,
+    y: &mut context::Context,
+) -> diagnostic::Result<SourceInfo> {
+    proto_required_field!(x, y, detail, extensions::advanced::parse_functional_any);
+
+    // Describe the node.
+    describe!(
+        y,
+        Misc,
+        "{} extension",
+        x.detail
+            .as_ref()
+            .map(|x| x.type_url.clone())
+            .unwrap_or_else(|| String::from("Unknown"))
+    );
+    Ok(SourceInfo {
+        name: x
+            .detail
+            .as_ref()
+            .map(|x| x.type_url.to_string())
+            .unwrap_or_else(|| String::from("extension")),
+        data_type: None,
+    })
+}
+
+/// Parse read type.
+fn parse_read_type(
+    x: &substrait::read_rel::ReadType,
+    y: &mut context::Context,
+) -> diagnostic::Result<SourceInfo> {
+    match x {
+        substrait::read_rel::ReadType::VirtualTable(x) => parse_virtual_table(x, y),
+        substrait::read_rel::ReadType::LocalFiles(x) => parse_local_files(x, y),
+        substrait::read_rel::ReadType::NamedTable(x) => parse_named_table(x, y),
+        substrait::read_rel::ReadType::ExtensionTable(x) => parse_extension_table(x, y),
+    }
+}
+
+/// Parse read relation.
+pub fn parse_read_rel(x: &substrait::ReadRel, y: &mut context::Context) -> diagnostic::Result<()> {
+    // Handle read type field.
+    let source = proto_required_field!(x, y, read_type, parse_read_type)
+        .1
+        .unwrap_or(SourceInfo {
+            name: String::from("unknown source"),
+            data_type: None,
+        });
+
+    // Handle schema field.
+    let schema = proto_required_field!(x, y, base_schema, types::parse_named_struct)
+        .0
+        .data_type
+        .clone();
+
+    // If both data_type and schema are known, verify that they are the same.
+    let mut schema = match (source.data_type, schema) {
+        (Some(data_type), Some(schema)) => {
+            types::assert_equal(y, &schema, &data_type, "data differs from schema")
+        }
+        (Some(data_type), None) => data_type,
+        (None, Some(schema)) => schema,
+        (None, None) => Arc::default(),
+    };
+
+    // The outer struct of a schema should not be nullable.
+    if !schema.is_unresolved() && schema.nullable() {
+        diagnostic!(
+            y,
+            Error,
+            TypeMismatchedNullability,
+            "the outer struct representing a schema must not be nullable"
+        );
+    }
+
+    // Set the schema to the merged data type.
+    y.set_schema(schema.clone());
+
+    // Handle filter.
+    proto_boxed_field!(x, y, filter, expressions::parse_predicate);
+
+    // Handle projection.
+    if x.projection.is_some() {
+        schema =
+            proto_required_field!(x, y, projection, mask::parse_mask_expression, &schema, true)
+                .0
+                .data_type();
+        y.set_schema(schema.clone());
+    }
+
+    // Describe the relation.
+    match (x.filter.is_some(), x.projection.is_some()) {
+        (false, false) => describe!(y, Relation, "Read from {}", source.name),
+        (false, true) => describe!(y, Relation, "Partial read from {}", source.name),
+        (true, false) => describe!(y, Relation, "Filtered read from {}", source.name),
+        (true, true) => describe!(y, Relation, "Filtered partial read from {}", source.name),
+    }
+
+    // Handle the common field.
+    handle_rel_common!(x, y);
+
+    // Handle the advanced extension field.
+    handle_advanced_extension!(x, y);
+
+    Ok(())
+}
diff --git a/rs/src/parse/relations/set.rs b/rs/src/parse/relations/set.rs
new file mode 100644
index 00000000..e5a8239a
--- /dev/null
+++ b/rs/src/parse/relations/set.rs
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for set relations.
+//!
+//! The set operation encompasses several set level operations that support
+//! combining datasets based, possibly excluding records based on various
+//! types of record level matching.
+//!
+//! See <https://substrait.io/relations/logical_relations/#set-operation>
+
+use std::sync::Arc;
+
+use crate::input::proto::substrait;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::types;
+
+enum Operation {
+    Invalid,
+    Subtract,
+    SubtractByUnion,
+    SubtractByIntersection,
+    Intersect,
+    IntersectWithUnion,
+    Union,
+    Merge,
+}
+
+/// Parse set relation.
+pub fn parse_set_rel(x: &substrait::SetRel, y: &mut context::Context) -> diagnostic::Result<()> {
+    use substrait::set_rel::SetOp;
+
+    // Parse inputs.
+    let in_types: Vec<_> = handle_rel_inputs!(x, y).collect();
+
+    // Check inputs and derive schema.
+    if in_types.len() < 2 {
+        diagnostic!(
+            y,
+            Error,
+            RelationMissing,
+            "set operations require at least two input relations"
+        );
+    }
+    let mut schema = Arc::default();
+    for in_type in in_types.iter() {
+        schema = types::assert_equal(
+            y,
+            &in_type.strip_field_names(),
+            &schema,
+            "all set inputs must have matching schemas",
+        );
+    }
+    y.set_schema(schema);
+
+    // Check set operation.
+    let op = proto_required_enum_field!(x, y, op, SetOp)
+        .1
+        .unwrap_or_default();
+    let op = match (op, in_types.len() > 2) {
+        (SetOp::Unspecified, _) => Operation::Invalid,
+        (SetOp::MinusPrimary, true) => Operation::SubtractByUnion,
+        (SetOp::MinusPrimary, false) => Operation::Subtract,
+        (SetOp::MinusMultiset, true) => Operation::SubtractByIntersection,
+        (SetOp::MinusMultiset, false) => Operation::Subtract,
+        (SetOp::IntersectionPrimary, true) => Operation::IntersectWithUnion,
+        (SetOp::IntersectionPrimary, false) => Operation::Intersect,
+        (SetOp::IntersectionMultiset, _) => Operation::Intersect,
+        (SetOp::UnionDistinct, _) => Operation::Union,
+        (SetOp::UnionAll, _) => Operation::Merge,
+    };
+
+    // Describe the relation.
+    match op {
+        Operation::Invalid => {
+            describe!(y, Relation, "Invalid set operation");
+        }
+        Operation::Subtract => {
+            describe!(y, Relation, "Set subtraction");
+            summary!(
+                y,
+                "Yields all rows from the first dataset that do not exist \
+                in the second dataset."
+            );
+        }
+        Operation::SubtractByUnion => {
+            describe!(y, Relation, "Set subtract by union");
+            summary!(
+                y,
+                "Yields all rows from the first dataset that do not exist \
+                in any of the other datasets."
+            );
+        }
+        Operation::SubtractByIntersection => {
+            describe!(y, Relation, "Set subtract by intersection");
+            summary!(
+                y,
+                "Yields all rows from the first dataset that do not exist in \
+                all of the other datasets."
+            );
+        }
+        Operation::Intersect => {
+            describe!(y, Relation, "Set intersection");
+            summary!(
+                y,
+                "Yields all rows from the first dataset that exist in all \
+                datasets."
+            );
+        }
+        Operation::IntersectWithUnion => {
+            describe!(y, Relation, "Set intersect with union");
+            summary!(
+                y,
+                "Yields all rows from the first dataset that exist in any of \
+                the other datasets."
+            );
+        }
+        Operation::Union => {
+            describe!(y, Relation, "Set union");
+            summary!(
+                y,
+                "Yields all rows that exist in any dataset, removing duplicates."
+            );
+        }
+        Operation::Merge => {
+            describe!(y, Relation, "Merge");
+            summary!(y, "Yields all rows from all incoming datasets.");
+        }
+    };
+
+    // Handle the common field.
+    handle_rel_common!(x, y);
+
+    // Handle the advanced extension field.
+    handle_advanced_extension!(x, y);
+
+    Ok(())
+}
diff --git a/rs/src/parse/relations/sort.rs b/rs/src/parse/relations/sort.rs
new file mode 100644
index 00000000..a4ccba29
--- /dev/null
+++ b/rs/src/parse/relations/sort.rs
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for sort relations.
+//!
+//! The sort operator reorders a dataset based on one or more identified
+//! sort fields as well as a sorting function.
+//!
+//! See <https://substrait.io/relations/logical_relations/#sort-operation>
+
+use crate::input::proto::substrait;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::sorts;
+
+/// Parse sort relation.
+pub fn parse_sort_rel(x: &substrait::SortRel, y: &mut context::Context) -> diagnostic::Result<()> {
+    // Parse input.
+    let in_type = handle_rel_input!(x, y);
+
+    // Sorts pass through their input schema unchanged.
+    y.set_schema(in_type);
+
+    // Check the sorts.
+    let keys = proto_required_repeated_field!(x, y, sorts, sorts::parse_sort_field).1;
+
+    // Describe the relation.
+    describe!(
+        y,
+        Relation,
+        "Order by {}",
+        keys.first().cloned().flatten().unwrap_or_default()
+    );
+    if x.sorts.len() > 1 {
+        summary!(
+            y,
+            "This relation reorders or coalesces a dataset based on {} keys. \
+            For sorts, the first key has greatest priority; only if the first \
+            key is equivalent for two rows will the next key be checked.",
+            x.sorts.len()
+        );
+    } else {
+        summary!(
+            y,
+            "This relation reorders or coalesces a dataset based on the value of {}.",
+            keys.first().cloned().flatten().unwrap_or_default()
+        );
+    }
+
+    // Handle the common field.
+    handle_rel_common!(x, y);
+
+    // Handle the advanced extension field.
+    handle_advanced_extension!(x, y);
+
+    Ok(())
+}
diff --git a/rs/src/parse/sorts.rs b/rs/src/parse/sorts.rs
new file mode 100644
index 00000000..9db76984
--- /dev/null
+++ b/rs/src/parse/sorts.rs
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for sort fields.
+
+use std::sync::Arc;
+
+use crate::input::proto::substrait;
+use crate::input::traits::ProtoEnum;
+use crate::output::comment;
+use crate::output::data_type;
+use crate::output::diagnostic;
+use crate::parse::context;
+use crate::parse::expressions;
+use crate::parse::expressions::functions;
+use crate::parse::extensions;
+
+/// Parse a sort direction.
+fn parse_sort_direction(x: &i32, y: &mut context::Context) -> diagnostic::Result<&'static str> {
+    use substrait::sort_field::SortDirection;
+    match SortDirection::proto_enum_from_i32(*x) {
+        None => {
+            diagnostic!(
+                y,
+                Error,
+                IllegalValue,
+                "unknown value {x} for {}",
+                SortDirection::proto_enum_type()
+            );
+            Ok("Invalid sort by")
+        }
+        Some(SortDirection::Unspecified) => {
+            diagnostic!(y, Error, ProtoMissingField, "direction");
+            Ok("Invalid sort by")
+        }
+        Some(SortDirection::AscNullsFirst) => {
+            describe!(y, Misc, "Sort ascending, nulls first");
+            Ok("Ascending sort by")
+        }
+        Some(SortDirection::AscNullsLast) => {
+            describe!(y, Misc, "Sort ascending, nulls last");
+            Ok("Ascending sort by")
+        }
+        Some(SortDirection::DescNullsFirst) => {
+            describe!(y, Misc, "Sort descending, nulls first");
+            Ok("Descending sort by")
+        }
+        Some(SortDirection::DescNullsLast) => {
+            describe!(y, Misc, "Sort descending, nulls last");
+            Ok("Descending sort by")
+        }
+        Some(SortDirection::Clustered) => {
+            describe!(y, Misc, "Coalesce equal values");
+            summary!(
+                y,
+                "Equal values are grouped together, but no ordering is defined between clusters."
+            );
+            Ok("Coalesce")
+        }
+    }
+}
+
+/// Parse a function reference that should resolve to a comparison function
+/// (i.e. one usable for sorts) for the given type.
+fn parse_comparison_function_reference(
+    x: &u32,
+    y: &mut context::Context,
+    data_type: &Arc<data_type::DataType>,
+) -> diagnostic::Result<&'static str> {
+    // Resolve the reference as normal.
+    let function = extensions::simple::parse_function_reference(x, y)?;
+
+    // Check the function.
+    if let Some(function) = &function.definition {
+        let return_type =
+            functions::check_function(y, function, &[], &[data_type.clone(), data_type.clone()]);
+        if !matches!(
+            return_type.class(),
+            data_type::Class::Simple(data_type::Simple::Boolean)
+                | data_type::Class::Simple(data_type::Simple::I8)
+                | data_type::Class::Simple(data_type::Simple::I16)
+                | data_type::Class::Simple(data_type::Simple::I32)
+                | data_type::Class::Simple(data_type::Simple::I64)
+                | data_type::Class::Unresolved
+        ) {
+            diagnostic!(
+                y,
+                Error,
+                TypeMismatch,
+                "comparison functions must yield booleans (a < b) or integers (a ?= b), but found {}",
+                return_type
+            );
+        }
+    } else {
+        diagnostic!(
+            y,
+            Warning,
+            ExpressionFunctionDefinitionUnavailable,
+            "cannot check validity of comparison function"
+        );
+    }
+
+    // Describe how the function is to be interpreted.
+    y.push_summary(
+        comment::Comment::new()
+            .plain("Comparison function for sorting. Taking two elements as input,")
+            .plain("it must determine the correct sort order. Comparison functions")
+            .plain("may return booleans or integers, interpreted as follows:")
+            .lo()
+            .plain("f(a, b) => true or negative: a sorts before b;")
+            .li()
+            .plain("f(a, b) => false or positive: b sorts before a;")
+            .li()
+            .plain("f(a, b) => 0 or null: a and b have no defined sort order.")
+            .lc()
+            .plain("This corresponds to f: a < b or f: a ?= b."),
+    );
+
+    Ok("Custom sort")
+}
+
+/// Parse a sort kind, applicable to elements of the given data type.
+fn parse_sort_kind(
+    x: &substrait::sort_field::SortKind,
+    y: &mut context::Context,
+    data_type: &Arc<data_type::DataType>,
+) -> diagnostic::Result<&'static str> {
+    match x {
+        substrait::sort_field::SortKind::Direction(x) => parse_sort_direction(x, y),
+        substrait::sort_field::SortKind::ComparisonFunctionReference(x) => {
+            parse_comparison_function_reference(x, y, data_type)
+        }
+    }
+}
+
+/// Parse a sort field.
+pub fn parse_sort_field(
+    x: &substrait::SortField,
+    y: &mut context::Context,
+) -> diagnostic::Result<expressions::Expression> {
+    // Parse fields.
+    let (n, e) = proto_required_field!(x, y, expr, expressions::parse_expression);
+    let expression = e.unwrap_or_default();
+    let method = proto_required_field!(x, y, sort_kind, parse_sort_kind, &n.data_type())
+        .1
+        .unwrap_or("Invalid sort by");
+
+    // Describe node.
+    describe!(y, Misc, "{method} {expression}");
+    summary!(y, "{method} {expression:#}.");
+    Ok(expression)
+}
diff --git a/rs/src/parse/traversal.rs b/rs/src/parse/traversal.rs
new file mode 100644
index 00000000..c61e4038
--- /dev/null
+++ b/rs/src/parse/traversal.rs
@@ -0,0 +1,1251 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module for the boilerplate code involved with traversing an input
+//! protobuf/YAML tree to form the output [tree](tree::Node).
+//!
+//! Refer to the documentation for [`parse`](mod@crate::parse) for more
+//! information.
+
+// TODO: remove once validation code is finished.
+#![allow(dead_code)]
+#![allow(unused_macros)]
+
+use crate::input::config;
+use crate::input::traits::InputNode;
+use crate::input::traits::ProtoEnum;
+use crate::input::yaml;
+use crate::output::diagnostic;
+use crate::output::extension;
+use crate::output::parse_result;
+use crate::output::path;
+use crate::output::primitive_data;
+use crate::output::tree;
+use crate::parse::context;
+use std::sync::Arc;
+
+//=============================================================================
+// Type definitions
+//=============================================================================
+
+// Return value for parse macros for optional fields. The first element refers
+// to the node for the field, if the field was present. The second is the
+// return value of the supplied parse function, if it was called and didn't
+// fail.
+type OptionalResult<T> = (Option<Arc<tree::Node>>, Option<T>);
+
+// Return value for parse macros for required fields. The first element refers
+// to the node for the field; if the required field wasn't actually specified,
+// a dummy node would have been made, so this is not an Option. The second is
+// the return value of the supplied parse function, if it was called and didn't
+// fail, just like for OptionalResult<T>.
+type RequiredResult<T> = (Arc<tree::Node>, Option<T>);
+
+// Return value for parse macros for repeated fields. Same as RequiredResult,
+// but with each tuple entry wrapped in a vector. Both vectors will have equal
+// length.
+type RepeatedResult<T> = (Vec<Arc<tree::Node>>, Vec<Option<T>>);
+
+//=============================================================================
+// Macros for pushing annotations
+//=============================================================================
+
+/// Convenience/shorthand macro for pushing diagnostic messages to a node.
+macro_rules! diagnostic {
+    ($context:expr, $level:ident, $class:ident, $($args:expr),*) => {
+        diagnostic!($context, $level, cause!($class, $($args),*))
+    };
+    ($context:expr, $level:ident, $cause:expr) => {
+        crate::parse::traversal::push_diagnostic($context, crate::output::diagnostic::Level::$level, $cause)
+    };
+    ($context:expr, $diag:expr) => {
+        $context.push_diagnostic($diag)
+    };
+}
+macro_rules! ediagnostic {
+    ($context:expr, $level:ident, $class:ident, $err:expr) => {
+        diagnostic!($context, $level, ecause!($class, $err))
+    };
+}
+
+/// Pushes a diagnostic message to the node information list.
+pub fn push_diagnostic(
+    context: &mut context::Context,
+    level: diagnostic::Level,
+    cause: diagnostic::Cause,
+) {
+    context.push_diagnostic(diagnostic::RawDiagnostic {
+        cause,
+        level,
+        path: context.path_buf(),
+    });
+}
+
+/// Convenience/shorthand macro for pushing formatted comments to a node.
+macro_rules! comment {
+    ($context:expr, $($fmts:expr),*) => {
+        $context.push_comment(format!($($fmts),*))
+    };
+}
+
+/// Convenience/shorthand macro for pushing formatted comments that link to
+/// some path to a node.
+macro_rules! link {
+    ($context:expr, $path:expr, $($fmts:expr),*) => {
+        $context.push_comment(crate::output::comment::Comment::new().link(format!($($fmts),*), $path))
+    };
+}
+
+/// Convenience/shorthand macro for setting descriptive information for a node.
+macro_rules! describe {
+    ($context:expr, $class:ident, $($fmts:expr),*) => {
+        $context.set_description(crate::output::tree::Class::$class, Some(format!($($fmts),*)))
+    };
+}
+
+/// Convenience/shorthand macro for appending plain text to the summary of a
+/// node.
+macro_rules! summary {
+    ($context:expr, $($fmts:expr),*) => {
+        $context.push_summary(format!($($fmts),*))
+    };
+}
+
+//=============================================================================
+// Generic code for field handling
+//=============================================================================
+
+/// Parses a child node and pushes it into the provided parent context.
+fn push_child<TF, TR, FP>(
+    context: &mut context::Context,
+    child: &TF,
+    path_element: path::PathElement,
+    unknown_subtree: bool,
+    parser: FP,
+) -> RequiredResult<TR>
+where
+    TF: InputNode,
+    FP: FnOnce(&TF, &mut context::Context) -> diagnostic::Result<TR>,
+{
+    // Create the node for the child.
+    let mut field_output = child.data_to_node();
+
+    // Create the context for calling the parse function for the child.
+    let mut field_context = context.child(&mut field_output, path_element.clone());
+
+    // Call the provided parser function.
+    let result = parser(child, &mut field_context)
+        .map_err(|cause| {
+            diagnostic!(&mut field_context, Error, cause);
+        })
+        .ok();
+
+    // Handle any fields not handled by the provided parse function. Only
+    // generate a warning diagnostic for unhandled children if the parse
+    // function succeeded and we're not already in an unknown subtree.
+    handle_unknown_children(
+        child,
+        &mut field_context,
+        result.is_some() && !unknown_subtree,
+    );
+
+    // Push and return the completed node.
+    let field_output = Arc::new(field_output);
+    context.push(tree::NodeData::Child(tree::Child {
+        path_element,
+        node: field_output.clone(),
+        recognized: !unknown_subtree,
+    }));
+
+    (field_output, result)
+}
+
+/// Handle all children that haven't already been handled. If with_diagnostic
+/// is set, this also generates a diagnostic message if there were
+/// populated/non-default unhandled fields.
+fn handle_unknown_children<T: InputNode>(
+    input: &T,
+    context: &mut context::Context,
+    with_diagnostic: bool,
+) {
+    if input.parse_unknown(context) && with_diagnostic {
+        let mut fields = vec![];
+        for data in context.node_data().iter() {
+            if let tree::NodeData::Child(child) = data {
+                if !child.recognized {
+                    fields.push(child.path_element.to_string_without_dot());
+                }
+            }
+        }
+        if !fields.is_empty() {
+            let fields: String =
+                itertools::Itertools::intersperse(fields.into_iter(), ", ".to_string()).collect();
+            diagnostic!(
+                context,
+                Warning,
+                NotYetImplemented,
+                "the following child nodes were not recognized by the validator: {fields}"
+            );
+        }
+    }
+}
+
+//=============================================================================
+// Protobuf optional field handling
+//=============================================================================
+
+/// Convenience/shorthand macro for parsing optional protobuf fields.
+macro_rules! proto_field {
+    ($input:expr, $context:expr, $field:ident) => {
+        proto_field!($input, $context, $field, |_, _| Ok(()))
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr) => {
+        crate::parse::traversal::push_proto_field(
+            $context,
+            &$input.$field.as_ref(),
+            crate::input::proto::cook_ident(stringify!($field)),
+            false,
+            $parser,
+        )
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr, $($args:expr),*) => {
+        proto_field!($input, $context, $field, |x, y| $parser(x, y, $($args),*))
+    };
+}
+
+/// Convenience/shorthand macro for parsing optional protobuf fields that were
+/// wrapped in a Box<T> by prost.
+macro_rules! proto_boxed_field {
+    ($input:expr, $context:expr, $field:ident) => {
+        proto_boxed_field!($input, $context, $field, |_, _| Ok(()))
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr) => {
+        crate::parse::traversal::push_proto_field(
+            $context,
+            &$input.$field,
+            crate::input::proto::cook_ident(stringify!($field)),
+            false,
+            $parser,
+        )
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr, $($args:expr),*) => {
+        proto_boxed_field!($input, $context, $field, |x, y| $parser(x, y, $($args),*))
+    };
+}
+
+/// Parse and push a protobuf optional field.
+pub fn push_proto_field<TF, TR, FP>(
+    context: &mut context::Context,
+    field: &Option<impl std::ops::Deref<Target = TF>>,
+    field_name: &'static str,
+    unknown_subtree: bool,
+    parser: FP,
+) -> OptionalResult<TR>
+where
+    TF: InputNode,
+    FP: FnOnce(&TF, &mut context::Context) -> diagnostic::Result<TR>,
+{
+    if !context.set_field_parsed(field_name) {
+        panic!("field {field_name} was parsed multiple times");
+    }
+
+    if let Some(field_input) = field {
+        let path_element = if let Some(variant) = field_input.oneof_variant() {
+            path::PathElement::Variant(field_name.to_string(), variant.to_string())
+        } else {
+            path::PathElement::Field(field_name.to_string())
+        };
+        let (field_output, result) = push_child(
+            context,
+            field_input.deref(),
+            path_element,
+            unknown_subtree,
+            parser,
+        );
+        (Some(field_output), result)
+    } else {
+        (None, None)
+    }
+}
+
+//=============================================================================
+// Protobuf required and primitive field handling
+//=============================================================================
+
+/// Convenience/shorthand macro for parsing required protobuf fields.
+macro_rules! proto_required_field {
+    ($input:expr, $context:expr, $field:ident) => {
+        proto_required_field!($input, $context, $field, |_, _| Ok(()))
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr) => {
+        crate::parse::traversal::push_proto_required_field(
+            $context,
+            &$input.$field.as_ref(),
+            crate::input::proto::cook_ident(stringify!($field)),
+            false,
+            $parser,
+        )
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr, $($args:expr),*) => {
+        proto_required_field!($input, $context, $field, |x, y| $parser(x, y, $($args),*))
+    };
+}
+
+/// Convenience/shorthand macro for parsing required protobuf fields that were
+/// wrapped in a Box<T> by prost.
+macro_rules! proto_boxed_required_field {
+    ($input:expr, $context:expr, $field:ident) => {
+        proto_boxed_required_field!($input, $context, $field, |_, _| Ok(()))
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr) => {
+        crate::parse::traversal::push_proto_required_field(
+            $context,
+            &$input.$field,
+            crate::input::proto::cook_ident(stringify!($field)),
+            false,
+            $parser,
+        )
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr, $($args:expr),*) => {
+        proto_boxed_required_field!($input, $context, $field, |x, y| $parser(x, y, $($args),*))
+    };
+}
+
+/// Convenience/shorthand macro for parsing primitive protobuf fields.
+macro_rules! proto_primitive_field {
+    ($input:expr, $context:expr, $field:ident) => {
+        proto_primitive_field!($input, $context, $field, |x, _| Ok(x.to_owned()))
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr) => {
+        crate::parse::traversal::push_proto_required_field(
+            $context,
+            &Some(&$input.$field),
+            crate::input::proto::cook_ident(stringify!($field)),
+            false,
+            $parser,
+        )
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr, $($args:expr),*) => {
+        proto_primitive_field!($input, $context, $field, |x, y| $parser(x, y, $($args),*))
+    };
+}
+
+/// Parse and push a required field of some message type. If the field is
+/// not populated, a MissingField diagnostic is pushed automatically, and
+/// an empty node is returned as an error recovery placeholder.
+pub fn push_proto_required_field<TF, TR, FP>(
+    context: &mut context::Context,
+    field: &Option<impl std::ops::Deref<Target = TF>>,
+    field_name: &'static str,
+    unknown_subtree: bool,
+    parser: FP,
+) -> RequiredResult<TR>
+where
+    TF: InputNode,
+    FP: FnOnce(&TF, &mut context::Context) -> diagnostic::Result<TR>,
+{
+    if let (Some(node), result) =
+        push_proto_field(context, field, field_name, unknown_subtree, parser)
+    {
+        (node, result)
+    } else {
+        ediagnostic!(context, Error, ProtoMissingField, field_name);
+        (Arc::new(TF::type_to_node()), None)
+    }
+}
+
+/// Convenience/shorthand macro for parsing enumeration protobuf fields.
+macro_rules! proto_enum_field {
+    ($input:expr, $context:expr, $field:ident, $typ:ty) => {
+        proto_enum_field!($input, $context, $field, $typ, |x, _| Ok(x.to_owned()))
+    };
+    ($input:expr, $context:expr, $field:ident, $typ:ty, $parser:expr) => {
+        crate::parse::traversal::push_proto_enum_field::<$typ, _, _>(
+            $context,
+            $input.$field,
+            crate::input::proto::cook_ident(stringify!($field)),
+            false,
+            $parser,
+        )
+    };
+    ($input:expr, $context:expr, $field:ident, $typ:ty, $parser:expr, $($args:expr),*) => {
+        proto_enum_field!($input, $context, $field, $typ, |x, y| $parser(x, y, $($args),*))
+    };
+}
+
+/// Parse and push an enumeration field of some message type. The i32 in the
+/// struct generated by prost is automatically converted to the enum; if the
+/// value is out of range, an error is generated.
+pub fn push_proto_enum_field<TF, TR, FP>(
+    context: &mut context::Context,
+    field: i32,
+    field_name: &'static str,
+    unknown_subtree: bool,
+    parser: FP,
+) -> RequiredResult<TR>
+where
+    TF: ProtoEnum,
+    FP: FnOnce(&TF, &mut context::Context) -> diagnostic::Result<TR>,
+{
+    if let Some(field) = TF::proto_enum_from_i32(field) {
+        push_proto_required_field(context, &Some(&field), field_name, unknown_subtree, parser)
+    } else {
+        (
+            push_proto_required_field(
+                context,
+                &Some(&field),
+                field_name,
+                unknown_subtree,
+                |x, y| {
+                    diagnostic!(
+                        y,
+                        Error,
+                        IllegalValue,
+                        "unknown value {x} for {}",
+                        TF::proto_enum_type()
+                    );
+                    Ok(())
+                },
+            )
+            .0,
+            None,
+        )
+    }
+}
+
+/// Convenience/shorthand macro for parsing enumeration protobuf fields of
+/// which the value must be specified.
+macro_rules! proto_required_enum_field {
+    ($input:expr, $context:expr, $field:ident, $typ:ty) => {
+        proto_required_enum_field!($input, $context, $field, $typ, |x, _| Ok(x.to_owned()))
+    };
+    ($input:expr, $context:expr, $field:ident, $typ:ty, $parser:expr) => {
+        crate::parse::traversal::push_proto_required_enum_field::<$typ, _, _>(
+            $context,
+            $input.$field,
+            crate::input::proto::cook_ident(stringify!($field)),
+            false,
+            $parser,
+        )
+    };
+    ($input:expr, $context:expr, $field:ident, $typ:ty, $parser:expr, $($args:expr),*) => {
+        proto_required_enum_field!($input, $context, $field, $typ, |x, y| $parser(x, y, $($args),*))
+    };
+}
+
+/// Parse and push an enumeration field of some message type. The i32 in the
+/// struct generated by prost is automatically converted to the enum; if the
+/// value is out of range, an error is generated.
+pub fn push_proto_required_enum_field<TF, TR, FP>(
+    context: &mut context::Context,
+    field: i32,
+    field_name: &'static str,
+    unknown_subtree: bool,
+    parser: FP,
+) -> RequiredResult<TR>
+where
+    TF: ProtoEnum,
+    FP: FnOnce(&TF, &mut context::Context) -> diagnostic::Result<TR>,
+{
+    push_proto_enum_field(context, field, field_name, unknown_subtree, |x, y| {
+        if field == 0 {
+            diagnostic!(
+                y,
+                Error,
+                IllegalValue,
+                "this enum may not be left unspecified"
+            );
+        }
+        parser(x, y)
+    })
+}
+
+//=============================================================================
+// Protobuf repeated field handling
+//=============================================================================
+
+/// Convenience/shorthand macro for parsing repeated protobuf fields.
+macro_rules! proto_repeated_field {
+    ($input:expr, $context:expr, $field:ident) => {
+        proto_repeated_field!($input, $context, $field, |_, _| Ok(()))
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr) => {
+        proto_repeated_field!($input, $context, $field, $parser, |_, _, _, _, _| ())
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr, $validator:expr) => {
+        crate::parse::traversal::push_proto_repeated_field(
+            $context,
+            &$input.$field,
+            crate::input::proto::cook_ident(stringify!($field)),
+            false,
+            $parser,
+            $validator,
+        )
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr, $validator:expr, $($args:expr),*) => {
+        proto_repeated_field!($input, $context, $field, |x, y| $parser(x, y, $($args),*), $validator)
+    };
+}
+
+/// Parse and push a repeated field of some message type.
+pub fn push_proto_repeated_field<TF, TR, FP, FV>(
+    context: &mut context::Context,
+    field: &[TF],
+    field_name: &'static str,
+    unknown_subtree: bool,
+    mut parser: FP,
+    mut validator: FV,
+) -> RepeatedResult<TR>
+where
+    TF: InputNode,
+    FP: FnMut(&TF, &mut context::Context) -> diagnostic::Result<TR>,
+    FV: FnMut(&TF, &mut context::Context, usize, &Arc<tree::Node>, Option<&TR>),
+{
+    if !context.set_field_parsed(field_name) {
+        panic!("field {field_name} was parsed multiple times");
+    }
+
+    field
+        .iter()
+        .enumerate()
+        .map(|(index, child)| {
+            let (node, result) = push_child(
+                context,
+                child,
+                path::PathElement::Repeated(field_name.to_string(), index),
+                unknown_subtree,
+                &mut parser,
+            );
+            validator(child, context, index, &node, result.as_ref());
+            (node, result)
+        })
+        .unzip()
+}
+
+/// Convenience/shorthand macro for parsing repeated protobuf fields for which
+/// at least one element must exist.
+macro_rules! proto_required_repeated_field {
+    ($input:expr, $context:expr, $field:ident) => {
+        proto_required_repeated_field!($input, $context, $field, |_, _| Ok(()))
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr) => {
+        proto_required_repeated_field!($input, $context, $field, $parser, |_, _, _, _, _| ())
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr, $validator:expr) => {
+        crate::parse::traversal::push_proto_required_repeated_field(
+            $context,
+            &$input.$field,
+            crate::input::proto::cook_ident(stringify!($field)),
+            false,
+            $parser,
+            $validator,
+        )
+    };
+    ($input:expr, $context:expr, $field:ident, $parser:expr, $validator:expr, $($args:expr),*) => {
+        proto_required_repeated_field!($input, $context, $field, |x, y| $parser(x, y, $($args),*), $validator)
+    };
+}
+
+/// Parse and push a repeated field of some message type, and check that at
+/// least one element exists.
+pub fn push_proto_required_repeated_field<TF, TR, FP, FV>(
+    context: &mut context::Context,
+    field: &[TF],
+    field_name: &'static str,
+    unknown_subtree: bool,
+    parser: FP,
+    validator: FV,
+) -> RepeatedResult<TR>
+where
+    TF: InputNode,
+    FP: FnMut(&TF, &mut context::Context) -> diagnostic::Result<TR>,
+    FV: FnMut(&TF, &mut context::Context, usize, &Arc<tree::Node>, Option<&TR>),
+{
+    if field.is_empty() {
+        ediagnostic!(context, Error, ProtoMissingField, field_name);
+    }
+    push_proto_repeated_field(
+        context,
+        field,
+        field_name,
+        unknown_subtree,
+        parser,
+        validator,
+    )
+}
+
+//=============================================================================
+// Protobuf root message handling
+//=============================================================================
+
+/// Parses a serialized protobuf message using the given root parse function,
+/// initial state, and configuration.
+pub fn parse_proto<T, F, B>(
+    buffer: B,
+    root_name: &'static str,
+    root_parser: F,
+    state: &mut context::State,
+    config: &config::Config,
+) -> parse_result::ParseResult
+where
+    T: prost::Message + InputNode + Default,
+    F: FnOnce(&T, &mut context::Context) -> diagnostic::Result<()>,
+    B: prost::bytes::Buf,
+{
+    match T::decode(buffer) {
+        Err(err) => {
+            // Create a minimal root node with just the decode error
+            // diagnostic.
+            let mut root = T::type_to_node();
+
+            // Create a root context for it.
+            let mut context = context::Context::new(root_name, &mut root, state, config);
+
+            // Push the diagnostic using the context.
+            context.push_diagnostic(diagnostic::RawDiagnostic {
+                cause: ecause!(ProtoParseFailed, err),
+                level: diagnostic::Level::Error,
+                path: path::PathBuf {
+                    root: root_name,
+                    elements: vec![],
+                },
+            });
+
+            parse_result::ParseResult { root }
+        }
+        Ok(input) => {
+            // Create the root node.
+            let mut root = input.data_to_node();
+
+            // Create the root context.
+            let mut context = context::Context::new(root_name, &mut root, state, config);
+
+            // Call the provided parser function.
+            let success = root_parser(&input, &mut context)
+                .map_err(|cause| {
+                    diagnostic!(&mut context, Error, cause);
+                })
+                .is_ok();
+
+            // Handle any fields not handled by the provided parse function.
+            // Only generate a warning diagnostic for unhandled children if the
+            // parse function succeeded.
+            handle_unknown_children(&input, &mut context, success);
+
+            parse_result::ParseResult { root }
+        }
+    }
+}
+
+//=============================================================================
+// YAML object handling
+//=============================================================================
+
+/// Convenience/shorthand macro for parsing optional YAML fields.
+macro_rules! yaml_field {
+    ($input:expr, $context:expr, $field:expr) => {
+        yaml_field!($input, $context, $field, |_, _| Ok(()))
+    };
+    ($input:expr, $context:expr, $field:expr, $parser:expr) => {
+        crate::parse::traversal::push_yaml_field($input, $context, $field, false, $parser)
+    };
+}
+
+/// Parse and push an optional YAML field.
+pub fn push_yaml_field<TS, TR, FP>(
+    input: &yaml::Value,
+    context: &mut context::Context,
+    field_name: TS,
+    unknown_subtree: bool,
+    parser: FP,
+) -> diagnostic::Result<OptionalResult<TR>>
+where
+    TS: AsRef<str>,
+    FP: FnOnce(&yaml::Value, &mut context::Context) -> diagnostic::Result<TR>,
+{
+    if let serde_json::Value::Object(input) = input {
+        let field_name = field_name.as_ref();
+        if !context.set_field_parsed(field_name) {
+            panic!("field {field_name} was parsed multiple times");
+        }
+
+        if let Some(child) = input.get(field_name) {
+            let (field_output, result) = push_child(
+                context,
+                child,
+                path::PathElement::Field(field_name.to_string()),
+                unknown_subtree,
+                parser,
+            );
+            Ok((Some(field_output), result))
+        } else {
+            Ok((None, None))
+        }
+    } else {
+        Err(cause!(YamlInvalidType, "object expected"))
+    }
+}
+
+/// Convenience/shorthand macro for parsing required YAML fields.
+macro_rules! yaml_required_field {
+    ($input:expr, $context:expr, $field:expr) => {
+        yaml_required_field!($input, $context, $field, |_, _| Ok(()))
+    };
+    ($input:expr, $context:expr, $field:expr, $parser:expr) => {
+        crate::parse::traversal::push_yaml_required_field($input, $context, $field, false, $parser)
+    };
+}
+
+/// Parse and push a required field of a YAML object. If the field does not
+/// exist, a MissingField diagnostic is pushed automatically, and an empty node
+/// is returned as an error recovery placeholder.
+pub fn push_yaml_required_field<TS, TR, FP>(
+    input: &yaml::Value,
+    context: &mut context::Context,
+    field_name: TS,
+    unknown_subtree: bool,
+    parser: FP,
+) -> diagnostic::Result<RequiredResult<TR>>
+where
+    TS: AsRef<str>,
+    FP: FnOnce(&yaml::Value, &mut context::Context) -> diagnostic::Result<TR>,
+{
+    let field_name = field_name.as_ref();
+    if let (Some(node), result) =
+        push_yaml_field(input, context, field_name, unknown_subtree, parser)?
+    {
+        Ok((node, result))
+    } else {
+        ediagnostic!(context, Error, YamlMissingKey, field_name);
+        Ok((
+            Arc::new(tree::NodeType::YamlPrimitive(primitive_data::PrimitiveData::Null).into()),
+            None,
+        ))
+    }
+}
+
+//=============================================================================
+// YAML array handling
+//=============================================================================
+
+/// Convenience/shorthand macro for parsing a YAML array that may be empty.
+macro_rules! yaml_array {
+    ($input:expr, $context:expr) => {
+        yaml_array!($input, $context, $field, |_, _| Ok(()))
+    };
+    ($input:expr, $context:expr, $parser:expr) => {
+        yaml_array!($input, $context, $field, $parser, 0)
+    };
+    ($input:expr, $context:expr, $parser:expr, $min_size:expr) => {
+        crate::parse::traversal::push_yaml_array($input, $context, $min_size, false, $parser)
+    };
+}
+
+/// Convenience/shorthand macro for parsing a YAML array that must have at
+/// least one value.
+macro_rules! yaml_required_array {
+    ($input:expr, $context:expr) => {
+        yaml_required_array!($input, $context, |_, _| Ok(()))
+    };
+    ($input:expr, $context:expr, $parser:expr) => {
+        yaml_array!($input, $context, $parser, 1)
+    };
+}
+
+/// Parse and push an optional YAML array element.
+pub fn push_yaml_element<TR, FP>(
+    input: &yaml::Array,
+    context: &mut context::Context,
+    index: usize,
+    unknown_subtree: bool,
+    parser: FP,
+) -> OptionalResult<TR>
+where
+    FP: FnOnce(&yaml::Value, &mut context::Context) -> diagnostic::Result<TR>,
+{
+    if !context.set_field_parsed(index) {
+        panic!("element {index} was parsed multiple times");
+    }
+
+    if let Some(child) = input.get(index) {
+        let (field_output, result) = push_child(
+            context,
+            child,
+            path::PathElement::Index(index),
+            unknown_subtree,
+            parser,
+        );
+        (Some(field_output), result)
+    } else {
+        (None, None)
+    }
+}
+
+/// Parse and push a required element of a YAML array. If the element does not
+/// exist, a MissingElement diagnostic is pushed automatically, and an empty node
+/// is returned as an error recovery placeholder.
+pub fn push_yaml_required_element<TR, FP>(
+    input: &yaml::Array,
+    context: &mut context::Context,
+    index: usize,
+    unknown_subtree: bool,
+    parser: FP,
+) -> RequiredResult<TR>
+where
+    FP: FnOnce(&yaml::Value, &mut context::Context) -> diagnostic::Result<TR>,
+{
+    if let (Some(node), result) = push_yaml_element(input, context, index, unknown_subtree, parser)
+    {
+        (node, result)
+    } else {
+        diagnostic!(context, Error, YamlMissingElement, "index {index}");
+        (
+            Arc::new(tree::NodeType::YamlPrimitive(primitive_data::PrimitiveData::Null).into()),
+            None,
+        )
+    }
+}
+
+/// Parse and push a complete YAML array. If a required element does not exist,
+/// a MissingElement diagnostic is pushed automatically, and an empty node is
+/// returned as an error recovery placeholder.
+pub fn push_yaml_array<TR, FP>(
+    input: &yaml::Value,
+    context: &mut context::Context,
+    min_size: usize,
+    unknown_subtree: bool,
+    mut parser: FP,
+) -> diagnostic::Result<RepeatedResult<TR>>
+where
+    FP: FnMut(&yaml::Value, &mut context::Context) -> diagnostic::Result<TR>,
+{
+    if let serde_json::Value::Array(input) = input {
+        let size = std::cmp::max(min_size, input.len());
+        Ok((0..size)
+            .into_iter()
+            .map(|index| {
+                push_yaml_required_element(input, context, index, unknown_subtree, &mut parser)
+            })
+            .unzip())
+    } else {
+        Err(cause!(YamlInvalidType, "array expected"))
+    }
+}
+
+/// Shorthand for fields that must be arrays if specified.
+macro_rules! yaml_repeated_field {
+    ($input:expr, $context:expr, $field:expr) => {
+        yaml_repeated_field!($input, $context, $field, |_, _| Ok(()))
+    };
+    ($input:expr, $context:expr, $field:expr, $parser:expr) => {
+        yaml_repeated_field!($input, $context, $field, $parser, 0)
+    };
+    ($input:expr, $context:expr, $field:expr, $parser:expr, $min_size:expr) => {
+        crate::parse::traversal::push_yaml_repeated_field(
+            $input, $context, $field, false, $min_size, false, $parser,
+        )
+    };
+}
+
+/// Shorthand for fields that must be arrays.
+macro_rules! yaml_required_repeated_field {
+    ($input:expr, $context:expr, $field:expr) => {
+        yaml_required_repeated_field!($input, $context, $field, |_, _| Ok(()))
+    };
+    ($input:expr, $context:expr, $field:expr, $parser:expr) => {
+        yaml_required_repeated_field!($input, $context, $field, $parser, 1)
+    };
+    ($input:expr, $context:expr, $field:expr, $parser:expr, $min_size:expr) => {
+        crate::parse::traversal::push_yaml_repeated_field(
+            $input, $context, $field, true, $min_size, false, $parser,
+        )
+    };
+}
+
+/// Parse and push a complete YAML array. If a required element does not exist,
+/// a MissingElement diagnostic is pushed automatically, and an empty node is
+/// returned as an error recovery placeholder.
+pub fn push_yaml_repeated_field<TR, FP>(
+    input: &yaml::Value,
+    context: &mut context::Context,
+    field_name: &'static str,
+    field_required: bool,
+    min_size: usize,
+    unknown_subtree: bool,
+    parser: FP,
+) -> diagnostic::Result<RepeatedResult<TR>>
+where
+    FP: FnMut(&yaml::Value, &mut context::Context) -> diagnostic::Result<TR>,
+{
+    Ok(if field_required {
+        push_yaml_required_field(input, context, field_name, unknown_subtree, |x, y| {
+            yaml_array!(x, y, parser, min_size)
+        })?
+        .1
+    } else {
+        push_yaml_field(input, context, field_name, unknown_subtree, |x, y| {
+            yaml_array!(x, y, parser, min_size)
+        })?
+        .1
+    }
+    .unwrap_or_else(|| (vec![], vec![])))
+}
+
+//=============================================================================
+// YAML primitive handling
+//=============================================================================
+
+/// Convenience/shorthand macro for parsing optional YAML fields.
+macro_rules! yaml_prim {
+    ($typ:ident) => {
+        |x, y| crate::parse::traversal::yaml_primitive_parsers::$typ(x, y, |x, _| Ok(x.to_owned()))
+    };
+    ($typ:ident, $parser:expr) => {
+        |x, y| crate::parse::traversal::yaml_primitive_parsers::$typ(x, y, $parser)
+    };
+}
+
+pub mod yaml_primitive_parsers {
+    use super::*;
+
+    /// Boolean primitive helper.
+    pub fn bool<TR, FP>(
+        x: &yaml::Value,
+        y: &mut context::Context,
+        parser: FP,
+    ) -> diagnostic::Result<TR>
+    where
+        FP: FnOnce(&bool, &mut context::Context) -> diagnostic::Result<TR>,
+    {
+        if let serde_json::Value::Bool(x) = x {
+            parser(x, y)
+        } else {
+            Err(cause!(YamlInvalidType, "string expected"))
+        }
+    }
+
+    /// Signed integer primitive helper.
+    pub fn i64<TR, FP>(
+        x: &yaml::Value,
+        y: &mut context::Context,
+        parser: FP,
+    ) -> diagnostic::Result<TR>
+    where
+        FP: FnOnce(&i64, &mut context::Context) -> diagnostic::Result<TR>,
+    {
+        if let serde_json::Value::Number(x) = x {
+            if let Some(x) = x.as_i64() {
+                return parser(&x, y);
+            }
+        }
+        Err(cause!(YamlInvalidType, "signed integer expected"))
+    }
+
+    /// Unsigned integer primitive helper.
+    pub fn u64<TR, FP>(
+        x: &yaml::Value,
+        y: &mut context::Context,
+        parser: FP,
+    ) -> diagnostic::Result<TR>
+    where
+        FP: FnOnce(&u64, &mut context::Context) -> diagnostic::Result<TR>,
+    {
+        if let serde_json::Value::Number(x) = x {
+            if let Some(x) = x.as_u64() {
+                return parser(&x, y);
+            }
+        }
+        Err(cause!(YamlInvalidType, "unsigned integer expected"))
+    }
+
+    /// Float primitive helper.
+    pub fn f64<TR, FP>(
+        x: &yaml::Value,
+        y: &mut context::Context,
+        parser: FP,
+    ) -> diagnostic::Result<TR>
+    where
+        FP: FnOnce(&f64, &mut context::Context) -> diagnostic::Result<TR>,
+    {
+        if let serde_json::Value::Number(x) = x {
+            if let Some(x) = x.as_f64() {
+                return parser(&x, y);
+            }
+        }
+        Err(cause!(YamlInvalidType, "floating point number expected"))
+    }
+
+    /// String primitive helper.
+    pub fn str<TR, FP>(
+        x: &yaml::Value,
+        y: &mut context::Context,
+        parser: FP,
+    ) -> diagnostic::Result<TR>
+    where
+        FP: FnOnce(&str, &mut context::Context) -> diagnostic::Result<TR>,
+    {
+        if let serde_json::Value::String(x) = x {
+            parser(x, y)
+        } else {
+            Err(cause!(YamlInvalidType, "string expected"))
+        }
+    }
+}
+
+//=============================================================================
+// YAML root handling
+//=============================================================================
+
+/// Attempts to resolve a URI.
+fn resolve_uri(
+    uri: &str,
+    context: &mut context::Context,
+) -> diagnostic::Result<config::BinaryData> {
+    // Apply yaml_uri_overrides configuration.
+    let remapped_uri = context
+        .config
+        .uri_overrides
+        .iter()
+        .find_map(|(pattern, mapping)| {
+            if pattern.matches(uri) {
+                Some(mapping.as_ref().map(|x| &x[..]))
+            } else {
+                None
+            }
+        });
+    let is_remapped = remapped_uri.is_some();
+    let remapped_uri = remapped_uri.unwrap_or(Some(uri));
+
+    let remapped_uri = if let Some(remapped_uri) = remapped_uri {
+        remapped_uri.to_owned()
+    } else {
+        return Err(cause!(
+            YamlResolutionDisabled,
+            "YAML resolution for {uri} was disabled"
+        ));
+    };
+    if is_remapped {
+        diagnostic!(context, Info, Yaml, "URI was remapped to {remapped_uri}");
+    }
+
+    // If a custom download function is specified, use it to resolve.
+    if let Some(ref resolver) = context.config.uri_resolver {
+        return resolver(&remapped_uri)
+            .map_err(|x| ecause!(YamlResolutionFailed, x.as_ref().to_string()));
+    }
+
+    // Parse as a URL.
+    let url = match url::Url::parse(&remapped_uri) {
+        Ok(url) => url,
+        Err(e) => {
+            return Err(if is_remapped {
+                cause!(
+                    YamlResolutionFailed,
+                    "configured URI remapping ({remapped_uri}) did not parse as URL: {e}"
+                )
+            } else {
+                cause!(
+                    YamlResolutionFailed,
+                    "failed to parse {remapped_uri} as URL: {e}"
+                )
+            });
+        }
+    };
+
+    // Reject anything that isn't file://-based.
+    if url.scheme() != "file" {
+        return Err(if is_remapped {
+            cause!(
+                YamlResolutionFailed,
+                "configured URI remapping ({remapped_uri}) does not use file:// scheme"
+            )
+        } else {
+            cause!(YamlResolutionFailed, "URI does not use file:// scheme")
+        });
+    }
+
+    // Convert to path.
+    let path = match url.to_file_path() {
+        Ok(path) => path,
+        Err(_) => {
+            return Err(if is_remapped {
+                cause!(
+                    YamlResolutionFailed,
+                    "configured URI remapping ({remapped_uri}) could not be converted to file path"
+                )
+            } else {
+                cause!(
+                    YamlResolutionFailed,
+                    "URI could not be converted to file path"
+                )
+            });
+        }
+    };
+
+    // Read the file.
+    std::fs::read(path)
+        .map_err(|e| {
+            if is_remapped {
+                cause!(
+                    YamlResolutionFailed,
+                    "failed to file remapping for URI ({remapped_uri}): {e}"
+                )
+            } else {
+                ecause!(YamlResolutionFailed, e)
+            }
+        })
+        .map(|d| -> config::BinaryData { Box::new(d) })
+}
+
+/// Resolves a URI to a YAML file, parses the YAML syntax, and optionally
+/// validates it using the given JSON schema.
+fn load_yaml(
+    uri: &str,
+    context: &mut context::Context,
+    schema: Option<&jsonschema::JSONSchema>,
+) -> Option<yaml::Value> {
+    // Try to resolve the YAML file. Note that failure to resolve is a warning,
+    // not an error; it means the plan isn't valid in the current environment,
+    // but it might still be valid in another one, in particular for consumers
+    // that don't need to be able to resolve the YAML files to use the plan.
+    let binary_data = match resolve_uri(uri, context) {
+        Err(e) => {
+            diagnostic!(context, Warning, e);
+            return None;
+        }
+        Ok(x) => x,
+    };
+
+    // Parse as UTF-8.
+    let string_data = match std::str::from_utf8(binary_data.as_ref().as_ref()) {
+        Err(e) => {
+            ediagnostic!(context, Error, YamlParseFailed, e);
+            return None;
+        }
+        Ok(x) => x,
+    };
+
+    // Parse as YAML.
+    let yaml_data = match yaml_rust::YamlLoader::load_from_str(string_data) {
+        Err(e) => {
+            ediagnostic!(context, Error, YamlParseFailed, e);
+            return None;
+        }
+        Ok(x) => {
+            if x.len() > 1 {
+                diagnostic!(
+                    context,
+                    Warning,
+                    YamlParseFailed,
+                    "YAML file contains multiple documents; ignoring all but the first"
+                );
+            }
+            match x.into_iter().next() {
+                None => {
+                    diagnostic!(
+                        context,
+                        Error,
+                        YamlParseFailed,
+                        "YAML file contains zero documents"
+                    );
+                    return None;
+                }
+                Some(x) => x,
+            }
+        }
+    };
+
+    // Convert to JSON DOM.
+    let json_data = match yaml::yaml_to_json(yaml_data, context.path()) {
+        Err(e) => {
+            diagnostic!(context, e);
+            return None;
+        }
+        Ok(x) => x,
+    };
+
+    // Validate with schema.
+    if let Some(schema) = schema {
+        if let Err(es) = schema.validate(&json_data) {
+            for e in es {
+                ediagnostic!(context, Error, YamlSchemaValidationFailed, e);
+            }
+            return None;
+        }
+    }
+
+    Some(json_data)
+}
+
+/// Attempt to load and parse a YAML file using the given root parse function,
+/// initial state, and configuration.
+pub fn parse_yaml<TS, FP>(
+    uri: TS,
+    context: &mut context::Context,
+    schema: Option<&jsonschema::JSONSchema>,
+    parser: FP,
+) -> Arc<extension::YamlInfo>
+where
+    TS: AsRef<str>,
+    FP: Fn(&yaml::Value, &mut context::Context) -> diagnostic::Result<()>,
+{
+    let uri = uri.as_ref();
+    let uri_reference = extension::NamedReference::new(Some(uri), context.parent_path_buf());
+
+    // Resolve the YAML file.
+    let yaml_info = Arc::new(if let Some(root_input) = load_yaml(uri, context, schema) {
+        // Create an empty YamlData object.
+        *context.yaml_data_opt() = Some(extension::YamlData::new(uri_reference));
+
+        // Create the node for the YAML data root.
+        let mut root_output = root_input.data_to_node();
+
+        // Create the path element for referring to the YAML data root.
+        let path_element = path::PathElement::Field("data".to_string());
+
+        // Create the context for the YAML data root.
+        let mut root_context = context.child(&mut root_output, path_element.clone());
+
+        // Create a PathBuf for the root node.
+        let root_path = root_context.path_buf();
+
+        // Call the provided root parser.
+        let success = parser(&root_input, &mut root_context)
+            .map_err(|cause| {
+                diagnostic!(&mut root_context, Error, cause);
+            })
+            .is_ok();
+
+        // Handle any fields not handled by the provided parse function.
+        handle_unknown_children(&root_input, &mut root_context, success);
+
+        // Push and return the completed node.
+        let root_output = Arc::new(root_output);
+        context.push(tree::NodeData::Child(tree::Child {
+            path_element,
+            node: root_output.clone(),
+            recognized: true,
+        }));
+
+        // Take the constructed YAML data object from the context.
+        let mut yaml_data = context.yaml_data_opt().take().unwrap();
+
+        // Configure the reference to the root node in the YamlData object.
+        yaml_data.data.path = root_path;
+        yaml_data.data.node = root_output;
+
+        // Wrap the completed YAML data object in an Arc.
+        let yaml_data = Arc::new(yaml_data);
+
+        // The node type will have been set as if this is a normal string
+        // primitive. We want extra information though, namely the contents of
+        // the YAML file. So we change the node type.
+        context.replace_node_type(tree::NodeType::YamlReference(yaml_data.clone()));
+
+        // Construct the YAML information object.
+        extension::YamlInfo::Resolved(yaml_data)
+    } else {
+        extension::YamlInfo::Unresolved(uri_reference)
+    });
+
+    yaml_info
+}
diff --git a/rs/src/parse/types.rs b/rs/src/parse/types.rs
new file mode 100644
index 00000000..3ebb5194
--- /dev/null
+++ b/rs/src/parse/types.rs
@@ -0,0 +1,1156 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Module providing parse/validation functions for types.
+
+use std::sync::Arc;
+
+use crate::input::proto::substrait;
+use crate::output::comment;
+use crate::output::data_type;
+use crate::output::data_type::ParameterInfo;
+use crate::output::diagnostic;
+use crate::output::extension;
+use crate::parse::context;
+use crate::parse::extensions;
+use crate::string_util;
+
+/// Parses a required nullability enum.
+fn parse_required_nullability(
+    x: &substrait::r#type::Nullability,
+    _: &mut context::Context,
+) -> diagnostic::Result<bool> {
+    match x {
+        substrait::r#type::Nullability::Nullable => Ok(true),
+        substrait::r#type::Nullability::Required => Ok(false),
+        substrait::r#type::Nullability::Unspecified => Err(cause!(
+            IllegalValue,
+            "nullability information is required in this context"
+        )),
+    }
+}
+
+/// Parses an optional type variation reference.
+fn parse_type_variation_reference(
+    x: &u32,
+    y: &mut context::Context,
+) -> diagnostic::Result<data_type::Variation> {
+    if *x == 0 {
+        Ok(None)
+    } else {
+        Some(extensions::simple::parse_type_variation_reference(x, y)).transpose()
+    }
+}
+
+/// Parses an unsigned integer type parameter.
+fn parse_integral_type_parameter(
+    x: &i32,
+    _: &mut context::Context,
+) -> diagnostic::Result<data_type::Parameter> {
+    Ok(u64::try_from(*x)
+        .map_err(|_| cause!(IllegalValue, "integral type parameters cannot be negative"))?
+        .into())
+}
+
+/// Macro for simple types, since they're all the same.
+macro_rules! parse_simple_type {
+    ($input:expr, $context:expr, $typ:ident) => {{
+        // Parse fields.
+        let nullable = proto_enum_field!(
+            $input,
+            $context,
+            nullability,
+            substrait::r#type::Nullability,
+            parse_required_nullability
+        )
+        .1;
+        let variation = proto_primitive_field!(
+            $input,
+            $context,
+            type_variation_reference,
+            parse_type_variation_reference
+        )
+        .1;
+
+        // Convert to internal type object.
+        let data_type = if let (Some(nullable), Some(variation)) = (nullable, variation) {
+            data_type::DataType::new(
+                data_type::Class::Simple(data_type::Simple::$typ),
+                nullable,
+                variation,
+                vec![],
+            )
+            .map_err(|e| diagnostic!($context, Error, e))
+            .unwrap_or_default()
+        } else {
+            Arc::default()
+        };
+
+        // Attach the type to the node.
+        $context.set_data_type(data_type);
+
+        Ok(())
+    }};
+}
+
+/// Parses a boolean type.
+pub fn parse_boolean(
+    x: &substrait::r#type::Boolean,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, Boolean)
+}
+
+/// Parses a i8 type.
+pub fn parse_i8(x: &substrait::r#type::I8, y: &mut context::Context) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, I8)
+}
+
+/// Parses a i16 type.
+pub fn parse_i16(x: &substrait::r#type::I16, y: &mut context::Context) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, I16)
+}
+
+/// Parses a i32 type.
+pub fn parse_i32(x: &substrait::r#type::I32, y: &mut context::Context) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, I32)
+}
+
+/// Parses a i64 type.
+pub fn parse_i64(x: &substrait::r#type::I64, y: &mut context::Context) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, I64)
+}
+
+/// Parses a fp32 type.
+pub fn parse_fp32(x: &substrait::r#type::Fp32, y: &mut context::Context) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, Fp32)
+}
+
+/// Parses a fp64 type.
+pub fn parse_fp64(x: &substrait::r#type::Fp64, y: &mut context::Context) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, Fp64)
+}
+
+/// Parses a string type.
+pub fn parse_string(
+    x: &substrait::r#type::String,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, String)
+}
+
+/// Parses a binary type.
+pub fn parse_binary(
+    x: &substrait::r#type::Binary,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, Binary)
+}
+
+/// Parses a timestamp type.
+pub fn parse_timestamp(
+    x: &substrait::r#type::Timestamp,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, Timestamp)
+}
+
+/// Parses a date type.
+pub fn parse_date(x: &substrait::r#type::Date, y: &mut context::Context) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, Date)
+}
+
+/// Parses a time type.
+pub fn parse_time(x: &substrait::r#type::Time, y: &mut context::Context) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, Time)
+}
+
+/// Parses a interval-year type.
+pub fn parse_interval_year(
+    x: &substrait::r#type::IntervalYear,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, IntervalYear)
+}
+
+/// Parses a interval-day type.
+pub fn parse_interval_day(
+    x: &substrait::r#type::IntervalDay,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, IntervalDay)
+}
+
+/// Parses a timestamp-tz type.
+pub fn parse_timestamp_tz(
+    x: &substrait::r#type::TimestampTz,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, TimestampTz)
+}
+
+/// Parses a UUID type.
+pub fn parse_uuid(x: &substrait::r#type::Uuid, y: &mut context::Context) -> diagnostic::Result<()> {
+    parse_simple_type!(x, y, Uuid)
+}
+
+/// Macro for compound types with just a length, since they're all the same.
+macro_rules! parse_compound_type_with_length {
+    ($input:expr, $context:expr, $typ:ident) => {{
+        // Parse fields.
+        let length =
+            proto_primitive_field!($input, $context, length, parse_integral_type_parameter).1;
+        let nullable = proto_enum_field!(
+            $input,
+            $context,
+            nullability,
+            substrait::r#type::Nullability,
+            parse_required_nullability
+        )
+        .1;
+        let variation = proto_primitive_field!(
+            $input,
+            $context,
+            type_variation_reference,
+            parse_type_variation_reference
+        )
+        .1;
+
+        // Convert to internal type object.
+        let data_type = if let (Some(length), Some(nullable), Some(variation)) =
+            (length, nullable, variation)
+        {
+            data_type::DataType::new(
+                data_type::Class::Compound(data_type::Compound::$typ),
+                nullable,
+                variation,
+                vec![length],
+            )
+            .map_err(|e| diagnostic!($context, Error, e))
+            .unwrap_or_default()
+        } else {
+            Arc::default()
+        };
+
+        // Attach the type to the node.
+        $context.set_data_type(data_type);
+
+        Ok(())
+    }};
+}
+
+/// Parses a fixed-char type.
+pub fn parse_fixed_char(
+    x: &substrait::r#type::FixedChar,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    parse_compound_type_with_length!(x, y, FixedChar)
+}
+
+/// Parses a varchar type.
+pub fn parse_var_char(
+    x: &substrait::r#type::VarChar,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    parse_compound_type_with_length!(x, y, VarChar)
+}
+
+/// Parses a fixed-binary type.
+pub fn parse_fixed_binary(
+    x: &substrait::r#type::FixedBinary,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    parse_compound_type_with_length!(x, y, FixedBinary)
+}
+
+/// Parses a decimal type.
+pub fn parse_decimal(
+    x: &substrait::r#type::Decimal,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    // Parse fields.
+    let precision = proto_primitive_field!(x, y, precision, parse_integral_type_parameter).1;
+    let scale = proto_primitive_field!(x, y, scale, parse_integral_type_parameter).1;
+    let nullable = proto_enum_field!(
+        x,
+        y,
+        nullability,
+        substrait::r#type::Nullability,
+        parse_required_nullability
+    )
+    .1;
+    let variation = proto_primitive_field!(
+        x,
+        y,
+        type_variation_reference,
+        parse_type_variation_reference
+    )
+    .1;
+
+    // Convert to internal type object.
+    let data_type = if let (Some(precision), Some(scale), Some(nullable), Some(variation)) =
+        (precision, scale, nullable, variation)
+    {
+        data_type::DataType::new(
+            data_type::Class::Compound(data_type::Compound::Decimal),
+            nullable,
+            variation,
+            vec![precision, scale],
+        )
+        .map_err(|e| diagnostic!(y, Error, e))
+        .unwrap_or_default()
+    } else {
+        Arc::default()
+    };
+
+    // Attach the type to the node.
+    y.set_data_type(data_type);
+
+    Ok(())
+}
+
+/// Parses a struct type.
+pub fn parse_struct(
+    x: &substrait::r#type::Struct,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    // Parse fields.
+    let types = proto_repeated_field!(x, y, types, parse_type)
+        .0
+        .iter()
+        .map(|n| n.data_type.clone().unwrap_or_default().into())
+        .collect();
+    let nullable = proto_enum_field!(
+        x,
+        y,
+        nullability,
+        substrait::r#type::Nullability,
+        parse_required_nullability
+    )
+    .1;
+    let variation = proto_primitive_field!(
+        x,
+        y,
+        type_variation_reference,
+        parse_type_variation_reference
+    )
+    .1;
+
+    // Convert to internal type object.
+    let data_type = if let (Some(nullable), Some(variation)) = (nullable, variation) {
+        data_type::DataType::new(
+            data_type::Class::Compound(data_type::Compound::Struct),
+            nullable,
+            variation,
+            types,
+        )
+        .map_err(|e| diagnostic!(y, Error, e))
+        .unwrap_or_default()
+    } else {
+        Arc::default()
+    };
+
+    // Attach the type to the node.
+    y.set_data_type(data_type);
+
+    Ok(())
+}
+
+/// Parses a list type.
+pub fn parse_list(x: &substrait::r#type::List, y: &mut context::Context) -> diagnostic::Result<()> {
+    // Parse fields.
+    let element_type = proto_boxed_required_field!(x, y, r#type, parse_type)
+        .0
+        .data_type
+        .clone()
+        .unwrap_or_default();
+    let nullable = proto_enum_field!(
+        x,
+        y,
+        nullability,
+        substrait::r#type::Nullability,
+        parse_required_nullability
+    )
+    .1;
+    let variation = proto_primitive_field!(
+        x,
+        y,
+        type_variation_reference,
+        parse_type_variation_reference
+    )
+    .1;
+
+    // Convert to internal type object.
+    let data_type = if let (Some(nullable), Some(variation)) = (nullable, variation) {
+        data_type::DataType::new(
+            data_type::Class::Compound(data_type::Compound::List),
+            nullable,
+            variation,
+            vec![element_type.into()],
+        )
+        .map_err(|e| diagnostic!(y, Error, e))
+        .unwrap_or_default()
+    } else {
+        Arc::default()
+    };
+
+    // Attach the type to the node.
+    y.set_data_type(data_type);
+
+    Ok(())
+}
+
+/// Parses a map type.
+pub fn parse_map(x: &substrait::r#type::Map, y: &mut context::Context) -> diagnostic::Result<()> {
+    // Parse fields.
+    let key_type = proto_boxed_required_field!(x, y, key, parse_type)
+        .0
+        .data_type
+        .clone()
+        .unwrap_or_default();
+    let value_type = proto_boxed_required_field!(x, y, value, parse_type)
+        .0
+        .data_type
+        .clone()
+        .unwrap_or_default();
+    let nullable = proto_enum_field!(
+        x,
+        y,
+        nullability,
+        substrait::r#type::Nullability,
+        parse_required_nullability
+    )
+    .1;
+    let variation = proto_primitive_field!(
+        x,
+        y,
+        type_variation_reference,
+        parse_type_variation_reference
+    )
+    .1;
+
+    // Convert to internal type object.
+    let data_type = if let (Some(nullable), Some(variation)) = (nullable, variation) {
+        data_type::DataType::new(
+            data_type::Class::Compound(data_type::Compound::Map),
+            nullable,
+            variation,
+            vec![key_type.into(), value_type.into()],
+        )
+        .map_err(|e| diagnostic!(y, Error, e))
+        .unwrap_or_default()
+    } else {
+        Arc::default()
+    };
+
+    // Attach the type to the node.
+    y.set_data_type(data_type);
+
+    Ok(())
+}
+
+/// Parses a user-defined type.
+pub fn parse_user_defined(x: &u32, y: &mut context::Context) -> diagnostic::Result<()> {
+    // Parse fields.
+    let user_type = extensions::simple::parse_type_reference(x, y)
+        .map_err(|e| diagnostic!(y, Error, e))
+        .ok();
+
+    // Convert to internal type object.
+    let data_type = if let Some(user_type) = user_type {
+        data_type::DataType::new(
+            data_type::Class::UserDefined(user_type),
+            false,
+            None,
+            vec![],
+        )
+        .map_err(|e| diagnostic!(y, Error, e))
+        .unwrap_or_default()
+    } else {
+        Arc::default()
+    };
+
+    // Attach the type to the node.
+    y.set_data_type(data_type);
+
+    Ok(())
+}
+
+/// Parses a type kind.
+pub fn parse_type_kind(
+    x: &substrait::r#type::Kind,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    match x {
+        substrait::r#type::Kind::Bool(x) => parse_boolean(x, y),
+        substrait::r#type::Kind::I8(x) => parse_i8(x, y),
+        substrait::r#type::Kind::I16(x) => parse_i16(x, y),
+        substrait::r#type::Kind::I32(x) => parse_i32(x, y),
+        substrait::r#type::Kind::I64(x) => parse_i64(x, y),
+        substrait::r#type::Kind::Fp32(x) => parse_fp32(x, y),
+        substrait::r#type::Kind::Fp64(x) => parse_fp64(x, y),
+        substrait::r#type::Kind::String(x) => parse_string(x, y),
+        substrait::r#type::Kind::Binary(x) => parse_binary(x, y),
+        substrait::r#type::Kind::Timestamp(x) => parse_timestamp(x, y),
+        substrait::r#type::Kind::Date(x) => parse_date(x, y),
+        substrait::r#type::Kind::Time(x) => parse_time(x, y),
+        substrait::r#type::Kind::IntervalYear(x) => parse_interval_year(x, y),
+        substrait::r#type::Kind::IntervalDay(x) => parse_interval_day(x, y),
+        substrait::r#type::Kind::TimestampTz(x) => parse_timestamp_tz(x, y),
+        substrait::r#type::Kind::Uuid(x) => parse_uuid(x, y),
+        substrait::r#type::Kind::FixedChar(x) => parse_fixed_char(x, y),
+        substrait::r#type::Kind::Varchar(x) => parse_var_char(x, y),
+        substrait::r#type::Kind::FixedBinary(x) => parse_fixed_binary(x, y),
+        substrait::r#type::Kind::Decimal(x) => parse_decimal(x, y),
+        substrait::r#type::Kind::Struct(x) => parse_struct(x, y),
+        substrait::r#type::Kind::List(x) => parse_list(x, y),
+        substrait::r#type::Kind::Map(x) => parse_map(x, y),
+        substrait::r#type::Kind::UserDefinedTypeReference(x) => parse_user_defined(x, y),
+    }
+}
+
+fn describe_type(y: &mut context::Context, data_type: &Arc<data_type::DataType>) {
+    let mut brief = match &data_type.class() {
+        data_type::Class::Simple(data_type::Simple::Boolean) => {
+            summary!(y, "Values of this type can be either true or false.");
+            String::from("boolean type")
+        }
+        data_type::Class::Simple(data_type::Simple::I8) => {
+            summary!(
+                y,
+                "Implementations of this type must support all integers in \
+                the range [-2^7, 2^7)."
+            );
+            String::from("8-bit signed integer type")
+        }
+        data_type::Class::Simple(data_type::Simple::I16) => {
+            summary!(
+                y,
+                "Implementations of this type must support all integers in \
+                the range [-2^15, 2^15)."
+            );
+            String::from("16-bit signed integer type")
+        }
+        data_type::Class::Simple(data_type::Simple::I32) => {
+            summary!(
+                y,
+                "Implementations of this type must support all integers in \
+                the range [-2^31, 2^31)."
+            );
+            String::from("32-bit signed integer type")
+        }
+        data_type::Class::Simple(data_type::Simple::I64) => {
+            summary!(
+                y,
+                "Implementations of this type must support all integers in \
+                the range [-2^63, 2^63)."
+            );
+            String::from("64-bit signed integer type")
+        }
+        data_type::Class::Simple(data_type::Simple::Fp32) => {
+            summary!(
+                y,
+                "Implementations of this type must support a superset of the \
+                values representable using IEEE 754 binary32."
+            );
+            String::from("single-precision float type")
+        }
+        data_type::Class::Simple(data_type::Simple::Fp64) => {
+            summary!(
+                y,
+                "Implementations of this type must support a superset of the \
+                values representable using IEEE 754 binary64."
+            );
+            String::from("double-precision float type")
+        }
+        data_type::Class::Simple(data_type::Simple::String) => {
+            summary!(
+                y,
+                "Implementations of this type must support all strings \
+                representable using UTF-8 encoding and up to 2^31-1 bytes of \
+                storage."
+            );
+            String::from("Unicode string type")
+        }
+        data_type::Class::Simple(data_type::Simple::Binary) => {
+            summary!(
+                y,
+                "Implementations of this type must support all byte strings \
+                of up to 2^31-1 bytes in length."
+            );
+            String::from("Binary string type")
+        }
+        data_type::Class::Simple(data_type::Simple::Timestamp) => {
+            summary!(
+                y,
+                "Implementations of this type must support all timestamps \
+                within the range [1000-01-01 00:00:00.000000, \
+                9999-12-31 23:59:59.999999] with microsecond precision. \
+                Timezone information is however not encoded, so contextual \
+                information would be needed to map the timestamp to a fixed \
+                point in time."
+            );
+            String::from("Timezone-naive timestamp type")
+        }
+        data_type::Class::Simple(data_type::Simple::TimestampTz) => {
+            summary!(
+                y,
+                "Implementations of this type must support all timestamps \
+                within the range [1000-01-01 00:00:00.000000 UTC, \
+                9999-12-31 23:59:59.999999 UTC] with microsecond precision."
+            );
+            String::from("Timezone-aware timestamp type")
+        }
+        data_type::Class::Simple(data_type::Simple::Date) => {
+            summary!(
+                y,
+                "Implementations of this type must support all dates within \
+                the range [1000-01-01, 9999-12-31]."
+            );
+            String::from("Date type")
+        }
+        data_type::Class::Simple(data_type::Simple::Time) => {
+            summary!(
+                y,
+                "Implementations of this type must support all times of day \
+                with microsecond precision, not counting leap seconds; that \
+                is, any integer number of microseconds since the start of a \
+                day in the range [0, 24*60*60*10^6]."
+            );
+            String::from("Time-of-day type")
+        }
+        data_type::Class::Simple(data_type::Simple::IntervalYear) => {
+            // FIXME: the way this type is defined makes no sense; its
+            // definition conflicts with the analog representations of at least
+            // Arrow as specified on the website (assuming INTERVAL_MONTHS was
+            // intended), and intuitively does not make sense either. The way
+            // it's written, for example [10000y, -120000m] necessarily encodes
+            // a semantically different value [0y, 0m], rather than that these
+            // can just be aliases of each other. Wouldn't it be better to
+            // define it as needing to represent all integer numbers of months
+            // in the range [-120000, 120000]? If someone then really wants the
+            // current semantics, they can just use
+            //
+            //   NSTRUCT<years: interval_year, months: interval_year>
+            //
+            // with some additional constraints. However, an implementation
+            // that wants to encode this interval type as an integer number of
+            // years plus an integer number of months still complies with the
+            // [-120000, 120000] months requirement just fine.
+            //
+            // Renaming it to interval_month makes a lot more sense then too,
+            // i.e. a signed interval with at least month precision and
+            // +/- 10000 year range, and that's it.
+            summary!(
+                y,
+                "Implementations of this type must support a range of any \
+                combination of years and months that total less than or equal \
+                to 10000 years. Each component can be specified as positive or \
+                negative."
+            );
+            String::from("Year/month interval type")
+        }
+        data_type::Class::Simple(data_type::Simple::IntervalDay) => {
+            // FIXME: see note for IntervalYear, making this
+            // interval_microsecond, i.e. a signed interval with at least
+            // microsecond precision and +/- 10000 year range.
+            //
+            // Worth noting in addition that 2^63 nanoseconds is a lot more
+            // than 10000 years. It doesn't make much sense to me to use
+            // I64 limits (for a different precision to boot) when all the
+            // other limits are based around +/- 10000 years.
+            summary!(
+                y,
+                "Implementations of this type must support a range of any \
+                combination of [-365*10000, 365*10000] days and \
+                [ceil(-2^63/1000), floor(2^63/1000)] integer microseconds."
+            );
+            String::from("Day/microsecond interval type")
+        }
+        data_type::Class::Simple(data_type::Simple::Uuid) => {
+            summary!(
+                y,
+                "Implementations of this type must support 2^128 different \
+                values, typically represented using the following hex format: \
+                c48ffa9e-64f4-44cb-ae47-152b4e60e77b."
+            );
+            String::from("128-bit identifier type")
+        }
+        data_type::Class::Compound(data_type::Compound::FixedChar) => {
+            let length = data_type
+                .parameters()
+                .get(0)
+                .map(|x| x.to_string())
+                .unwrap_or_else(|| String::from("?"));
+            summary!(
+                y,
+                "Implementations of this type must support all unicode \
+                strings with exactly {length} characters (i.e. code points). \
+                Values shorter than that must be right-padded with spaces."
+            );
+            format!("Fixed-length ({length}) unicode string type")
+        }
+        data_type::Class::Compound(data_type::Compound::VarChar) => {
+            let length = data_type
+                .parameters()
+                .get(0)
+                .map(|x| x.to_string())
+                .unwrap_or_else(|| String::from("?"));
+            summary!(
+                y,
+                "Implementations of this type must support all unicode \
+                strings with 0 to {length} characters (i.e. code points)."
+            );
+            format!("Variable-length ({length}) unicode string type")
+        }
+        data_type::Class::Compound(data_type::Compound::FixedBinary) => {
+            let length = data_type
+                .parameters()
+                .get(0)
+                .map(|x| x.to_string())
+                .unwrap_or_else(|| String::from("?"));
+            summary!(
+                y,
+                "Implementations of this type must support all binary \
+                strings of exactly {length} bytes in length. Values shorter \
+                than that must be right-padded with zero bytes."
+            );
+            format!("Fixed-length ({length}) binary string type")
+        }
+        data_type::Class::Compound(data_type::Compound::Decimal) => {
+            let precision = data_type.int_parameter(0);
+            let scale = data_type.int_parameter(1);
+            let (p, i, s) = if let (Some(precision), Some(scale)) = (precision, scale) {
+                (
+                    precision.to_string(),
+                    (precision - scale).to_string(),
+                    scale.to_string(),
+                )
+            } else {
+                (String::from("?"), String::from("?"), String::from("?"))
+            };
+            summary!(
+                y,
+                "Implementations of this type must support all decimal \
+                numbers with {i} integer digits and {s} fractional digits \
+                (precision = {p}, scale = {s})."
+            );
+            format!("Decimal number type with {i} integer and {s} fractional digits")
+        }
+        data_type::Class::Compound(data_type::Compound::Struct)
+        | data_type::Class::Compound(data_type::Compound::NamedStruct) => {
+            let n = data_type.parameters().len();
+            if n == 1 {
+                summary!(y, "Structure with one field.");
+                String::from("Structure with one field")
+            } else {
+                summary!(y, "Structure with {n} fields.");
+                format!("Structure with {n} fields")
+            }
+        }
+        data_type::Class::Compound(data_type::Compound::List) => {
+            let e = data_type
+                .type_parameter(0)
+                .map(|t| t.to_string())
+                .unwrap_or_else(|| String::from("?"));
+            summary!(
+                y,
+                "Implementations of this type must support all sequences of \
+                0 to 2^31-1 {e} elements."
+            );
+            String::from("List type")
+        }
+        data_type::Class::Compound(data_type::Compound::Map) => {
+            // FIXME: the definition in the spec is technically a multimap,
+            // because it says nothing about key uniqueness, but that's
+            // probably not intentional (how would references work, then?).
+            // Also, unlike all the other types, there's no specified size
+            // limit here. Assuming the other size limits are 2^31-1 for
+            // Java compatibility, the same would need to apply here.
+            let k = data_type
+                .type_parameter(0)
+                .map(|t| t.to_string())
+                .unwrap_or_else(|| String::from("?"));
+            let v = data_type
+                .type_parameter(1)
+                .map(|t| t.to_string())
+                .unwrap_or_else(|| String::from("?"));
+            summary!(
+                y,
+                "Implementations of this type must support any mapping from \
+                {k} keys to {v} values, consisting of up to 2^31-1 key-value \
+                pairs. No key uniqueness check is required on insertion, but \
+                resolving the mapping for a key for which multiple values are \
+                defined is undefined behavior."
+            );
+            String::from("Map type")
+        }
+        data_type::Class::UserDefined(u) => {
+            summary!(y, "Extension type {u}.");
+            if let Some(x) = &u.definition {
+                y.push_summary(
+                    comment::Comment::new()
+                        .plain("Internal structure corresponds to:")
+                        .lo(),
+                );
+                let mut first = true;
+                for (name, class) in &x.structure {
+                    if first {
+                        first = false;
+                    } else {
+                        y.push_summary(comment::Comment::new().li());
+                    }
+                    summary!(y, "{}: {}", string_util::as_ident_or_string(name), class);
+                }
+                y.push_summary(comment::Comment::new().lc());
+            }
+            format!("Extension type {}", u.name)
+        }
+        data_type::Class::Unresolved => {
+            summary!(
+                y,
+                "Failed to resolve information about this type due to \
+                validation errors."
+            );
+            String::from("Unresolved type")
+        }
+    };
+    if data_type.nullable() {
+        brief += ", nullable";
+        summary!(
+            y,
+            "Values of this type are optional, i.e. this type is nullable."
+        );
+    } else {
+        summary!(
+            y,
+            "Values of this type are required, i.e. the type is not nullable."
+        );
+    }
+    let variation = if let Some(u) = data_type.variation() {
+        let mut variation = format!("This is the {u} variation of this type");
+        if let Some(tv) = &u.definition {
+            if tv.function_behavior == extension::FunctionBehavior::Inherits {
+                variation +=
+                    ", which behaves the same as the base type w.r.t. overload resolution.";
+            } else {
+                variation += ", which behaves as a separate type w.r.t. overload resolution.";
+            }
+        } else {
+            variation += ".";
+        }
+        variation
+    } else {
+        String::from("This is the base variation of this type.")
+    };
+    summary!(y, "{}", variation);
+    describe!(y, Type, "{}", brief);
+}
+
+/// Parses a type.
+pub fn parse_type(x: &substrait::Type, y: &mut context::Context) -> diagnostic::Result<()> {
+    // Parse fields.
+    let data_type = proto_required_field!(x, y, kind, parse_type_kind)
+        .0
+        .data_type();
+
+    // Describe the data type.
+    describe_type(y, &data_type);
+
+    // Attach the type to the node.
+    y.set_data_type(data_type);
+
+    Ok(())
+}
+
+/// Parses a named struct.
+pub fn parse_named_struct(
+    x: &substrait::NamedStruct,
+    y: &mut context::Context,
+) -> diagnostic::Result<()> {
+    // Parse fields.
+    proto_repeated_field!(x, y, names);
+    let node = proto_required_field!(x, y, r#struct, parse_struct).0;
+
+    // Try to apply the names to the data type.
+    let data_type = match node.data_type().apply_field_names(&x.names) {
+        Err(e) => {
+            diagnostic!(y, Error, e);
+            node.data_type()
+        }
+        Ok(data_type) => data_type,
+    };
+
+    // Describe the data type.
+    describe_type(y, &data_type);
+
+    // Attach the type to the node.
+    y.set_data_type(data_type);
+
+    Ok(())
+}
+
+/// Asserts that two types are equal, and returns the combined type, pushing
+/// diagnostics if there is a mismatch. Warnings are used for field name
+/// mismatches, errors are used for any other difference. If either type is
+/// unresolved at any point in the tree, the other is returned. If both are
+/// unresolved, base is returned.
+fn assert_equal_internal(
+    context: &mut context::Context,
+    other: &Arc<data_type::DataType>,
+    promote_other: bool,
+    base: &Arc<data_type::DataType>,
+    promote_base: bool,
+    message: &str,
+    path: &str,
+) -> Arc<data_type::DataType> {
+    if other.is_unresolved() {
+        base.clone()
+    } else if base.is_unresolved() {
+        other.clone()
+    } else {
+        // Match base types.
+        let base_types_match = match (other.class(), base.class()) {
+            (
+                data_type::Class::Compound(data_type::Compound::Struct),
+                data_type::Class::Compound(data_type::Compound::NamedStruct),
+            ) => true,
+            (
+                data_type::Class::Compound(data_type::Compound::NamedStruct),
+                data_type::Class::Compound(data_type::Compound::Struct),
+            ) => true,
+            (a, b) => a == b,
+        };
+        if !base_types_match {
+            diagnostic!(
+                context,
+                Error,
+                TypeMismatch,
+                "{message}: {} vs. {}{path}",
+                other.class(),
+                base.class()
+            );
+
+            // No sense in comparing parameters if the base type is already
+            // different, so just return here.
+            return base.clone();
+        }
+
+        // Match nullability.
+        let nullable = match (other.nullable(), base.nullable()) {
+            (true, false) => {
+                if promote_base {
+                    true
+                } else {
+                    diagnostic!(
+                        context,
+                        Error,
+                        TypeMismatchedNullability,
+                        "{message}: nullable vs. required{path}"
+                    );
+                    false
+                }
+            }
+            (false, true) => {
+                if !promote_other {
+                    diagnostic!(
+                        context,
+                        Error,
+                        TypeMismatchedNullability,
+                        "{message}: required vs. nullable{path}"
+                    );
+                }
+                true
+            }
+            (_, x) => x,
+        };
+
+        // Match variations.
+        match (other.variation(), base.variation()) {
+            (Some(other), Some(base)) => {
+                if base != other {
+                    diagnostic!(
+                        context,
+                        Error,
+                        TypeMismatchedVariation,
+                        "{message}: variation {other} vs. {base}{path}"
+                    );
+                }
+            }
+            (Some(other), None) => diagnostic!(
+                context,
+                Error,
+                TypeMismatchedVariation,
+                "{message}: variation {other} vs. no variation{path}"
+            ),
+            (None, Some(base)) => diagnostic!(
+                context,
+                Error,
+                TypeMismatchedVariation,
+                "{message}: no variation vs. variation {base}{path}"
+            ),
+            (None, None) => {}
+        }
+
+        // Match parameter count.
+        let other_len = other.parameters().len();
+        let base_len = base.parameters().len();
+        if other_len != base_len {
+            diagnostic!(
+                context,
+                Error,
+                TypeMismatch,
+                "{message}: {other_len} parameter(s) vs. {base_len} parameter(s){path}"
+            );
+            return base.clone();
+        }
+
+        // Now match the parameters. We call ourselves recursively for each
+        // type parameter, using the combined type to form the new type
+        // parameter, such that information present in only one of the
+        // parameters ends up in the final parameter, regardless of which
+        // it is.
+        let parameters = other
+            .parameters()
+            .iter()
+            .zip(base.parameters().iter())
+            .enumerate()
+            .map(|(index, (other_param, base_param))| {
+                let path_element = base_param
+                    .get_name()
+                    .or_else(|| other_param.get_name())
+                    .map(String::from)
+                    .or_else(|| base.class().parameter_name(index))
+                    .unwrap_or_else(|| String::from("!"));
+                let path = if path.is_empty() {
+                    format!(" on parameter path {path_element}")
+                } else {
+                    format!("{path}.{path_element}")
+                };
+                match (other_param, base_param) {
+                    (data_type::Parameter::Type(other), data_type::Parameter::Type(base)) => {
+                        data_type::Parameter::Type(assert_equal_internal(
+                            context,
+                            other,
+                            promote_other,
+                            base,
+                            promote_base,
+                            message,
+                            &path,
+                        ))
+                    }
+                    (
+                        data_type::Parameter::Type(other),
+                        data_type::Parameter::NamedType(name, base),
+                    ) => data_type::Parameter::NamedType(
+                        name.clone(),
+                        assert_equal_internal(
+                            context,
+                            other,
+                            promote_other,
+                            base,
+                            promote_base,
+                            message,
+                            &path,
+                        ),
+                    ),
+                    (
+                        data_type::Parameter::NamedType(name, other),
+                        data_type::Parameter::Type(base),
+                    ) => data_type::Parameter::NamedType(
+                        name.clone(),
+                        assert_equal_internal(
+                            context,
+                            other,
+                            promote_other,
+                            base,
+                            promote_base,
+                            message,
+                            &path,
+                        ),
+                    ),
+                    (
+                        data_type::Parameter::NamedType(other_name, other),
+                        data_type::Parameter::NamedType(base_name, base),
+                    ) => {
+                        if other_name != base_name {
+                            diagnostic!(
+                                context,
+                                Warning,
+                                TypeMismatch,
+                                "{message}: field name {} vs. {}{path}",
+                                string_util::as_ident_or_string(&other_name),
+                                string_util::as_ident_or_string(&base_name)
+                            );
+                        }
+                        data_type::Parameter::NamedType(
+                            base_name.clone(),
+                            assert_equal_internal(
+                                context,
+                                other,
+                                promote_other,
+                                base,
+                                promote_base,
+                                message,
+                                &path,
+                            ),
+                        )
+                    }
+                    (other, base) => {
+                        if other != base {
+                            diagnostic!(
+                                context,
+                                Error,
+                                TypeMismatch,
+                                "{message}: {other} vs. {base}{path}"
+                            );
+                        }
+                        base.clone()
+                    }
+                }
+            })
+            .collect();
+
+        // If either type is a named struct, the result should be a named
+        // struct, since we'll have taken the field names from the type that
+        // has them in the loop above.
+        let class = match (other.class(), base.class()) {
+            (
+                data_type::Class::Compound(data_type::Compound::Struct),
+                data_type::Class::Compound(data_type::Compound::NamedStruct),
+            ) => data_type::Class::Compound(data_type::Compound::NamedStruct),
+            (
+                data_type::Class::Compound(data_type::Compound::NamedStruct),
+                data_type::Class::Compound(data_type::Compound::Struct),
+            ) => data_type::Class::Compound(data_type::Compound::NamedStruct),
+            (a, _) => a.clone(),
+        };
+
+        data_type::DataType::new(class, nullable, base.variation().clone(), parameters)
+            .expect("assert_equal() failed to correctly combine types")
+    }
+}
+
+/// Asserts that two types are equal, and returns the combined type, pushing
+/// diagnostics if there is a mismatch. Warnings are used for field name
+/// mismatches, errors are used for any other difference. If either type is
+/// unresolved at any point in the tree, the other is returned. If both are
+/// unresolved, base is returned.
+pub fn assert_equal<S: AsRef<str>>(
+    context: &mut context::Context,
+    other: &Arc<data_type::DataType>,
+    base: &Arc<data_type::DataType>,
+    message: S,
+) -> Arc<data_type::DataType> {
+    assert_equal_internal(context, other, false, base, false, message.as_ref(), "")
+}
+
+/// Like assert_equal, but will first promote either input to try to make them
+/// match.
+pub fn promote_and_assert_equal<S: AsRef<str>>(
+    context: &mut context::Context,
+    other: &Arc<data_type::DataType>,
+    base: &Arc<data_type::DataType>,
+    message: S,
+) -> Arc<data_type::DataType> {
+    assert_equal_internal(context, other, true, base, true, message.as_ref(), "")
+}
diff --git a/rs/src/string_util.rs b/rs/src/string_util.rs
new file mode 100644
index 00000000..a42a8c8f
--- /dev/null
+++ b/rs/src/string_util.rs
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Some misc. string utility functions.
+
+use crate::output::diagnostic;
+
+/// Returns whether the given string is a valid identifier.
+pub fn is_identifier(s: &str) -> bool {
+    static IDENTIFIER_RE: once_cell::sync::Lazy<regex::Regex> =
+        once_cell::sync::Lazy::new(|| regex::Regex::new("[a-zA-Z_][a-zA-Z0-9_]*").unwrap());
+    IDENTIFIER_RE.is_match(s)
+}
+
+/// Checks an URI for validity.
+pub fn check_uri(s: &str) -> diagnostic::Result<uriparse::URIReference> {
+    uriparse::URIReference::try_from(s).map_err(|e| ecause!(IllegalUri, e))
+}
+
+/// Checks an URI that may include glob syntax in its path for validity.
+pub fn check_uri_glob(s: &str) -> diagnostic::Result<()> {
+    // Parse as URI first, then obtain the path.
+    let uri = check_uri(s)?;
+    let path = uri.path().to_string();
+
+    // The glob characters `?`, `[`, and `]` are reserved in URIs, so they must
+    // be percent-encoded. So, in order to check the glob syntax, we must first
+    // percent-decode the string. Without loss of generality we use the lossy
+    // decode function because we don't really care about characters other than
+    // `*?[]` for syntax-checking the glob.
+    let decoded_path = percent_encoding::percent_decode_str(&path).decode_utf8_lossy();
+
+    // Check the glob syntax.
+    glob::Pattern::new(&decoded_path).map_err(|e| ecause!(IllegalGlob, e))?;
+
+    Ok(())
+}
+
+/// Returns the given string as a quoted string.
+pub fn as_quoted_string<S: AsRef<str>>(s: S) -> String {
+    let s = s.as_ref();
+    let mut result = String::with_capacity(s.len() + 2);
+    result.push('"');
+    for c in s.chars() {
+        match c {
+            '\\' => result += "\\\\",
+            '"' => result += "\"",
+            c => result.push(c),
+        }
+    }
+    result.push('"');
+    result
+}
+
+/// Returns the given string as-is if it's a valid identifier (i.e. if it
+/// matches `[a-zA-Z_][a-zA-Z0-9_]*`), or returns it as an escaped string
+/// otherwise, using (only) \" and \\ as escape sequences.
+pub fn as_ident_or_string<S: AsRef<str>>(s: S) -> String {
+    let s = s.as_ref();
+    if is_identifier(s) {
+        s.to_string()
+    } else {
+        as_quoted_string(s)
+    }
+}
+
+/// Returns <n>th in English, using the correct suffix for the number.
+pub fn describe_nth(index: u32) -> String {
+    // Overkill? Yes. Couldn't help myself.
+    match index {
+        0 => String::from("zeroth"),
+        1 => String::from("first"),
+        2 => String::from("second"),
+        3 => String::from("third"),
+        4 => String::from("fourth"),
+        5 => String::from("fifth"),
+        6 => String::from("sixth"),
+        7 => String::from("seventh"),
+        8 => String::from("eighth"),
+        9 => String::from("ninth"),
+        10 => String::from("tenth"),
+        11 => String::from("eleventh"),
+        12 => String::from("twelfth"),
+        13 => String::from("thirteenth"),
+        14 => String::from("fourteenth"),
+        15 => String::from("fifteenth"),
+        16 => String::from("sixteenth"),
+        17 => String::from("seventeenth"),
+        18 => String::from("eighteenth"),
+        19 => String::from("nineteenth"),
+        20 => String::from("twentieth"),
+        _ => match index % 10 {
+            1 => format!("{index}st"),
+            2 => format!("{index}nd"),
+            3 => format!("{index}rd"),
+            _ => format!("{index}th"),
+        },
+    }
+}
+
+/// Describes an index.
+pub fn describe_index(index: i32) -> String {
+    match index {
+        i32::MIN..=-2 => format!("the {} to last", describe_nth(-index as u32)),
+        -1 => String::from("the last"),
+        0..=i32::MAX => format!("the {}", describe_nth((index + 1) as u32)),
+    }
+}
+
+/// Representation of an approximate character limit for printing descriptions.
+#[derive(Clone, Copy, Debug)]
+pub struct Limit {
+    limit: Option<usize>,
+}
+
+impl Default for Limit {
+    /// Creates a limit object for the default number of characters.
+    fn default() -> Self {
+        Self { limit: Some(100) }
+    }
+}
+
+impl Limit {
+    /// Creates a limit object for the given target number of characters.
+    pub fn new(limit: usize) -> Self {
+        Self { limit: Some(limit) }
+    }
+
+    /// Creates a limit object signifying a lack of a character limit (i.e.
+    /// print everything).
+    pub fn unlimited() -> Self {
+        Self { limit: None }
+    }
+
+    /// Returns the character limit in number of characters.
+    pub fn chars(&self) -> usize {
+        self.limit.unwrap_or(usize::MAX)
+    }
+
+    /// Splits this limit up into two limits. The first limit will use all
+    /// available characters up to min_amount, and the remainder will go to the
+    /// second.
+    pub fn split(self, min_amount: usize) -> (Self, Self) {
+        if let Some(limit) = self.limit {
+            if limit < min_amount {
+                (Self::new(limit), Self::new(0))
+            } else {
+                (Self::new(min_amount), Self::new(limit - min_amount))
+            }
+        } else {
+            (Self::unlimited(), Self::unlimited())
+        }
+    }
+
+    /// Heuristically divides the current limit up into a number of elements,
+    /// each allocated a number of characters, being at least min_element_size.
+    /// If enough characters are available to give that amount of characters to
+    /// each element, this returns (num_elements, None, element_limit); if not,
+    /// this returns (left, Some(right), min_element_limit), where left and
+    /// right define how many of the elements on the left/right side of the
+    /// sequence should be printed. In this case, left + right < num_elements.
+    pub fn split_n(
+        self,
+        num_elements: usize,
+        min_element_size: usize,
+    ) -> (usize, Option<usize>, Limit) {
+        if let Some(limit) = self.limit {
+            let n = limit.checked_div(min_element_size).unwrap_or(usize::MAX);
+            if n < num_elements {
+                // Apply heuristics for how many elements to print on either
+                // side. For some small values, this yields:
+                //  - 0 -> ..
+                //  - 1 -> a, ..
+                //  - 2 -> a, .., z
+                //  - 3 -> a, b, .., z
+                //  - 4 -> a, b, c, .., z
+                //  - 5 -> a, b, c, .., y, z
+                //  - 10 -> a, b, c, d, e, f, g, .., x, y, z
+                // Limit is twice as many elements on the left as on the
+                // right.
+                let n_right = (n + 1) / 3;
+                let n_left = n - n_right;
+                let limit = Self::new(limit.checked_div(n).unwrap_or(limit));
+                (n_left, Some(n_right), limit)
+            } else {
+                (
+                    num_elements,
+                    None,
+                    Self::new(limit.checked_div(num_elements).unwrap_or(limit)),
+                )
+            }
+        } else {
+            (num_elements, None, Self::unlimited())
+        }
+    }
+
+    /// Same as split_n(), but with the element size specified per element.
+    pub fn split_ns(self, elements: &[usize]) -> (usize, Option<usize>) {
+        if let Some(limit) = self.limit {
+            if elements.iter().cloned().sum::<usize>() > limit {
+                let mut remain = (limit + 1) / 3;
+                let mut total = 0;
+                let mut n_right = 0;
+                for size in elements.iter().rev() {
+                    let size = *size;
+                    if size > remain {
+                        n_right += 1;
+                        remain -= size;
+                        total += size;
+                    } else {
+                        break;
+                    }
+                }
+                let mut remain = limit - total;
+                let mut n_left = 0;
+                for size in elements.iter() {
+                    let size = *size;
+                    if size > remain {
+                        n_left += 1;
+                        remain -= size;
+                    } else {
+                        break;
+                    }
+                }
+                return (n_left, Some(n_right));
+            }
+        }
+        (elements.len(), None)
+    }
+}
+
+/// Like Display, but with a heuristic character limit.
+pub trait Describe {
+    fn describe(&self, f: &mut std::fmt::Formatter<'_>, limit: Limit) -> std::fmt::Result;
+    fn display(&self) -> Describer<Self> {
+        Describer(self)
+    }
+}
+
+pub struct Describer<'a, T: Describe + ?Sized>(&'a T);
+
+impl<'a, T: Describe> std::fmt::Display for Describer<'a, T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.0.describe(
+            f,
+            if f.alternate() {
+                Limit::unlimited()
+            } else {
+                Limit::default()
+            },
+        )
+    }
+}
+
+/// Represent data as an identifier. If the identifier is too long, abbreviate
+/// it. limit specifies the rough resulting string length that is considered
+/// to be "too long."
+pub fn describe_identifier(
+    f: &mut std::fmt::Formatter<'_>,
+    data: &str,
+    limit: Limit,
+) -> std::fmt::Result {
+    if is_identifier(data) {
+        let (n_left, n_right, _) = limit.split_n(data.len(), 1);
+        if n_left > 0 || n_right.is_none() {
+            write!(f, "{}", &data[..n_left])?;
+        }
+        if let Some(n_right) = n_right {
+            write!(f, "..")?;
+            if n_right > 0 {
+                write!(f, "{}", &data[data.len() - n_right..])?;
+            }
+        }
+        Ok(())
+    } else {
+        describe_string(f, data, limit)
+    }
+}
+
+/// Represent data as a quoted string. If the string is too long, abbreviate
+/// it. limit specifies the rough resulting string length that is considered
+/// to be "too long."
+pub fn describe_string(
+    f: &mut std::fmt::Formatter<'_>,
+    data: &str,
+    limit: Limit,
+) -> std::fmt::Result {
+    let (n_left, n_right, _) = limit.split_n(data.len(), 1);
+    if n_left > 0 || n_right.is_none() {
+        write!(f, "{}", as_quoted_string(&data[..n_left]))?;
+    }
+    if let Some(n_right) = n_right {
+        write!(f, "..")?;
+        if n_right > 0 {
+            write!(f, "{}", as_quoted_string(&data[data.len() - n_right..]))?;
+        }
+    }
+    Ok(())
+}
+
+/// Represent data as a complete hexdump.
+fn describe_binary_all(f: &mut std::fmt::Formatter<'_>, data: &[u8]) -> std::fmt::Result {
+    let mut first = true;
+    for byte in data {
+        if first {
+            first = false;
+        } else {
+            write!(f, " ")?;
+        }
+        write!(f, "{byte:02X}")?;
+    }
+    Ok(())
+}
+
+/// Represent data as a hexdump. If the resulting dump is too long, abbreviate
+/// it. limit specifies the rough resulting string length that is considered
+/// to be "too long."
+pub fn describe_binary(
+    f: &mut std::fmt::Formatter<'_>,
+    data: &[u8],
+    limit: Limit,
+) -> std::fmt::Result {
+    let (n_left, n_right, _) = limit.split_n(data.len(), 3);
+    describe_binary_all(f, &data[..n_left])?;
+    if let Some(n_right) = n_right {
+        write!(f, "..")?;
+        describe_binary_all(f, &data[data.len() - n_right..])?;
+    }
+    Ok(())
+}
+
+/// Represent the given sequence completely.
+fn describe_sequence_all<T, F>(
+    f: &mut std::fmt::Formatter<'_>,
+    values: &[T],
+    offset: usize,
+    el_limit: Limit,
+    repr: &F,
+) -> std::fmt::Result
+where
+    F: Fn(&mut std::fmt::Formatter<'_>, &T, usize, Limit) -> std::fmt::Result,
+{
+    let mut first = true;
+    for (index, value) in values.iter().enumerate() {
+        if first {
+            first = false;
+        } else {
+            write!(f, ", ")?;
+        }
+        repr(f, value, index + offset, el_limit)?;
+    }
+    Ok(())
+}
+
+/// Represent the given sequence with heuristic length limits.
+pub fn describe_sequence<T, F>(
+    f: &mut std::fmt::Formatter<'_>,
+    values: &[T],
+    limit: Limit,
+    element_size: usize,
+    repr: F,
+) -> std::fmt::Result
+where
+    F: Fn(&mut std::fmt::Formatter<'_>, &T, usize, Limit) -> std::fmt::Result,
+{
+    let (n_left, n_right, el_limit) = limit.split_n(values.len(), element_size);
+    describe_sequence_all(f, &values[..n_left], 0, el_limit, &repr)?;
+    if let Some(n_right) = n_right {
+        if n_left > 0 {
+            write!(f, ", ")?;
+        }
+        write!(f, "..")?;
+        if n_right > 0 {
+            write!(f, ", ")?;
+        }
+        let offset = values.len() - n_right;
+        describe_sequence_all(f, &values[offset..], offset, el_limit, &repr)?;
+    }
+    Ok(())
+}
diff --git a/substrait b/substrait
new file mode 160000
index 00000000..88463636
--- /dev/null
+++ b/substrait
@@ -0,0 +1 @@
+Subproject commit 88463636b22a503adeddd9cb4da1295bbc5b15be
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 00000000..bf36bb59
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1,4 @@
+substrait/
+*.test
+*.test.html
+*.test.*.yaml
diff --git a/tests/Cargo.toml b/tests/Cargo.toml
new file mode 100644
index 00000000..068c231e
--- /dev/null
+++ b/tests/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "test-runner"
+version = "0.0.1"
+edition = "2018"
+license = "Apache-2.0"
+default-run = "runner"
+
+[[bin]]
+name = "runner"
+path = "src/runner.rs"
+
+[[bin]]
+name = "find_protoc"
+path = "src/find_protoc.rs"
+
+[dependencies]
+substrait-validator = { path = "../rs", version = "0.0.1" }
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+walkdir = "2"
+glob = "0.3"
+prost-build = "0.9"
+rayon = "1.5"
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 00000000..c29b0368
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,142 @@
+Validation tests
+================
+
+This folder is dedicated to testing the output of the validator, while the more
+local tests only test the API. The test runner is written such that there are
+no (long*) recompilation times every time a test changes: the tests are parsed
+by the test runner on-the-fly.
+
+*the Python-level parser isn't smart enough to do dependency checks, so it just
+reruns protoc and re-parses each test every time it's run. At present, this
+still happens in the blink of an eye, but as this slows down, it should be made
+smarter.
+
+Usage
+-----
+
+To run the tests, you will need:
+
+ - Rust/cargo, in order to compile the validator and the test runner; and
+ - Python 3.x, with `protobuf`, `pyyaml`, and `click` installed.
+
+After that, just run `python3 runner.py run`. This will "compile" the test
+descriptions in the tests folder to a format more easy to understand by the
+Rust code (notably, the Rust bindings for protobuf can't read protobuf JSON
+serializations) and then run the test runner using cargo.
+
+A whole bunch of files are generated by this process:
+
+ - the `substrait` folder, containing the Python protobuf files for Substrait,
+   used by the compiler; and
+ - for each test:
+    - a `<test.fmt>.test` file, containing the intermediate format passed to
+      the Rust test runner;
+    - a `<test.fmt>.test.plan.yaml` file, containing the YAML representation of
+      the protobuf JSON serialization of the plan, as generated from the test
+      description;
+    - possibly some `<test.fmt>.*.yaml` files, representing the extension YAML
+      files as generated from the test description; and
+    - a `<test.fmt>.test.html` file, generated by the validator when the test
+      is run (useful when debugging and writing tests).
+
+You can remove all generated files by running `python3 runner.py clean`.
+
+Test descriptions
+-----------------
+
+Tests can currently only be described using YAML (TODO: also support JDOT when
+this stabilizes). Any `*.yaml` file found recursively in the `tests` folder
+(that isn't a generated file, so not `*.test.*.yaml`) is a test case.
+
+The expected input format is structured as follows.
+
+    {
+        "name": "<test-name>",
+        "diags"?: [
+            {
+                "code": <code>,
+                "min": "<minimum-error-level>",
+                "max": "<maximum-error-level>",
+            }*
+        ],
+        "plan": <substrait-plan>
+    }
+
+The `"name"` key specifies a friendly name for the test, which is printed by
+the runner when it is run and again at the end if it fails.
+
+The `"diags"` key allows diagnostic levels to be overridden. `<code>` is just
+the integer diagnostic code, and the error levels can be either `"e"` for
+error, `"w"` for warning, or `"i"` for info.
+
+The contents of the `"plan"` key, roughly speaking, correspond to the JSON
+serialization structure of the `substrait.Plan` protobuf message. However,
+additional keys may be added to objects to give instructions to the test
+runner, and YAML extensions can be embedded into the plans.
+
+The checks that the runner must perform are embedded in the plan structure
+using `"[sub-path]__test"` keys. Usually, `[sub-path]` is left blank, which
+means that the embedded checks relate to the dictionary that key is a part
+of, but it may also be set to a period-separated list of subkeys and/or
+list indices, to allow `__test` data to be attached to non-dict values. The
+data associated with `__test` keys must be a list of dictionaries with the
+following format:
+
+    {
+        "level"?: [<allowed-error-levels>],
+        "diag"?: {
+            "code"?: <expected-code>,
+            "level"?: <expected-level>,
+            "original_level"?: <expected-level>,
+            "msg"?: <expected-message-pattern>,
+            "before"?: <path-element>,
+            "after"?: <path-element>
+        },
+        "type"?: "expected-type"
+    }
+
+Exactly one key must be specified for the outermost dictionary:
+
+ - `"level"` matches the (recursive) error level of the current node, failing
+   the test if the actual level is not in the list.
+ - `"diag"` removes the first diagnostic attached to the node that complies
+   with all patterns from the node's diagnostics, while failing the test if
+   no such diagnostic exists. Because the diagnostic is effectively
+   removed, subsequent "level" checks can be used to ensure that no
+   unexpected diagnostics remain. The following checks can be added:
+    - `"code"` matches the diagnostic code exactly;
+    - `"level"` matches the adjusted error level exactly;
+    - `"original_level"` matches the original error level exactly;
+    - `"msg"` matches the error message, using `*` for zero or more characters,
+      and `**` as an escape for matching `*` literally;
+    - `"before"` only checks for diagnostics that occur before the child with
+      the given path element was written;
+    - `"after"` only checks for diagnostics that occur after the child with
+      the given path element was written.
+   The path elements have the following syntax:
+    - `"field"`: regular field named "field";
+    - `"field[x]"`: element x of repeated field named "field";
+    - `"field<variant>"`: oneof field named "field" with variant "variant";
+    - `"[x]"` -> YAML list index x.
+   Fields and variants that aren't valid identifiers can be specified using
+   double-quoted strings, using `\"` and `\\` as escape sequences, but be aware
+   that the serialization format you're writing may want non-identifier things
+   to be quoted, too. For example, in YAML, a field with named `!` would be
+   written as `'"!"'`, the single quotes delimiting the YAML string.
+ - `"type"` matches the (final) data type attached to the node with the given
+   string. There's no intelligence here; the string must match exactly.
+
+Evaluation order is depth-first, so diagnostics attached to child nodes are
+removed before the level of their parent node is checked.
+
+`<key>__yaml` keys may be used in place of URI keys to embed extension YAML
+files. The key will be replaced with `"<key>"`, set to the string
+`"test:<index>.yaml"`. The corresponding YAML file is written to
+`"<test.fmt>.<index>.yaml"`. The test runner installs a custom URI handler
+with the validator to ensure that the extension file will be linked up
+appropriately.
+
+Just like the protobuf message structure, the embedded YAML data may have
+`__test` tags associated with it, so check instructions can also be attached
+to the extension
+files.
diff --git a/tests/runner.py b/tests/runner.py
new file mode 100755
index 00000000..65b2fef0
--- /dev/null
+++ b/tests/runner.py
@@ -0,0 +1,580 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import pathlib
+import subprocess
+import shutil
+import sys
+import click
+import yaml
+import json
+import re
+
+
+def destringify_ident(s):
+    """Converts potentially stringified identifiers to strings."""
+    if s.startswith('"') and s.endswith('"'):
+        return s[1:-1].replace('\\"', '"').replace("\\\\", "\\")
+    return s
+
+
+def path_element_field(f):
+    """Returns the serialization of a field path element."""
+    return {"Field": {"field": f}}
+
+
+def path_element_repeated(f, i):
+    """Returns the serialization of a repeated field path element."""
+    return {"Repeated": {"field": f, "index": i}}
+
+
+def path_element_oneof(f, v):
+    """Returns the serialization of a oneof field path element."""
+    return {"Oneof": {"field": f, "variant": v}}
+
+
+def path_element_index(i):
+    """Returns the serialization of an array index path element."""
+    return {"Index": {"index": i}}
+
+
+def convert_if_int(x):
+    """Convert string x to an integer of possible, otherwise keep it as or
+    convert it to a string."""
+    try:
+        return int(x)
+    except ValueError:
+        return str(x)
+
+
+def parse_path_element(s):
+    """Parses the Rust path element syntax to its serialized form."""
+    ident_re = r'([a-zA-Z_][a-zA-Z0-9_]*|"(?:[^\\]|\\[\\"])*")'
+    index_re = r"\[([1-9][0-9]*|0)\]"
+    field_mat = re.fullmatch(ident_re, s)
+    if field_mat:
+        return path_element_field(destringify_ident(field_mat.group(1)))
+    oneof_mat = re.fullmatch(ident_re + "<" + ident_re + ">", s)
+    if oneof_mat:
+        return path_element_oneof(
+            destringify_ident(oneof_mat.group(1)), destringify_ident(oneof_mat.group(2))
+        )
+    repeated_mat = re.fullmatch(ident_re + index_re, s)
+    if repeated_mat:
+        return path_element_repeated(
+            destringify_ident(repeated_mat.group(1)), int(repeated_mat.group(2))
+        )
+    index_mat = re.fullmatch(index_re, s)
+    if index_mat:
+        return path_element_index(int(index_mat.group(1)))
+    raise ValueError(f"failed to parse {s} as path element")
+
+
+def parse_diag_overrides(diags):
+    """Parses and checks the input syntax for diagnostic overrides into the
+    Rust/serde syntax."""
+    diag_overrides = []
+    if diags is not None:
+        if not isinstance(diags, list):
+            raise Exception("diags key must map to a list")
+        for diag in diags:
+            diag_data = {}
+
+            code = diag.pop("code", None)
+            if not isinstance(code, int):
+                raise Exception("diags[].code must be an integer")
+            diag_data["code"] = code
+
+            level = diag.pop("min", "i")
+            if level not in ("i", "w", "e"):
+                raise Exception('diags[].min must be either "i", "w", or "e"')
+            diag_data["min"] = level
+
+            level = diag.pop("max", "e")
+            if level not in ("i", "w", "e"):
+                raise Exception('diags[].max must be either "i", "w", or "e"')
+            diag_data["max"] = level
+
+            if diag:
+                raise Exception(
+                    "Found unknown key(s) in diag[]: {}".format(", ".join(diag.keys()))
+                )
+            diag_overrides.append(diag_data)
+    return diag_overrides
+
+
+def strip_test_tags(data, path=(), yaml_counter=None):
+    """
+    Modifies data recursively, yielding a flattened set of instruction triple:
+
+     - Pops all "[sub_path]__test" keys from the given data. For each popped
+       value, yields a ('test', path + sub_path, test_data) triple. sub_path
+       may be left blank, or may be a .-separated list of key names and list
+       indices.
+     - Replaces all "<name>__yaml" keys with "<name>", replacing their value
+       with "test:<index>.yaml", where index is a unique integer index within
+       the plan. For each replaced value, the original yaml data is recursively
+       stripped using 'data' for the path element (this is how it will appear
+       in the validator output tree) and then yielded in the form of a
+       ('yaml', index, data) triple.
+    """
+    if yaml_counter is None:
+        yaml_counter = [0]
+    if isinstance(data, dict):
+        # Handle __test keys.
+        keys = []
+        for key in data.keys():
+            if not isinstance(key, str):
+                raise Exception(
+                    "found non-string key at {}".format(".".join(map(str, path)))
+                )
+            if key.endswith("__test"):
+                keys.append(key)
+        for key in keys:
+            test_data = data.pop(key)
+            sub_path = tuple(map(convert_if_int, key.rsplit("__")[0].split(".")))
+            if sub_path == ("",):
+                sub_path = ()
+            yield ("test", path + sub_path, test_data)
+
+        # Handle __yaml keys.
+        keys = []
+        for key in data.keys():
+            if key.endswith("__yaml"):
+                keys.append(key)
+        for key in keys:
+            index = yaml_counter[0]
+            yaml_counter[0] += 1
+            yaml_data = data.pop(key)
+            new_key = key.rsplit("__")[0]
+            data[new_key] = f"test:{index}.yaml"
+            for x in strip_test_tags(yaml_data, path + (new_key, "data"), yaml_counter):
+                yield x
+            yield ("yaml", index, yaml_data)
+
+        # Traverse into dict.
+        for key, value in data.items():
+            for x in strip_test_tags(value, path + (key,), yaml_counter):
+                yield x
+    elif isinstance(data, list):
+        # Traverse into list.
+        for index, value in enumerate(data):
+            for x in strip_test_tags(value, path + (index,), yaml_counter):
+                yield x
+
+
+def resolve_path(path, msg_desc):
+    """Converts a JSON path to the protobuf path elements that Rust derives
+    from the prost-generated structures."""
+    while path:
+        el, *path = path
+        if isinstance(el, int):
+            if msg_desc is None:
+                yield path_element_index(el)
+            else:
+                raise Exception(
+                    f"unexpected integer in path description, currently at {msg_desc.full_name}"
+                )
+        elif msg_desc is None:
+            yield path_element_field(el)
+        else:
+            field_desc = msg_desc.fields_by_camelcase_name.get(el, None)
+            if field_desc is None:
+                field_desc = msg_desc.fields_by_name.get(el, None)
+            if field_desc is None:
+                raise Exception(f"unknown field {el} for {msg_desc.full_name}")
+            if field_desc.label == field_desc.LABEL_REPEATED:
+                if not path:
+                    raise Exception(
+                        f"ran out of path elements for repeated {msg_desc.full_name}"
+                    )
+                el2, *path = path
+                if not isinstance(el2, int):
+                    raise Exception(
+                        f"found non-index path element for repeated {msg_desc.full_name}"
+                    )
+                yield path_element_repeated(field_desc.name, el2)
+            else:
+                if field_desc.containing_oneof is not None:
+                    yield path_element_oneof(
+                        field_desc.containing_oneof.name, field_desc.name
+                    )
+                else:
+                    yield path_element_field(field_desc.name)
+            msg_desc = field_desc.message_type
+
+
+def parse_level_instruction(allowed_levels, path):
+    """Parses an error level instruction in the input format into the
+    Rust/serde instruction syntax."""
+    if allowed_levels is None:
+        return []
+
+    if not isinstance(allowed_levels, (list, str)):
+        raise Exception("__test.level must be a list or string")
+    allowed_levels = list(allowed_levels)
+    for level in allowed_levels:
+        if level not in ("i", "w", "e"):
+            raise Exception('__test.level[] must be either "i", "w", or "e"')
+    return [dict(Level=dict(path=path, allowed_levels=allowed_levels))]
+
+
+def parse_diag_instruction(diag_data, path):
+    """Parses a diagnostic matching instruction in the input format into the
+    Rust/serde instruction syntax."""
+    if diag_data is None:
+        return []
+
+    rust_diag_data = {}
+    if not isinstance(diag_data, dict):
+        raise Exception("__test.diag must be a dict")
+
+    code = diag_data.pop("code", None)
+    if code is not None:
+        if not isinstance(code, int):
+            raise Exception("__test.diag.code must be an int")
+        rust_diag_data["code"] = code
+
+    level = diag_data.pop("level", None)
+    if level is not None:
+        if level not in ("i", "w", "e"):
+            raise Exception('__test.diag.level must be either "i", "w", or "e"')
+        rust_diag_data["level"] = level
+
+    level = diag_data.pop("original_level", None)
+    if level is not None:
+        if level not in ("i", "w", "e"):
+            raise Exception(
+                '__test.diag.original_level must be either "i", "w", or "e"'
+            )
+        rust_diag_data["original_level"] = level
+
+    msg_pattern = diag_data.pop("msg", None)
+    if msg_pattern is not None:
+        if not isinstance(msg_pattern, str):
+            raise Exception("__test.diag.msg must be a string")
+        # Convert to full glob pattern... We don't use the full
+        # pattern syntax in the description because escape
+        # sequences are needed for some rather common characters
+        # in messages (i.e. '[', ']', and '?').
+        i = 0
+        glob_pattern = ""
+        while i < len(msg_pattern):
+            if msg_pattern[i : i + 2] == "**":
+                glob_pattern += "[*]"
+                i += 1
+                break
+            c = msg_pattern[i]
+            if c in ("?", "[", "]"):
+                glob_pattern += f"[{c}]"
+            else:
+                glob_pattern += c
+            i += 1
+        rust_diag_data["msg"] = glob_pattern
+
+    element = diag_data.pop("before", None)
+    if element is not None:
+        if not isinstance(element, str):
+            raise Exception("__test.diag.before must be a path element string")
+        rust_diag_data["before"] = parse_path_element(element)
+
+    element = diag_data.pop("after", None)
+    if element is not None:
+        if not isinstance(element, str):
+            raise Exception("__test.diag.after must be a path element string")
+        rust_diag_data["after"] = parse_path_element(element)
+
+    if diag_data:
+        raise Exception(
+            "Found unknown __test.diag key(s): {}".format(", ".join(diag_data.keys()))
+        )
+    return [dict(Diag=dict(path=path, **rust_diag_data))]
+
+
+def parse_type_instruction(type_str, path):
+    """Parses a data type check instruction in the input format into the
+    Rust/serde instruction syntax."""
+    if type_str is None:
+        return []
+
+    if not isinstance(type_str, str):
+        raise Exception("__test.type must be a string")
+    return [dict(DataType=dict(path=path, data_type=type_str))]
+
+
+def parse_instructions(test_tags, fname, proto_desc):
+    """Parses and checks the syntax for instructions in the input format into
+    the Rust/serde instruction syntax."""
+    instructions = []
+    for insn, loc, data in test_tags:
+        if insn == "test":
+            path = list(resolve_path(loc, proto_desc))
+            for insn_type in data:
+
+                # Handle level instructions.
+                instructions.extend(
+                    parse_level_instruction(insn_type.pop("level", None), path)
+                )
+
+                # Handle diag instructions.
+                instructions.extend(
+                    parse_diag_instruction(insn_type.pop("diag", None), path)
+                )
+
+                # Handle type instructions.
+                instructions.extend(
+                    parse_type_instruction(insn_type.pop("type", None), path)
+                )
+
+                if insn_type:
+                    raise Exception(
+                        "Found unknown __test key(s): {}".format(
+                            ", ".join(insn_type.keys())
+                        )
+                    )
+
+        if insn == "yaml":
+            with open(f"{fname}.{loc}.yaml", "w") as f:
+                f.write(yaml.safe_dump(data))
+
+    return instructions
+
+
+def compile_test(fname, data, proto_parse, proto_desc):
+    """Compile test data into a bunch of test files, of which fname itself is
+    the main test file and the remainder are of the form fname.<tag>.yaml,
+    containing supplementary information. proto_parse should be a function
+    that parses a Python dict representation of the JSON corresponding to a
+    Substrait plan into its binary representation, and proto_desc must point
+    to the descriptor for substrait.Plan.
+
+    See README.md for format information."""
+
+    # Get name.
+    name = data.pop("name", None)
+    if not isinstance(name, str):
+        raise Exception("Missing valid test name")
+
+    # Parse diagnostic overrides.
+    diag_overrides = parse_diag_overrides(data.pop("diags", None))
+
+    # Get plan data.
+    plan = data.pop("plan", None)
+    if not isinstance(plan, dict):
+        raise Exception("Missing Substrait plan")
+
+    if data:
+        raise Exception(
+            "Found unknown key(s) in root: {}".format(", ".join(data.keys()))
+        )
+
+    # Strip test tags from the test data.
+    test_tags = list(strip_test_tags(plan))
+
+    # strip_test_tags does post-order tree traversal, but we need the
+    # instructions ordered pre-order. Easiest way to do that is to just reverse
+    # the list.
+    test_tags.reverse()
+
+    # Write the converted plan for debugging purposes.
+    with open(f"{fname}.plan.yaml", "w") as f:
+        f.write(yaml.safe_dump(plan))
+
+    # Parse and serialize the stripped plan using protobuf.
+    plan = proto_parse(plan)
+
+    # Parse the instructions derived from the test tags now that we know the
+    # protobuf structure was found to be valid by protobuf (it generates far
+    # better error messages than the path resolver does, in case something is
+    # wrong in the test description).
+    instructions = parse_instructions(test_tags, fname, proto_desc)
+
+    # Write output file.
+    with open(fname, "w") as f:
+        f.write(
+            json.dumps(
+                dict(
+                    name=name,
+                    plan=list(plan),
+                    diag_overrides=diag_overrides,
+                    instructions=instructions,
+                )
+            )
+        )
+
+
+def mtime(path) -> float:
+    """Yields the mtime of the given path, or 0 if it doesn't exist."""
+    try:
+        if os.path.isfile(path):
+            return os.path.getmtime(path)
+    except OSError:
+        pass
+    return 0.0
+
+
+@click.group()
+def cli():
+    pass
+
+
+@cli.command(
+    short_help="Runs the test suite",
+    help=(
+        "Runs the test suite, or only run tests matching the "
+        "given glob-capable filter (matching test case names)."
+    ),
+)
+@click.argument("filter", required=False, default="*")
+@click.option(
+    "--release/--no-release",
+    default=False,
+    help=(
+        "Build Rust application in --release mode. Recompilation "
+        "will take longer, but rerunning the tests will be much "
+        "faster."
+    ),
+)
+@click.option(
+    "--html/--no-html",
+    default=True,
+    help=(
+        "Enables or disables exporting HTML for the plans under "
+        "test. Enabled by default."
+    ),
+)
+def run(filter, release, html):
+
+    # Build and run with optimizations if --release is passed.
+    if release:
+        release = ["--release"]
+    else:
+        release = []
+
+    # Run cargo build without capturing output.
+    code = subprocess.run(["cargo", "build"] + release).returncode
+    if code:
+        sys.exit(code)
+
+    # Find all proto files and check if they've changed since the last run.
+    click.echo("Scanning for proto files...")
+    script_path = os.path.dirname(os.path.realpath(__file__))
+    repo_path = os.path.realpath(os.path.join(script_path, ".."))
+    proto_paths = [
+        os.path.join(repo_path, "proto"),
+        os.path.join(repo_path, "substrait", "proto"),
+    ]
+    proto_files = []
+    proto_path_args = []
+    for proto_path in proto_paths:
+        proto_files.extend(
+            pathlib.Path(os.path.join(proto_path, "substrait")).rglob("*.proto")
+        )
+        proto_path_args.extend(("-I", proto_path))
+    proto_mtime = max(map(mtime, proto_files))
+    output_path = os.path.join(script_path, "substrait")
+    stamp_path = os.path.join(output_path, "__init__.py")
+    stamp_mtime = mtime(stamp_path)
+    if proto_mtime < stamp_mtime:
+        click.echo("Protobuf bindings are up-to-date.")
+    else:
+
+        # Find the path to a protoc executable. We rely on prost for this, which is
+        # capable of shipping it for most operating systems.
+        click.echo("Finding protoc location...")
+        protoc = subprocess.run(
+            ["cargo", "run"] + release + ["-q", "--bin", "find_protoc"],
+            capture_output=True,
+        ).stdout.strip()
+
+        # (Re)generate and import protobuf files and import them.
+        click.echo("Generating protobuf bindings...")
+        if os.path.isdir(output_path):
+            shutil.rmtree(output_path)
+        subprocess.check_call(
+            [protoc, *proto_path_args, "--python_out", script_path, *proto_files]
+        )
+        for subdir in (".", "extensions", "validator"):
+            fname = os.path.join(output_path, subdir, "__init__.py")
+            with open(fname, "w") as f:
+                f.write("\n")
+
+    # Import the generated protobuf bindings.
+    from substrait import plan_pb2
+
+    assert os.path.samefile(plan_pb2.__file__, os.path.join(output_path, "plan_pb2.py"))
+    from google.protobuf.json_format import ParseDict
+
+    proto_desc = plan_pb2.Plan.DESCRIPTOR
+
+    def proto_parse(data):
+        return ParseDict(data, plan_pb2.Plan()).SerializeToString()
+
+    # Rather than failing immediately when the first error occurs, store errors
+    # here. The output for test files that compile without errors will then
+    # still be written.
+    errors = {}
+
+    # Deserialize test input files (multiple input formats can be added here).
+    click.echo("Scanning for test description files...")
+    suite_path = os.path.join(script_path, "tests")
+    test_inputs = {}
+    for fname in pathlib.Path(suite_path).rglob("*.yaml"):
+        if ".test." in fname.name:
+            continue
+        try:
+            output_fname = str(fname) + ".test"
+            if mtime(fname) >= mtime(output_fname):
+                with open(fname, "r") as f:
+                    test_inputs[fname] = (yaml.safe_load(f.read()), output_fname)
+        except Exception as e:
+            errors[fname] = ("reading", e)
+
+    # Compile the contents of the test input files.
+    if not test_inputs:
+        click.echo("All test descriptions are up-to-date.")
+    else:
+        click.echo(f"Parsing {len(test_inputs)} test description(s)...")
+        for fname, (test_input, output_fname) in test_inputs.items():
+            try:
+                compile_test(output_fname, test_input, proto_parse, proto_desc)
+            except Exception as e:
+                if os.path.isfile(output_fname):
+                    os.remove(output_fname)
+                errors[fname] = ("compiling", e)
+
+    # Fail if there were any errors.
+    if errors:
+        for fname, (action, error) in errors.items():
+            rel_path = os.path.relpath(fname, suite_path)
+            click.echo(f"{type(error).__name__} while {action} {rel_path}: {error}")
+        sys.exit(1)
+
+    # Now run the test suite.
+    sys.exit(
+        subprocess.run(
+            ["cargo", "run"] + release + ["-q", suite_path, str(int(html)), filter]
+        ).returncode
+    )
+
+
+@cli.command(
+    short_help="Removes all generated files", help="Removes all generated files."
+)
+def clean():
+    script_path = os.path.dirname(os.path.realpath(__file__))
+
+    # Remove generated protobuf files.
+    proto_output_path = os.path.join(script_path, "substrait")
+    if os.path.isdir(proto_output_path):
+        shutil.rmtree(proto_output_path)
+
+    # Remove compiled test files and test results.
+    suite_path = os.path.join(script_path, "tests")
+    for fname in pathlib.Path(suite_path).rglob("*.test*"):
+        os.remove(fname)
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/tests/src/find_protoc.rs b/tests/src/find_protoc.rs
new file mode 100644
index 00000000..08b8f874
--- /dev/null
+++ b/tests/src/find_protoc.rs
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Prost has some magic for finding the path to protoc, so let's use that in
+//! the Python code as well...
+
+fn main() {
+    println!("{}", prost_build::protoc().display());
+}
diff --git a/tests/src/runner.rs b/tests/src/runner.rs
new file mode 100644
index 00000000..45259a62
--- /dev/null
+++ b/tests/src/runner.rs
@@ -0,0 +1,580 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Test runner for the [substrait_validator] crate.
+
+use rayon::prelude::*;
+use std::collections::HashMap;
+use std::collections::HashSet;
+use substrait_validator as sv;
+
+#[derive(serde::Deserialize, PartialEq, Eq, Hash, Debug, Clone, Copy)]
+enum ErrorLevel {
+    #[serde(rename(deserialize = "e"))]
+    Error,
+    #[serde(rename(deserialize = "w"))]
+    Warning,
+    #[serde(rename(deserialize = "i"))]
+    Info,
+}
+
+impl From<ErrorLevel> for sv::Level {
+    fn from(l: ErrorLevel) -> Self {
+        match l {
+            ErrorLevel::Error => sv::Level::Error,
+            ErrorLevel::Warning => sv::Level::Warning,
+            ErrorLevel::Info => sv::Level::Info,
+        }
+    }
+}
+
+#[derive(serde::Deserialize, Debug, Clone)]
+enum PathElement {
+    Field { field: String },
+    Oneof { field: String, variant: String },
+    Repeated { field: String, index: usize },
+    Index { index: usize },
+}
+
+impl From<PathElement> for sv::output::path::PathElement {
+    fn from(e: PathElement) -> Self {
+        match e {
+            PathElement::Field { field } => sv::output::path::PathElement::Field(field),
+            PathElement::Oneof { field, variant } => {
+                sv::output::path::PathElement::Variant(field, variant)
+            }
+            PathElement::Repeated { field, index } => {
+                sv::output::path::PathElement::Repeated(field, index)
+            }
+            PathElement::Index { index } => sv::output::path::PathElement::Index(index),
+        }
+    }
+}
+
+fn convert_path(path: &[PathElement]) -> sv::output::path::PathBuf {
+    sv::output::path::PathBuf {
+        root: "plan",
+        elements: path.iter().map(|x| x.clone().into()).collect(),
+    }
+}
+
+#[derive(serde::Deserialize, Debug)]
+struct LevelTest {
+    pub path: Vec<PathElement>,
+    pub allowed_levels: HashSet<ErrorLevel>,
+}
+
+#[derive(serde::Deserialize, Debug)]
+struct DiagnosticTest {
+    pub path: Vec<PathElement>,
+    pub code: Option<u32>,
+    pub level: Option<ErrorLevel>,
+    pub original_level: Option<ErrorLevel>,
+    pub msg: Option<String>,
+    pub before: Option<PathElement>,
+    pub after: Option<PathElement>,
+}
+
+#[derive(serde::Deserialize, Debug)]
+struct DataTypeTest {
+    pub path: Vec<PathElement>,
+    pub data_type: String,
+}
+
+impl DiagnosticTest {
+    pub fn matches(&self, diag: &sv::Diagnostic) -> bool {
+        // Check code.
+        if let Some(code) = &self.code {
+            if diag.cause.classification.code() != *code {
+                return false;
+            }
+        }
+
+        // Check adjusted level.
+        if let Some(level) = &self.level {
+            let level = sv::Level::from(*level);
+            if diag.adjusted_level != level {
+                return false;
+            }
+        }
+
+        // Check original level.
+        if let Some(level) = &self.original_level {
+            let level = sv::Level::from(*level);
+            if diag.original_level != level {
+                return false;
+            }
+        }
+
+        // Check message.
+        if let Some(msg) = &self.msg {
+            let msg = glob::Pattern::new(msg).unwrap();
+            if !msg.matches(&diag.cause.to_string()) {
+                return false;
+            }
+        }
+
+        true
+    }
+}
+
+/// A validation result checking instruction.
+#[derive(serde::Deserialize, Debug)]
+enum Instruction {
+    Level(LevelTest),
+    Diag(DiagnosticTest),
+    DataType(DataTypeTest),
+}
+
+/// A diagnostic level override command.
+#[derive(serde::Deserialize, Debug)]
+struct DiagOverride {
+    code: u32,
+    min: ErrorLevel,
+    max: ErrorLevel,
+}
+
+/// Test case description structure.
+#[derive(serde::Deserialize, Debug)]
+struct TestDescription {
+    /// Test case name.
+    pub name: String,
+
+    /// List of diagnostic level overrides to apply.
+    pub diag_overrides: Vec<DiagOverride>,
+
+    /// The binary serialization of the plan.
+    pub plan: Vec<u8>,
+
+    /// The instructions for checking the validation result.
+    pub instructions: Vec<Instruction>,
+}
+
+/// The result of a test case including messages.
+#[derive(Default)]
+struct TestResult {
+    /// Log messages generated while running the test.
+    pub messages: Vec<String>,
+
+    /// Whether there were failures in this test case.
+    pub failed: bool,
+
+    /// Whether the test case was skipped.
+    pub skipped: bool,
+}
+
+impl TestResult {
+    pub fn log<S: std::fmt::Display>(&mut self, msg: S) {
+        self.messages.push(msg.to_string());
+    }
+
+    pub fn error<S: std::fmt::Display>(&mut self, msg: S) {
+        self.failed = true;
+        self.log(format!("Error: {msg}"));
+    }
+
+    pub fn handle_result<T, E, F, S>(&mut self, e: Result<T, E>, msg: F) -> Option<T>
+    where
+        F: FnOnce() -> S,
+        S: std::fmt::Display,
+        E: std::error::Error,
+    {
+        match e {
+            Ok(x) => Some(x),
+            Err(e) => {
+                let msg = msg();
+                self.error(format!("{msg}: {e}"));
+                None
+            }
+        }
+    }
+
+    pub fn handle_option<T, S, F>(&mut self, option: Option<T>, msg: F) -> Option<T>
+    where
+        F: FnOnce() -> S,
+        S: std::fmt::Display,
+    {
+        if option.is_none() {
+            let msg = msg();
+            self.error(format!("{msg}"));
+        }
+        option
+    }
+}
+
+/// Configuration structure for the test runner.
+struct Configuration {
+    /// Skip test cases for which the name does not match this pattern.
+    pub filter: glob::Pattern,
+
+    /// Whether HTML output files should be written.
+    pub enable_html: bool,
+}
+
+/// All information related to a test case, including its result.
+struct TestCase {
+    /// Path to the test case input file.
+    pub path: std::path::PathBuf,
+
+    /// The test description file, if parsing succeeded.
+    pub description: Option<TestDescription>,
+
+    /// The result of the test.
+    pub result: TestResult,
+}
+
+impl TestCase {
+    /// Traverse the given path within the given node tree, and then apply f on the
+    /// selected node.
+    fn traverse<'a, I, F>(
+        result: &mut TestResult,
+        node: &mut sv::output::tree::Node,
+        mut path: I,
+        f: F,
+    ) where
+        I: Iterator<Item = &'a sv::output::path::PathElement>,
+        F: FnOnce(&mut TestResult, &mut sv::output::tree::Node),
+    {
+        match path.next() {
+            Some(path_element) => {
+                for data in node.data.iter_mut() {
+                    if let sv::output::tree::NodeData::Child(c) = data {
+                        if &c.path_element == path_element {
+                            let mut node = c.node.as_ref().clone();
+                            Self::traverse(result, &mut node, path, f);
+                            c.node = std::sync::Arc::new(node);
+                            return;
+                        }
+                    }
+                }
+                result.error(format!("missing child node {path_element}"));
+            }
+            None => f(result, node),
+        }
+    }
+
+    /// Searches for the child node of node at the given path element and
+    /// returns its index. If the child does not exist, None is returned, and
+    /// an error is pushed.
+    fn find_child_index(
+        result: &mut TestResult,
+        node: &mut sv::output::tree::Node,
+        desc: &PathElement,
+    ) -> Option<usize> {
+        let path_element = sv::output::path::PathElement::from(desc.clone());
+        result.handle_option(
+            node.data.iter().enumerate().find_map(|(index, data)| {
+                if let sv::output::tree::NodeData::Child(c) = data {
+                    if c.path_element == path_element {
+                        return Some(index);
+                    }
+                }
+                None
+            }),
+            || format!("child {path_element} does not exist"),
+        )
+    }
+
+    /// Runs the given level test instruction.
+    fn run_level_test(
+        result: &mut TestResult,
+        root: &mut sv::output::tree::Node,
+        desc: &LevelTest,
+    ) {
+        let path = convert_path(&desc.path);
+        result.log(format!("Checking level at {path}..."));
+        Self::traverse(result, root, path.elements.iter(), |result, node| {
+            let actual_level = node
+                .get_diagnostic()
+                .map(|d| d.adjusted_level)
+                .unwrap_or(sv::Level::Info);
+            if !desc
+                .allowed_levels
+                .iter()
+                .any(|l| sv::Level::from(*l) == actual_level)
+            {
+                result.error(format!("unexpected error level {actual_level:?}"));
+            }
+        });
+    }
+
+    /// Runs the given diagnostic test instruction.
+    fn run_diag_test(
+        result: &mut TestResult,
+        root: &mut sv::output::tree::Node,
+        desc: &DiagnosticTest,
+    ) {
+        let path = convert_path(&desc.path);
+        result.log(format!("Checking diagnostic at {path}..."));
+        Self::traverse(result, root, path.elements.iter(), |result, node| {
+            // Find node data start index based on after (if specified).
+            let start_index = desc
+                .after
+                .as_ref()
+                .and_then(|path_element| Self::find_child_index(result, node, path_element))
+                .unwrap_or(0);
+
+            // Find node data end index based on before (if specified).
+            let end_index = desc
+                .before
+                .as_ref()
+                .and_then(|path_element| Self::find_child_index(result, node, path_element))
+                .unwrap_or(node.data.len());
+
+            // Look for diagnostics within that range.
+            let diag_index = result.handle_option(
+                node.data[start_index..end_index]
+                    .iter()
+                    .enumerate()
+                    .find_map(|(index, data)| {
+                        if let sv::output::tree::NodeData::Diagnostic(diag) = data {
+                            if desc.matches(diag) {
+                                return Some(index);
+                            }
+                        }
+                        None
+                    }),
+                || "no diagnostic found that matches expectations",
+            );
+
+            // Remove the diagnostic we found from the tree.
+            if let Some(diag_index) = diag_index {
+                node.data.remove(diag_index);
+            }
+        });
+    }
+
+    /// Runs the given data type test instruction.
+    fn run_data_type_test(
+        result: &mut TestResult,
+        root: &mut sv::output::tree::Node,
+        desc: &DataTypeTest,
+    ) {
+        let path = convert_path(&desc.path);
+        result.log(format!("Checking data type at {path}..."));
+        Self::traverse(result, root, path.elements.iter(), |result, node| {
+            let actual = format!("{:#}", node.data_type());
+            if actual != desc.data_type {
+                result.error(format!("data type mismatch; found {actual}"));
+            }
+        })
+    }
+
+    /// Runs the given test case, updating result.
+    fn run(
+        result: &mut TestResult,
+        path: &std::path::Path,
+        desc: &TestDescription,
+        cfg: &Configuration,
+    ) {
+        // Create validator configuration.
+        let mut validator_config = sv::Config::new();
+        for diag_override in desc.diag_overrides.iter() {
+            validator_config.override_diagnostic_level(
+                result
+                    .handle_option(sv::Classification::from_code(diag_override.code), || {
+                        format!("invalid error code {}", diag_override.code)
+                    })
+                    .unwrap_or_default(),
+                diag_override.min.into(),
+                diag_override.max.into(),
+            );
+        }
+        let path_os_str = path.as_os_str().to_owned();
+        validator_config.add_uri_resolver(move |uri| {
+            if let Some(name) = uri.strip_prefix("test:") {
+                let mut yaml_path = path_os_str.clone();
+                yaml_path.push(".");
+                yaml_path.push(name);
+                let yaml_path = std::path::PathBuf::from(yaml_path);
+                std::fs::read(yaml_path)
+            } else if let Some(uri) = uri.strip_prefix('/') {
+                std::fs::read(std::path::PathBuf::from("../substrait/extensions").join(uri))
+            } else {
+                Err(std::io::Error::new(
+                    std::io::ErrorKind::Other,
+                    "non-test URI",
+                ))
+            }
+        });
+
+        // Parse the plan.
+        let parse_result = sv::parse(&desc.plan[..], &validator_config);
+
+        // Export result to HTML for debugging.
+        if cfg.enable_html {
+            let mut html_path = path.as_os_str().to_owned();
+            html_path.push(".html");
+            result.handle_result(
+                std::fs::File::create(html_path)
+                    .and_then(|mut f| parse_result.export(&mut f, sv::export::Format::Html)),
+                || "Error while attempting to write HTML output",
+            );
+        }
+
+        // Execute test instructions.
+        let mut root = parse_result.root;
+        for insn in desc.instructions.iter() {
+            match insn {
+                Instruction::Level(level) => Self::run_level_test(result, &mut root, level),
+                Instruction::Diag(diag) => Self::run_diag_test(result, &mut root, diag),
+                Instruction::DataType(data_type) => {
+                    Self::run_data_type_test(result, &mut root, data_type)
+                }
+            }
+        }
+    }
+
+    /// Loads a plan from the given file and runs it, returning the result.
+    pub fn load_and_run<P: Into<std::path::PathBuf>>(
+        path: P,
+        cfg: &Configuration,
+    ) -> Box<TestCase> {
+        // Construct the path.
+        let path = path.into();
+
+        // Construct the result object.
+        let mut result = TestResult::default();
+
+        // Read input file.
+        let input = result.handle_result(std::fs::read_to_string(&path), || {
+            "failed to read test file"
+        });
+
+        // Parse input file.
+        let description = input.and_then(|input| {
+            result.handle_result(serde_json::from_str::<TestDescription>(&input), || {
+                "failed to parse test file"
+            })
+        });
+
+        // Match test case filter.
+        let skip = description
+            .as_ref()
+            .map(|d| !cfg.filter.matches(&d.name))
+            .unwrap_or_default();
+
+        // Run the test case.
+        if skip {
+            result.skipped = true;
+        } else if let Some(desc) = &description {
+            Self::run(&mut result, &path, desc, cfg);
+        }
+
+        // Log the result.
+        result.log(format!(
+            "Test case {} ({}): {}",
+            description.as_ref().map(|d| &d.name[..]).unwrap_or("?"),
+            path.display(),
+            if result.skipped {
+                "skipped"
+            } else if result.failed {
+                "FAILED"
+            } else {
+                "passed"
+            }
+        ));
+
+        Box::new(TestCase {
+            path,
+            description,
+            result,
+        })
+    }
+}
+
+fn print_usage_and_fail() -> ! {
+    let me = std::env::args()
+        .next()
+        .unwrap_or_else(|| String::from("test_runner"));
+    println!("Usage: {me} <test-directory> <enable-html> <name-pattern>");
+    println!("Runs all *.test files in the test directory for which the name matches the pattern.");
+    println!("NOTE: you should be running this with runner.py.");
+    std::process::exit(2);
+}
+
+pub fn main() {
+    // "Parse" command line arguments.
+    let args: Vec<_> = std::env::args().collect();
+    if args.len() != 4 {
+        print_usage_and_fail();
+    }
+    let cfg = Configuration {
+        filter: glob::Pattern::new(&args[3]).expect("invalid filter pattern"),
+        enable_html: match &args[2][..] {
+            "1" => true,
+            "0" => false,
+            _ => print_usage_and_fail(),
+        },
+    };
+
+    // Find all test cases and run them.
+    println!("Running test suite...");
+    let paths = walkdir::WalkDir::new(&args[1])
+        .into_iter()
+        .filter_map(|e| e.ok())
+        .filter(|e| {
+            e.path().extension() == Some(std::ffi::OsStr::new("test"))
+                && e.metadata().unwrap().is_file()
+        })
+        .map(|e| e.into_path())
+        .collect::<Vec<_>>();
+    let test_cases = paths
+        .par_iter()
+        .map(|p| TestCase::load_and_run(p, &cfg))
+        .collect::<Vec<_>>();
+
+    // Print test name collisions.
+    let mut names = HashMap::new();
+    for test_case in test_cases.iter() {
+        if let Some(desc) = &test_case.description {
+            if let Some(previous) = names.insert(&desc.name, &test_case.path) {
+                println!(
+                    "Warning: duplicate test name {}: {} and {}",
+                    &desc.name,
+                    test_case.path.display(),
+                    previous.display()
+                );
+            }
+        }
+    }
+
+    // Print logs for failing tests.
+    for test_case in test_cases.iter().filter(|x| x.result.failed) {
+        println!();
+        if let Some(desc) = &test_case.description {
+            println!("Test {} ({}) FAILED:", desc.name, test_case.path.display());
+        } else {
+            println!("Test {} FAILED:", test_case.path.display());
+        }
+        for msg in test_case.result.messages.iter() {
+            println!("  {msg}");
+        }
+    }
+
+    // Print summary.
+    let n_total = test_cases.len();
+    let n_run = test_cases.iter().filter(|x| !x.result.skipped).count();
+    let n_failed = test_cases.iter().filter(|x| x.result.failed).count();
+    if n_total == 0 {
+        println!("FAIL: no test cases were found. Did you run me using runner.py?");
+        std::process::exit(1);
+    } else if n_run == 0 {
+        println!("FAIL: none of the {n_total} test case(s) matched the specified filter.");
+        std::process::exit(1);
+    } else if n_failed == 0 {
+        if n_run != n_total {
+            println!("PASS: all {n_run}/{n_total} matching test case(s) passed.");
+        } else {
+            println!("PASS: all {n_run} test case(s) passed.");
+        }
+        std::process::exit(0);
+    } else {
+        println!();
+        if n_run != n_total {
+            println!("FAIL: {n_failed} out of {n_run}/{n_total} matching test case(s) failed.");
+        } else {
+            println!("FAIL: {n_failed} out of {n_run} test case(s) failed.");
+        }
+        std::process::exit(1);
+    }
+}
diff --git a/tests/tests/README.md b/tests/tests/README.md
new file mode 100644
index 00000000..ccf0f241
--- /dev/null
+++ b/tests/tests/README.md
@@ -0,0 +1,60 @@
+State of test suite coverage
+============================
+
+ - [ ] TPC-H (integration tests)
+    - [x] 1
+    - [x] 2
+    - [x] 3
+    - [x] 4
+    - [x] 5
+    - [x] 6
+    - [x] 7
+    - [x] 8
+    - [x] 9
+    - [x] 10
+    - [ ] 11
+    - [ ] 12
+    - [ ] 13
+    - [x] 14
+    - [ ] 15
+    - [ ] 16
+    - [ ] 17
+    - [ ] 18
+    - [x] 19
+    - [ ] 20
+    - [ ] 21
+    - [ ] 22
+   Note: Isthmus crashes on the other queries, so they'll have to be written
+   manually or be generated with another tool. Note also: since validation for
+   functions doesn't really work yet, some diagnostics are currently
+   blanket-disabled.
+ - [x] Expressions
+    - [x] Literals
+    - [x] References
+    - [x] Conditionals
+    - [x] Subqueries
+ - [x] Relations
+    - [x] Relation root
+    - [x] Common logic
+    - [x] Read
+       - [x] Virtual data source
+       - [x] Named data source
+       - [x] File data source
+       - [x] Extension data source
+       - [x] Base schema
+       - [x] Filter
+       - [x] Projection
+    - [x] Filter
+    - [x] Sort
+    - [x] Project
+    - [x] Cross
+    - [x] Join
+    - [x] Set
+    - [x] Fetch
+    - [x] Aggregate
+    - [x] Extensions
+ - [ ] Extensions
+    - [ ] Types
+    - [ ] Type variations
+    - [ ] Functions
+    - [x] Advanced extensions
diff --git a/tests/tests/expressions/conditionals/if-else.yaml b/tests/tests/expressions/conditionals/if-else.yaml
new file mode 100644
index 00000000..222472ce
--- /dev/null
+++ b/tests/tests/expressions/conditionals/if-else.yaml
@@ -0,0 +1,133 @@
+name: if-else
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [a, b, c, d, e]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - bool: { nullability: NULLABILITY_REQUIRED }
+                - i8: { nullability: NULLABILITY_REQUIRED }
+                - i16: { nullability: NULLABILITY_REQUIRED }
+                - i16: { nullability: NULLABILITY_NULLABLE }
+                - bool: { nullability: NULLABILITY_NULLABLE }
+            namedTable:
+              names:
+              - test
+        expressions:
+        - ifThen:
+            ifs:
+            - if:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 0 } }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 1 } }
+            else:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 1 } }
+          __test: [ type: "i8" ]
+        - ifThen:
+            ifs:
+            - if:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 0 } }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 1 } }
+          __test: [ type: "i8?" ]
+        - ifThen:
+            ifs:
+            - if:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 4 } }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 1 } }
+            else:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 1 } }
+          __test: [ type: "i8?" ]
+        - ifThen:
+            ifs:
+            - if:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 0 } }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 2 } }
+            else:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 3 } }
+          __test: [ type: "i16?" ]
+        - ifThen:
+            ifs:
+            - if:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 0 } }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 2 } }
+            - if:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 0 } }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 3 } }
+            else:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 2 } }
+          __test: [ type: "i16?" ]
+        - ifThen:
+            else:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 1 } }
+            __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: ifs*" } ]
+        - ifThen:
+            ifs:
+            - if:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 1 } }
+                __test: [ diag: { level: e, code: 4005, msg: "*predicates must yield booleans, but found i8*" } ]
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 1 } }
+            - if:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 0 } }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 2 } }
+              __test: [ diag: { level: e, code: 4005, msg: "*branches must yield the same type: i16 vs. i8*" } ]
+            else:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 2 } }
+            __test: [ diag: { level: e, code: 4005, msg: "*branches must yield the same type: i16 vs. i8*" } ]
+          __test: [ type: "i8" ]
diff --git a/tests/tests/expressions/conditionals/matches-scalar.yaml b/tests/tests/expressions/conditionals/matches-scalar.yaml
new file mode 100644
index 00000000..84c0b6cb
--- /dev/null
+++ b/tests/tests/expressions/conditionals/matches-scalar.yaml
@@ -0,0 +1,50 @@
+name: matches-scalar
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [a, b, c, d]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - bool: { nullability: NULLABILITY_REQUIRED }
+                - i8: { nullability: NULLABILITY_REQUIRED }
+                - i16: { nullability: NULLABILITY_REQUIRED }
+                - i16: { nullability: NULLABILITY_NULLABLE }
+            namedTable:
+              names:
+              - test
+        expressions:
+        - singularOrList:
+            value:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 2 } }
+            options:
+            - literal: { i16: 1 }
+            - literal: { i16: 2 }
+            - literal: { i16: 3 }
+          __test: [ type: "boolean" ]
+        - singularOrList:
+            value:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 2 } }
+            options: []
+            __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: options*" } ]
+        - singularOrList:
+            value:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 2 } }
+            options:
+            - literal: { i16: 1 }
+            - literal: { i16: 2, nullable: true }
+              __test: [ diag: { level: e, code: 4008, msg: "*nullable vs. required*" } ]
+            - literal: { i32: 3 }
+              __test: [ diag: { level: e, code: 4005, msg: "*i32 vs. i16*" } ]
+          __test: [ type: "boolean" ]
diff --git a/tests/tests/expressions/conditionals/matches-vector.yaml b/tests/tests/expressions/conditionals/matches-vector.yaml
new file mode 100644
index 00000000..fc8f9f7e
--- /dev/null
+++ b/tests/tests/expressions/conditionals/matches-vector.yaml
@@ -0,0 +1,74 @@
+name: matches-vector
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [a, b, c, d]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - bool: { nullability: NULLABILITY_REQUIRED }
+                - i8: { nullability: NULLABILITY_REQUIRED }
+                - i16: { nullability: NULLABILITY_REQUIRED }
+                - i16: { nullability: NULLABILITY_NULLABLE }
+            namedTable:
+              names:
+              - test
+        expressions:
+        - multiOrList:
+            value:
+            - selection:
+                rootReference: {}
+                directReference: { structField: { field: 1 } }
+            - selection:
+                rootReference: {}
+                directReference: { structField: { field: 2 } }
+            options:
+            - fields:
+              - literal: { i8: 1 }
+              - literal: { i16: 2 }
+            - fields:
+              - literal: { i8: 3 }
+              - literal: { i16: 4 }
+            - fields:
+              - literal: { i8: 5 }
+              - literal: { i16: 6 }
+          __test: [ type: "boolean" ]
+        - multiOrList:
+            value:
+            - selection:
+                rootReference: {}
+                directReference: { structField: { field: 1 } }
+            - selection:
+                rootReference: {}
+                directReference: { structField: { field: 2 } }
+            options: []
+            __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: options*" } ]
+        - multiOrList:
+            value:
+            - selection:
+                rootReference: {}
+                directReference: { structField: { field: 1 } }
+            - selection:
+                rootReference: {}
+                directReference: { structField: { field: 2 } }
+            options:
+            - fields:
+              - literal: { i8: 1 }
+              - literal: { i16: 2 }
+            - fields:
+              - literal: { i8: 3 }
+              - literal: { i16: 2, nullable: true }
+              __test: [ diag: { level: e, code: 4008, msg: "*nullable vs. required*" } ]
+            - fields:
+              - literal: { i8: 5 }
+              - literal: { i32: 3 }
+              __test: [ diag: { level: e, code: 4005, msg: "*i32 vs. i16*" } ]
+            - fields:
+              - literal: { i8: 5 }
+              __test: [ diag: { level: e, code: 4005, msg: "*numbers of fields differ*" } ]
+          __test: [ type: "boolean" ]
diff --git a/tests/tests/expressions/conditionals/switch.yaml b/tests/tests/expressions/conditionals/switch.yaml
new file mode 100644
index 00000000..48691ca9
--- /dev/null
+++ b/tests/tests/expressions/conditionals/switch.yaml
@@ -0,0 +1,121 @@
+name: switch
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [a, b, c, d]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - i32: { nullability: NULLABILITY_REQUIRED }
+                - i8: { nullability: NULLABILITY_REQUIRED }
+                - i16: { nullability: NULLABILITY_REQUIRED }
+                - i16: { nullability: NULLABILITY_NULLABLE }
+            namedTable:
+              names:
+              - test
+        expressions:
+        - switchExpression:
+            match:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 0 } }
+            ifs:
+            - if: { i32: 0 }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 1 } }
+            else:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 1 } }
+          __test: [ type: "i8" ]
+        - switchExpression:
+            match:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 0 } }
+            ifs:
+            - if: { i32: 0 }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 1 } }
+          __test: [ type: "i8?" ]
+        - switchExpression:
+            match:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 0 } }
+            ifs:
+            - if: { i32: 0 }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 2 } }
+            else:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 3 } }
+          __test: [ type: "i16?" ]
+        - switchExpression:
+            match:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 0 } }
+            ifs:
+            - if: { i32: 0 }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 2 } }
+            - if: { i32: 1 }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 3 } }
+            else:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 2 } }
+          __test: [ type: "i16?" ]
+        - switchExpression:
+            match:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 0 } }
+            else:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 1 } }
+            __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: ifs*" } ]
+        - switchExpression:
+            match:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 0 } }
+            ifs:
+            - if: { i16: 0 }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 1 } }
+              __test:
+              - diag: { level: e, code: 4005, msg: "*literal type must match switch expression: i16 vs. i32*" }
+            - if: { i32: 1 }
+              then:
+                selection:
+                  rootReference: {}
+                  directReference: { structField: { field: 2 } }
+              __test: [ diag: { level: e, code: 4005, msg: "*branches must yield the same type: i16 vs. i8*" } ]
+            else:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 2 } }
+            __test: [ diag: { level: e, code: 4005, msg: "*branches must yield the same type: i16 vs. i8*" } ]
+          __test: [ type: "i8" ]
diff --git a/tests/tests/expressions/field-refs/README.md b/tests/tests/expressions/field-refs/README.md
new file mode 100644
index 00000000..e025a9a6
--- /dev/null
+++ b/tests/tests/expressions/field-refs/README.md
@@ -0,0 +1 @@
+This directory contains corner case tests for field references.
diff --git a/tests/tests/expressions/field-refs/mask-ref.yaml b/tests/tests/expressions/field-refs/mask-ref.yaml
new file mode 100644
index 00000000..9eda6cea
--- /dev/null
+++ b/tests/tests/expressions/field-refs/mask-ref.yaml
@@ -0,0 +1,114 @@
+name: mask-ref
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            common:
+              direct: {}
+            baseSchema:
+              names: [a, b, c, d, x, y, e]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - bool: { nullability: NULLABILITY_REQUIRED }
+                - i8: { nullability: NULLABILITY_REQUIRED }
+                - list:
+                    nullability: NULLABILITY_REQUIRED
+                    type: { i16: { nullability: NULLABILITY_REQUIRED } }
+                - struct:
+                    nullability: NULLABILITY_REQUIRED
+                    types:
+                    - i32: { nullability: NULLABILITY_REQUIRED }
+                    - i64: { nullability: NULLABILITY_REQUIRED }
+                - map:
+                    nullability: NULLABILITY_REQUIRED
+                    key: { string: { nullability: NULLABILITY_REQUIRED } }
+                    value: { date: { nullability: NULLABILITY_REQUIRED } }
+            namedTable:
+              names:
+              - test
+        expressions:
+        - selection:
+            rootReference: {}
+            maskedReference:
+              select:
+                structItems:
+                - field: 0
+                - field: 4
+                  child:
+                    map:
+                      key:
+                        mapKey: key
+                        __test:
+                        - diag: { level: e, code: 1 }
+                        - diag: { level: w, code: 1 }
+                - field: 3
+                  child:
+                    struct:
+                      structItems:
+                      - field: 1
+                - field: 2
+                  child:
+                    list:
+                      selection:
+                      - item: { field: -1 }
+                      - slice: { start: 3, end: -2 }
+                - field: 5
+                  field__test: [ diag: { level: e, code: 2, msg: "*out of range*" } ]
+            __test: [ type: "STRUCT<boolean, MAP<string, date>, STRUCT<i64>, LIST<i16>, !>" ]
+        - selection:
+            rootReference: {}
+            maskedReference:
+              select: []
+            __test: [ type: "STRUCT<>" ]
+        - selection:
+            rootReference: {}
+            maskedReference:
+              select:
+                structItems:
+                - field: 3
+                  child:
+                    struct:
+                      structItems:
+                      - field: 1
+            __test: [ type: "STRUCT<i64>" ]
+        - selection:
+            rootReference: {}
+            maskedReference:
+              maintainSingularStruct: true
+              select:
+                structItems:
+                - field: 3
+                  child:
+                    struct:
+                      structItems:
+                      - field: 1
+            __test: [ type: "STRUCT<STRUCT<i64>>" ]
+        - selection:
+            rootReference: {}
+            maskedReference:
+              select:
+                structItems:
+                - field: 2
+                  child:
+                    map:
+                      __test: [ diag: { level: e, code: 4005, msg: "*requires a map*LIST*" } ]
+                      key:
+                        mapKey: key
+                        __test:
+                        - diag: { level: e, code: 1 }
+                        - diag: { level: w, code: 1 }
+                - field: 0
+                  child:
+                    struct:
+                      __test: [ diag: { level: e, code: 4005, msg: "*requires a struct*boolean*" } ]
+                      structItems: []
+                - field: 4
+                  child:
+                    list:
+                      __test: [ diag: { level: e, code: 4005, msg: "*requires a list*MAP*" } ]
+                      selection:
+                      - item: { field: -1 }
diff --git a/tests/tests/expressions/field-refs/outer-ref.yaml b/tests/tests/expressions/field-refs/outer-ref.yaml
new file mode 100644
index 00000000..a14189f5
--- /dev/null
+++ b/tests/tests/expressions/field-refs/outer-ref.yaml
@@ -0,0 +1,72 @@
+name: outer-ref
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [a, b, c]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - bool: { nullability: NULLABILITY_REQUIRED }
+                - i8: { nullability: NULLABILITY_REQUIRED }
+                - i16: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        expressions:
+        - subquery:
+            scalar:
+              input:
+                project:
+                  common: { emit: { outputMapping: [2] } }
+                  input:
+                    read:
+                      baseSchema:
+                        names: [x, y]
+                        struct:
+                          nullability: NULLABILITY_REQUIRED
+                          types:
+                          - string: { nullability: NULLABILITY_REQUIRED }
+                          - string: { nullability: NULLABILITY_REQUIRED }
+                      namedTable:
+                        names:
+                        - test2
+                  expressions:
+                  - ifThen:
+                      ifs:
+                      - if:
+                          selection:
+                            outerReference:
+                              stepsOut: 1
+                              __test: [ type: "NSTRUCT<a: boolean, b: i8, c: i16>" ]
+                            directReference: { structField: { field: 0 } }
+                        then:
+                          selection:
+                            rootReference: {}
+                            directReference: { structField: { field: 1 } }
+                      else:
+                        selection:
+                          rootReference: {}
+                          directReference: { structField: { field: 0 } }
+                  - selection:
+                      outerReference:
+                        stepsOut: 2
+                        __test:
+                        - diag: { level: e, code: 6001, msg: "*indexing query beyond current query depth (2)*" }
+                      directReference: { structField: { field: 0 } }
+        - selection:
+            outerReference:
+              stepsOut: 1
+              __test:
+              - diag: { level: e, code: 6001, msg: "*indexing query beyond current query depth (1)*" }
+            directReference: { structField: { field: 0 } }
+        - selection:
+            outerReference:
+              stepsOut: 0
+              stepsOut__test:
+              - diag: { level: e, code: 2, msg: "*must be at least 1 (use RootReference instead)*" }
+            directReference: { structField: { field: 0 } }
diff --git a/tests/tests/expressions/field-refs/scalar-list-ref.yaml b/tests/tests/expressions/field-refs/scalar-list-ref.yaml
new file mode 100644
index 00000000..d8fc5450
--- /dev/null
+++ b/tests/tests/expressions/field-refs/scalar-list-ref.yaml
@@ -0,0 +1,96 @@
+name: scalar-list-ref
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            common:
+              direct: {}
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - list:
+                    nullability: NULLABILITY_REQUIRED
+                    type:
+                      list:
+                        nullability: NULLABILITY_NULLABLE
+                        type:
+                          bool:
+                            nullability: NULLABILITY_REQUIRED
+                - list:
+                    nullability: NULLABILITY_REQUIRED
+                    type:
+                      bool:
+                        nullability: NULLABILITY_REQUIRED
+            namedTable:
+              names:
+              - test
+        expressions:
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 0
+          __test: [ type: "LIST<LIST?<boolean>>" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 0
+                child:
+                  listElement:
+                    offset: 0
+          __test: [ type: "LIST?<boolean>" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 0
+                child:
+                  listElement:
+                    offset: -1
+          __test: [ type: "LIST?<boolean>" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 0
+                child:
+                  listElement:
+                    offset: 0
+                    child:
+                      listElement:
+                        offset: 0
+          __test: [ type: "boolean?" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 1
+                child:
+                  listElement:
+                    offset: 0
+          # FIXME: should this yield nullable or not? That is, is out-of-range
+          # a runtime error or does it yield null? Or does that depend on the
+          # nullability of the field type?
+          __test: [ type: "boolean" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 0
+                child:
+                  listElement:
+                    offset: 0
+                    child:
+                      listElement:
+                        offset: 0
+                        child:
+                          listElement:
+                            offset: 0
+                            __test: [ diag: { level: e, code: 4005, msg: "*requires a list type*boolean*" } ]
+          __test: [ type: "!?" ]
diff --git a/tests/tests/expressions/field-refs/scalar-map-ref.yaml b/tests/tests/expressions/field-refs/scalar-map-ref.yaml
new file mode 100644
index 00000000..44bff7dc
--- /dev/null
+++ b/tests/tests/expressions/field-refs/scalar-map-ref.yaml
@@ -0,0 +1,96 @@
+name: scalar-map-ref
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            common:
+              direct: {}
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - map:
+                    nullability: NULLABILITY_REQUIRED
+                    key: { i8: { nullability: NULLABILITY_REQUIRED } }
+                    value:
+                      map:
+                        nullability: NULLABILITY_NULLABLE
+                        key: { string: { nullability: NULLABILITY_REQUIRED } }
+                        value: { bool: { nullability: NULLABILITY_REQUIRED } }
+                - map:
+                    nullability: NULLABILITY_REQUIRED
+                    key: { i32: { nullability: NULLABILITY_REQUIRED } }
+                    value: { bool: { nullability: NULLABILITY_REQUIRED } }
+            namedTable:
+              names:
+              - test
+        expressions:
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 0
+          __test: [ type: "MAP<i8, MAP?<string, boolean>>" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 0
+                child:
+                  mapKey:
+                    mapKey: { i8: 0, nullable: false }
+          __test: [ type: "MAP?<string, boolean>" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 0
+                child:
+                  mapKey:
+                    mapKey: { i16: 0, nullable: false }
+                    __test: [ diag: { level: e, code: 4005, msg: "*map key type mismatch*" } ]
+          __test: [ type: "MAP?<string, boolean>" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 0
+                child:
+                  mapKey:
+                    mapKey: { i8: 0, nullable: false }
+                    child:
+                      mapKey:
+                        mapKey: { string: hello, nullable: false }
+          __test: [ type: "boolean?" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 1
+                child:
+                  mapKey:
+                    mapKey: { i32: 0, nullable: false }
+          # FIXME: should this yield nullable or not? That is, are missing keys
+          # a runtime error or does it yield null? Or does that depend on the
+          # nullability of the value type?
+          __test: [ type: "boolean" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 0
+                child:
+                  mapKey:
+                    mapKey: { i8: 0, nullable: false }
+                    child:
+                      mapKey:
+                        mapKey: { string: hello, nullable: false }
+                        child:
+                          mapKey:
+                            mapKey: { string: hello, nullable: false }
+                            __test: [ diag: { level: e, code: 4005, msg: "*requires a map type*boolean*" } ]
+          __test: [ type: "!?" ]
diff --git a/tests/tests/expressions/field-refs/scalar-struct-ref.yaml b/tests/tests/expressions/field-refs/scalar-struct-ref.yaml
new file mode 100644
index 00000000..24326ae7
--- /dev/null
+++ b/tests/tests/expressions/field-refs/scalar-struct-ref.yaml
@@ -0,0 +1,110 @@
+name: scalar-struct-ref
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            common:
+              direct: {}
+            baseSchema:
+              names: [a, b, c, d, x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - bool: { nullability: NULLABILITY_NULLABLE }
+                - i8: { nullability: NULLABILITY_REQUIRED }
+                - i16: { nullability: NULLABILITY_REQUIRED }
+                - struct:
+                    nullability: NULLABILITY_NULLABLE
+                    types:
+                    - i32: { nullability: NULLABILITY_REQUIRED }
+                    - i64: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        expressions:
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 4
+                field__test: [ diag: { level: e, code: 2, msg: "*out of range*" } ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: -1
+                field__test: [ diag: { level: e, code: 2, msg: "*cannot be less than zero*" } ]
+        - selection:
+            rootReference: {}
+            directReference: { structField: { field: 0 }}
+          __test: [ type: "boolean?" ]
+        - selection:
+            rootReference: {}
+            directReference: { structField: { field: 1 }}
+          __test: [ type: "i8" ]
+        - selection:
+            rootReference: {}
+            directReference: { structField: { field: 2 }}
+          __test: [ type: "i16" ]
+        - selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 }}
+          __test: [ type: "NSTRUCT?<x: i32, y: i64>" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 3
+                child:
+                  structField:
+                    field: 0
+          __test: [ type: "i32?" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 3
+                child:
+                  structField:
+                    field: 1
+          __test: [ type: "i64?" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 3
+                child:
+                  structField:
+                    field: 2
+                    field__test: [ diag: { level: e, code: 2, msg: "*out of range*" } ]
+          __test: [ type: "!?" ]
+        - selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 2
+                child:
+                  structField:
+                    field: 0
+                    __test: [ diag: { level: e, code: 4005, msg: "*requires a struct type*i16*" } ]
+          __test: [ type: "!" ]
+        - selection:
+            expression:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 3 }}
+            directReference: { structField: { field: 0 }}
+          __test: [ type: "i32?" ]
+        - selection:
+            expression:
+              selection:
+                rootReference: {}
+                directReference: { structField: { field: 2 }}
+            directReference:
+              structField:
+                field: 0
+                __test: [ diag: { level: e, code: 4005, msg: "*requires a struct type*i16*" } ]
+          __test: [ type: "!" ]
diff --git a/tests/tests/expressions/literals/README.md b/tests/tests/expressions/literals/README.md
new file mode 100644
index 00000000..174cc77d
--- /dev/null
+++ b/tests/tests/expressions/literals/README.md
@@ -0,0 +1 @@
+This directory contains corner case tests for all literals and types.
diff --git a/tests/tests/expressions/literals/binary.yaml b/tests/tests/expressions/literals/binary.yaml
new file mode 100644
index 00000000..945e31ab
--- /dev/null
+++ b/tests/tests/expressions/literals/binary.yaml
@@ -0,0 +1,34 @@
+name: binary
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - binary: { nullability: NULLABILITY_REQUIRED }
+            - binary: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - binary: Zm9v # base64(foo)
+              nullable: false
+            - binary: YmFy # base64(bar)
+              nullable: true
+          - fields:
+            - binary: ""
+              nullable: false
+            - binary: BAgVFiNC # base64(04 08 15 16 23 42)
+              nullable: true
+          - fields:
+            - "null":
+                binary: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                binary: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: binary, y: binary?>" ]
diff --git a/tests/tests/expressions/literals/boolean.yaml b/tests/tests/expressions/literals/boolean.yaml
new file mode 100644
index 00000000..b20ef75c
--- /dev/null
+++ b/tests/tests/expressions/literals/boolean.yaml
@@ -0,0 +1,30 @@
+name: boolean
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - bool: { nullability: NULLABILITY_REQUIRED }
+            - bool: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - { boolean: false, nullable: false }
+            - { boolean: false, nullable: true }
+          - fields:
+            - { boolean: true, nullable: false }
+            - { boolean: true, nullable: true }
+          - fields:
+            - "null":
+                bool: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                bool: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: boolean, y: boolean?>" ]
diff --git a/tests/tests/expressions/literals/date.yaml b/tests/tests/expressions/literals/date.yaml
new file mode 100644
index 00000000..b6cf1245
--- /dev/null
+++ b/tests/tests/expressions/literals/date.yaml
@@ -0,0 +1,48 @@
+name: date
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - date: { nullability: NULLABILITY_REQUIRED }
+            - date: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - date: 0 # 1970 epoch
+              nullable: false
+            - date: 18231 # 2019-12-00 00:00:00.000000
+              nullable: true
+          - fields:
+            - date: -354285 # 1000-01-01
+              nullable: false
+            - date: -354286 # 999-12-31
+              date__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - date: 2932897 # 10000-01-01
+              date__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - date: 2932896 # 9999-12-31
+              nullable: true
+          - fields:
+            - date: -2147483648 # i32 min (not representable with chrono::NaiveDate; don't panic!)
+              date__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - date: 2147483647 # i32 max (not representable with chrono::NaiveDate; don't panic!)
+              date__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - "null":
+                date: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                date: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: date, y: date?>" ]
diff --git a/tests/tests/expressions/literals/decimal.yaml b/tests/tests/expressions/literals/decimal.yaml
new file mode 100644
index 00000000..69be98b2
--- /dev/null
+++ b/tests/tests/expressions/literals/decimal.yaml
@@ -0,0 +1,156 @@
+name: decimal
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - decimal: { scale: 2, precision: 10, nullability: NULLABILITY_REQUIRED }
+            - decimal: { scale: 5, precision: 5, nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - decimal:
+                value: OTAAAAAAAAAAAAAAAAAAAA== # 123.45
+                scale: 2
+                precision: 10
+              nullable: false
+            - decimal:
+                value: OTAAAAAAAAAAAAAAAAAAAA== # .12345
+                scale: 5
+                precision: 5
+              nullable: true
+          - fields:
+            - decimal:
+                value: /+MLVAIAAAAAAAAAAAAAAA== # 99999999.99
+                scale: 2
+                precision: 10
+              nullable: false
+            - decimal:
+                value: n4YBAAAAAAAAAAAAAAAAAA== # .99999
+                scale: 5
+                precision: 5
+              nullable: true
+          - fields:
+            - decimal:
+                value: AOQLVAIAAAAAAAAAAAAAAA== # 100000000.00
+                scale: 2
+                precision: 10
+                __test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - decimal:
+                value: oIYBAAAAAAAAAAAAAAAAAA== # 1.00000
+                scale: 5
+                precision: 5
+                __test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - decimal:
+                value: ARz0q/3//////////////w== # -99999999.99
+                scale: 2
+                precision: 10
+              nullable: false
+            - decimal:
+                value: YXn+/////////////////w== # -.99999
+                scale: 5
+                precision: 5
+              nullable: true
+          - fields:
+            - decimal:
+                value: ABz0q/3//////////////w== # -100000000.00
+                scale: 2
+                precision: 10
+                __test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - decimal:
+                value: YHn+/////////////////w== # -1.00000
+                scale: 5
+                precision: 5
+                __test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - decimal:
+                value: /////z8iigl6xIZaqEw7Sw== # 99999999999999999999999999999999999999 (max)
+                scale: 0
+                precision: 38
+              nullable: false
+            - decimal:
+                value: AQAAAMDddfaFO3mlV7PEtA== # -99999999999999999999999999999999999999 (min)
+                scale: 0
+                precision: 38
+              nullable: true
+            __test:
+            - diag: { level: e, code: 4005 }
+            - diag: { level: e, code: 4005 }
+            - diag: { level: e, code: 4005 }
+            - diag: { level: e, code: 4005 }
+          - fields:
+            - decimal:
+                value: AAAAAAAAAAAAAAAAAAAA # 15 bytes
+                value__test: [ diag: { level: e, code: 6002, msg: "*16 bytes*15*" } ]
+                scale: 2
+                precision: 10
+              nullable: false
+            - decimal:
+                value: AAAAAAAAAAAAAAAAAAAAAAA= # 17 bytes
+                value__test: [ diag: { level: e, code: 6002, msg: "*16 bytes*17*" } ]
+                scale: 5
+                precision: 5
+              nullable: true
+          - fields:
+            - "null":
+                decimal: { scale: 2, precision: 10, nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                decimal: { scale: 5, precision: 5, nullability: NULLABILITY_NULLABLE }
+          - fields:
+            - "null":
+                decimal:
+                  scale: -2147483648 # i32 minimum
+                  scale__test: [ diag: { level: e, code: 2, msg: "*parameters cannot be negative*" } ]
+                  precision: -2147483648 # i32 minimum
+                  precision__test: [ diag: { level: e, code: 2, msg: "*parameters cannot be negative*" } ]
+                  nullability: NULLABILITY_NULLABLE
+            - "null":
+                decimal:
+                  scale: 0
+                  precision: 0 # 0 digits doesn't make sense
+                  nullability: NULLABILITY_NULLABLE
+                  __test: [ diag: { level: e, code: 4002, msg: "*out of range*" } ]
+          - fields:
+            - "null":
+                decimal:
+                  scale: 0
+                  precision: 1 # minimum precision
+                  nullability: NULLABILITY_NULLABLE
+            - "null":
+                decimal:
+                  scale: 0
+                  precision: 38 # maximum precision
+                  nullability: NULLABILITY_NULLABLE
+            __test:
+            - diag: { level: e, code: 4008 }
+            - diag: { level: e, code: 4005 }
+            - diag: { level: e, code: 4005 }
+            - diag: { level: e, code: 4005 }
+            - diag: { level: e, code: 4005 }
+          - fields:
+            - "null":
+                decimal:
+                  scale: 0
+                  precision: 39 # beyond maximum precision
+                  nullability: NULLABILITY_NULLABLE
+                  __test: [ diag: { level: e, code: 4002, msg: "*out of range*" } ]
+            - "null":
+                decimal:
+                  scale: 6 # scale is not allowed to be greater than precision
+                  precision: 5
+                  nullability: NULLABILITY_NULLABLE
+                  __test: [ diag: { level: e, code: 4002, msg: "*out of range 0..5*" } ]
+        __test: [ type: "NSTRUCT<x: DECIMAL<10, 2>, y: DECIMAL?<5, 5>>" ]
diff --git a/tests/tests/expressions/literals/fixed_binary.yaml b/tests/tests/expressions/literals/fixed_binary.yaml
new file mode 100644
index 00000000..41a9543e
--- /dev/null
+++ b/tests/tests/expressions/literals/fixed_binary.yaml
@@ -0,0 +1,71 @@
+name: fixed_binary
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - fixed_binary: { length: 3, nullability: NULLABILITY_REQUIRED }
+            - fixed_binary: { length: 5, nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - fixed_binary: Zm9v # base64("foo")
+              nullable: false
+            - fixed_binary: aGVsbG8= # base64("hello")
+              nullable: true
+          - fields:
+            - fixed_binary: YmFy # base64("bar")
+              nullable: false
+            - fixed_binary: d29ybGQ= # base64("world")
+              nullable: true
+          - fields:
+            - fixed_binary: d3JvbmcgbGVuZ3Ro # base64("wrong length")
+              nullable: false
+            - fixed_binary: AAECAwQ= # base64(00 01 02 03 04)
+              nullable: true
+            __test: [ diag: { level: e, code: 4005, msg: "*12 vs. 3*"} ]
+          - fields:
+            - "null":
+                fixed_binary: { length: 3, nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                fixed_binary: { length: 5, nullability: NULLABILITY_NULLABLE }
+          - fields:
+            - "null":
+                fixed_binary:
+                  length: -2147483648 # i32 minimum
+                  length__test: [ diag: { level: e, code: 2, msg: "*parameters cannot be negative*" } ]
+                  nullability: NULLABILITY_NULLABLE
+            - "null":
+                fixed_binary:
+                  length: 0 # size 0 not allowed
+                  nullability: NULLABILITY_NULLABLE
+                  __test: [ diag: { level: e, code: 4002, msg: "*out of range*" } ]
+          - fields:
+            - "null":
+                fixed_binary:
+                  length: 1 # minimum size
+                  nullability: NULLABILITY_NULLABLE
+            - "null":
+                fixed_binary:
+                  length: 2147483647 # maximum size
+                  nullability: NULLABILITY_NULLABLE
+            __test:
+            - diag: { level: e, code: 4008 }
+            - diag: { level: e, code: 4005 }
+            - diag: { level: e, code: 4005 }
+          - fields:
+            - fixed_binary: ""
+              fixed_binary__test: [ diag: { level: e, code: 4002, msg: "*out of range*" } ]
+              nullable: false
+            - fixed_binary: AA== # base64(00)
+              nullable: true
+            __test: [ diag: { level: e, code: 4005 } ]
+        __test: [ type: "NSTRUCT<x: FIXEDBINARY<3>, y: FIXEDBINARY?<5>>" ]
diff --git a/tests/tests/expressions/literals/fixed_char.yaml b/tests/tests/expressions/literals/fixed_char.yaml
new file mode 100644
index 00000000..680afc95
--- /dev/null
+++ b/tests/tests/expressions/literals/fixed_char.yaml
@@ -0,0 +1,71 @@
+name: fixed_char
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - fixed_char: { length: 3, nullability: NULLABILITY_REQUIRED }
+            - fixed_char: { length: 5, nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - fixed_char: "foo"
+              nullable: false
+            - fixed_char: "hello"
+              nullable: true
+          - fields:
+            - fixed_char: "bar"
+              nullable: false
+            - fixed_char: "world"
+              nullable: true
+          - fields:
+            - fixed_char: "wrong length"
+              nullable: false
+            - fixed_char: "!@#$%"
+              nullable: true
+            __test: [ diag: { level: e, code: 4005, msg: "*12 vs. 3*"} ]
+          - fields:
+            - "null":
+                fixed_char: { length: 3, nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                fixed_char: { length: 5, nullability: NULLABILITY_NULLABLE }
+          - fields:
+            - "null":
+                fixed_char:
+                  length: -2147483648 # i32 minimum
+                  length__test: [ diag: { level: e, code: 2, msg: "*parameters cannot be negative*" } ]
+                  nullability: NULLABILITY_NULLABLE
+            - "null":
+                fixed_char:
+                  length: 0 # size 0 not allowed
+                  nullability: NULLABILITY_NULLABLE
+                  __test: [ diag: { level: e, code: 4002, msg: "*out of range*" } ]
+          - fields:
+            - "null":
+                fixed_char:
+                  length: 1 # minimum size
+                  nullability: NULLABILITY_NULLABLE
+            - "null":
+                fixed_char:
+                  length: 2147483647 # maximum size
+                  nullability: NULLABILITY_NULLABLE
+            __test:
+            - diag: { level: e, code: 4008 }
+            - diag: { level: e, code: 4005 }
+            - diag: { level: e, code: 4005 }
+          - fields:
+            - fixed_char: ""
+              fixed_char__test: [ diag: { level: e, code: 4002, msg: "*out of range*" } ]
+              nullable: false
+            - fixed_char: " "
+              nullable: true
+            __test: [ diag: { level: e, code: 4005 } ]
+        __test: [ type: "NSTRUCT<x: FIXEDCHAR<3>, y: FIXEDCHAR?<5>>" ]
diff --git a/tests/tests/expressions/literals/fp32.yaml b/tests/tests/expressions/literals/fp32.yaml
new file mode 100644
index 00000000..d8b14454
--- /dev/null
+++ b/tests/tests/expressions/literals/fp32.yaml
@@ -0,0 +1,34 @@
+name: fp32
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - fp32: { nullability: NULLABILITY_REQUIRED }
+            - fp32: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - fp32: 0.0
+              nullable: false
+            - fp32: 3.1415926535897932384626433832795028841971
+              nullable: true
+          - fields:
+            - fp32: -100000000000000000000000.0
+              nullable: false
+            - fp32: 100000000000000000000000.0
+              nullable: true
+          - fields:
+            - "null":
+                fp32: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                fp32: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: fp32, y: fp32?>" ]
diff --git a/tests/tests/expressions/literals/fp64.yaml b/tests/tests/expressions/literals/fp64.yaml
new file mode 100644
index 00000000..4bc1c157
--- /dev/null
+++ b/tests/tests/expressions/literals/fp64.yaml
@@ -0,0 +1,34 @@
+name: fp64
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - fp64: { nullability: NULLABILITY_REQUIRED }
+            - fp64: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - fp64: 0.0
+              nullable: false
+            - fp64: 3.1415926535897932384626433832795028841971
+              nullable: true
+          - fields:
+            - fp64: -100000000000000000000000.0
+              nullable: false
+            - fp64: 100000000000000000000000.0
+              nullable: true
+          - fields:
+            - "null":
+                fp64: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                fp64: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: fp64, y: fp64?>" ]
diff --git a/tests/tests/expressions/literals/i16.yaml b/tests/tests/expressions/literals/i16.yaml
new file mode 100644
index 00000000..84f4e1b1
--- /dev/null
+++ b/tests/tests/expressions/literals/i16.yaml
@@ -0,0 +1,41 @@
+name: i16
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - i16: { nullability: NULLABILITY_REQUIRED }
+            - i16: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - i16: 0
+              nullable: false
+            - i16: 0
+              nullable: true
+          - fields:
+            - i16: -32768
+              nullable: false
+            - i16: -32769
+              i16__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - i16: 32768
+              i16__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - i16: 32767
+              nullable: true
+          - fields:
+            - "null":
+                i16: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                i16: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: i16, y: i16?>" ]
diff --git a/tests/tests/expressions/literals/i32.yaml b/tests/tests/expressions/literals/i32.yaml
new file mode 100644
index 00000000..f5c49b55
--- /dev/null
+++ b/tests/tests/expressions/literals/i32.yaml
@@ -0,0 +1,34 @@
+name: i32
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - i32: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - i32: 0
+              nullable: false
+            - i32: 0
+              nullable: true
+          - fields:
+            - i32: -2147483648
+              nullable: false
+            - i32: 2147483647
+              nullable: true
+          - fields:
+            - "null":
+                i32: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                i32: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: i32, y: i32?>" ]
diff --git a/tests/tests/expressions/literals/i64.yaml b/tests/tests/expressions/literals/i64.yaml
new file mode 100644
index 00000000..ee559a2f
--- /dev/null
+++ b/tests/tests/expressions/literals/i64.yaml
@@ -0,0 +1,34 @@
+name: i64
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - i64: { nullability: NULLABILITY_REQUIRED }
+            - i64: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - i64: 0
+              nullable: false
+            - i64: 0
+              nullable: true
+          - fields:
+            - i64: -9223372036854775808
+              nullable: false
+            - i64: 9223372036854775807
+              nullable: true
+          - fields:
+            - "null":
+                i64: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                i64: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: i64, y: i64?>" ]
diff --git a/tests/tests/expressions/literals/i8.yaml b/tests/tests/expressions/literals/i8.yaml
new file mode 100644
index 00000000..1323d751
--- /dev/null
+++ b/tests/tests/expressions/literals/i8.yaml
@@ -0,0 +1,41 @@
+name: i8
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - i8: { nullability: NULLABILITY_REQUIRED }
+            - i8: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - i8: 0
+              nullable: false
+            - i8: 0
+              nullable: true
+          - fields:
+            - i8: -128
+              nullable: false
+            - i8: -129
+              i8__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - i8: 128
+              i8__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - i8: 127
+              nullable: true
+          - fields:
+            - "null":
+                i8: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                i8: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: i8, y: i8?>" ]
diff --git a/tests/tests/expressions/literals/interval_day.yaml b/tests/tests/expressions/literals/interval_day.yaml
new file mode 100644
index 00000000..5adedae2
--- /dev/null
+++ b/tests/tests/expressions/literals/interval_day.yaml
@@ -0,0 +1,73 @@
+name: interval_day
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - interval_day: { nullability: NULLABILITY_REQUIRED }
+            - interval_day: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - interval_day_to_second:
+                days: 0
+                seconds: 0
+              nullable: false
+            - interval_day_to_second:
+                days: 123
+                seconds: 456
+              nullable: true
+          - fields:
+            - interval_day_to_second:
+                days: 3650000
+                seconds: 0
+              nullable: false
+            - interval_day_to_second:
+                days: 3650001
+                days__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+                seconds: 0
+              nullable: true
+          - fields:
+            - interval_day_to_second:
+                days: -3650000
+                seconds: 0
+              nullable: false
+            - interval_day_to_second:
+                days: -3650001
+                days__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+                seconds: 0
+              nullable: true
+          - fields:
+            - interval_day_to_second:
+                days: -2147483648
+                days__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+                seconds: 0
+              nullable: false
+            - interval_day_to_second:
+                days: 2147483647
+                days__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+                seconds: 0
+              nullable: true
+          - fields:
+            - interval_day_to_second:
+                days: 0
+                seconds: -2147483648
+              nullable: false
+            - interval_day_to_second:
+                days: 0
+                seconds: 2147483647
+              nullable: true
+          - fields:
+            - "null":
+                interval_day: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                interval_day: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: interval_day, y: interval_day?>" ]
diff --git a/tests/tests/expressions/literals/interval_year.yaml b/tests/tests/expressions/literals/interval_year.yaml
new file mode 100644
index 00000000..f3ab2bb5
--- /dev/null
+++ b/tests/tests/expressions/literals/interval_year.yaml
@@ -0,0 +1,101 @@
+name: interval_year
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - interval_year: { nullability: NULLABILITY_REQUIRED }
+            - interval_year: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - interval_year_to_month:
+                years: 0
+                months: 0
+              nullable: false
+            - interval_year_to_month:
+                years: 123
+                months: 456
+              nullable: true
+          - fields:
+            - interval_year_to_month:
+                years: 10000
+                months: -120000
+              nullable: false
+            - interval_year_to_month:
+                years: 10001
+                years__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+                months: -120001
+                months__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - interval_year_to_month:
+                years: -10000
+                months: 120000
+              nullable: false
+            - interval_year_to_month:
+                years: -10001
+                years__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+                months: 120001
+                months__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - interval_year_to_month:
+                years: 5000
+                months: 60000
+              nullable: false
+            - interval_year_to_month:
+                years: 5000
+                months: 60001
+                __test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - interval_year_to_month:
+                years: -5000
+                months: -60000
+              nullable: false
+            - interval_year_to_month:
+                years: -5001
+                months: -60000
+                __test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - interval_year_to_month:
+                years: -2147483648
+                years__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+                months: 0
+                __test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - interval_year_to_month:
+                years: 2147483647
+                years__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+                months: 0
+                __test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - interval_year_to_month:
+                years: 0
+                months: -2147483648
+                months__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+                __test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - interval_year_to_month:
+                years: 0
+                months: 2147483647
+                months__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+                __test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - "null":
+                interval_year: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                interval_year: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: interval_year, y: interval_year?>" ]
diff --git a/tests/tests/expressions/literals/list.yaml b/tests/tests/expressions/literals/list.yaml
new file mode 100644
index 00000000..a9eed041
--- /dev/null
+++ b/tests/tests/expressions/literals/list.yaml
@@ -0,0 +1,61 @@
+name: list
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - list:
+                nullability: NULLABILITY_REQUIRED
+                type:
+                  bool: { nullability: NULLABILITY_NULLABLE }
+            - list:
+                nullability: NULLABILITY_NULLABLE
+                type:
+                  bool: { nullability: NULLABILITY_REQUIRED }
+        virtualTable:
+          values:
+          - fields:
+            - list:
+                values:
+                - boolean: false
+                  nullable: true
+                - boolean: true
+                  nullable: true
+              nullable: false
+            - list:
+                values:
+                - boolean: false
+                  nullable: false
+                - boolean: true
+                  nullable: false
+              nullable: true
+          - fields:
+            - list:
+                values: []
+                __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: values*" } ]
+              nullable: false
+            - empty_list:
+                nullability: NULLABILITY_NULLABLE
+                type:
+                  bool: { nullability: NULLABILITY_REQUIRED }
+              nullable: true
+          - fields:
+            - "null":
+                list:
+                  nullability: NULLABILITY_REQUIRED
+                  type:
+                    bool: { nullability: NULLABILITY_NULLABLE }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                list:
+                  nullability: NULLABILITY_NULLABLE
+                  type:
+                    bool: { nullability: NULLABILITY_REQUIRED }
+        __test: [ type: "NSTRUCT<x: LIST<boolean?>, y: LIST?<boolean>>" ]
diff --git a/tests/tests/expressions/literals/map.yaml b/tests/tests/expressions/literals/map.yaml
new file mode 100644
index 00000000..d4a5404c
--- /dev/null
+++ b/tests/tests/expressions/literals/map.yaml
@@ -0,0 +1,77 @@
+name: map
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - map:
+                nullability: NULLABILITY_REQUIRED
+                key: { string: { nullability: NULLABILITY_NULLABLE } }
+                value: { bool: { nullability: NULLABILITY_REQUIRED } }
+            - map:
+                nullability: NULLABILITY_NULLABLE
+                key: { bool: { nullability: NULLABILITY_REQUIRED } }
+                value: { string: { nullability: NULLABILITY_NULLABLE } }
+        virtualTable:
+          values:
+          - fields:
+            - map:
+                key_values:
+                - key:
+                    string: hello
+                    nullable: true
+                  value:
+                    boolean: false
+                    nullable: false
+                - key:
+                    string: world
+                    nullable: true
+                  value:
+                    boolean: true
+                    nullable: false
+              nullable: false
+            - map:
+                key_values:
+                - key:
+                    boolean: false
+                    nullable: false
+                  value:
+                    string: hello
+                    nullable: true
+                - key:
+                    boolean: true
+                    nullable: false
+                  value:
+                    string: world
+                    nullable: true
+              nullable: true
+          - fields:
+            - map:
+                key_values: []
+                __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: key_values*" } ]
+              nullable: false
+            - empty_map:
+                nullability: NULLABILITY_NULLABLE
+                key: { bool: { nullability: NULLABILITY_REQUIRED } }
+                value: { string: { nullability: NULLABILITY_NULLABLE } }
+              nullable: true
+          - fields:
+            - "null":
+                map:
+                  nullability: NULLABILITY_REQUIRED
+                  key: { string: { nullability: NULLABILITY_NULLABLE } }
+                  value: { bool: { nullability: NULLABILITY_REQUIRED } }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                map:
+                  nullability: NULLABILITY_NULLABLE
+                  key: { bool: { nullability: NULLABILITY_REQUIRED } }
+                  value: { string: { nullability: NULLABILITY_NULLABLE } }
+        __test: [ type: "NSTRUCT<x: MAP<string?, boolean>, y: MAP?<boolean, string?>>" ]
diff --git a/tests/tests/expressions/literals/string.yaml b/tests/tests/expressions/literals/string.yaml
new file mode 100644
index 00000000..23f66cb4
--- /dev/null
+++ b/tests/tests/expressions/literals/string.yaml
@@ -0,0 +1,34 @@
+name: string
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - string: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - string: "foo"
+              nullable: false
+            - string: "bar"
+              nullable: true
+          - fields:
+            - string: ""
+              nullable: false
+            - string: "!@#$%^&*()_+<>"
+              nullable: true
+          - fields:
+            - "null":
+                string: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                string: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: string, y: string?>" ]
diff --git a/tests/tests/expressions/literals/struct.yaml b/tests/tests/expressions/literals/struct.yaml
new file mode 100644
index 00000000..e8942608
--- /dev/null
+++ b/tests/tests/expressions/literals/struct.yaml
@@ -0,0 +1,40 @@
+name: struct
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, a, b, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - bool: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_NULLABLE }
+            - struct: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - struct:
+                fields:
+                - { boolean: false, nullable: false }
+                - { boolean: false, nullable: true }
+              nullable: false
+            - struct: {}
+              nullable: true
+          - fields:
+            - "null":
+                struct:
+                  nullability: NULLABILITY_REQUIRED
+                  types:
+                  - bool: { nullability: NULLABILITY_REQUIRED }
+                  - bool: { nullability: NULLABILITY_NULLABLE }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                struct: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: NSTRUCT<a: boolean, b: boolean?>, y: NSTRUCT?<>>" ]
diff --git a/tests/tests/expressions/literals/time.yaml b/tests/tests/expressions/literals/time.yaml
new file mode 100644
index 00000000..55cbca4a
--- /dev/null
+++ b/tests/tests/expressions/literals/time.yaml
@@ -0,0 +1,48 @@
+name: time
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - time: { nullability: NULLABILITY_REQUIRED }
+            - time: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - time: 61200000000 # 17:00:00.000000
+              nullable: false
+            - time: 45296789876 # 12:34:56.789876
+              nullable: true
+          - fields:
+            - time: 0 # 00:00:00.000000 (lowest value)
+              nullable: false
+            - time: -1 # before start of day
+              time__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - time: 86400000000 # after end of day (leap seconds not supported)
+              time__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - time: 86399999999 # 23:59:59.999999 (highest value)
+              nullable: true
+          - fields:
+            - time: -9223372036854775808 # i64 min
+              time__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - time: 9223372036854775807 # i64 max
+              time__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - "null":
+                time: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                time: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: time, y: time?>" ]
diff --git a/tests/tests/expressions/literals/timestamp.yaml b/tests/tests/expressions/literals/timestamp.yaml
new file mode 100644
index 00000000..5410c4df
--- /dev/null
+++ b/tests/tests/expressions/literals/timestamp.yaml
@@ -0,0 +1,48 @@
+name: timestamp
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - timestamp: { nullability: NULLABILITY_REQUIRED }
+            - timestamp: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - timestamp: 0 # 1970 epoch
+              nullable: false
+            - timestamp: 1575158400000000 # 2019-12-00 00:00:00.000000
+              nullable: true
+          - fields:
+            - timestamp: -30610224000000000 # 1000-01-01 00:00:00.000000
+              nullable: false
+            - timestamp: -30610224000000001 # 999-12-31 23:59:59.999999
+              timestamp__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - timestamp: 253402300800000000 # 10000-01-01 00:00:00.000000
+              timestamp__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - timestamp: 253402300799999999 # 9999-12-31 23:59:59.999999
+              nullable: true
+          - fields:
+            - timestamp: -9223372036854775808 # i64 min (not representable with chrono::NaiveDate; don't panic!)
+              timestamp__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - timestamp: 9223372036854775807 # i64 max (not representable with chrono::NaiveDate; don't panic!)
+              timestamp__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - "null":
+                timestamp: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                timestamp: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: timestamp, y: timestamp?>" ]
diff --git a/tests/tests/expressions/literals/timestamp_tz.yaml b/tests/tests/expressions/literals/timestamp_tz.yaml
new file mode 100644
index 00000000..27f230e2
--- /dev/null
+++ b/tests/tests/expressions/literals/timestamp_tz.yaml
@@ -0,0 +1,48 @@
+name: timestamp_tz
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - timestamp_tz: { nullability: NULLABILITY_REQUIRED }
+            - timestamp_tz: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - timestamp_tz: 0 # 1970 epoch
+              nullable: false
+            - timestamp_tz: 1575158400000000 # 2019-12-00 00:00:00.000000
+              nullable: true
+          - fields:
+            - timestamp_tz: -30610224000000000 # 1000-01-01 00:00:00.000000
+              nullable: false
+            - timestamp_tz: -30610224000000001 # 999-12-31 23:59:59.999999
+              timestamp_tz__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - timestamp_tz: 253402300800000000 # 10000-01-01 00:00:00.000000
+              timestamp_tz__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - timestamp_tz: 253402300799999999 # 9999-12-31 23:59:59.999999
+              nullable: true
+          - fields:
+            - timestamp_tz: -9223372036854775808 # i64 min (not representable with chrono::NaiveDate; don't panic!)
+              timestamp_tz__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: false
+            - timestamp_tz: 9223372036854775807 # i64 max (not representable with chrono::NaiveDate; don't panic!)
+              timestamp_tz__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ]
+              nullable: true
+          - fields:
+            - "null":
+                timestamp_tz: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                timestamp_tz: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: timestamp_tz, y: timestamp_tz?>" ]
diff --git a/tests/tests/expressions/literals/uuid.yaml b/tests/tests/expressions/literals/uuid.yaml
new file mode 100644
index 00000000..1993392a
--- /dev/null
+++ b/tests/tests/expressions/literals/uuid.yaml
@@ -0,0 +1,36 @@
+name: uuid
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - uuid: { nullability: NULLABILITY_REQUIRED }
+            - uuid: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - uuid: K2lgaYZgQDqtouub8GqT+g== # base64(2b696069-8660-403a-ada2-eb9bf06a93fa)
+              nullable: false
+            - uuid: AAAAAAAAAAAAAAAAAAAAAA== # base64(00000000-0000-0000-0000-000000000000)
+              nullable: true
+          - fields:
+            - uuid: AAAAAAAAAAAAAAAAAAAA # 15 bytes
+              uuid__test: [ diag: { level: e, code: 6002, msg: "*16 bytes*15*" } ]
+              nullable: false
+            - uuid: AAAAAAAAAAAAAAAAAAAAAAA= # 17 bytes
+              uuid__test: [ diag: { level: e, code: 6002, msg: "*16 bytes*17*" } ]
+              nullable: true
+          - fields:
+            - "null":
+                uuid: { nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                uuid: { nullability: NULLABILITY_NULLABLE }
+        __test: [ type: "NSTRUCT<x: uuid, y: uuid?>" ]
diff --git a/tests/tests/expressions/literals/var_char.yaml b/tests/tests/expressions/literals/var_char.yaml
new file mode 100644
index 00000000..34100737
--- /dev/null
+++ b/tests/tests/expressions/literals/var_char.yaml
@@ -0,0 +1,76 @@
+name: varchar
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - varchar: { length: 3, nullability: NULLABILITY_REQUIRED }
+            - varchar: { length: 6, nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - var_char:
+                value: "foo"
+                length: 3
+              nullable: false
+            - var_char:
+                value: "hello"
+                length: 6
+              nullable: true
+          - fields:
+            - var_char:
+                value: ""
+                length: 3
+              nullable: false
+            - var_char:
+                value: "world!"
+                length: 6
+              nullable: true
+          - fields:
+            - var_char:
+                value: "too long"
+                value__test: [ diag: { level: e, code: 6002, msg: "*longer than specified length*" } ]
+                length: 3
+              nullable: false
+            - var_char:
+                value: "!@#$%^"
+                length: 6
+              nullable: true
+          - fields:
+            - "null":
+                varchar: { length: 3, nullability: NULLABILITY_REQUIRED }
+                __test: [ diag: { level: e, code: 4008, msg: "*type of null literal must be nullable*" } ]
+            - "null":
+                varchar: { length: 6, nullability: NULLABILITY_NULLABLE }
+          - fields:
+            - "null":
+                varchar:
+                  length: -2147483648 # i32 minimum
+                  length__test: [ diag: { level: e, code: 2, msg: "*parameters cannot be negative*" } ]
+                  nullability: NULLABILITY_NULLABLE
+            - "null":
+                varchar:
+                  length: 0 # size 0 not allowed
+                  nullability: NULLABILITY_NULLABLE
+                  __test: [ diag: { level: e, code: 4002, msg: "*out of range*" } ]
+          - fields:
+            - "null":
+                varchar:
+                  length: 1 # minimum size
+                  nullability: NULLABILITY_NULLABLE
+            - "null":
+                varchar:
+                  length: 2147483647 # maximum size
+                  nullability: NULLABILITY_NULLABLE
+            __test:
+            - diag: { level: e, code: 4008 }
+            - diag: { level: e, code: 4005 }
+            - diag: { level: e, code: 4005 }
+        __test: [ type: "NSTRUCT<x: VARCHAR<3>, y: VARCHAR?<6>>" ]
diff --git a/tests/tests/expressions/subqueries/comparison.yaml b/tests/tests/expressions/subqueries/comparison.yaml
new file mode 100644
index 00000000..486c7dc5
--- /dev/null
+++ b/tests/tests/expressions/subqueries/comparison.yaml
@@ -0,0 +1,72 @@
+name: comparison-subquery
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [x]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable: { names: [ test ] }
+        expressions:
+        - subquery:
+            setComparison:
+              right:
+                read:
+                  baseSchema:
+                    names: [x]
+                    struct:
+                      nullability: NULLABILITY_REQUIRED
+                      types:
+                      - string: { nullability: NULLABILITY_REQUIRED }
+                  namedTable:
+                    names:
+                    - test2
+              left: { literal: { string: test } }
+              comparisonOp: COMPARISON_OP_EQ
+              reductionOp: REDUCTION_OP_ALL
+          __test: [ type: boolean ]
+        - subquery:
+            setComparison:
+              right:
+                read:
+                  baseSchema:
+                    names: [x, y]
+                    struct:
+                      nullability: NULLABILITY_REQUIRED
+                      types:
+                      - string: { nullability: NULLABILITY_REQUIRED }
+                      - i32: { nullability: NULLABILITY_REQUIRED }
+                  namedTable:
+                    names:
+                    - test3
+              __test: [ diag: { level: e, code: 6004, msg: "*subquery must return a single column*" } ]
+              left: { literal: { string: test } }
+              comparisonOp: COMPARISON_OP_UNSPECIFIED
+              comparisonOp__test: [ diag: { level: e, code: 2, msg: "*this enum may not be left unspecified*" } ]
+              reductionOp: REDUCTION_OP_UNSPECIFIED
+              reductionOp__test: [ diag: { level: e, code: 2, msg: "*this enum may not be left unspecified*" } ]
+          __test: [ type: boolean ]
+        - subquery:
+            setComparison:
+              right:
+                read:
+                  baseSchema:
+                    names: [x]
+                    struct:
+                      nullability: NULLABILITY_REQUIRED
+                      types:
+                      - string: { nullability: NULLABILITY_REQUIRED }
+                  namedTable:
+                    names:
+                    - test4
+              __test: [ diag: { level: e, code: 4005, msg: "*string vs. i32*" } ]
+              left: { literal: { i32: 0 } }
+              comparisonOp: COMPARISON_OP_GE
+              reductionOp: REDUCTION_OP_ANY
+          __test: [ type: boolean ]
diff --git a/tests/tests/expressions/subqueries/in-predicate.yaml b/tests/tests/expressions/subqueries/in-predicate.yaml
new file mode 100644
index 00000000..65bb3528
--- /dev/null
+++ b/tests/tests/expressions/subqueries/in-predicate.yaml
@@ -0,0 +1,69 @@
+name: in-subquery
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [x]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable: { names: [ test ] }
+        expressions:
+        - subquery:
+            inPredicate:
+              haystack:
+                read:
+                  baseSchema:
+                    names: [x, y]
+                    struct:
+                      nullability: NULLABILITY_REQUIRED
+                      types:
+                      - string: { nullability: NULLABILITY_REQUIRED }
+                      - i32: { nullability: NULLABILITY_REQUIRED }
+                  namedTable:
+                    names:
+                    - test2
+              needles:
+              - literal: { string: test }
+              - literal: { i32: 0 }
+          __test: [ type: boolean ]
+        - subquery:
+            inPredicate:
+              haystack:
+                read:
+                  baseSchema:
+                    names: [x]
+                    struct:
+                      nullability: NULLABILITY_REQUIRED
+                      types:
+                      - i32: { nullability: NULLABILITY_REQUIRED }
+                  namedTable:
+                    names:
+                    - test3
+              needles:
+              - literal: { i16: 0 }
+              __test: [ diag: { level: e, code: 4005, msg: "*column 1*i32 vs. i16*" } ]
+          __test: [ type: boolean ]
+        - subquery:
+            inPredicate:
+              haystack:
+                read:
+                  baseSchema:
+                    names: [x]
+                    struct:
+                      nullability: NULLABILITY_REQUIRED
+                      types:
+                      - i32: { nullability: NULLABILITY_REQUIRED }
+                  namedTable:
+                    names:
+                    - test3
+              needles:
+              - literal: { i32: 0 }
+              - literal: { i16: 0 }
+              __test: [ diag: { level: e, code: 4005, msg: "*column count mismatch*" } ]
+          __test: [ type: boolean ]
diff --git a/tests/tests/expressions/subqueries/scalar.yaml b/tests/tests/expressions/subqueries/scalar.yaml
new file mode 100644
index 00000000..cbf2382a
--- /dev/null
+++ b/tests/tests/expressions/subqueries/scalar.yaml
@@ -0,0 +1,45 @@
+name: scalar-subquery
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [x]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable: { names: [ test ] }
+        expressions:
+        - subquery:
+            scalar:
+              input:
+                read:
+                  baseSchema:
+                    names: [x]
+                    struct:
+                      nullability: NULLABILITY_REQUIRED
+                      types:
+                      - string: { nullability: NULLABILITY_REQUIRED }
+                  namedTable:
+                    names:
+                    - test2
+          __test: [ type: string ]
+        - subquery:
+            scalar:
+              input:
+                read:
+                  baseSchema:
+                    names: [x, y]
+                    struct:
+                      nullability: NULLABILITY_REQUIRED
+                      types:
+                      - string: { nullability: NULLABILITY_REQUIRED }
+                      - date: { nullability: NULLABILITY_REQUIRED }
+                  namedTable:
+                    names:
+                    - test3
+              __test: [ diag: { level: e, code: 6004, msg: "*subquery must return a single column*" } ]
diff --git a/tests/tests/expressions/subqueries/set.yaml b/tests/tests/expressions/subqueries/set.yaml
new file mode 100644
index 00000000..306500e3
--- /dev/null
+++ b/tests/tests/expressions/subqueries/set.yaml
@@ -0,0 +1,65 @@
+name: set-subquery
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [x]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable: { names: [ test ] }
+        expressions:
+        - subquery:
+            setPredicate:
+              tuples:
+                read:
+                  baseSchema:
+                    names: [x, y]
+                    struct:
+                      nullability: NULLABILITY_REQUIRED
+                      types:
+                      - string: { nullability: NULLABILITY_REQUIRED }
+                      - i32: { nullability: NULLABILITY_REQUIRED }
+                  namedTable:
+                    names:
+                    - test2
+              predicateOp: PREDICATE_OP_EXISTS
+          __test: [ type: boolean ]
+        - subquery:
+            setPredicate:
+              tuples:
+                read:
+                  baseSchema:
+                    names: [x, y]
+                    struct:
+                      nullability: NULLABILITY_REQUIRED
+                      types:
+                      - string: { nullability: NULLABILITY_REQUIRED }
+                      - i32: { nullability: NULLABILITY_REQUIRED }
+                  namedTable:
+                    names:
+                    - test2
+              predicateOp: PREDICATE_OP_UNIQUE
+          __test: [ type: boolean ]
+        - subquery:
+            setPredicate:
+              tuples:
+                read:
+                  baseSchema:
+                    names: [x, y]
+                    struct:
+                      nullability: NULLABILITY_REQUIRED
+                      types:
+                      - string: { nullability: NULLABILITY_REQUIRED }
+                      - i32: { nullability: NULLABILITY_REQUIRED }
+                  namedTable:
+                    names:
+                    - test2
+              predicateOp: PREDICATE_OP_UNSPECIFIED
+              predicateOp__test: [ diag: { level: e, code: 2, msg: "*this enum may not be left unspecified*" } ]
+          __test: [ type: boolean ]
diff --git a/tests/tests/extensions/advanced/enhancement-not-declared.yaml b/tests/tests/extensions/advanced/enhancement-not-declared.yaml
new file mode 100644
index 00000000..7c679487
--- /dev/null
+++ b/tests/tests/extensions/advanced/enhancement-not-declared.yaml
@@ -0,0 +1,21 @@
+name: adv-ext-enhancement-not-declared
+plan:
+  __test: [ level: w ]
+  relations:
+  - rel:
+      read:
+        common:
+          advancedExtension:
+            enhancement:
+              "@type": substrait.Plan
+              __test: [ diag: { level: e, code: 1006, msg: '*missing protobuf "any" declaration*' } ]
+        baseSchema:
+          names: [word, value]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        namedTable:
+          names:
+            - test
diff --git a/tests/tests/extensions/advanced/enhancement.yaml b/tests/tests/extensions/advanced/enhancement.yaml
new file mode 100644
index 00000000..3f0d7566
--- /dev/null
+++ b/tests/tests/extensions/advanced/enhancement.yaml
@@ -0,0 +1,22 @@
+name: adv-ext-enhancement
+plan:
+  __test: [ level: w ]
+  relations:
+  - rel:
+      read:
+        common:
+          advancedExtension:
+            enhancement:
+              "@type": substrait.Plan
+        baseSchema:
+          names: [word, value]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        namedTable:
+          names:
+            - test
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/extensions/advanced/missing-declaration.yaml b/tests/tests/extensions/advanced/missing-declaration.yaml
new file mode 100644
index 00000000..d217392d
--- /dev/null
+++ b/tests/tests/extensions/advanced/missing-declaration.yaml
@@ -0,0 +1,21 @@
+name: adv-ext-missing-declaration
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          advancedExtension:
+            optimization:
+              "@type": substrait.Plan
+              __test: [ diag: { level: e, code: 1006, msg: '*missing protobuf "any" declaration: substrait.Plan*' } ]
+        baseSchema:
+          names: [word, value]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        namedTable:
+          names:
+            - test
diff --git a/tests/tests/extensions/advanced/optimization-not-declared.yaml b/tests/tests/extensions/advanced/optimization-not-declared.yaml
new file mode 100644
index 00000000..5f52547a
--- /dev/null
+++ b/tests/tests/extensions/advanced/optimization-not-declared.yaml
@@ -0,0 +1,21 @@
+name: adv-ext-optimization-not-declared
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          advancedExtension:
+            optimization:
+              "@type": substrait.Plan
+              __test: [ diag: { level: e, code: 1006, msg: '*missing protobuf "any" declaration*' } ]
+        baseSchema:
+          names: [word, value]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        namedTable:
+          names:
+            - test
diff --git a/tests/tests/extensions/advanced/optimization.yaml b/tests/tests/extensions/advanced/optimization.yaml
new file mode 100644
index 00000000..1972de0c
--- /dev/null
+++ b/tests/tests/extensions/advanced/optimization.yaml
@@ -0,0 +1,22 @@
+name: adv-ext-optimization
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          advancedExtension:
+            optimization:
+              "@type": substrait.Plan
+        baseSchema:
+          names: [word, value]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        namedTable:
+          names:
+            - test
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/extensions/advanced/unused-declaration.yaml b/tests/tests/extensions/advanced/unused-declaration.yaml
new file mode 100644
index 00000000..432e1c5a
--- /dev/null
+++ b/tests/tests/extensions/advanced/unused-declaration.yaml
@@ -0,0 +1,24 @@
+name: adv-ext-unused-declaration
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          advancedExtension:
+            optimization:
+              "@type": substrait.Plan
+        baseSchema:
+          names: [word, value]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        namedTable:
+          names:
+            - test
+  expectedTypeUrls:
+  - substrait.Plan
+  - not.Used
+  __test: [ diag: { level: i, code: 7001, msg: '*not.Used is not present in the plan*' } ]
diff --git a/tests/tests/relations/aggregate/measure-and-group.yaml b/tests/tests/relations/aggregate/measure-and-group.yaml
new file mode 100644
index 00000000..cd2acdff
--- /dev/null
+++ b/tests/tests/relations/aggregate/measure-and-group.yaml
@@ -0,0 +1,45 @@
+name: aggregate-measure-and-group
+plan:
+  __test: [ level: iw ]
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri__yaml:
+      aggregate_functions:
+        - name: "count"
+          description: Count number of rows
+          impls:
+          - args: []
+            nullability: DECLARED_OUTPUT
+            decomposable: MANY
+            intermediate: i64
+            return: i64
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: "count:"
+  relations:
+  - rel:
+      aggregate:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        groupings:
+        - groupingExpressions:
+          - selection:
+              rootReference: {}
+              directReference: { structField: { field: 0 } }
+        measures:
+        - measure:
+            functionReference: 1
+            output_type: { i64: { nullability: NULLABILITY_REQUIRED } }
+        __test: [ type: "STRUCT<string, i64, i32>" ]
diff --git a/tests/tests/relations/aggregate/measure.yaml b/tests/tests/relations/aggregate/measure.yaml
new file mode 100644
index 00000000..9dc2dc35
--- /dev/null
+++ b/tests/tests/relations/aggregate/measure.yaml
@@ -0,0 +1,40 @@
+name: aggregate-measure
+plan:
+  __test: [ level: iw ]
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri__yaml:
+      aggregate_functions:
+        - name: "count"
+          description: Count number of rows
+          impls:
+          - args: []
+            nullability: DECLARED_OUTPUT
+            decomposable: MANY
+            intermediate: i64
+            return: i64
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: "count:"
+  relations:
+  - rel:
+      aggregate:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        measures:
+        - measure:
+            functionReference: 1
+            output_type: { i64: { nullability: NULLABILITY_REQUIRED } }
+        __test: [ type: "STRUCT<i64, i32>" ]
diff --git a/tests/tests/relations/aggregate/missing-set-or-measure.yaml b/tests/tests/relations/aggregate/missing-set-or-measure.yaml
new file mode 100644
index 00000000..deb680a6
--- /dev/null
+++ b/tests/tests/relations/aggregate/missing-set-or-measure.yaml
@@ -0,0 +1,19 @@
+name: aggregate-missing-set-or-measure
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      aggregate:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        __test: [ diag: { level: e, code: 5003, msg: "*must have at least one grouping expression or measure*" } ]
diff --git a/tests/tests/relations/aggregate/multi-set-reused.yaml b/tests/tests/relations/aggregate/multi-set-reused.yaml
new file mode 100644
index 00000000..d9214a3b
--- /dev/null
+++ b/tests/tests/relations/aggregate/multi-set-reused.yaml
@@ -0,0 +1,31 @@
+name: aggregate-multi-set-reused
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      aggregate:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        groupings:
+        - groupingExpressions:
+          - selection:
+              rootReference: {}
+              directReference: { structField: { field: 1 } }
+          - selection:
+              rootReference: {}
+              directReference: { structField: { field: 0 } }
+        - groupingExpressions:
+          - selection:
+              rootReference: {}
+              directReference: { structField: { field: 0 } }
+        __test: [ type: "STRUCT<fp32?, string, i32>" ]
diff --git a/tests/tests/relations/aggregate/multi-set.yaml b/tests/tests/relations/aggregate/multi-set.yaml
new file mode 100644
index 00000000..4a6a5add
--- /dev/null
+++ b/tests/tests/relations/aggregate/multi-set.yaml
@@ -0,0 +1,28 @@
+name: aggregate-multi-set
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      aggregate:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        groupings:
+        - groupingExpressions:
+          - selection:
+              rootReference: {}
+              directReference: { structField: { field: 1 } }
+        - groupingExpressions:
+          - selection:
+              rootReference: {}
+              directReference: { structField: { field: 0 } }
+        __test: [ type: "STRUCT<fp32?, string?, i32>" ]
diff --git a/tests/tests/relations/aggregate/single-set-one-expr.yaml b/tests/tests/relations/aggregate/single-set-one-expr.yaml
new file mode 100644
index 00000000..aee213da
--- /dev/null
+++ b/tests/tests/relations/aggregate/single-set-one-expr.yaml
@@ -0,0 +1,24 @@
+name: aggregate-single-set-one-expr
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      aggregate:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        groupings:
+        - groupingExpressions:
+          - selection:
+              rootReference: {}
+              directReference: { structField: { field: 0 } }
+        __test: [ type: "STRUCT<string, i32>" ]
diff --git a/tests/tests/relations/aggregate/single-set-two-expr.yaml b/tests/tests/relations/aggregate/single-set-two-expr.yaml
new file mode 100644
index 00000000..9b3d8a4a
--- /dev/null
+++ b/tests/tests/relations/aggregate/single-set-two-expr.yaml
@@ -0,0 +1,27 @@
+name: aggregate-single-set-two-expr
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      aggregate:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        groupings:
+        - groupingExpressions:
+          - selection:
+              rootReference: {}
+              directReference: { structField: { field: 1 } }
+          - selection:
+              rootReference: {}
+              directReference: { structField: { field: 0 } }
+        __test: [ type: "STRUCT<fp32, string, i32>" ]
diff --git a/tests/tests/relations/aggregate/single-set-zero-expr.yaml b/tests/tests/relations/aggregate/single-set-zero-expr.yaml
new file mode 100644
index 00000000..a87b9529
--- /dev/null
+++ b/tests/tests/relations/aggregate/single-set-zero-expr.yaml
@@ -0,0 +1,22 @@
+name: aggregate-single-set-missing-expr
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      aggregate:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        groupings:
+        - groupingExpressions: []
+        __test:
+        - diag: { level: e, code: 5003, msg: "*must have at least one grouping expression or measure*" }
diff --git a/tests/tests/relations/common/direct.yaml b/tests/tests/relations/common/direct.yaml
new file mode 100644
index 00000000..52376ae7
--- /dev/null
+++ b/tests/tests/relations/common/direct.yaml
@@ -0,0 +1,18 @@
+name: rel-common-direct
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          direct: {}
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        __test: [ type: "NSTRUCT<a: string>" ]
diff --git a/tests/tests/relations/common/emit-basic.yaml b/tests/tests/relations/common/emit-basic.yaml
new file mode 100644
index 00000000..4acb3a81
--- /dev/null
+++ b/tests/tests/relations/common/emit-basic.yaml
@@ -0,0 +1,22 @@
+name: rel-common-emit-basic
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          emit:
+            outputMapping:
+            - 1
+            - 0
+        baseSchema:
+          names: [a, b]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        __test: [ type: "STRUCT<i32, string>" ]
diff --git a/tests/tests/relations/common/emit-empty.yaml b/tests/tests/relations/common/emit-empty.yaml
new file mode 100644
index 00000000..797774d1
--- /dev/null
+++ b/tests/tests/relations/common/emit-empty.yaml
@@ -0,0 +1,20 @@
+name: rel-common-emit-empty
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          emit:
+            outputMapping: []
+        baseSchema:
+          names: [a, b]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        __test: [ type: "STRUCT<>" ]
diff --git a/tests/tests/relations/common/emit-out-of-range.yaml b/tests/tests/relations/common/emit-out-of-range.yaml
new file mode 100644
index 00000000..6d3f7277
--- /dev/null
+++ b/tests/tests/relations/common/emit-out-of-range.yaml
@@ -0,0 +1,21 @@
+name: rel-common-emit-out-of-range
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          emit:
+            outputMapping:
+            - 2
+            outputMapping.0__test: [ diag: { level: e, code: 4004, msg: "*index out of range*" } ]
+        baseSchema:
+          names: [a, b]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
diff --git a/tests/tests/relations/common/enhancement.yaml b/tests/tests/relations/common/enhancement.yaml
new file mode 100644
index 00000000..eba8d7c1
--- /dev/null
+++ b/tests/tests/relations/common/enhancement.yaml
@@ -0,0 +1,23 @@
+name: rel-common-enhancement
+plan:
+  __test: [ level: w ]
+  relations:
+  - rel:
+      read:
+        common:
+          advancedExtension:
+            enhancement:
+              '@type': substrait.Plan
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        # enhancements may affect schema, so validator does not know type
+        __test: [ type: "!" ]
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/relations/common/hints-functional.yaml b/tests/tests/relations/common/hints-functional.yaml
new file mode 100644
index 00000000..b4efbf34
--- /dev/null
+++ b/tests/tests/relations/common/hints-functional.yaml
@@ -0,0 +1,26 @@
+name: rel-common-hints-functional
+plan:
+  __test: [ level: w ]
+  relations:
+  - rel:
+      read:
+        common:
+          hint:
+            stats:
+              rowCount: 100
+              recordSize: 100
+              advancedExtension:
+                enhancement:
+                  '@type': substrait.Plan
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        __test: [ type: "NSTRUCT<a: string>" ]
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/relations/common/hints.yaml b/tests/tests/relations/common/hints.yaml
new file mode 100644
index 00000000..0c11dbe4
--- /dev/null
+++ b/tests/tests/relations/common/hints.yaml
@@ -0,0 +1,26 @@
+name: rel-common-hints
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          hint:
+            stats:
+              rowCount: 100
+              recordSize: 100
+              advancedExtension:
+                optimization:
+                  '@type': substrait.Plan
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        __test: [ type: "NSTRUCT<a: string>" ]
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/relations/common/omitted.yaml b/tests/tests/relations/common/omitted.yaml
new file mode 100644
index 00000000..fcdc42dd
--- /dev/null
+++ b/tests/tests/relations/common/omitted.yaml
@@ -0,0 +1,16 @@
+name: rel-common-omitted
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        __test: [ type: "NSTRUCT<a: string>" ]
diff --git a/tests/tests/relations/common/optimization.yaml b/tests/tests/relations/common/optimization.yaml
new file mode 100644
index 00000000..2476e9c9
--- /dev/null
+++ b/tests/tests/relations/common/optimization.yaml
@@ -0,0 +1,22 @@
+name: rel-common-optimization
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        common:
+          advancedExtension:
+            optimization:
+              '@type': substrait.Plan
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        __test: [ type: "NSTRUCT<a: string>" ]
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/relations/cross/basic.yaml b/tests/tests/relations/cross/basic.yaml
new file mode 100644
index 00000000..92ee81d0
--- /dev/null
+++ b/tests/tests/relations/cross/basic.yaml
@@ -0,0 +1,31 @@
+name: cross-basic
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      cross:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        __test: [ type: "STRUCT<string, i32, fp32, boolean>" ]
diff --git a/tests/tests/relations/cross/unknown-schema.yaml b/tests/tests/relations/cross/unknown-schema.yaml
new file mode 100644
index 00000000..df1499b0
--- /dev/null
+++ b/tests/tests/relations/cross/unknown-schema.yaml
@@ -0,0 +1,25 @@
+name: cross-unknown-schema
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      cross:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            namedTable:
+              names:
+              - test2
+            __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: base_schema*" } ]
+        __test: [ type: "!" ]
diff --git a/tests/tests/relations/extensions/leaf-missing-detail.yaml b/tests/tests/relations/extensions/leaf-missing-detail.yaml
new file mode 100644
index 00000000..d1bc266d
--- /dev/null
+++ b/tests/tests/relations/extensions/leaf-missing-detail.yaml
@@ -0,0 +1,9 @@
+name: rel-extension-leaf-missing-detail
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      extensionLeaf:
+        __test:
+        - diag: { level: e, code: 1002, msg: "*missing required protobuf field: detail*" }
+        - type: "!"
diff --git a/tests/tests/relations/extensions/leaf.yaml b/tests/tests/relations/extensions/leaf.yaml
new file mode 100644
index 00000000..5c188f3d
--- /dev/null
+++ b/tests/tests/relations/extensions/leaf.yaml
@@ -0,0 +1,11 @@
+name: rel-extension-leaf
+plan:
+  __test: [ level: w ]
+  relations:
+  - rel:
+      extensionLeaf:
+        detail:
+          '@type': substrait.Plan
+        __test: [ type: "!" ]
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/relations/extensions/multi-missing-detail.yaml b/tests/tests/relations/extensions/multi-missing-detail.yaml
new file mode 100644
index 00000000..e7924d85
--- /dev/null
+++ b/tests/tests/relations/extensions/multi-missing-detail.yaml
@@ -0,0 +1,22 @@
+name: rel-extension-multi-missing-detail
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      extensionMulti:
+        inputs:
+        - read:
+            baseSchema:
+              names: [a]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        __test:
+        - diag: { level: e, code: 1002, msg: "*missing required protobuf field: detail*" }
+        - type: "!"
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/relations/extensions/multi-without-inputs.yaml b/tests/tests/relations/extensions/multi-without-inputs.yaml
new file mode 100644
index 00000000..82e30896
--- /dev/null
+++ b/tests/tests/relations/extensions/multi-without-inputs.yaml
@@ -0,0 +1,12 @@
+name: rel-extension-multi-without-inputs
+plan:
+  __test: [ level: w ]
+  relations:
+  - rel:
+      extensionMulti:
+        inputs: []
+        detail:
+          '@type': substrait.Plan
+        __test: [ type: "!" ]
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/relations/extensions/multi.yaml b/tests/tests/relations/extensions/multi.yaml
new file mode 100644
index 00000000..26318821
--- /dev/null
+++ b/tests/tests/relations/extensions/multi.yaml
@@ -0,0 +1,22 @@
+name: rel-extension-multi
+plan:
+  __test: [ level: w ]
+  relations:
+  - rel:
+      extensionMulti:
+        inputs:
+        - read:
+            baseSchema:
+              names: [a]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        detail:
+          '@type': substrait.Plan
+        __test: [ type: "!" ]
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/relations/extensions/single-missing-detail.yaml b/tests/tests/relations/extensions/single-missing-detail.yaml
new file mode 100644
index 00000000..005edc6b
--- /dev/null
+++ b/tests/tests/relations/extensions/single-missing-detail.yaml
@@ -0,0 +1,22 @@
+name: rel-extension-single-missing-detail
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      extensionSingle:
+        input:
+          read:
+            baseSchema:
+              names: [a]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        __test:
+        - diag: { level: e, code: 1002, msg: "*missing required protobuf field: detail*" }
+        - type: "!"
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/relations/extensions/single.yaml b/tests/tests/relations/extensions/single.yaml
new file mode 100644
index 00000000..b86248d6
--- /dev/null
+++ b/tests/tests/relations/extensions/single.yaml
@@ -0,0 +1,22 @@
+name: rel-extension-single
+plan:
+  __test: [ level: w ]
+  relations:
+  - rel:
+      extensionSingle:
+        input:
+          read:
+            baseSchema:
+              names: [a]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        detail:
+          '@type': substrait.Plan
+        __test: [ type: "!" ]
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/relations/fetch/all.yaml b/tests/tests/relations/fetch/all.yaml
new file mode 100644
index 00000000..b8ffb6c9
--- /dev/null
+++ b/tests/tests/relations/fetch/all.yaml
@@ -0,0 +1,18 @@
+name: fetch-all
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      fetch:
+        input:
+          read:
+            baseSchema:
+              names: [a]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        __test: [ type: "NSTRUCT<a: string>" ]
diff --git a/tests/tests/relations/fetch/discard-first-n.yaml b/tests/tests/relations/fetch/discard-first-n.yaml
new file mode 100644
index 00000000..e5901a2b
--- /dev/null
+++ b/tests/tests/relations/fetch/discard-first-n.yaml
@@ -0,0 +1,19 @@
+name: fetch-discard-first-n
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      fetch:
+        input:
+          read:
+            baseSchema:
+              names: [a]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        offset: 100
+        __test: [ type: "NSTRUCT<a: string>" ]
diff --git a/tests/tests/relations/fetch/discard-first.yaml b/tests/tests/relations/fetch/discard-first.yaml
new file mode 100644
index 00000000..2184b98b
--- /dev/null
+++ b/tests/tests/relations/fetch/discard-first.yaml
@@ -0,0 +1,19 @@
+name: fetch-discard-first
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      fetch:
+        input:
+          read:
+            baseSchema:
+              names: [a]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        offset: 1
+        __test: [ type: "NSTRUCT<a: string>" ]
diff --git a/tests/tests/relations/fetch/n-rows.yaml b/tests/tests/relations/fetch/n-rows.yaml
new file mode 100644
index 00000000..b86186f2
--- /dev/null
+++ b/tests/tests/relations/fetch/n-rows.yaml
@@ -0,0 +1,20 @@
+name: fetch-n-rows
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      fetch:
+        input:
+          read:
+            baseSchema:
+              names: [a]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        offset: 100
+        count: 50
+        __test: [ type: "NSTRUCT<a: string>" ]
diff --git a/tests/tests/relations/fetch/single-row.yaml b/tests/tests/relations/fetch/single-row.yaml
new file mode 100644
index 00000000..6b2fefef
--- /dev/null
+++ b/tests/tests/relations/fetch/single-row.yaml
@@ -0,0 +1,20 @@
+name: fetch-single-row
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      fetch:
+        input:
+          read:
+            baseSchema:
+              names: [a]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        offset: 100
+        count: 1
+        __test: [ type: "NSTRUCT<a: string>" ]
diff --git a/tests/tests/relations/filter/basic.yaml b/tests/tests/relations/filter/basic.yaml
new file mode 100644
index 00000000..f4b7d334
--- /dev/null
+++ b/tests/tests/relations/filter/basic.yaml
@@ -0,0 +1,23 @@
+name: filter-basic
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      filter:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        condition:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 1 } }
+        __test: [ type: "NSTRUCT<a: string, b: boolean>" ]
diff --git a/tests/tests/relations/filter/missing.yaml b/tests/tests/relations/filter/missing.yaml
new file mode 100644
index 00000000..965f2dab
--- /dev/null
+++ b/tests/tests/relations/filter/missing.yaml
@@ -0,0 +1,19 @@
+name: filter-missing
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      filter:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: condition*" } ]
diff --git a/tests/tests/relations/filter/not-bool.yaml b/tests/tests/relations/filter/not-bool.yaml
new file mode 100644
index 00000000..5961eb3b
--- /dev/null
+++ b/tests/tests/relations/filter/not-bool.yaml
@@ -0,0 +1,24 @@
+name: filter-not-bool
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      filter:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        condition:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 0 } }
+          __test: [ diag: { level: e, code: 4005, msg: "*must yield booleans*string*" } ]
+        __test: [ type: "NSTRUCT<a: string, b: boolean>" ]
diff --git a/tests/tests/relations/filter/nullable.yaml b/tests/tests/relations/filter/nullable.yaml
new file mode 100644
index 00000000..8ee210f8
--- /dev/null
+++ b/tests/tests/relations/filter/nullable.yaml
@@ -0,0 +1,23 @@
+name: filter-nullable
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      filter:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_NULLABLE }
+            namedTable:
+              names:
+              - test
+        condition:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 1 } }
+        __test: [ type: "NSTRUCT<a: string, b: boolean?>" ]
diff --git a/tests/tests/relations/join/anti.yaml b/tests/tests/relations/join/anti.yaml
new file mode 100644
index 00000000..d6ebdd78
--- /dev/null
+++ b/tests/tests/relations/join/anti.yaml
@@ -0,0 +1,36 @@
+name: join-anti
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        expression:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 } }
+        type: JOIN_TYPE_ANTI
+        __test: [ type: "STRUCT<string, i32>" ]
diff --git a/tests/tests/relations/join/expr-not-bool.yaml b/tests/tests/relations/join/expr-not-bool.yaml
new file mode 100644
index 00000000..dd69c4ef
--- /dev/null
+++ b/tests/tests/relations/join/expr-not-bool.yaml
@@ -0,0 +1,36 @@
+name: join-expr-not-bool
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        expression:
+          __test: [ diag: { level: e, code: 4005, msg: "*predicates must yield booleans*fp32*" } ]
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 2 } }
+        type: JOIN_TYPE_INNER
diff --git a/tests/tests/relations/join/filter-not-bool.yaml b/tests/tests/relations/join/filter-not-bool.yaml
new file mode 100644
index 00000000..b4595ae9
--- /dev/null
+++ b/tests/tests/relations/join/filter-not-bool.yaml
@@ -0,0 +1,40 @@
+name: join-filter-not-bool
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        type: JOIN_TYPE_INNER
+        expression:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 } }
+        postJoinFilter:
+          __test: [ diag: { level: e, code: 4005, msg: "*predicates must yield booleans*fp32*" } ]
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 2 } }
diff --git a/tests/tests/relations/join/filter-range.yaml b/tests/tests/relations/join/filter-range.yaml
new file mode 100644
index 00000000..87b55d52
--- /dev/null
+++ b/tests/tests/relations/join/filter-range.yaml
@@ -0,0 +1,42 @@
+name: join-filter-range
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        type: JOIN_TYPE_SEMI
+        expression:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 } }
+        postJoinFilter:
+          selection:
+            rootReference: {}
+            directReference:
+              structField:
+                field: 3
+                field__test: [ diag: { level: e, code: 2, msg: "*struct index out of range (size = 2)*" } ]
diff --git a/tests/tests/relations/join/filter.yaml b/tests/tests/relations/join/filter.yaml
new file mode 100644
index 00000000..f58f54cf
--- /dev/null
+++ b/tests/tests/relations/join/filter.yaml
@@ -0,0 +1,39 @@
+name: join-filter
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        type: JOIN_TYPE_INNER
+        expression:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 } }
+        postJoinFilter:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 } }
diff --git a/tests/tests/relations/join/inner.yaml b/tests/tests/relations/join/inner.yaml
new file mode 100644
index 00000000..b8fdbfb6
--- /dev/null
+++ b/tests/tests/relations/join/inner.yaml
@@ -0,0 +1,36 @@
+name: join-inner
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        expression:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 } }
+        type: JOIN_TYPE_INNER
+        __test: [ type: "STRUCT<string, i32, fp32, boolean>" ]
diff --git a/tests/tests/relations/join/left.yaml b/tests/tests/relations/join/left.yaml
new file mode 100644
index 00000000..53880dae
--- /dev/null
+++ b/tests/tests/relations/join/left.yaml
@@ -0,0 +1,36 @@
+name: join-left
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        expression:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 } }
+        type: JOIN_TYPE_LEFT
+        __test: [ type: "STRUCT<string, i32, fp32?, boolean?>" ]
diff --git a/tests/tests/relations/join/missing-expr.yaml b/tests/tests/relations/join/missing-expr.yaml
new file mode 100644
index 00000000..9facf797
--- /dev/null
+++ b/tests/tests/relations/join/missing-expr.yaml
@@ -0,0 +1,32 @@
+name: join-missing-expr
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        type: JOIN_TYPE_INNER
+        __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: expression*" } ]
diff --git a/tests/tests/relations/join/missing-type.yaml b/tests/tests/relations/join/missing-type.yaml
new file mode 100644
index 00000000..befa6c31
--- /dev/null
+++ b/tests/tests/relations/join/missing-type.yaml
@@ -0,0 +1,35 @@
+name: join-missing-type
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        expression:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 } }
+        type__test: [ diag: { level: e, code: 2, msg: "*this enum may not be left unspecified*" } ]
diff --git a/tests/tests/relations/join/outer.yaml b/tests/tests/relations/join/outer.yaml
new file mode 100644
index 00000000..bf720706
--- /dev/null
+++ b/tests/tests/relations/join/outer.yaml
@@ -0,0 +1,36 @@
+name: join-outer
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        expression:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 } }
+        type: JOIN_TYPE_OUTER
+        __test: [ type: "STRUCT<string?, i32?, fp32?, boolean?>" ]
diff --git a/tests/tests/relations/join/right.yaml b/tests/tests/relations/join/right.yaml
new file mode 100644
index 00000000..f73e2a90
--- /dev/null
+++ b/tests/tests/relations/join/right.yaml
@@ -0,0 +1,36 @@
+name: join-right
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        expression:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 } }
+        type: JOIN_TYPE_RIGHT
+        __test: [ type: "STRUCT<string?, i32?, fp32, boolean>" ]
diff --git a/tests/tests/relations/join/semi.yaml b/tests/tests/relations/join/semi.yaml
new file mode 100644
index 00000000..dcecf050
--- /dev/null
+++ b/tests/tests/relations/join/semi.yaml
@@ -0,0 +1,36 @@
+name: join-semi
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        expression:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 } }
+        type: JOIN_TYPE_SEMI
+        __test: [ type: "STRUCT<string, i32>" ]
diff --git a/tests/tests/relations/join/single.yaml b/tests/tests/relations/join/single.yaml
new file mode 100644
index 00000000..28b40286
--- /dev/null
+++ b/tests/tests/relations/join/single.yaml
@@ -0,0 +1,36 @@
+name: join-single
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      join:
+        left:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        right:
+          read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - fp32: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        expression:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 3 } }
+        type: JOIN_TYPE_SINGLE
+        __test: [ type: "STRUCT<string, i32, fp32?, boolean?>" ]
diff --git a/tests/tests/relations/project/dependent.yaml b/tests/tests/relations/project/dependent.yaml
new file mode 100644
index 00000000..eef74eaa
--- /dev/null
+++ b/tests/tests/relations/project/dependent.yaml
@@ -0,0 +1,26 @@
+name: project-dependent
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        expressions:
+        - selection:
+            rootReference: {}
+            directReference: { structField: { field: 1 } }
+        - selection:
+            rootReference: {}
+            directReference: { structField: { field: 2 } }
+        __test: [ type: "STRUCT<string, boolean, boolean, boolean>" ]
diff --git a/tests/tests/relations/project/missing.yaml b/tests/tests/relations/project/missing.yaml
new file mode 100644
index 00000000..d1312f3a
--- /dev/null
+++ b/tests/tests/relations/project/missing.yaml
@@ -0,0 +1,18 @@
+name: project-missing
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [a]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: expressions*" } ]
diff --git a/tests/tests/relations/project/multiple.yaml b/tests/tests/relations/project/multiple.yaml
new file mode 100644
index 00000000..2ea75098
--- /dev/null
+++ b/tests/tests/relations/project/multiple.yaml
@@ -0,0 +1,26 @@
+name: project-multiple
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        expressions:
+        - selection:
+            rootReference: {}
+            directReference: { structField: { field: 1 } }
+        - selection:
+            rootReference: {}
+            directReference: { structField: { field: 0 } }
+        __test: [ type: "STRUCT<string, boolean, boolean, string>" ]
diff --git a/tests/tests/relations/project/single.yaml b/tests/tests/relations/project/single.yaml
new file mode 100644
index 00000000..bfe1800f
--- /dev/null
+++ b/tests/tests/relations/project/single.yaml
@@ -0,0 +1,23 @@
+name: project-single
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      project:
+        input:
+          read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - bool: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        expressions:
+        - selection:
+            rootReference: {}
+            directReference: { structField: { field: 1 } }
+        __test: [ type: "STRUCT<string, boolean, boolean>" ]
diff --git a/tests/tests/relations/read/extension-table/basic.yaml b/tests/tests/relations/read/extension-table/basic.yaml
new file mode 100644
index 00000000..09b8fb2f
--- /dev/null
+++ b/tests/tests/relations/read/extension-table/basic.yaml
@@ -0,0 +1,19 @@
+name: read-extension-basic
+plan:
+  __test: [ level: w ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [word, value]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        extensionTable:
+          detail:
+            '@type': substrait.Plan
+        __test: [ type: "NSTRUCT<word: string, value: i32?>" ]
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/relations/read/file-table/basic.yaml b/tests/tests/relations/read/file-table/basic.yaml
new file mode 100644
index 00000000..9791216f
--- /dev/null
+++ b/tests/tests/relations/read/file-table/basic.yaml
@@ -0,0 +1,16 @@
+name: read-files-basic
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        localFiles:
+          items:
+          - uriPath: "1/2/3"
+            format: FILE_FORMAT_PARQUET
diff --git a/tests/tests/relations/read/file-table/extension-format.yaml b/tests/tests/relations/read/file-table/extension-format.yaml
new file mode 100644
index 00000000..99e178fc
--- /dev/null
+++ b/tests/tests/relations/read/file-table/extension-format.yaml
@@ -0,0 +1,20 @@
+name: read-files-extension-format
+plan:
+  __test: [ level: w ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        localFiles:
+          items:
+          - uriPath: "1/2/3"
+          advancedExtension:
+            enhancement:
+              "@type": substrait.Plan
+  expectedTypeUrls:
+  - substrait.Plan
diff --git a/tests/tests/relations/read/file-table/missing-format.yaml b/tests/tests/relations/read/file-table/missing-format.yaml
new file mode 100644
index 00000000..6813c4d8
--- /dev/null
+++ b/tests/tests/relations/read/file-table/missing-format.yaml
@@ -0,0 +1,16 @@
+name: read-files-missing-format
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        localFiles:
+          items:
+          - uriPath: "1/2/3"
+            format__test: [ diag: { level: e, code: 2, msg: "*file format must be specified*" } ]
diff --git a/tests/tests/relations/read/file-table/partial.yaml b/tests/tests/relations/read/file-table/partial.yaml
new file mode 100644
index 00000000..91fed1c5
--- /dev/null
+++ b/tests/tests/relations/read/file-table/partial.yaml
@@ -0,0 +1,30 @@
+name: read-partial-files
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        localFiles:
+          items:
+          - uriFile: "a/b/c"
+            format: FILE_FORMAT_PARQUET
+            partitionIndex: 3
+          - uriFolder: "a/b/c"
+            format: FILE_FORMAT_PARQUET
+            partitionIndex: 3
+          - uriFile: "a/b/c"
+            format: FILE_FORMAT_PARQUET
+            start: 10
+            length: 20
+          - uriFolder: "a/b/c"
+            format: FILE_FORMAT_PARQUET
+            start: 10
+            length: 20
+            __test:
+            - diag: { level: e, code: 2, msg: "*file offsets are not allowed in conjunction with multiple files*" }
diff --git a/tests/tests/relations/read/file-table/uri-validation.yaml b/tests/tests/relations/read/file-table/uri-validation.yaml
new file mode 100644
index 00000000..7fa88e24
--- /dev/null
+++ b/tests/tests/relations/read/file-table/uri-validation.yaml
@@ -0,0 +1,50 @@
+name: read-files-uri-validation
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        localFiles:
+          items:
+          - uriPath: "not a valid URI"
+            uriPath__test: [ diag: { level: e, code: 4, msg: "*invalid path character*" } ]
+            format: FILE_FORMAT_PARQUET
+          - uriPath: "a%20valid%20relative%20URI"
+            format: FILE_FORMAT_PARQUET
+          - uriPath: "/path/to/local/file"
+            format: FILE_FORMAT_PARQUET
+          - uriPath: "file:///path/to/local/file"
+            format: FILE_FORMAT_PARQUET
+          - uriPath: "protocol://with.an.authority/path/goes/here"
+            format: FILE_FORMAT_PARQUET
+          - uriPath: "protocol:urn"
+            format: FILE_FORMAT_PARQUET
+          - uriPath: 'C:\windows\paths\are\not\uris'
+            uriPath__test: [ diag: { level: e, code: 4, msg: "*invalid path character*" } ]
+            format: FILE_FORMAT_PARQUET
+          - uriPath: 'file://C:/write/them/like/this'
+            format: FILE_FORMAT_PARQUET
+          - uriPath: 'C:/or/like/this'
+            format: FILE_FORMAT_PARQUET
+          - uriPathGlob: '/can/have/*/and/?/in/path/globs'
+            format: FILE_FORMAT_PARQUET
+          - uriPathGlob: 'file:///can/have/*/and/?/in/path/globs'
+            format: FILE_FORMAT_PARQUET
+          - uriPathGlob: '/character/classes/must/be/escaped/[cls]'
+            uriPathGlob__test: [ diag: { level: e, code: 4, msg: "*invalid path character*" } ]
+            format: FILE_FORMAT_PARQUET
+          - uriPathGlob: '/character/classes/must/be/escaped/%5Bcls%5D'
+            format: FILE_FORMAT_PARQUET
+          - uriPathGlob: '/invalid/glob/syntax/%5Dcls%5B'
+            uriPathGlob__test: [ diag: { level: e, code: 5, msg: "*invalid range pattern*" } ]
+            format: FILE_FORMAT_PARQUET
+          - uriFile: "/path/to/local/file"
+            format: FILE_FORMAT_PARQUET
+          - uriFolder: "/path/to/local/folder"
+            format: FILE_FORMAT_PARQUET
diff --git a/tests/tests/relations/read/filter-project/both.yaml b/tests/tests/relations/read/filter-project/both.yaml
new file mode 100644
index 00000000..ff56ad40
--- /dev/null
+++ b/tests/tests/relations/read/filter-project/both.yaml
@@ -0,0 +1,26 @@
+name: read-filter-project
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a, b]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - bool: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        filter:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 1 } }
+        projection:
+          maintain_singular_struct: true
+          select:
+            structItems:
+            - field: 0
+        __test: [ type: "STRUCT<string>" ]
diff --git a/tests/tests/relations/read/filter-project/filter-not-bool.yaml b/tests/tests/relations/read/filter-project/filter-not-bool.yaml
new file mode 100644
index 00000000..cf8861b2
--- /dev/null
+++ b/tests/tests/relations/read/filter-project/filter-not-bool.yaml
@@ -0,0 +1,21 @@
+name: read-filter-not-bool
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a, b]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - bool: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        filter:
+          selection:
+            rootReference: {}
+            directReference: { structField: { field: 0 } }
+          __test: [ diag: { level: e, code: 4005, msg: "*predicates must yield booleans*string*" } ]
diff --git a/tests/tests/relations/read/filter-project/projection-multiple.yaml b/tests/tests/relations/read/filter-project/projection-multiple.yaml
new file mode 100644
index 00000000..f4e26577
--- /dev/null
+++ b/tests/tests/relations/read/filter-project/projection-multiple.yaml
@@ -0,0 +1,22 @@
+name: read-projection-multiple
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a, b]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - bool: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        projection:
+          select:
+            structItems:
+            - field: 1
+            - field: 0
+        __test: [ type: "STRUCT<boolean, string>" ]
diff --git a/tests/tests/relations/read/filter-project/projection-singular.yaml b/tests/tests/relations/read/filter-project/projection-singular.yaml
new file mode 100644
index 00000000..04d104bb
--- /dev/null
+++ b/tests/tests/relations/read/filter-project/projection-singular.yaml
@@ -0,0 +1,21 @@
+name: read-projection-singular
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a, b]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - bool: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        projection:
+          select:
+            structItems:
+            - field: 0
+          maintainSingularStruct__test: [ diag: { level: e, code: 4006, msg: "*must be set*" } ]
diff --git a/tests/tests/relations/read/named-table/basic.yaml b/tests/tests/relations/read/named-table/basic.yaml
new file mode 100644
index 00000000..806cb34d
--- /dev/null
+++ b/tests/tests/relations/read/named-table/basic.yaml
@@ -0,0 +1,15 @@
+name: read-named-basic
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
diff --git a/tests/tests/relations/read/named-table/missing.yaml b/tests/tests/relations/read/named-table/missing.yaml
new file mode 100644
index 00000000..e2840df5
--- /dev/null
+++ b/tests/tests/relations/read/named-table/missing.yaml
@@ -0,0 +1,15 @@
+name: read-named-missing
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names: []
+          __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: names*" } ]
diff --git a/tests/tests/relations/read/named-table/multiple.yaml b/tests/tests/relations/read/named-table/multiple.yaml
new file mode 100644
index 00000000..c5494b06
--- /dev/null
+++ b/tests/tests/relations/read/named-table/multiple.yaml
@@ -0,0 +1,17 @@
+name: read-named-multiple
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - a
+          - b
+          __test: [ diag: { level: w, msg: "*named tables with multiple names*" } ]
diff --git a/tests/tests/relations/read/schema/missing-names.yaml b/tests/tests/relations/read/schema/missing-names.yaml
new file mode 100644
index 00000000..cf4600ee
--- /dev/null
+++ b/tests/tests/relations/read/schema/missing-names.yaml
@@ -0,0 +1,15 @@
+name: read-schema-missing-names
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+          __test: [ diag: { level: e, code: 4003, msg: "*1 too few*" } ]
+        namedTable:
+          names:
+          - test
diff --git a/tests/tests/relations/read/schema/missing-schema.yaml b/tests/tests/relations/read/schema/missing-schema.yaml
new file mode 100644
index 00000000..4d1b11cf
--- /dev/null
+++ b/tests/tests/relations/read/schema/missing-schema.yaml
@@ -0,0 +1,10 @@
+name: read-schema-missing
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        namedTable:
+          names:
+          - test
+        __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: base_schema*" } ]
diff --git a/tests/tests/relations/read/schema/nullable.yaml b/tests/tests/relations/read/schema/nullable.yaml
new file mode 100644
index 00000000..5fadb8e1
--- /dev/null
+++ b/tests/tests/relations/read/schema/nullable.yaml
@@ -0,0 +1,16 @@
+name: read-schema-nullable
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a]
+          struct:
+            nullability: NULLABILITY_NULLABLE
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+        namedTable:
+          names:
+          - test
+        __test: [ diag: { level: e, code: 4008, msg: "*the outer struct representing a schema must not be nullable*" } ]
diff --git a/tests/tests/relations/read/schema/wrong-name-count.yaml b/tests/tests/relations/read/schema/wrong-name-count.yaml
new file mode 100644
index 00000000..37003f9b
--- /dev/null
+++ b/tests/tests/relations/read/schema/wrong-name-count.yaml
@@ -0,0 +1,16 @@
+name: read-schema-wrong-name-count
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a, b]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+          __test: [ diag: { level: e, code: 4003, msg: "*1 too many*" } ]
+        namedTable:
+          names:
+          - test
diff --git a/tests/tests/relations/read/virtual-table/basic.yaml b/tests/tests/relations/read/virtual-table/basic.yaml
new file mode 100644
index 00000000..5f84ea05
--- /dev/null
+++ b/tests/tests/relations/read/virtual-table/basic.yaml
@@ -0,0 +1,35 @@
+name: read-virtual-basic
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [word, value]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - string: one
+              nullable: false
+            - i32: 1
+              nullable: true
+          - fields:
+            - string: two
+              nullable: false
+            - i32: 2
+              nullable: true
+          - fields:
+            - string: three
+              nullable: false
+            - i32: 3
+              nullable: true
+          - fields:
+            - string: banana
+              nullable: false
+            - "null": { i32: { nullability: NULLABILITY_NULLABLE } }
+        __test: [ type: "NSTRUCT<word: string, value: i32?>" ]
diff --git a/tests/tests/relations/read/virtual-table/empty.yaml b/tests/tests/relations/read/virtual-table/empty.yaml
new file mode 100644
index 00000000..277852d7
--- /dev/null
+++ b/tests/tests/relations/read/virtual-table/empty.yaml
@@ -0,0 +1,16 @@
+name: read-virtual-empty
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [word, value]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values: []
+        __test: [ type: "NSTRUCT<word: string, value: i32?>" ]
diff --git a/tests/tests/relations/read/virtual-table/row-field-count-mismatch.yaml b/tests/tests/relations/read/virtual-table/row-field-count-mismatch.yaml
new file mode 100644
index 00000000..71b33fb4
--- /dev/null
+++ b/tests/tests/relations/read/virtual-table/row-field-count-mismatch.yaml
@@ -0,0 +1,34 @@
+name: read-virtual-field-count-mismatch
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [word, value]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - string: one
+              nullable: false
+            - i32: 1
+              nullable: true
+          - fields:
+            - string: one
+              nullable: false
+            __test: [ diag: { level: e, code: 4005, msg: "*1 parameter(s) vs. 2 parameter(s)*" } ]
+          - fields:
+            - string: three
+              nullable: false
+            - i32: 3
+              nullable: true
+          - fields:
+            - string: banana
+              nullable: false
+            - "null": { i32: { nullability: NULLABILITY_NULLABLE } }
+        __test: [ type: "NSTRUCT<word: string, value: i32?>" ]
diff --git a/tests/tests/relations/read/virtual-table/row-type-mismatch.yaml b/tests/tests/relations/read/virtual-table/row-type-mismatch.yaml
new file mode 100644
index 00000000..0d9eaabd
--- /dev/null
+++ b/tests/tests/relations/read/virtual-table/row-type-mismatch.yaml
@@ -0,0 +1,38 @@
+name: read-virtual-row-type-mismatch
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [word, value]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        virtualTable:
+          values:
+          - fields:
+            - string: one
+              nullable: false
+            - i32: 1
+              nullable: true
+          - fields:
+            - varChar:
+                value: two
+                length: 25
+              nullable: false
+            - i32: 2
+              nullable: true
+            __test: [ diag: { level: e, code: 4005, msg: "*VARCHAR vs. string on parameter path 0*" } ]
+          - fields:
+            - string: three
+              nullable: false
+            - i32: 3
+              nullable: true
+          - fields:
+            - string: banana
+              nullable: false
+            - "null": { i32: { nullability: NULLABILITY_NULLABLE } }
+        __test: [ type: "NSTRUCT<word: string, value: i32?>" ]
diff --git a/tests/tests/relations/root/missing.yaml b/tests/tests/relations/root/missing.yaml
new file mode 100644
index 00000000..1137baf5
--- /dev/null
+++ b/tests/tests/relations/root/missing.yaml
@@ -0,0 +1,5 @@
+name: rel-root-missing
+plan:
+  __test: [ level: i ]
+  relations: []
+  __test: [ diag: { level: e, code: 5001, msg: "*must have at least one relation*" } ]
diff --git a/tests/tests/relations/root/multiple.yaml b/tests/tests/relations/root/multiple.yaml
new file mode 100644
index 00000000..00941080
--- /dev/null
+++ b/tests/tests/relations/root/multiple.yaml
@@ -0,0 +1,28 @@
+name: rel-root-multiple
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        namedTable:
+          names:
+          - test
+  - rel:
+      read:
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        namedTable:
+          names:
+          - test
diff --git a/tests/tests/relations/root/with-names-nested.yaml b/tests/tests/relations/root/with-names-nested.yaml
new file mode 100644
index 00000000..0c0bb20b
--- /dev/null
+++ b/tests/tests/relations/root/with-names-nested.yaml
@@ -0,0 +1,30 @@
+name: rel-root-with-names-nested
+plan:
+  __test: [ level: i ]
+  relations:
+  - root:
+      names: [x, y, a, b, z]
+      input:
+        read:
+          baseSchema:
+            names: [a, b, c, d, e]
+            struct:
+              nullability: NULLABILITY_REQUIRED
+              types:
+              - string: { nullability: NULLABILITY_NULLABLE }
+              - list:
+                  nullability: NULLABILITY_REQUIRED
+                  type:
+                    struct:
+                      nullability: NULLABILITY_REQUIRED
+                      types:
+                      - map:
+                          nullability: NULLABILITY_REQUIRED
+                          key: { string: { nullability: NULLABILITY_NULLABLE } }
+                          value: { string: { nullability: NULLABILITY_NULLABLE } }
+                      - bool: { nullability: NULLABILITY_NULLABLE }
+              - i32: { nullability: NULLABILITY_NULLABLE }
+          namedTable:
+            names:
+            - test
+    __test: [ type: "NSTRUCT<x: string?, y: LIST<NSTRUCT<a: MAP<string?, string?>, b: boolean?>>, z: i32?>" ]
diff --git a/tests/tests/relations/root/with-names.yaml b/tests/tests/relations/root/with-names.yaml
new file mode 100644
index 00000000..5c1737a7
--- /dev/null
+++ b/tests/tests/relations/root/with-names.yaml
@@ -0,0 +1,21 @@
+name: rel-root-with-names
+plan:
+  __test: [ level: i ]
+  relations:
+  - root:
+      names:
+      - a
+      - b
+      input:
+        read:
+          baseSchema:
+            names: [x, y]
+            struct:
+              nullability: NULLABILITY_REQUIRED
+              types:
+              - string: { nullability: NULLABILITY_REQUIRED }
+              - i32: { nullability: NULLABILITY_NULLABLE }
+          namedTable:
+            names:
+            - test
+    __test: [ type: "NSTRUCT<a: string, b: i32?>" ]
diff --git a/tests/tests/relations/root/without-names-nested.yaml b/tests/tests/relations/root/without-names-nested.yaml
new file mode 100644
index 00000000..14a8e530
--- /dev/null
+++ b/tests/tests/relations/root/without-names-nested.yaml
@@ -0,0 +1,28 @@
+name: rel-root-without-names-nested
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [a, b, c, d, e]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_NULLABLE }
+            - list:
+                nullability: NULLABILITY_REQUIRED
+                type:
+                  struct:
+                    nullability: NULLABILITY_REQUIRED
+                    types:
+                    - map:
+                        nullability: NULLABILITY_REQUIRED
+                        key: { string: { nullability: NULLABILITY_NULLABLE } }
+                        value: { string: { nullability: NULLABILITY_NULLABLE } }
+                    - bool: { nullability: NULLABILITY_NULLABLE }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        namedTable:
+          names:
+          - test
+    __test: [ type: "STRUCT<string?, LIST<STRUCT<MAP<string?, string?>, boolean?>>, i32?>" ]
diff --git a/tests/tests/relations/root/without-names.yaml b/tests/tests/relations/root/without-names.yaml
new file mode 100644
index 00000000..698cfc96
--- /dev/null
+++ b/tests/tests/relations/root/without-names.yaml
@@ -0,0 +1,17 @@
+name: rel-root-without-names
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      read:
+        baseSchema:
+          names: [x, y]
+          struct:
+            nullability: NULLABILITY_REQUIRED
+            types:
+            - string: { nullability: NULLABILITY_REQUIRED }
+            - i32: { nullability: NULLABILITY_NULLABLE }
+        namedTable:
+          names:
+          - test
+    __test: [ type: "STRUCT<string, i32?>" ]
diff --git a/tests/tests/relations/set/different-inputs.yaml b/tests/tests/relations/set/different-inputs.yaml
new file mode 100644
index 00000000..40e0e9d0
--- /dev/null
+++ b/tests/tests/relations/set/different-inputs.yaml
@@ -0,0 +1,33 @@
+name: set-different-inputs
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      set:
+        inputs:
+        - read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        - read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - i32: { nullability: NULLABILITY_REQUIRED }
+                - string: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        op: SET_OP_UNION_ALL
+        __test:
+        - diag: { level: e, code: 4005, msg: "*i32 vs. string on parameter path 0*" }
+        - diag: { level: e, code: 4005, msg: "*string vs. i32 on parameter path 1*" }
diff --git a/tests/tests/relations/set/insufficient-inputs.yaml b/tests/tests/relations/set/insufficient-inputs.yaml
new file mode 100644
index 00000000..3d565c38
--- /dev/null
+++ b/tests/tests/relations/set/insufficient-inputs.yaml
@@ -0,0 +1,20 @@
+name: set-insufficient-inputs
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      set:
+        inputs:
+        - read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        op: SET_OP_UNION_ALL
+        __test: [ diag: { level: e, code: 5002, msg: "*set operations require at least two input relations*" } ]
diff --git a/tests/tests/relations/set/missing-op.yaml b/tests/tests/relations/set/missing-op.yaml
new file mode 100644
index 00000000..dbfbaf28
--- /dev/null
+++ b/tests/tests/relations/set/missing-op.yaml
@@ -0,0 +1,30 @@
+name: set-missing-op
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      set:
+        inputs:
+        - read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        - read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        op__test: [ diag: { level: e, code: 2, msg: "*this enum may not be left unspecified*" } ]
diff --git a/tests/tests/relations/set/three-inputs.yaml b/tests/tests/relations/set/three-inputs.yaml
new file mode 100644
index 00000000..321aeae2
--- /dev/null
+++ b/tests/tests/relations/set/three-inputs.yaml
@@ -0,0 +1,42 @@
+name: set-three-inputs
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      set:
+        inputs:
+        - read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        - read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        - read:
+            baseSchema:
+              names: [u, v]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test3
+        op: SET_OP_UNION_ALL
+        __test: [ type: "STRUCT<string, i32>" ]
diff --git a/tests/tests/relations/set/two-inputs.yaml b/tests/tests/relations/set/two-inputs.yaml
new file mode 100644
index 00000000..d21c0902
--- /dev/null
+++ b/tests/tests/relations/set/two-inputs.yaml
@@ -0,0 +1,31 @@
+name: set-two-inputs
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      set:
+        inputs:
+        - read:
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        - read:
+            baseSchema:
+              names: [x, y]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test2
+        op: SET_OP_UNION_ALL
+        __test: [ type: "STRUCT<string, i32>" ]
diff --git a/tests/tests/relations/sort/coalesce.yaml b/tests/tests/relations/sort/coalesce.yaml
new file mode 100644
index 00000000..83bd7d1c
--- /dev/null
+++ b/tests/tests/relations/sort/coalesce.yaml
@@ -0,0 +1,27 @@
+name: rel-sort-coalesce
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      sort:
+        input:
+          read:
+            common:
+              direct: {}
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        sorts:
+        - expr:
+            selection:
+              rootReference: {}
+              directReference: { structField: { field: 0 } }
+          direction: SORT_DIRECTION_CLUSTERED
+        __test: [ type: "NSTRUCT<a: string, b: i32>" ]
diff --git a/tests/tests/relations/sort/key_cmp.yaml b/tests/tests/relations/sort/key_cmp.yaml
new file mode 100644
index 00000000..6eb3046a
--- /dev/null
+++ b/tests/tests/relations/sort/key_cmp.yaml
@@ -0,0 +1,42 @@
+name: rel-sort-key-cmp
+plan:
+  __test: [ level: iw ]
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri__yaml:
+      scalar_functions:
+        - name: "cmp"
+          impls:
+            - args:
+                - value: i32
+                - value: i32
+              return: i32
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: cmp:i32_i32
+  relations:
+  - rel:
+      sort:
+        input:
+          read:
+            common:
+              direct: {}
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        sorts:
+        - expr:
+            selection:
+              rootReference: {}
+              directReference: { structField: { field: 0 } }
+          comparisonFunctionReference: 1
+        __test: [ type: "NSTRUCT<a: string, b: i32>" ]
diff --git a/tests/tests/relations/sort/key_lt.yaml b/tests/tests/relations/sort/key_lt.yaml
new file mode 100644
index 00000000..9eae7b70
--- /dev/null
+++ b/tests/tests/relations/sort/key_lt.yaml
@@ -0,0 +1,42 @@
+name: rel-sort-key-lt
+plan:
+  __test: [ level: iw ]
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri__yaml:
+      scalar_functions:
+        - name: "lt"
+          impls:
+            - args:
+                - value: i32
+                - value: i32
+              return: BOOLEAN
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: lt:i32_i32
+  relations:
+  - rel:
+      sort:
+        input:
+          read:
+            common:
+              direct: {}
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        sorts:
+        - expr:
+            selection:
+              rootReference: {}
+              directReference: { structField: { field: 0 } }
+          comparisonFunctionReference: 1
+        __test: [ type: "NSTRUCT<a: string, b: i32>" ]
diff --git a/tests/tests/relations/sort/missing-expr.yaml b/tests/tests/relations/sort/missing-expr.yaml
new file mode 100644
index 00000000..3f4291e8
--- /dev/null
+++ b/tests/tests/relations/sort/missing-expr.yaml
@@ -0,0 +1,23 @@
+name: rel-sort-missing-expr
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      sort:
+        input:
+          read:
+            common:
+              direct: {}
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        sorts:
+        - direction: SORT_DIRECTION_ASC_NULLS_LAST
+          __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: expr*" } ]
diff --git a/tests/tests/relations/sort/missing-sort-kind.yaml b/tests/tests/relations/sort/missing-sort-kind.yaml
new file mode 100644
index 00000000..02f0dd00
--- /dev/null
+++ b/tests/tests/relations/sort/missing-sort-kind.yaml
@@ -0,0 +1,26 @@
+name: rel-sort-missing-sort-kind
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      sort:
+        input:
+          read:
+            common:
+              direct: {}
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        sorts:
+        - expr:
+            selection:
+              rootReference: {}
+              directReference: { structField: { field: 0 } }
+          __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: sort_kind*" } ]
diff --git a/tests/tests/relations/sort/missing.yaml b/tests/tests/relations/sort/missing.yaml
new file mode 100644
index 00000000..e18f2dab
--- /dev/null
+++ b/tests/tests/relations/sort/missing.yaml
@@ -0,0 +1,21 @@
+name: rel-sort-no-op
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      sort:
+        input:
+          read:
+            common:
+              direct: {}
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        __test: [ diag: { level: e, code: 1002, msg: "*missing required protobuf field: sorts*" } ]
diff --git a/tests/tests/relations/sort/multiple.yaml b/tests/tests/relations/sort/multiple.yaml
new file mode 100644
index 00000000..82b927a5
--- /dev/null
+++ b/tests/tests/relations/sort/multiple.yaml
@@ -0,0 +1,32 @@
+name: rel-sort-multiple
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      sort:
+        input:
+          read:
+            common:
+              direct: {}
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        sorts:
+        - expr:
+            selection:
+              rootReference: {}
+              directReference: { structField: { field: 1 } }
+          direction: SORT_DIRECTION_ASC_NULLS_LAST
+        - expr:
+            selection:
+              rootReference: {}
+              directReference: { structField: { field: 0 } }
+          direction: SORT_DIRECTION_DESC_NULLS_FIRST
+        __test: [ type: "NSTRUCT<a: string, b: i32>" ]
diff --git a/tests/tests/relations/sort/single.yaml b/tests/tests/relations/sort/single.yaml
new file mode 100644
index 00000000..5d6c4644
--- /dev/null
+++ b/tests/tests/relations/sort/single.yaml
@@ -0,0 +1,27 @@
+name: rel-sort-single
+plan:
+  __test: [ level: i ]
+  relations:
+  - rel:
+      sort:
+        input:
+          read:
+            common:
+              direct: {}
+            baseSchema:
+              names: [a, b]
+              struct:
+                nullability: NULLABILITY_REQUIRED
+                types:
+                - string: { nullability: NULLABILITY_REQUIRED }
+                - i32: { nullability: NULLABILITY_REQUIRED }
+            namedTable:
+              names:
+              - test
+        sorts:
+        - expr:
+            selection:
+              rootReference: {}
+              directReference: { structField: { field: 0 } }
+          direction: SORT_DIRECTION_ASC_NULLS_LAST
+        __test: [ type: "NSTRUCT<a: string, b: i32>" ]
diff --git a/tests/tests/tpc-h/README.md b/tests/tests/tpc-h/README.md
new file mode 100644
index 00000000..e01542e6
--- /dev/null
+++ b/tests/tests/tpc-h/README.md
@@ -0,0 +1,28 @@
+This directory contains positive tests for (some of) the TPC-H queries. For the
+most part, they are either completely or partially generated by Isthmus,
+however:
+
+ - Aggregations output an extra column according to the spec indicating which
+   grouping set was used for a particular row, which the Isthmus plans weren't
+   considering.
+ - Aggregations with only measures were being emitted by Isthmus as
+   aggregations with empty grouping sets rather than no grouping sets.
+ - Isthmus was emitting duplicate grouping sets wherever there should only be
+   one grouping set.
+ - Decimal literals had too many bytes attached to them.
+ - Emitted function signatures use `any1` etc. where according to the spec a
+   plain `any` should be used. Same for `decimal` vs `dec`.
+ - Subqueries are not presently supported by Isthmus, so queries with
+   subqueries were merged manually.
+
+NOTE: these queries have not undergone *functional* testing, and have been
+manually modified. Therefore, it is quite likely for there to be slight
+functional differences between the SQL queries and the plans due to bugs.
+
+NOTE: the plans are also not optimized; they typically start by forming the
+cross product of all input tables and then applying a filter to the result.
+Without predicate pushdown they are unlikely to run for any reasonable table
+sizes.
+
+TODO: when function resolution is implemented in the validator, the diagnostic
+overrides relating to those not currently working should be removed.
diff --git a/tests/tests/tpc-h/tpc-h01.yaml b/tests/tests/tpc-h/tpc-h01.yaml
new file mode 100644
index 00000000..de6a273f
--- /dev/null
+++ b/tests/tests/tpc-h/tpc-h01.yaml
@@ -0,0 +1,465 @@
+# select
+#   l_returnflag,
+#   l_linestatus,
+#   sum(l_quantity) as sum_qty,
+#   sum(l_extendedprice) as sum_base_price,
+#   sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
+#   sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
+#   avg(l_quantity) as avg_qty,
+#   avg(l_extendedprice) as avg_price,
+#   avg(l_discount) as avg_disc,
+#   count(*) as count_order
+# from
+#   lineitem
+# where
+#   l_shipdate <= date '1998-12-01' - interval '120' day (3)
+# group by
+#   l_returnflag,
+#   l_linestatus
+# order by
+#   l_returnflag,
+#   l_linestatus
+
+name: TPC-H01
+diags:
+- { code: 0001, max: i } # Suppress "not yet implemented" warnings
+- { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet)
+- { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet)
+plan:
+  __test:
+  - level: i
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri: /functions_datetime.yaml
+  - extensionUriAnchor: 2
+    uri: /functions_arithmetic_decimal.yaml
+  - extensionUriAnchor: 3
+    uri: /functions_aggregate_generic.yaml
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: lte:date_date
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 2
+      name: subtract:date_day
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 3
+      name: multiply:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 4
+      name: subtract:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 5
+      name: add:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 6
+      name: sum:opt_dec
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 7
+      name: avg:opt_dec
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 8
+      name: count:opt
+  relations:
+  - root:
+      __test:
+      - type: "\
+        NSTRUCT<\
+          L_RETURNFLAG: FIXEDCHAR?<1>, \
+          L_LINESTATUS: FIXEDCHAR?<1>, \
+          SUM_QTY: DECIMAL?<19, 0>, \
+          SUM_BASE_PRICE: DECIMAL?<19, 0>, \
+          SUM_DISC_PRICE: DECIMAL?<19, 0>, \
+          SUM_CHARGE: DECIMAL?<19, 0>, \
+          AVG_QTY: DECIMAL?<19, 0>, \
+          AVG_PRICE: DECIMAL?<19, 0>, \
+          AVG_DISC: DECIMAL?<19, 0>, \
+          COUNT_ORDER: i64\
+        >"
+      input:
+        sort:
+          common:
+            direct: {}
+          input:
+            aggregate:
+              common:
+                emit:
+                  outputMapping:
+                  - 0
+                  - 1
+                  - 2
+                  - 3
+                  - 4
+                  - 5
+                  - 6
+                  - 7
+                  - 8
+                  - 9
+              groupings:
+              - groupingExpressions:
+                - selection:
+                    directReference:
+                      structField:
+                        field: 0
+                    rootReference: {}
+                - selection:
+                    directReference:
+                      structField:
+                        field: 1
+                    rootReference: {}
+              input:
+                project:
+                  common:
+                    emit:
+                      outputMapping:
+                      - 16
+                      - 17
+                      - 18
+                      - 19
+                      - 20
+                      - 21
+                      - 22
+                  expressions:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 8
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 9
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 4
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 5
+                      rootReference: {}
+                  - scalarFunction:
+                      args:
+                      - selection:
+                          directReference:
+                            structField:
+                              field: 5
+                          rootReference: {}
+                      - scalarFunction:
+                          args:
+                          - cast:
+                              input:
+                                literal:
+                                  i32: 1
+                              type:
+                                decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 6
+                              rootReference: {}
+                          functionReference: 4
+                          outputType:
+                            decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                      functionReference: 3
+                      outputType:
+                        decimal:
+                          nullability: NULLABILITY_NULLABLE
+                          precision: 19
+                  - scalarFunction:
+                      args:
+                      - scalarFunction:
+                          args:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 5
+                              rootReference: {}
+                          - scalarFunction:
+                              args:
+                              - cast:
+                                  input:
+                                    literal:
+                                      i32: 1
+                                  type:
+                                    decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 6
+                                  rootReference: {}
+                              functionReference: 4
+                              outputType:
+                                decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                          functionReference: 3
+                          outputType:
+                            decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                      - scalarFunction:
+                          args:
+                          - cast:
+                              input:
+                                literal:
+                                  i32: 1
+                              type:
+                                decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 7
+                              rootReference: {}
+                          functionReference: 5
+                          outputType:
+                            decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                      functionReference: 3
+                      outputType:
+                        decimal:
+                          nullability: NULLABILITY_NULLABLE
+                          precision: 19
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 6
+                      rootReference: {}
+                  input:
+                    filter:
+                      common:
+                        direct: {}
+                      condition:
+                        scalarFunction:
+                          args:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 10
+                              rootReference: {}
+                          - scalarFunction:
+                              args:
+                              - literal:
+                                  date: 10561
+                              - literal:
+                                  intervalDayToSecond:
+                                    days: 120
+                              functionReference: 2
+                              outputType:
+                                date:
+                                  nullability: NULLABILITY_REQUIRED
+                          functionReference: 1
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      input:
+                        read:
+                          baseSchema:
+                            names:
+                            - L_ORDERKEY
+                            - L_PARTKEY
+                            - L_SUPPKEY
+                            - L_LINENUMBER
+                            - L_QUANTITY
+                            - L_EXTENDEDPRICE
+                            - L_DISCOUNT
+                            - L_TAX
+                            - L_RETURNFLAG
+                            - L_LINESTATUS
+                            - L_SHIPDATE
+                            - L_COMMITDATE
+                            - L_RECEIPTDATE
+                            - L_SHIPINSTRUCT
+                            - L_SHIPMODE
+                            - L_COMMENT
+                            struct:
+                              nullability: NULLABILITY_REQUIRED
+                              types:
+                              - i64:
+                                  nullability: NULLABILITY_REQUIRED
+                              - i64:
+                                  nullability: NULLABILITY_REQUIRED
+                              - i64:
+                                  nullability: NULLABILITY_REQUIRED
+                              - i32:
+                                  nullability: NULLABILITY_NULLABLE
+                              - decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                              - decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                              - decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                              - decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                              - fixedChar:
+                                  length: 1
+                                  nullability: NULLABILITY_NULLABLE
+                              - fixedChar:
+                                  length: 1
+                                  nullability: NULLABILITY_NULLABLE
+                              - date:
+                                  nullability: NULLABILITY_NULLABLE
+                              - date:
+                                  nullability: NULLABILITY_NULLABLE
+                              - date:
+                                  nullability: NULLABILITY_NULLABLE
+                              - fixedChar:
+                                  length: 25
+                                  nullability: NULLABILITY_NULLABLE
+                              - fixedChar:
+                                  length: 10
+                                  nullability: NULLABILITY_NULLABLE
+                              - varchar:
+                                  length: 44
+                                  nullability: NULLABILITY_NULLABLE
+                          common:
+                            direct: {}
+                          namedTable:
+                            names:
+                            - LINEITEM
+              measures:
+              - measure:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 2
+                      rootReference: {}
+                  functionReference: 6
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+              - measure:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 3
+                      rootReference: {}
+                  functionReference: 6
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+              - measure:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 4
+                      rootReference: {}
+                  functionReference: 6
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+              - measure:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 5
+                      rootReference: {}
+                  functionReference: 6
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+              - measure:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 2
+                      rootReference: {}
+                  functionReference: 7
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+              - measure:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 3
+                      rootReference: {}
+                  functionReference: 7
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+              - measure:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 6
+                      rootReference: {}
+                  functionReference: 7
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+              - measure:
+                  functionReference: 8
+                  outputType:
+                    i64:
+                      nullability: NULLABILITY_REQUIRED
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+          sorts:
+          - direction: SORT_DIRECTION_ASC_NULLS_LAST
+            expr:
+              selection:
+                directReference:
+                  structField: {}
+                rootReference: {}
+          - direction: SORT_DIRECTION_ASC_NULLS_LAST
+            expr:
+              selection:
+                directReference:
+                  structField:
+                    field: 1
+                rootReference: {}
+      names:
+      - L_RETURNFLAG
+      - L_LINESTATUS
+      - SUM_QTY
+      - SUM_BASE_PRICE
+      - SUM_DISC_PRICE
+      - SUM_CHARGE
+      - AVG_QTY
+      - AVG_PRICE
+      - AVG_DISC
+      - COUNT_ORDER
diff --git a/tests/tests/tpc-h/tpc-h02.yaml b/tests/tests/tpc-h/tpc-h02.yaml
new file mode 100644
index 00000000..f46139cc
--- /dev/null
+++ b/tests/tests/tpc-h/tpc-h02.yaml
@@ -0,0 +1,780 @@
+# select
+#   s.s_acctbal,
+#   s.s_name,
+#   n.n_name,
+#   p.p_partkey,
+#   p.p_mfgr,
+#   s.s_address,
+#   s.s_phone,
+#   s.s_comment
+# from
+#   "part" p,
+#   "supplier" s,
+#   "partsupp" ps,
+#   "nation" n,
+#   "region" r
+# where
+#   p.p_partkey = ps.ps_partkey
+#   and s.s_suppkey = ps.ps_suppkey
+#   and p.p_size = 41
+#   and p.p_type like '%NICKEL'
+#   and s.s_nationkey = n.n_nationkey
+#   and n.n_regionkey = r.r_regionkey
+#   and r.r_name = 'EUROPE'
+#   and ps.ps_supplycost = (
+#
+#     select
+#       min(ps.ps_supplycost)
+#
+#     from
+#       "partsupp" ps,
+#       "supplier" s,
+#       "nation" n,
+#       "region" r
+#     where
+#       p.p_partkey = ps.ps_partkey
+#       and s.s_suppkey = ps.ps_suppkey
+#       and s.s_nationkey = n.n_nationkey
+#       and n.n_regionkey = r.r_regionkey
+#       and r.r_name = 'EUROPE'
+#   )
+#
+# order by
+#   s.s_acctbal desc,
+#   n.n_name,
+#   s.s_name,
+#   p.p_partkey
+# limit 100
+
+name: TPC-H02
+diags:
+- { code: 0001, max: i } # Suppress "not yet implemented" warnings
+- { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet)
+- { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet)
+plan:
+  __test:
+  - level: i
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri: /functions_boolean.yaml
+  - extensionUriAnchor: 2
+    uri: /functions_comparison.yaml
+  - extensionUriAnchor: 3
+    uri: /functions_string.yaml
+  - extensionUriAnchor: 4
+    uri: /functions_aggregate_generic.yaml
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: and:bool
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 2
+      name: equal:any_any
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 3
+      name: like:vchar_vchar
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 4
+      name: min:any_any
+  relations:
+  - root:
+      input:
+        fetch:
+          common:
+            direct: {}
+          count: '100'
+          input:
+            sort:
+              common:
+                direct: {}
+              input:
+                project:
+                  common:
+                    emit:
+                      outputMapping:
+                      - 28
+                      - 29
+                      - 30
+                      - 31
+                      - 32
+                      - 33
+                      - 34
+                      - 35
+                  expressions:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 14
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 10
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 22
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField: {}
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 2
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 11
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 13
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 15
+                      rootReference: {}
+                  input:
+                    filter:
+                      common:
+                        direct: {}
+                      condition:
+                        scalarFunction:
+                          args:
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField: {}
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 16
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 9
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 17
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 5
+                                  rootReference: {}
+                              - literal:
+                                  i32: 41
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 4
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      fixedChar: '%NICKEL'
+                                  type:
+                                    varchar:
+                                      length: 25
+                                      nullability: NULLABILITY_NULLABLE
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 12
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 21
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 23
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 25
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 26
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      fixedChar: EUROPE
+                                  type:
+                                    fixedChar:
+                                      length: 25
+                                      nullability: NULLABILITY_REQUIRED
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - subquery:
+                                  scalar:
+                                    input:
+                                      aggregate:
+                                        common:
+                                          emit:
+                                            outputMapping:
+                                            - 0
+                                        input:
+                                          project:
+                                            common:
+                                              emit:
+                                                outputMapping:
+                                                - 19
+                                            expressions:
+                                            - selection:
+                                                directReference:
+                                                  structField:
+                                                    field: 3
+                                                rootReference: {}
+                                            input:
+                                              filter:
+                                                common:
+                                                  direct: {}
+                                                condition:
+                                                  scalarFunction:
+                                                    args:
+                                                    - scalarFunction:
+                                                        args:
+                                                        - selection:
+                                                            directReference:
+                                                              structField: {}
+                                                            outerReference:
+                                                              stepsOut: 1
+                                                        - selection:
+                                                            directReference:
+                                                              structField: {}
+                                                            rootReference: {}
+                                                        functionReference: 2
+                                                        outputType:
+                                                          bool:
+                                                            nullability: NULLABILITY_REQUIRED
+                                                    - scalarFunction:
+                                                        args:
+                                                        - selection:
+                                                            directReference:
+                                                              structField:
+                                                                field: 5
+                                                            rootReference: {}
+                                                        - selection:
+                                                            directReference:
+                                                              structField:
+                                                                field: 1
+                                                            rootReference: {}
+                                                        functionReference: 2
+                                                        outputType:
+                                                          bool:
+                                                            nullability: NULLABILITY_REQUIRED
+                                                    - scalarFunction:
+                                                        args:
+                                                        - selection:
+                                                            directReference:
+                                                              structField:
+                                                                field: 8
+                                                            rootReference: {}
+                                                        - selection:
+                                                            directReference:
+                                                              structField:
+                                                                field: 12
+                                                            rootReference: {}
+                                                        functionReference: 2
+                                                        outputType:
+                                                          bool:
+                                                            nullability: NULLABILITY_REQUIRED
+                                                    - scalarFunction:
+                                                        args:
+                                                        - selection:
+                                                            directReference:
+                                                              structField:
+                                                                field: 14
+                                                            rootReference: {}
+                                                        - selection:
+                                                            directReference:
+                                                              structField:
+                                                                field: 16
+                                                            rootReference: {}
+                                                        functionReference: 2
+                                                        outputType:
+                                                          bool:
+                                                            nullability: NULLABILITY_REQUIRED
+                                                    - scalarFunction:
+                                                        args:
+                                                        - selection:
+                                                            directReference:
+                                                              structField:
+                                                                field: 17
+                                                            rootReference: {}
+                                                        - cast:
+                                                            input:
+                                                              literal:
+                                                                fixedChar: EUROPE
+                                                            type:
+                                                              fixedChar:
+                                                                length: 25
+                                                                nullability: NULLABILITY_REQUIRED
+                                                        functionReference: 2
+                                                        outputType:
+                                                          bool:
+                                                            nullability: NULLABILITY_NULLABLE
+                                                    functionReference: 1
+                                                    outputType:
+                                                      bool:
+                                                        nullability: NULLABILITY_NULLABLE
+                                                input:
+                                                  join:
+                                                    common:
+                                                      direct: {}
+                                                    expression:
+                                                      literal:
+                                                        boolean: true
+                                                    left:
+                                                      join:
+                                                        common:
+                                                          direct: {}
+                                                        expression:
+                                                          literal:
+                                                            boolean: true
+                                                        left:
+                                                          join:
+                                                            common:
+                                                              direct: {}
+                                                            expression:
+                                                              literal:
+                                                                boolean: true
+                                                            left:
+                                                              read:
+                                                                baseSchema:
+                                                                  names:
+                                                                  - PS_PARTKEY
+                                                                  - PS_SUPPKEY
+                                                                  - PS_AVAILQTY
+                                                                  - PS_SUPPLYCOST
+                                                                  - PS_COMMENT
+                                                                  struct:
+                                                                    nullability: NULLABILITY_REQUIRED
+                                                                    types:
+                                                                    - i64:
+                                                                        nullability: NULLABILITY_REQUIRED
+                                                                    - i64:
+                                                                        nullability: NULLABILITY_REQUIRED
+                                                                    - i32:
+                                                                        nullability: NULLABILITY_NULLABLE
+                                                                    - decimal:
+                                                                        nullability: NULLABILITY_NULLABLE
+                                                                        precision: 19
+                                                                    - varchar:
+                                                                        length: 199
+                                                                        nullability: NULLABILITY_NULLABLE
+                                                                common:
+                                                                  direct: {}
+                                                                namedTable:
+                                                                  names:
+                                                                  - PARTSUPP
+                                                            right:
+                                                              read:
+                                                                baseSchema:
+                                                                  names:
+                                                                  - S_SUPPKEY
+                                                                  - S_NAME
+                                                                  - S_ADDRESS
+                                                                  - S_NATIONKEY
+                                                                  - S_PHONE
+                                                                  - S_ACCTBAL
+                                                                  - S_COMMENT
+                                                                  struct:
+                                                                    nullability: NULLABILITY_REQUIRED
+                                                                    types:
+                                                                    - i64:
+                                                                        nullability: NULLABILITY_REQUIRED
+                                                                    - fixedChar:
+                                                                        length: 25
+                                                                        nullability: NULLABILITY_NULLABLE
+                                                                    - varchar:
+                                                                        length: 40
+                                                                        nullability: NULLABILITY_NULLABLE
+                                                                    - i64:
+                                                                        nullability: NULLABILITY_REQUIRED
+                                                                    - fixedChar:
+                                                                        length: 15
+                                                                        nullability: NULLABILITY_NULLABLE
+                                                                    - decimal:
+                                                                        nullability: NULLABILITY_NULLABLE
+                                                                        precision: 19
+                                                                    - varchar:
+                                                                        length: 101
+                                                                        nullability: NULLABILITY_NULLABLE
+                                                                common:
+                                                                  direct: {}
+                                                                namedTable:
+                                                                  names:
+                                                                  - SUPPLIER
+                                                            type: JOIN_TYPE_INNER
+                                                        right:
+                                                          read:
+                                                            baseSchema:
+                                                              names:
+                                                              - N_NATIONKEY
+                                                              - N_NAME
+                                                              - N_REGIONKEY
+                                                              - N_COMMENT
+                                                              struct:
+                                                                nullability: NULLABILITY_REQUIRED
+                                                                types:
+                                                                - i64:
+                                                                    nullability: NULLABILITY_REQUIRED
+                                                                - fixedChar:
+                                                                    length: 25
+                                                                    nullability: NULLABILITY_NULLABLE
+                                                                - i64:
+                                                                    nullability: NULLABILITY_REQUIRED
+                                                                - varchar:
+                                                                    length: 152
+                                                                    nullability: NULLABILITY_NULLABLE
+                                                            common:
+                                                              direct: {}
+                                                            namedTable:
+                                                              names:
+                                                              - NATION
+                                                        type: JOIN_TYPE_INNER
+                                                    right:
+                                                      read:
+                                                        baseSchema:
+                                                          names:
+                                                          - R_REGIONKEY
+                                                          - R_NAME
+                                                          - R_COMMENT
+                                                          struct:
+                                                            nullability: NULLABILITY_REQUIRED
+                                                            types:
+                                                            - i64:
+                                                                nullability: NULLABILITY_REQUIRED
+                                                            - fixedChar:
+                                                                length: 25
+                                                                nullability: NULLABILITY_NULLABLE
+                                                            - varchar:
+                                                                length: 152
+                                                                nullability: NULLABILITY_NULLABLE
+                                                        common:
+                                                          direct: {}
+                                                        namedTable:
+                                                          names:
+                                                          - REGION
+                                                    type: JOIN_TYPE_INNER
+                                        measures:
+                                        - measure:
+                                            args:
+                                            - selection:
+                                                directReference:
+                                                  structField:
+                                                    field: 0
+                                                rootReference: {}
+                                            functionReference: 4
+                                            outputType:
+                                              decimal:
+                                                nullability: NULLABILITY_NULLABLE
+                                                precision: 19
+                                            phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 19
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          functionReference: 1
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      input:
+                        join:
+                          common:
+                            direct: {}
+                          expression:
+                            literal:
+                              boolean: true
+                          left:
+                            join:
+                              common:
+                                direct: {}
+                              expression:
+                                literal:
+                                  boolean: true
+                              left:
+                                join:
+                                  common:
+                                    direct: {}
+                                  expression:
+                                    literal:
+                                      boolean: true
+                                  left:
+                                    join:
+                                      common:
+                                        direct: {}
+                                      expression:
+                                        literal:
+                                          boolean: true
+                                      left:
+                                        read:
+                                          baseSchema:
+                                            names:
+                                            - P_PARTKEY
+                                            - P_NAME
+                                            - P_MFGR
+                                            - P_BRAND
+                                            - P_TYPE
+                                            - P_SIZE
+                                            - P_CONTAINER
+                                            - P_RETAILPRICE
+                                            - P_COMMENT
+                                            struct:
+                                              nullability: NULLABILITY_REQUIRED
+                                              types:
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - varchar:
+                                                  length: 55
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 25
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 10
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - varchar:
+                                                  length: 25
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - i32:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 10
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - varchar:
+                                                  length: 23
+                                                  nullability: NULLABILITY_NULLABLE
+                                          common:
+                                            direct: {}
+                                          namedTable:
+                                            names:
+                                            - PART
+                                      right:
+                                        read:
+                                          baseSchema:
+                                            names:
+                                            - S_SUPPKEY
+                                            - S_NAME
+                                            - S_ADDRESS
+                                            - S_NATIONKEY
+                                            - S_PHONE
+                                            - S_ACCTBAL
+                                            - S_COMMENT
+                                            struct:
+                                              nullability: NULLABILITY_REQUIRED
+                                              types:
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - fixedChar:
+                                                  length: 25
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - varchar:
+                                                  length: 40
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - fixedChar:
+                                                  length: 15
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - varchar:
+                                                  length: 101
+                                                  nullability: NULLABILITY_NULLABLE
+                                          common:
+                                            direct: {}
+                                          namedTable:
+                                            names:
+                                            - SUPPLIER
+                                      type: JOIN_TYPE_INNER
+                                  right:
+                                    read:
+                                      baseSchema:
+                                        names:
+                                        - PS_PARTKEY
+                                        - PS_SUPPKEY
+                                        - PS_AVAILQTY
+                                        - PS_SUPPLYCOST
+                                        - PS_COMMENT
+                                        struct:
+                                          nullability: NULLABILITY_REQUIRED
+                                          types:
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - i32:
+                                              nullability: NULLABILITY_NULLABLE
+                                          - decimal:
+                                              nullability: NULLABILITY_NULLABLE
+                                              precision: 19
+                                          - varchar:
+                                              length: 199
+                                              nullability: NULLABILITY_NULLABLE
+                                      common:
+                                        direct: {}
+                                      namedTable:
+                                        names:
+                                        - PARTSUPP
+                                  type: JOIN_TYPE_INNER
+                              right:
+                                read:
+                                  baseSchema:
+                                    names:
+                                    - N_NATIONKEY
+                                    - N_NAME
+                                    - N_REGIONKEY
+                                    - N_COMMENT
+                                    struct:
+                                      nullability: NULLABILITY_REQUIRED
+                                      types:
+                                      - i64:
+                                          nullability: NULLABILITY_REQUIRED
+                                      - fixedChar:
+                                          length: 25
+                                          nullability: NULLABILITY_NULLABLE
+                                      - i64:
+                                          nullability: NULLABILITY_REQUIRED
+                                      - varchar:
+                                          length: 152
+                                          nullability: NULLABILITY_NULLABLE
+                                  common:
+                                    direct: {}
+                                  namedTable:
+                                    names:
+                                    - NATION
+                              type: JOIN_TYPE_INNER
+                          right:
+                            read:
+                              baseSchema:
+                                names:
+                                - R_REGIONKEY
+                                - R_NAME
+                                - R_COMMENT
+                                struct:
+                                  nullability: NULLABILITY_REQUIRED
+                                  types:
+                                  - i64:
+                                      nullability: NULLABILITY_REQUIRED
+                                  - fixedChar:
+                                      length: 25
+                                      nullability: NULLABILITY_NULLABLE
+                                  - varchar:
+                                      length: 152
+                                      nullability: NULLABILITY_NULLABLE
+                              common:
+                                direct: {}
+                              namedTable:
+                                names:
+                                - REGION
+                          type: JOIN_TYPE_INNER
+              sorts:
+              - direction: SORT_DIRECTION_DESC_NULLS_FIRST
+                expr:
+                  selection:
+                    directReference:
+                      structField: {}
+                    rootReference: {}
+              - direction: SORT_DIRECTION_ASC_NULLS_LAST
+                expr:
+                  selection:
+                    directReference:
+                      structField:
+                        field: 2
+                    rootReference: {}
+              - direction: SORT_DIRECTION_ASC_NULLS_LAST
+                expr:
+                  selection:
+                    directReference:
+                      structField:
+                        field: 1
+                    rootReference: {}
+              - direction: SORT_DIRECTION_ASC_NULLS_LAST
+                expr:
+                  selection:
+                    directReference:
+                      structField:
+                        field: 3
+                    rootReference: {}
+      names:
+      - S_ACCTBAL
+      - S_NAME
+      - N_NAME
+      - P_PARTKEY
+      - P_MFGR
+      - S_ADDRESS
+      - S_PHONE
+      - S_COMMENT
diff --git a/tests/tests/tpc-h/tpc-h03.yaml b/tests/tests/tpc-h/tpc-h03.yaml
new file mode 100644
index 00000000..d1a39140
--- /dev/null
+++ b/tests/tests/tpc-h/tpc-h03.yaml
@@ -0,0 +1,485 @@
+# select
+#   l.l_orderkey,
+#   sum(l.l_extendedprice * (1 - l.l_discount)) as revenue,
+#   o.o_orderdate,
+#   o.o_shippriority
+#
+# from
+#   "customer" c,
+#   "orders" o,
+#   "lineitem" l
+#
+# where
+#   c.c_mktsegment = 'HOUSEHOLD'
+#   and c.c_custkey = o.o_custkey
+#   and l.l_orderkey = o.o_orderkey
+#   and o.o_orderdate < date '1995-03-25'
+#   and l.l_shipdate > date '1995-03-25'
+#
+# group by
+#   l.l_orderkey,
+#   o.o_orderdate,
+#   o.o_shippriority
+# order by
+#   revenue desc,
+#   o.o_orderdate
+# limit 10
+
+name: TPC-H03
+diags:
+- { code: 0001, max: i } # Suppress "not yet implemented" warnings
+- { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet)
+- { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet)
+plan:
+  __test:
+  - level: i
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri: /functions_boolean.yaml
+  - extensionUriAnchor: 2
+    uri: /functions_comparison.yaml
+  - extensionUriAnchor: 3
+    uri: /functions_datetime.yaml
+  - extensionUriAnchor: 4
+    uri: /functions_arithmetic_decimal.yaml
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: and:bool
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 2
+      name: equal:any_any
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 3
+      name: lt:date_date
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 4
+      name: gt:date_date
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 5
+      name: multiply:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 6
+      name: subtract:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 7
+      name: sum:opt_dec
+  relations:
+  - root:
+      __test:
+      - type: "NSTRUCT<L_ORDERKEY: i32, REVENUE: i64, O_ORDERDATE: DECIMAL?<19, 0>, O_SHIPPRIORITY: date?>"
+      input:
+        fetch:
+          common:
+            direct: {}
+          count: '10'
+          input:
+            sort:
+              common:
+                direct: {}
+              input:
+                project:
+                  common:
+                    emit:
+                      outputMapping:
+                      - 4
+                      - 5
+                      - 6
+                      - 7
+                  expressions:
+                  - selection:
+                      directReference:
+                        structField: {}
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 3
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 1
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 2
+                      rootReference: {}
+                  input:
+                    aggregate:
+                      common:
+                        direct: {}
+                      groupings:
+                      - groupingExpressions:
+                        - selection:
+                            directReference:
+                              structField:
+                                field: 0
+                            rootReference: {}
+                        - selection:
+                            directReference:
+                              structField:
+                                field: 1
+                            rootReference: {}
+                        - selection:
+                            directReference:
+                              structField:
+                                field: 2
+                            rootReference: {}
+                      input:
+                        project:
+                          common:
+                            emit:
+                              outputMapping:
+                              - 33
+                              - 34
+                              - 35
+                              - 36
+                          expressions:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 17
+                              rootReference: {}
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 12
+                              rootReference: {}
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 15
+                              rootReference: {}
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 22
+                                  rootReference: {}
+                              - scalarFunction:
+                                  args:
+                                  - cast:
+                                      input:
+                                        literal:
+                                          i32: 1
+                                      type:
+                                        decimal:
+                                          nullability: NULLABILITY_NULLABLE
+                                          precision: 19
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 23
+                                      rootReference: {}
+                                  functionReference: 6
+                                  outputType:
+                                    decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                              functionReference: 5
+                              outputType:
+                                decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                          input:
+                            filter:
+                              common:
+                                direct: {}
+                              condition:
+                                scalarFunction:
+                                  args:
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 6
+                                          rootReference: {}
+                                      - cast:
+                                          input:
+                                            literal:
+                                              fixedChar: HOUSEHOLD
+                                          type:
+                                            fixedChar:
+                                              length: 10
+                                              nullability: NULLABILITY_REQUIRED
+                                      functionReference: 2
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_NULLABLE
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField: {}
+                                          rootReference: {}
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 9
+                                          rootReference: {}
+                                      functionReference: 2
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_REQUIRED
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 17
+                                          rootReference: {}
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 8
+                                          rootReference: {}
+                                      functionReference: 2
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_REQUIRED
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 12
+                                          rootReference: {}
+                                      - literal:
+                                          date: 9214
+                                      functionReference: 3
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_NULLABLE
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 27
+                                          rootReference: {}
+                                      - literal:
+                                          date: 9214
+                                      functionReference: 4
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_NULLABLE
+                                  functionReference: 1
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              input:
+                                join:
+                                  common:
+                                    direct: {}
+                                  expression:
+                                    literal:
+                                      boolean: true
+                                  left:
+                                    join:
+                                      common:
+                                        direct: {}
+                                      expression:
+                                        literal:
+                                          boolean: true
+                                      left:
+                                        read:
+                                          baseSchema:
+                                            names:
+                                            - C_CUSTKEY
+                                            - C_NAME
+                                            - C_ADDRESS
+                                            - C_NATIONKEY
+                                            - C_PHONE
+                                            - C_ACCTBAL
+                                            - C_MKTSEGMENT
+                                            - C_COMMENT
+                                            struct:
+                                              nullability: NULLABILITY_REQUIRED
+                                              types:
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - varchar:
+                                                  length: 25
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - varchar:
+                                                  length: 40
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - fixedChar:
+                                                  length: 15
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - fixedChar:
+                                                  length: 10
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - varchar:
+                                                  length: 117
+                                                  nullability: NULLABILITY_NULLABLE
+                                          common:
+                                            direct: {}
+                                          namedTable:
+                                            names:
+                                            - CUSTOMER
+                                      right:
+                                        read:
+                                          baseSchema:
+                                            names:
+                                            - O_ORDERKEY
+                                            - O_CUSTKEY
+                                            - O_ORDERSTATUS
+                                            - O_TOTALPRICE
+                                            - O_ORDERDATE
+                                            - O_ORDERPRIORITY
+                                            - O_CLERK
+                                            - O_SHIPPRIORITY
+                                            - O_COMMENT
+                                            struct:
+                                              nullability: NULLABILITY_REQUIRED
+                                              types:
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - fixedChar:
+                                                  length: 1
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - date:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 15
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 15
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - i32:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - varchar:
+                                                  length: 79
+                                                  nullability: NULLABILITY_NULLABLE
+                                          common:
+                                            direct: {}
+                                          namedTable:
+                                            names:
+                                            - ORDERS
+                                      type: JOIN_TYPE_INNER
+                                  right:
+                                    read:
+                                      baseSchema:
+                                        names:
+                                        - L_ORDERKEY
+                                        - L_PARTKEY
+                                        - L_SUPPKEY
+                                        - L_LINENUMBER
+                                        - L_QUANTITY
+                                        - L_EXTENDEDPRICE
+                                        - L_DISCOUNT
+                                        - L_TAX
+                                        - L_RETURNFLAG
+                                        - L_LINESTATUS
+                                        - L_SHIPDATE
+                                        - L_COMMITDATE
+                                        - L_RECEIPTDATE
+                                        - L_SHIPINSTRUCT
+                                        - L_SHIPMODE
+                                        - L_COMMENT
+                                        struct:
+                                          nullability: NULLABILITY_REQUIRED
+                                          types:
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - i32:
+                                              nullability: NULLABILITY_NULLABLE
+                                          - decimal:
+                                              nullability: NULLABILITY_NULLABLE
+                                              precision: 19
+                                          - decimal:
+                                              nullability: NULLABILITY_NULLABLE
+                                              precision: 19
+                                          - decimal:
+                                              nullability: NULLABILITY_NULLABLE
+                                              precision: 19
+                                          - decimal:
+                                              nullability: NULLABILITY_NULLABLE
+                                              precision: 19
+                                          - fixedChar:
+                                              length: 1
+                                              nullability: NULLABILITY_NULLABLE
+                                          - fixedChar:
+                                              length: 1
+                                              nullability: NULLABILITY_NULLABLE
+                                          - date:
+                                              nullability: NULLABILITY_NULLABLE
+                                          - date:
+                                              nullability: NULLABILITY_NULLABLE
+                                          - date:
+                                              nullability: NULLABILITY_NULLABLE
+                                          - fixedChar:
+                                              length: 25
+                                              nullability: NULLABILITY_NULLABLE
+                                          - fixedChar:
+                                              length: 10
+                                              nullability: NULLABILITY_NULLABLE
+                                          - varchar:
+                                              length: 44
+                                              nullability: NULLABILITY_NULLABLE
+                                      common:
+                                        direct: {}
+                                      namedTable:
+                                        names:
+                                        - LINEITEM
+                                  type: JOIN_TYPE_INNER
+                      measures:
+                      - measure:
+                          args:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 3
+                              rootReference: {}
+                          functionReference: 7
+                          outputType:
+                            decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                          phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+              sorts:
+              - direction: SORT_DIRECTION_DESC_NULLS_FIRST
+                expr:
+                  selection:
+                    directReference:
+                      structField:
+                        field: 1
+                    rootReference: {}
+              - direction: SORT_DIRECTION_ASC_NULLS_LAST
+                expr:
+                  selection:
+                    directReference:
+                      structField:
+                        field: 2
+                    rootReference: {}
+      names:
+      - L_ORDERKEY
+      - REVENUE
+      - O_ORDERDATE
+      - O_SHIPPRIORITY
diff --git a/tests/tests/tpc-h/tpc-h04.yaml b/tests/tests/tpc-h/tpc-h04.yaml
new file mode 100644
index 00000000..c99a0941
--- /dev/null
+++ b/tests/tests/tpc-h/tpc-h04.yaml
@@ -0,0 +1,318 @@
+# select
+#   o.o_orderpriority,
+#   count(*) as order_count
+# from
+#   "orders" o
+#
+# where
+#   o.o_orderdate >= date '1996-10-01'
+#   and o.o_orderdate < date '1996-10-01' + interval '3' month
+#   and
+#   exists (
+#     select
+#       *
+#     from
+#       "lineitem" l
+#     where
+#       l.l_orderkey = o.o_orderkey
+#       and l.l_commitdate < l.l_receiptdate
+#   )
+# group by
+#   o.o_orderpriority
+# order by
+#   o.o_orderpriority
+
+name: TPC-H04
+diags:
+- { code: 0001, max: i } # Suppress "not yet implemented" warnings
+- { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet)
+- { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet)
+plan:
+  __test:
+  - level: i
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri: /functions_boolean.yaml
+  - extensionUriAnchor: 2
+    uri: /functions_datetime.yaml
+  - extensionUriAnchor: 3
+    uri: /functions_aggregate_generic.yaml
+  - extensionUriAnchor: 4
+    uri: /functions_comparison.yaml
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: and:bool
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 2
+      name: gte:date_date
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 3
+      name: lt:date_date
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 4
+      name: add:date_year
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 5
+      name: count:opt
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 6
+      name: equal:any_any
+  relations:
+  - root:
+      input:
+        sort:
+          common:
+            direct: {}
+          input:
+            aggregate:
+              common:
+                emit:
+                  outputMapping:
+                  - 0
+                  - 1
+              groupings:
+              - groupingExpressions:
+                - selection:
+                    directReference:
+                      structField: {}
+                    rootReference: {}
+              input:
+                project:
+                  common:
+                    emit:
+                      outputMapping:
+                      - 9
+                  expressions:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 5
+                      rootReference: {}
+                  input:
+                    filter:
+                      common:
+                        direct: {}
+                      condition:
+                        scalarFunction:
+                          args:
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 4
+                                  rootReference: {}
+                              - literal:
+                                  date: 9770
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 4
+                                  rootReference: {}
+                              - scalarFunction:
+                                  args:
+                                  - literal:
+                                      date: 9770
+                                  - literal:
+                                      intervalYearToMonth:
+                                        months: 3
+                                  functionReference: 4
+                                  outputType:
+                                    date:
+                                      nullability: NULLABILITY_REQUIRED
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - subquery:
+                              setPredicate:
+                                predicateOp: PREDICATE_OP_EXISTS
+                                tuples:
+                                  filter:
+                                    common:
+                                      direct: {}
+                                    condition:
+                                      scalarFunction:
+                                        args:
+                                        - scalarFunction:
+                                            args:
+                                            - selection:
+                                                directReference:
+                                                  structField: {}
+                                                rootReference: {}
+                                            - selection:
+                                                directReference:
+                                                  structField: {}
+                                                outerReference:
+                                                  stepsOut: 1
+                                            functionReference: 6
+                                            outputType:
+                                              bool:
+                                                nullability: NULLABILITY_REQUIRED
+                                        - scalarFunction:
+                                            args:
+                                            - selection:
+                                                directReference:
+                                                  structField:
+                                                    field: 11
+                                                rootReference: {}
+                                            - selection:
+                                                directReference:
+                                                  structField:
+                                                    field: 12
+                                                rootReference: {}
+                                            functionReference: 3
+                                            outputType:
+                                              bool:
+                                                nullability: NULLABILITY_NULLABLE
+                                        functionReference: 1
+                                        outputType:
+                                          bool:
+                                            nullability: NULLABILITY_NULLABLE
+                                    input:
+                                      read:
+                                        baseSchema:
+                                          names:
+                                          - L_ORDERKEY
+                                          - L_PARTKEY
+                                          - L_SUPPKEY
+                                          - L_LINENUMBER
+                                          - L_QUANTITY
+                                          - L_EXTENDEDPRICE
+                                          - L_DISCOUNT
+                                          - L_TAX
+                                          - L_RETURNFLAG
+                                          - L_LINESTATUS
+                                          - L_SHIPDATE
+                                          - L_COMMITDATE
+                                          - L_RECEIPTDATE
+                                          - L_SHIPINSTRUCT
+                                          - L_SHIPMODE
+                                          - L_COMMENT
+                                          struct:
+                                            nullability: NULLABILITY_REQUIRED
+                                            types:
+                                            - i64:
+                                                nullability: NULLABILITY_REQUIRED
+                                            - i64:
+                                                nullability: NULLABILITY_REQUIRED
+                                            - i64:
+                                                nullability: NULLABILITY_REQUIRED
+                                            - i32:
+                                                nullability: NULLABILITY_NULLABLE
+                                            - decimal:
+                                                nullability: NULLABILITY_NULLABLE
+                                                precision: 19
+                                            - decimal:
+                                                nullability: NULLABILITY_NULLABLE
+                                                precision: 19
+                                            - decimal:
+                                                nullability: NULLABILITY_NULLABLE
+                                                precision: 19
+                                            - decimal:
+                                                nullability: NULLABILITY_NULLABLE
+                                                precision: 19
+                                            - fixedChar:
+                                                length: 1
+                                                nullability: NULLABILITY_NULLABLE
+                                            - fixedChar:
+                                                length: 1
+                                                nullability: NULLABILITY_NULLABLE
+                                            - date:
+                                                nullability: NULLABILITY_NULLABLE
+                                            - date:
+                                                nullability: NULLABILITY_NULLABLE
+                                            - date:
+                                                nullability: NULLABILITY_NULLABLE
+                                            - fixedChar:
+                                                length: 25
+                                                nullability: NULLABILITY_NULLABLE
+                                            - fixedChar:
+                                                length: 10
+                                                nullability: NULLABILITY_NULLABLE
+                                            - varchar:
+                                                length: 44
+                                                nullability: NULLABILITY_NULLABLE
+                                        common:
+                                          direct: {}
+                                        namedTable:
+                                          names:
+                                          - LINEITEM
+                          functionReference: 1
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      input:
+                        read:
+                          baseSchema:
+                            names:
+                            - O_ORDERKEY
+                            - O_CUSTKEY
+                            - O_ORDERSTATUS
+                            - O_TOTALPRICE
+                            - O_ORDERDATE
+                            - O_ORDERPRIORITY
+                            - O_CLERK
+                            - O_SHIPPRIORITY
+                            - O_COMMENT
+                            struct:
+                              nullability: NULLABILITY_REQUIRED
+                              types:
+                              - i64:
+                                  nullability: NULLABILITY_REQUIRED
+                              - i64:
+                                  nullability: NULLABILITY_REQUIRED
+                              - fixedChar:
+                                  length: 1
+                                  nullability: NULLABILITY_NULLABLE
+                              - decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                              - date:
+                                  nullability: NULLABILITY_NULLABLE
+                              - fixedChar:
+                                  length: 15
+                                  nullability: NULLABILITY_NULLABLE
+                              - fixedChar:
+                                  length: 15
+                                  nullability: NULLABILITY_NULLABLE
+                              - i32:
+                                  nullability: NULLABILITY_NULLABLE
+                              - varchar:
+                                  length: 79
+                                  nullability: NULLABILITY_NULLABLE
+                          common:
+                            direct: {}
+                          namedTable:
+                            names:
+                            - ORDERS
+              measures:
+              - measure:
+                  functionReference: 5
+                  outputType:
+                    i64:
+                      nullability: NULLABILITY_REQUIRED
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+          sorts:
+          - direction: SORT_DIRECTION_ASC_NULLS_LAST
+            expr:
+              selection:
+                directReference:
+                  structField: {}
+                rootReference: {}
+      names:
+      - O_ORDERPRIORITY
+      - ORDER_COUNT
diff --git a/tests/tests/tpc-h/tpc-h05.yaml b/tests/tests/tpc-h/tpc-h05.yaml
new file mode 100644
index 00000000..5e39be72
--- /dev/null
+++ b/tests/tests/tpc-h/tpc-h05.yaml
@@ -0,0 +1,612 @@
+# select
+#   n.n_name,
+#   sum(l.l_extendedprice * (1 - l.l_discount)) as revenue
+#
+# from
+#   "customer" c,
+#   "orders" o,
+#   "lineitem" l,
+#   "supplier" s,
+#   "nation" n,
+#   "region" r
+#
+# where
+#   c.c_custkey = o.o_custkey
+#   and l.l_orderkey = o.o_orderkey
+#   and l.l_suppkey = s.s_suppkey
+#   and c.c_nationkey = s.s_nationkey
+#   and s.s_nationkey = n.n_nationkey
+#   and n.n_regionkey = r.r_regionkey
+#   and r.r_name = 'EUROPE'
+#   and o.o_orderdate >= date '1997-01-01'
+#   and o.o_orderdate < date '1997-01-01' + interval '1' year
+# group by
+#   n.n_name
+#
+# order by
+#   revenue desc
+
+name: TPC-H05
+diags:
+- { code: 0001, max: i } # Suppress "not yet implemented" warnings
+- { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet)
+- { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet)
+plan:
+  __test:
+  - level: i
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri: /functions_boolean.yaml
+  - extensionUriAnchor: 2
+    uri: /functions_comparison.yaml
+  - extensionUriAnchor: 3
+    uri: /functions_datetime.yaml
+  - extensionUriAnchor: 4
+    uri: /functions_arithmetic_decimal.yaml
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: and:bool
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 2
+      name: equal:any_any
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 3
+      name: gte:date_date
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 4
+      name: lt:date_date
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 5
+      name: add:date_year
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 6
+      name: multiply:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 7
+      name: subtract:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 8
+      name: sum:opt_dec
+  relations:
+  - root:
+      __test:
+      - type: "NSTRUCT<N_NAME: FIXEDCHAR?<25>, REVENUE: DECIMAL?<19, 0>>"
+      input:
+        sort:
+          common:
+            direct: {}
+          input:
+            aggregate:
+              common:
+                emit:
+                  outputMapping:
+                  - 0
+                  - 1
+              groupings:
+              - groupingExpressions:
+                - selection:
+                    directReference:
+                      structField:
+                        field: 0
+                    rootReference: {}
+              input:
+                project:
+                  common:
+                    emit:
+                      outputMapping:
+                      - 47
+                      - 48
+                  expressions:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 41
+                      rootReference: {}
+                  - scalarFunction:
+                      args:
+                      - selection:
+                          directReference:
+                            structField:
+                              field: 22
+                          rootReference: {}
+                      - scalarFunction:
+                          args:
+                          - cast:
+                              input:
+                                literal:
+                                  i32: 1
+                              type:
+                                decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 23
+                              rootReference: {}
+                          functionReference: 7
+                          outputType:
+                            decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                      functionReference: 6
+                      outputType:
+                        decimal:
+                          nullability: NULLABILITY_NULLABLE
+                          precision: 19
+                  input:
+                    filter:
+                      common:
+                        direct: {}
+                      condition:
+                        scalarFunction:
+                          args:
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField: {}
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 9
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 17
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 8
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 19
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 33
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 3
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 36
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 36
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 40
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 42
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 44
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 45
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      fixedChar: EUROPE
+                                  type:
+                                    fixedChar:
+                                      length: 25
+                                      nullability: NULLABILITY_REQUIRED
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 12
+                                  rootReference: {}
+                              - literal:
+                                  date: 9862
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 12
+                                  rootReference: {}
+                              - scalarFunction:
+                                  args:
+                                  - literal:
+                                      date: 9862
+                                  - literal:
+                                      intervalYearToMonth:
+                                        years: 1
+                                  functionReference: 5
+                                  outputType:
+                                    date:
+                                      nullability: NULLABILITY_REQUIRED
+                              functionReference: 4
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          functionReference: 1
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      input:
+                        join:
+                          common:
+                            direct: {}
+                          expression:
+                            literal:
+                              boolean: true
+                          left:
+                            join:
+                              common:
+                                direct: {}
+                              expression:
+                                literal:
+                                  boolean: true
+                              left:
+                                join:
+                                  common:
+                                    direct: {}
+                                  expression:
+                                    literal:
+                                      boolean: true
+                                  left:
+                                    join:
+                                      common:
+                                        direct: {}
+                                      expression:
+                                        literal:
+                                          boolean: true
+                                      left:
+                                        join:
+                                          common:
+                                            direct: {}
+                                          expression:
+                                            literal:
+                                              boolean: true
+                                          left:
+                                            read:
+                                              baseSchema:
+                                                names:
+                                                - C_CUSTKEY
+                                                - C_NAME
+                                                - C_ADDRESS
+                                                - C_NATIONKEY
+                                                - C_PHONE
+                                                - C_ACCTBAL
+                                                - C_MKTSEGMENT
+                                                - C_COMMENT
+                                                struct:
+                                                  nullability: NULLABILITY_REQUIRED
+                                                  types:
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - varchar:
+                                                      length: 25
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - varchar:
+                                                      length: 40
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - fixedChar:
+                                                      length: 15
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - decimal:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                      precision: 19
+                                                  - fixedChar:
+                                                      length: 10
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - varchar:
+                                                      length: 117
+                                                      nullability: NULLABILITY_NULLABLE
+                                              common:
+                                                direct: {}
+                                              namedTable:
+                                                names:
+                                                - CUSTOMER
+                                          right:
+                                            read:
+                                              baseSchema:
+                                                names:
+                                                - O_ORDERKEY
+                                                - O_CUSTKEY
+                                                - O_ORDERSTATUS
+                                                - O_TOTALPRICE
+                                                - O_ORDERDATE
+                                                - O_ORDERPRIORITY
+                                                - O_CLERK
+                                                - O_SHIPPRIORITY
+                                                - O_COMMENT
+                                                struct:
+                                                  nullability: NULLABILITY_REQUIRED
+                                                  types:
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - fixedChar:
+                                                      length: 1
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - decimal:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                      precision: 19
+                                                  - date:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - fixedChar:
+                                                      length: 15
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - fixedChar:
+                                                      length: 15
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - i32:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - varchar:
+                                                      length: 79
+                                                      nullability: NULLABILITY_NULLABLE
+                                              common:
+                                                direct: {}
+                                              namedTable:
+                                                names:
+                                                - ORDERS
+                                          type: JOIN_TYPE_INNER
+                                      right:
+                                        read:
+                                          baseSchema:
+                                            names:
+                                            - L_ORDERKEY
+                                            - L_PARTKEY
+                                            - L_SUPPKEY
+                                            - L_LINENUMBER
+                                            - L_QUANTITY
+                                            - L_EXTENDEDPRICE
+                                            - L_DISCOUNT
+                                            - L_TAX
+                                            - L_RETURNFLAG
+                                            - L_LINESTATUS
+                                            - L_SHIPDATE
+                                            - L_COMMITDATE
+                                            - L_RECEIPTDATE
+                                            - L_SHIPINSTRUCT
+                                            - L_SHIPMODE
+                                            - L_COMMENT
+                                            struct:
+                                              nullability: NULLABILITY_REQUIRED
+                                              types:
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - i32:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - fixedChar:
+                                                  length: 1
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 1
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - date:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - date:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - date:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 25
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 10
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - varchar:
+                                                  length: 44
+                                                  nullability: NULLABILITY_NULLABLE
+                                          common:
+                                            direct: {}
+                                          namedTable:
+                                            names:
+                                            - LINEITEM
+                                      type: JOIN_TYPE_INNER
+                                  right:
+                                    read:
+                                      baseSchema:
+                                        names:
+                                        - S_SUPPKEY
+                                        - S_NAME
+                                        - S_ADDRESS
+                                        - S_NATIONKEY
+                                        - S_PHONE
+                                        - S_ACCTBAL
+                                        - S_COMMENT
+                                        struct:
+                                          nullability: NULLABILITY_REQUIRED
+                                          types:
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - fixedChar:
+                                              length: 25
+                                              nullability: NULLABILITY_NULLABLE
+                                          - varchar:
+                                              length: 40
+                                              nullability: NULLABILITY_NULLABLE
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - fixedChar:
+                                              length: 15
+                                              nullability: NULLABILITY_NULLABLE
+                                          - decimal:
+                                              nullability: NULLABILITY_NULLABLE
+                                              precision: 19
+                                          - varchar:
+                                              length: 101
+                                              nullability: NULLABILITY_NULLABLE
+                                      common:
+                                        direct: {}
+                                      namedTable:
+                                        names:
+                                        - SUPPLIER
+                                  type: JOIN_TYPE_INNER
+                              right:
+                                read:
+                                  baseSchema:
+                                    names:
+                                    - N_NATIONKEY
+                                    - N_NAME
+                                    - N_REGIONKEY
+                                    - N_COMMENT
+                                    struct:
+                                      nullability: NULLABILITY_REQUIRED
+                                      types:
+                                      - i64:
+                                          nullability: NULLABILITY_REQUIRED
+                                      - fixedChar:
+                                          length: 25
+                                          nullability: NULLABILITY_NULLABLE
+                                      - i64:
+                                          nullability: NULLABILITY_REQUIRED
+                                      - varchar:
+                                          length: 152
+                                          nullability: NULLABILITY_NULLABLE
+                                  common:
+                                    direct: {}
+                                  namedTable:
+                                    names:
+                                    - NATION
+                              type: JOIN_TYPE_INNER
+                          right:
+                            read:
+                              baseSchema:
+                                names:
+                                - R_REGIONKEY
+                                - R_NAME
+                                - R_COMMENT
+                                struct:
+                                  nullability: NULLABILITY_REQUIRED
+                                  types:
+                                  - i64:
+                                      nullability: NULLABILITY_REQUIRED
+                                  - fixedChar:
+                                      length: 25
+                                      nullability: NULLABILITY_NULLABLE
+                                  - varchar:
+                                      length: 152
+                                      nullability: NULLABILITY_NULLABLE
+                              common:
+                                direct: {}
+                              namedTable:
+                                names:
+                                - REGION
+                          type: JOIN_TYPE_INNER
+              measures:
+              - measure:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 1
+                      rootReference: {}
+                  functionReference: 8
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+          sorts:
+          - direction: SORT_DIRECTION_DESC_NULLS_FIRST
+            expr:
+              selection:
+                directReference:
+                  structField:
+                    field: 1
+                rootReference: {}
+      names:
+      - N_NAME
+      - REVENUE
diff --git a/tests/tests/tpc-h/tpc-h06.yaml b/tests/tests/tpc-h/tpc-h06.yaml
new file mode 100644
index 00000000..ede9e0ca
--- /dev/null
+++ b/tests/tests/tpc-h/tpc-h06.yaml
@@ -0,0 +1,314 @@
+# select
+#   sum(l_extendedprice * l_discount) as revenue
+# from
+#   "lineitem"
+# where
+#   l_shipdate >= date '1997-01-01'
+#   and l_shipdate < date '1997-01-01' + interval '1' year
+#   and
+#   l_discount between 0.03 - 0.01 and 0.03 + 0.01
+#   and l_quantity < 24
+
+name: TPC-H06
+diags:
+- { code: 0001, max: i } # Suppress "not yet implemented" warnings
+- { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet)
+- { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet)
+plan:
+  __test:
+  - level: i
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri: /functions_boolean.yaml
+  - extensionUriAnchor: 2
+    uri: /functions_datetime.yaml
+  - extensionUriAnchor: 3
+    uri: /functions_comparison.yaml
+  - extensionUriAnchor: 4
+    uri: /functions_arithmetic_decimal.yaml
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: and:bool
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 2
+      name: gte:date_date
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 3
+      name: lt:date_date
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 4
+      name: add:date_year
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 5
+      name: gte:any_any
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 6
+      name: subtract:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 7
+      name: lte:any_any
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 8
+      name: add:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 9
+      name: lt:any_any
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 10
+      name: multiply:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 11
+      name: sum:opt_dec
+  relations:
+  - root:
+      __test:
+      - type: "NSTRUCT<REVENUE: DECIMAL?<19, 0>>"
+      input:
+        aggregate:
+          common:
+            emit:
+              outputMapping:
+              - 0
+          input:
+            project:
+              common:
+                emit:
+                  outputMapping:
+                  - 16
+              expressions:
+              - scalarFunction:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 5
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 6
+                      rootReference: {}
+                  functionReference: 10
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+              input:
+                filter:
+                  common:
+                    direct: {}
+                  condition:
+                    scalarFunction:
+                      args:
+                      - scalarFunction:
+                          args:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 10
+                              rootReference: {}
+                          - literal:
+                              date: 9862
+                          functionReference: 2
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      - scalarFunction:
+                          args:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 10
+                              rootReference: {}
+                          - scalarFunction:
+                              args:
+                              - literal:
+                                  date: 9862
+                              - literal:
+                                  intervalYearToMonth:
+                                    years: 1
+                              functionReference: 4
+                              outputType:
+                                date:
+                                  nullability: NULLABILITY_REQUIRED
+                          functionReference: 3
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      - scalarFunction:
+                          args:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 6
+                              rootReference: {}
+                          - scalarFunction:
+                              args:
+                              - literal:
+                                  decimal:
+                                    precision: 3
+                                    scale: 2
+                                    value: AAAAAAAAAAAAAAAAAAAAAA==
+                              - literal:
+                                  decimal:
+                                    precision: 3
+                                    scale: 2
+                                    value: AAAAAAAAAAAAAAAAAAAAAA==
+                              functionReference: 6
+                              outputType:
+                                decimal:
+                                  nullability: NULLABILITY_REQUIRED
+                                  precision: 4
+                                  scale: 2
+                          functionReference: 5
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      - scalarFunction:
+                          args:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 6
+                              rootReference: {}
+                          - scalarFunction:
+                              args:
+                              - literal:
+                                  decimal:
+                                    precision: 3
+                                    scale: 2
+                                    value: AAAAAAAAAAAAAAAAAAAAAA==
+                              - literal:
+                                  decimal:
+                                    precision: 3
+                                    scale: 2
+                                    value: AAAAAAAAAAAAAAAAAAAAAA==
+                              functionReference: 8
+                              outputType:
+                                decimal:
+                                  nullability: NULLABILITY_REQUIRED
+                                  precision: 4
+                                  scale: 2
+                          functionReference: 7
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      - scalarFunction:
+                          args:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 4
+                              rootReference: {}
+                          - cast:
+                              input:
+                                literal:
+                                  i32: 24
+                              type:
+                                decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                          functionReference: 9
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      functionReference: 1
+                      outputType:
+                        bool:
+                          nullability: NULLABILITY_NULLABLE
+                  input:
+                    read:
+                      baseSchema:
+                        names:
+                        - L_ORDERKEY
+                        - L_PARTKEY
+                        - L_SUPPKEY
+                        - L_LINENUMBER
+                        - L_QUANTITY
+                        - L_EXTENDEDPRICE
+                        - L_DISCOUNT
+                        - L_TAX
+                        - L_RETURNFLAG
+                        - L_LINESTATUS
+                        - L_SHIPDATE
+                        - L_COMMITDATE
+                        - L_RECEIPTDATE
+                        - L_SHIPINSTRUCT
+                        - L_SHIPMODE
+                        - L_COMMENT
+                        struct:
+                          nullability: NULLABILITY_REQUIRED
+                          types:
+                          - i64:
+                              nullability: NULLABILITY_REQUIRED
+                          - i64:
+                              nullability: NULLABILITY_REQUIRED
+                          - i64:
+                              nullability: NULLABILITY_REQUIRED
+                          - i32:
+                              nullability: NULLABILITY_NULLABLE
+                          - decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                          - decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                          - decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                          - decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                          - fixedChar:
+                              length: 1
+                              nullability: NULLABILITY_NULLABLE
+                          - fixedChar:
+                              length: 1
+                              nullability: NULLABILITY_NULLABLE
+                          - date:
+                              nullability: NULLABILITY_NULLABLE
+                          - date:
+                              nullability: NULLABILITY_NULLABLE
+                          - date:
+                              nullability: NULLABILITY_NULLABLE
+                          - fixedChar:
+                              length: 25
+                              nullability: NULLABILITY_NULLABLE
+                          - fixedChar:
+                              length: 10
+                              nullability: NULLABILITY_NULLABLE
+                          - varchar:
+                              length: 44
+                              nullability: NULLABILITY_NULLABLE
+                      common:
+                        direct: {}
+                      namedTable:
+                        names:
+                        - LINEITEM
+          measures:
+          - measure:
+              args:
+              - selection:
+                  directReference:
+                    structField: {}
+                  rootReference: {}
+              functionReference: 11
+              outputType:
+                decimal:
+                  nullability: NULLABILITY_NULLABLE
+                  precision: 19
+              phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+      names:
+      - REVENUE
diff --git a/tests/tests/tpc-h/tpc-h07.yaml b/tests/tests/tpc-h/tpc-h07.yaml
new file mode 100644
index 00000000..7f585a93
--- /dev/null
+++ b/tests/tests/tpc-h/tpc-h07.yaml
@@ -0,0 +1,726 @@
+# select
+#   supp_nation,
+#   cust_nation,
+#   l_year,
+#   sum(volume) as revenue
+# from
+#   (
+#     select
+#       n1.n_name as supp_nation,
+#       n2.n_name as cust_nation,
+#       extract(year from l.l_shipdate) as l_year,
+#       l.l_extendedprice * (1 - l.l_discount) as volume
+#     from
+#       "supplier" s,
+#       "lineitem" l,
+#       "orders" o,
+#       "customer" c,
+#       "nation" n1,
+#       "nation" n2
+#     where
+#       s.s_suppkey = l.l_suppkey
+#       and o.o_orderkey = l.l_orderkey
+#       and c.c_custkey = o.o_custkey
+#       and s.s_nationkey = n1.n_nationkey
+#       and c.c_nationkey = n2.n_nationkey
+#       and (
+#         (n1.n_name = 'EGYPT' and n2.n_name = 'UNITED STATES')
+#         or (n1.n_name = 'UNITED STATES' and n2.n_name = 'EGYPT')
+#       )
+#       and l.l_shipdate between date '1995-01-01' and date '1996-12-31'
+#   ) as shipping
+# group by
+#   supp_nation,
+#   cust_nation,
+#   l_year
+# order by
+#   supp_nation,
+#   cust_nation,
+#   l_year
+
+name: TPC-H07
+diags:
+- { code: 0001, max: i } # Suppress "not yet implemented" warnings
+- { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet)
+- { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet)
+plan:
+  __test:
+  - level: i
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri: /functions_boolean.yaml
+  - extensionUriAnchor: 2
+    uri: /functions_comparison.yaml
+  - extensionUriAnchor: 3
+    uri: /functions_datetime.yaml
+  - extensionUriAnchor: 4
+    uri: /functions_arithmetic_decimal.yaml
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: and:bool
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 2
+      name: equal:any_any
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 3
+      name: or:bool
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 4
+      name: gte:date_date
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 5
+      name: lte:date_date
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 6
+      name: multiply:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 7
+      name: subtract:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 8
+      name: extract:req_date
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 9
+      name: sum:opt_dec
+  relations:
+  - root:
+      input:
+        sort:
+          common:
+            direct: {}
+          input:
+            aggregate:
+              common:
+                emit:
+                  outputMapping:
+                  - 0
+                  - 1
+                  - 2
+                  - 3
+              groupings:
+              - groupingExpressions:
+                - selection:
+                    directReference:
+                      structField:
+                        field: 0
+                    rootReference: {}
+                - selection:
+                    directReference:
+                      structField:
+                        field: 1
+                    rootReference: {}
+                - selection:
+                    directReference:
+                      structField:
+                        field: 2
+                    rootReference: {}
+              input:
+                project:
+                  common:
+                    emit:
+                      outputMapping:
+                      - 48
+                      - 49
+                      - 50
+                      - 51
+                  expressions:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 41
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 45
+                      rootReference: {}
+                  - scalarFunction:
+                      args:
+                      - enum:
+                          specified: YEAR
+                      - selection:
+                          directReference:
+                            structField:
+                              field: 17
+                          rootReference: {}
+                      functionReference: 8
+                      outputType:
+                        i64:
+                          nullability: NULLABILITY_NULLABLE
+                  - scalarFunction:
+                      args:
+                      - selection:
+                          directReference:
+                            structField:
+                              field: 12
+                          rootReference: {}
+                      - scalarFunction:
+                          args:
+                          - cast:
+                              input:
+                                literal:
+                                  i32: 1
+                              type:
+                                decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 13
+                              rootReference: {}
+                          functionReference: 7
+                          outputType:
+                            decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                      functionReference: 6
+                      outputType:
+                        decimal:
+                          nullability: NULLABILITY_NULLABLE
+                          precision: 19
+                  input:
+                    filter:
+                      common:
+                        direct: {}
+                      condition:
+                        scalarFunction:
+                          args:
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField: {}
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 9
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 23
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 7
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 32
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 24
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 3
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 40
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 35
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 44
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - scalarFunction:
+                                  args:
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 41
+                                          rootReference: {}
+                                      - cast:
+                                          input:
+                                            literal:
+                                              fixedChar: EGYPT
+                                          type:
+                                            fixedChar:
+                                              length: 25
+                                              nullability: NULLABILITY_REQUIRED
+                                      functionReference: 2
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_NULLABLE
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 45
+                                          rootReference: {}
+                                      - cast:
+                                          input:
+                                            literal:
+                                              fixedChar: UNITED STATES
+                                          type:
+                                            fixedChar:
+                                              length: 25
+                                              nullability: NULLABILITY_REQUIRED
+                                      functionReference: 2
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_NULLABLE
+                                  functionReference: 1
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 41
+                                          rootReference: {}
+                                      - cast:
+                                          input:
+                                            literal:
+                                              fixedChar: UNITED STATES
+                                          type:
+                                            fixedChar:
+                                              length: 25
+                                              nullability: NULLABILITY_REQUIRED
+                                      functionReference: 2
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_NULLABLE
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 45
+                                          rootReference: {}
+                                      - cast:
+                                          input:
+                                            literal:
+                                              fixedChar: EGYPT
+                                          type:
+                                            fixedChar:
+                                              length: 25
+                                              nullability: NULLABILITY_REQUIRED
+                                      functionReference: 2
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_NULLABLE
+                                  functionReference: 1
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 17
+                                  rootReference: {}
+                              - literal:
+                                  date: 9131
+                              functionReference: 4
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 17
+                                  rootReference: {}
+                              - literal:
+                                  date: 9861
+                              functionReference: 5
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          functionReference: 1
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      input:
+                        join:
+                          common:
+                            direct: {}
+                          expression:
+                            literal:
+                              boolean: true
+                          left:
+                            join:
+                              common:
+                                direct: {}
+                              expression:
+                                literal:
+                                  boolean: true
+                              left:
+                                join:
+                                  common:
+                                    direct: {}
+                                  expression:
+                                    literal:
+                                      boolean: true
+                                  left:
+                                    join:
+                                      common:
+                                        direct: {}
+                                      expression:
+                                        literal:
+                                          boolean: true
+                                      left:
+                                        join:
+                                          common:
+                                            direct: {}
+                                          expression:
+                                            literal:
+                                              boolean: true
+                                          left:
+                                            read:
+                                              baseSchema:
+                                                names:
+                                                - S_SUPPKEY
+                                                - S_NAME
+                                                - S_ADDRESS
+                                                - S_NATIONKEY
+                                                - S_PHONE
+                                                - S_ACCTBAL
+                                                - S_COMMENT
+                                                struct:
+                                                  nullability: NULLABILITY_REQUIRED
+                                                  types:
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - fixedChar:
+                                                      length: 25
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - varchar:
+                                                      length: 40
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - fixedChar:
+                                                      length: 15
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - decimal:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                      precision: 19
+                                                  - varchar:
+                                                      length: 101
+                                                      nullability: NULLABILITY_NULLABLE
+                                              common:
+                                                direct: {}
+                                              namedTable:
+                                                names:
+                                                - SUPPLIER
+                                          right:
+                                            read:
+                                              baseSchema:
+                                                names:
+                                                - L_ORDERKEY
+                                                - L_PARTKEY
+                                                - L_SUPPKEY
+                                                - L_LINENUMBER
+                                                - L_QUANTITY
+                                                - L_EXTENDEDPRICE
+                                                - L_DISCOUNT
+                                                - L_TAX
+                                                - L_RETURNFLAG
+                                                - L_LINESTATUS
+                                                - L_SHIPDATE
+                                                - L_COMMITDATE
+                                                - L_RECEIPTDATE
+                                                - L_SHIPINSTRUCT
+                                                - L_SHIPMODE
+                                                - L_COMMENT
+                                                struct:
+                                                  nullability: NULLABILITY_REQUIRED
+                                                  types:
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - i32:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - decimal:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                      precision: 19
+                                                  - decimal:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                      precision: 19
+                                                  - decimal:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                      precision: 19
+                                                  - decimal:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                      precision: 19
+                                                  - fixedChar:
+                                                      length: 1
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - fixedChar:
+                                                      length: 1
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - date:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - date:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - date:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - fixedChar:
+                                                      length: 25
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - fixedChar:
+                                                      length: 10
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - varchar:
+                                                      length: 44
+                                                      nullability: NULLABILITY_NULLABLE
+                                              common:
+                                                direct: {}
+                                              namedTable:
+                                                names:
+                                                - LINEITEM
+                                          type: JOIN_TYPE_INNER
+                                      right:
+                                        read:
+                                          baseSchema:
+                                            names:
+                                            - O_ORDERKEY
+                                            - O_CUSTKEY
+                                            - O_ORDERSTATUS
+                                            - O_TOTALPRICE
+                                            - O_ORDERDATE
+                                            - O_ORDERPRIORITY
+                                            - O_CLERK
+                                            - O_SHIPPRIORITY
+                                            - O_COMMENT
+                                            struct:
+                                              nullability: NULLABILITY_REQUIRED
+                                              types:
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - fixedChar:
+                                                  length: 1
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - date:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 15
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 15
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - i32:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - varchar:
+                                                  length: 79
+                                                  nullability: NULLABILITY_NULLABLE
+                                          common:
+                                            direct: {}
+                                          namedTable:
+                                            names:
+                                            - ORDERS
+                                      type: JOIN_TYPE_INNER
+                                  right:
+                                    read:
+                                      baseSchema:
+                                        names:
+                                        - C_CUSTKEY
+                                        - C_NAME
+                                        - C_ADDRESS
+                                        - C_NATIONKEY
+                                        - C_PHONE
+                                        - C_ACCTBAL
+                                        - C_MKTSEGMENT
+                                        - C_COMMENT
+                                        struct:
+                                          nullability: NULLABILITY_REQUIRED
+                                          types:
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - varchar:
+                                              length: 25
+                                              nullability: NULLABILITY_NULLABLE
+                                          - varchar:
+                                              length: 40
+                                              nullability: NULLABILITY_NULLABLE
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - fixedChar:
+                                              length: 15
+                                              nullability: NULLABILITY_NULLABLE
+                                          - decimal:
+                                              nullability: NULLABILITY_NULLABLE
+                                              precision: 19
+                                          - fixedChar:
+                                              length: 10
+                                              nullability: NULLABILITY_NULLABLE
+                                          - varchar:
+                                              length: 117
+                                              nullability: NULLABILITY_NULLABLE
+                                      common:
+                                        direct: {}
+                                      namedTable:
+                                        names:
+                                        - CUSTOMER
+                                  type: JOIN_TYPE_INNER
+                              right:
+                                read:
+                                  baseSchema:
+                                    names:
+                                    - N_NATIONKEY
+                                    - N_NAME
+                                    - N_REGIONKEY
+                                    - N_COMMENT
+                                    struct:
+                                      nullability: NULLABILITY_REQUIRED
+                                      types:
+                                      - i64:
+                                          nullability: NULLABILITY_REQUIRED
+                                      - fixedChar:
+                                          length: 25
+                                          nullability: NULLABILITY_NULLABLE
+                                      - i64:
+                                          nullability: NULLABILITY_REQUIRED
+                                      - varchar:
+                                          length: 152
+                                          nullability: NULLABILITY_NULLABLE
+                                  common:
+                                    direct: {}
+                                  namedTable:
+                                    names:
+                                    - NATION
+                              type: JOIN_TYPE_INNER
+                          right:
+                            read:
+                              baseSchema:
+                                names:
+                                - N_NATIONKEY
+                                - N_NAME
+                                - N_REGIONKEY
+                                - N_COMMENT
+                                struct:
+                                  nullability: NULLABILITY_REQUIRED
+                                  types:
+                                  - i64:
+                                      nullability: NULLABILITY_REQUIRED
+                                  - fixedChar:
+                                      length: 25
+                                      nullability: NULLABILITY_NULLABLE
+                                  - i64:
+                                      nullability: NULLABILITY_REQUIRED
+                                  - varchar:
+                                      length: 152
+                                      nullability: NULLABILITY_NULLABLE
+                              common:
+                                direct: {}
+                              namedTable:
+                                names:
+                                - NATION
+                          type: JOIN_TYPE_INNER
+              measures:
+              - measure:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 3
+                      rootReference: {}
+                  functionReference: 9
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_REQUIRED
+                      precision: 38
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+          sorts:
+          - direction: SORT_DIRECTION_ASC_NULLS_LAST
+            expr:
+              selection:
+                directReference:
+                  structField: {}
+                rootReference: {}
+          - direction: SORT_DIRECTION_ASC_NULLS_LAST
+            expr:
+              selection:
+                directReference:
+                  structField:
+                    field: 1
+                rootReference: {}
+          - direction: SORT_DIRECTION_ASC_NULLS_LAST
+            expr:
+              selection:
+                directReference:
+                  structField:
+                    field: 2
+                rootReference: {}
+      names:
+      - SUPP_NATION
+      - CUST_NATION
+      - L_YEAR
+      - REVENUE
diff --git a/tests/tests/tpc-h/tpc-h08.yaml b/tests/tests/tpc-h/tpc-h08.yaml
new file mode 100644
index 00000000..e9bff2f1
--- /dev/null
+++ b/tests/tests/tpc-h/tpc-h08.yaml
@@ -0,0 +1,810 @@
+# select
+#   o_year,
+#   sum(case
+#     when nation = 'EGYPT' then volume
+#     else 0
+#   end) / sum(volume) as mkt_share
+# from
+#   (
+#     select
+#       extract(year from o.o_orderdate) as o_year,
+#       l.l_extendedprice * (1 - l.l_discount) as volume,
+#       n2.n_name as nation
+#     from
+#       "part" p,
+#       "supplier" s,
+#       "lineitem" l,
+#       "orders" o,
+#       "customer" c,
+#       "nation" n1,
+#       "nation" n2,
+#       "region" r
+#     where
+#       p.p_partkey = l.l_partkey
+#       and s.s_suppkey = l.l_suppkey
+#       and l.l_orderkey = o.o_orderkey
+#       and o.o_custkey = c.c_custkey
+#       and c.c_nationkey = n1.n_nationkey
+#       and n1.n_regionkey = r.r_regionkey
+#       and r.r_name = 'MIDDLE EAST'
+#       and s.s_nationkey = n2.n_nationkey
+#       and o.o_orderdate between date '1995-01-01' and date '1996-12-31'
+#       and p.p_type = 'PROMO BRUSHED COPPER'
+#   ) as all_nations
+# group by
+#   o_year
+# order by
+#   o_year
+
+name: TPC-H08
+diags:
+- { code: 0001, max: i } # Suppress "not yet implemented" warnings
+- { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet)
+- { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet)
+plan:
+  __test:
+  - level: i
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri: /functions_boolean.yaml
+  - extensionUriAnchor: 2
+    uri: /functions_comparison.yaml
+  - extensionUriAnchor: 3
+    uri: /functions_datetime.yaml
+  - extensionUriAnchor: 4
+    uri: /functions_arithmetic_decimal.yaml
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: and:bool
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 2
+      name: equal:any_any
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 3
+      name: gte:date_date
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 4
+      name: lte:date_date
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 5
+      name: multiply:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 6
+      name: subtract:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 7
+      name: extract:req_date
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 8
+      name: sum:opt_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 9
+      name: divide:opt_dec_dec
+  relations:
+  - root:
+      input:
+        project:
+          common:
+            emit:
+              outputMapping:
+              - 0
+              - 3
+          expressions:
+          - scalarFunction:
+              args:
+              - selection:
+                  directReference:
+                    structField:
+                      field: 1
+                  rootReference: {}
+              - selection:
+                  directReference:
+                    structField:
+                      field: 2
+                  rootReference: {}
+              functionReference: 9
+              outputType:
+                decimal:
+                  precision: 38
+                  scale: 6
+                  nullability: NULLABILITY_NULLABLE
+          input:
+            sort:
+              common:
+                direct: {}
+              input:
+                aggregate:
+                  common:
+                    emit:
+                      outputMapping:
+                      - 0
+                      - 1
+                      - 2
+                  groupings:
+                  - groupingExpressions:
+                    - selection:
+                        directReference:
+                          structField:
+                            field: 0
+                        rootReference: {}
+                  input:
+                    project:
+                      common:
+                        emit:
+                          outputMapping:
+                          - 60
+                          - 61
+                          - 62
+                      expressions:
+                      - scalarFunction:
+                          args:
+                          - enum:
+                              specified: YEAR
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 36
+                              rootReference: {}
+                          functionReference: 7
+                          outputType:
+                            i64:
+                              nullability: NULLABILITY_NULLABLE
+                      - scalarFunction:
+                          args:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 21
+                              rootReference: {}
+                          - scalarFunction:
+                              args:
+                              - cast:
+                                  input:
+                                    literal:
+                                      i32: 1
+                                  type:
+                                    decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 22
+                                  rootReference: {}
+                              functionReference: 6
+                              outputType:
+                                decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                          functionReference: 5
+                          outputType:
+                            decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                      - selection:
+                          directReference:
+                            structField:
+                              field: 54
+                          rootReference: {}
+                      input:
+                        filter:
+                          common:
+                            direct: {}
+                          condition:
+                            scalarFunction:
+                              args:
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField: {}
+                                      rootReference: {}
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 17
+                                      rootReference: {}
+                                  functionReference: 2
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_REQUIRED
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 9
+                                      rootReference: {}
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 18
+                                      rootReference: {}
+                                  functionReference: 2
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_REQUIRED
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 16
+                                      rootReference: {}
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 32
+                                      rootReference: {}
+                                  functionReference: 2
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_REQUIRED
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 33
+                                      rootReference: {}
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 41
+                                      rootReference: {}
+                                  functionReference: 2
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_REQUIRED
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 44
+                                      rootReference: {}
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 49
+                                      rootReference: {}
+                                  functionReference: 2
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_REQUIRED
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 51
+                                      rootReference: {}
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 57
+                                      rootReference: {}
+                                  functionReference: 2
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_REQUIRED
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 58
+                                      rootReference: {}
+                                  - cast:
+                                      input:
+                                        literal:
+                                          fixedChar: MIDDLE EAST
+                                      type:
+                                        fixedChar:
+                                          length: 25
+                                          nullability: NULLABILITY_REQUIRED
+                                  functionReference: 2
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 12
+                                      rootReference: {}
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 53
+                                      rootReference: {}
+                                  functionReference: 2
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_REQUIRED
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 36
+                                      rootReference: {}
+                                  - literal:
+                                      date: 9131
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 36
+                                      rootReference: {}
+                                  - literal:
+                                      date: 9861
+                                  functionReference: 4
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 4
+                                      rootReference: {}
+                                  - literal:
+                                      varChar:
+                                        length: 25
+                                        value: PROMO BRUSHED COPPER
+                                  functionReference: 2
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              functionReference: 1
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          input:
+                            join:
+                              common:
+                                direct: {}
+                              expression:
+                                literal:
+                                  boolean: true
+                              left:
+                                join:
+                                  common:
+                                    direct: {}
+                                  expression:
+                                    literal:
+                                      boolean: true
+                                  left:
+                                    join:
+                                      common:
+                                        direct: {}
+                                      expression:
+                                        literal:
+                                          boolean: true
+                                      left:
+                                        join:
+                                          common:
+                                            direct: {}
+                                          expression:
+                                            literal:
+                                              boolean: true
+                                          left:
+                                            join:
+                                              common:
+                                                direct: {}
+                                              expression:
+                                                literal:
+                                                  boolean: true
+                                              left:
+                                                join:
+                                                  common:
+                                                    direct: {}
+                                                  expression:
+                                                    literal:
+                                                      boolean: true
+                                                  left:
+                                                    join:
+                                                      common:
+                                                        direct: {}
+                                                      expression:
+                                                        literal:
+                                                          boolean: true
+                                                      left:
+                                                        read:
+                                                          baseSchema:
+                                                            names:
+                                                            - P_PARTKEY
+                                                            - P_NAME
+                                                            - P_MFGR
+                                                            - P_BRAND
+                                                            - P_TYPE
+                                                            - P_SIZE
+                                                            - P_CONTAINER
+                                                            - P_RETAILPRICE
+                                                            - P_COMMENT
+                                                            struct:
+                                                              nullability: NULLABILITY_REQUIRED
+                                                              types:
+                                                              - i64:
+                                                                  nullability: NULLABILITY_REQUIRED
+                                                              - varchar:
+                                                                  length: 55
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                              - fixedChar:
+                                                                  length: 25
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                              - fixedChar:
+                                                                  length: 10
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                              - varchar:
+                                                                  length: 25
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                              - i32:
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                              - fixedChar:
+                                                                  length: 10
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                              - decimal:
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                                  precision: 19
+                                                              - varchar:
+                                                                  length: 23
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                          common:
+                                                            direct: {}
+                                                          namedTable:
+                                                            names:
+                                                            - PART
+                                                      right:
+                                                        read:
+                                                          baseSchema:
+                                                            names:
+                                                            - S_SUPPKEY
+                                                            - S_NAME
+                                                            - S_ADDRESS
+                                                            - S_NATIONKEY
+                                                            - S_PHONE
+                                                            - S_ACCTBAL
+                                                            - S_COMMENT
+                                                            struct:
+                                                              nullability: NULLABILITY_REQUIRED
+                                                              types:
+                                                              - i64:
+                                                                  nullability: NULLABILITY_REQUIRED
+                                                              - fixedChar:
+                                                                  length: 25
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                              - varchar:
+                                                                  length: 40
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                              - i64:
+                                                                  nullability: NULLABILITY_REQUIRED
+                                                              - fixedChar:
+                                                                  length: 15
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                              - decimal:
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                                  precision: 19
+                                                              - varchar:
+                                                                  length: 101
+                                                                  nullability: NULLABILITY_NULLABLE
+                                                          common:
+                                                            direct: {}
+                                                          namedTable:
+                                                            names:
+                                                            - SUPPLIER
+                                                      type: JOIN_TYPE_INNER
+                                                  right:
+                                                    read:
+                                                      baseSchema:
+                                                        names:
+                                                        - L_ORDERKEY
+                                                        - L_PARTKEY
+                                                        - L_SUPPKEY
+                                                        - L_LINENUMBER
+                                                        - L_QUANTITY
+                                                        - L_EXTENDEDPRICE
+                                                        - L_DISCOUNT
+                                                        - L_TAX
+                                                        - L_RETURNFLAG
+                                                        - L_LINESTATUS
+                                                        - L_SHIPDATE
+                                                        - L_COMMITDATE
+                                                        - L_RECEIPTDATE
+                                                        - L_SHIPINSTRUCT
+                                                        - L_SHIPMODE
+                                                        - L_COMMENT
+                                                        struct:
+                                                          nullability: NULLABILITY_REQUIRED
+                                                          types:
+                                                          - i64:
+                                                              nullability: NULLABILITY_REQUIRED
+                                                          - i64:
+                                                              nullability: NULLABILITY_REQUIRED
+                                                          - i64:
+                                                              nullability: NULLABILITY_REQUIRED
+                                                          - i32:
+                                                              nullability: NULLABILITY_NULLABLE
+                                                          - decimal:
+                                                              nullability: NULLABILITY_NULLABLE
+                                                              precision: 19
+                                                          - decimal:
+                                                              nullability: NULLABILITY_NULLABLE
+                                                              precision: 19
+                                                          - decimal:
+                                                              nullability: NULLABILITY_NULLABLE
+                                                              precision: 19
+                                                          - decimal:
+                                                              nullability: NULLABILITY_NULLABLE
+                                                              precision: 19
+                                                          - fixedChar:
+                                                              length: 1
+                                                              nullability: NULLABILITY_NULLABLE
+                                                          - fixedChar:
+                                                              length: 1
+                                                              nullability: NULLABILITY_NULLABLE
+                                                          - date:
+                                                              nullability: NULLABILITY_NULLABLE
+                                                          - date:
+                                                              nullability: NULLABILITY_NULLABLE
+                                                          - date:
+                                                              nullability: NULLABILITY_NULLABLE
+                                                          - fixedChar:
+                                                              length: 25
+                                                              nullability: NULLABILITY_NULLABLE
+                                                          - fixedChar:
+                                                              length: 10
+                                                              nullability: NULLABILITY_NULLABLE
+                                                          - varchar:
+                                                              length: 44
+                                                              nullability: NULLABILITY_NULLABLE
+                                                      common:
+                                                        direct: {}
+                                                      namedTable:
+                                                        names:
+                                                        - LINEITEM
+                                                  type: JOIN_TYPE_INNER
+                                              right:
+                                                read:
+                                                  baseSchema:
+                                                    names:
+                                                    - O_ORDERKEY
+                                                    - O_CUSTKEY
+                                                    - O_ORDERSTATUS
+                                                    - O_TOTALPRICE
+                                                    - O_ORDERDATE
+                                                    - O_ORDERPRIORITY
+                                                    - O_CLERK
+                                                    - O_SHIPPRIORITY
+                                                    - O_COMMENT
+                                                    struct:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                      types:
+                                                      - i64:
+                                                          nullability: NULLABILITY_REQUIRED
+                                                      - i64:
+                                                          nullability: NULLABILITY_REQUIRED
+                                                      - fixedChar:
+                                                          length: 1
+                                                          nullability: NULLABILITY_NULLABLE
+                                                      - decimal:
+                                                          nullability: NULLABILITY_NULLABLE
+                                                          precision: 19
+                                                      - date:
+                                                          nullability: NULLABILITY_NULLABLE
+                                                      - fixedChar:
+                                                          length: 15
+                                                          nullability: NULLABILITY_NULLABLE
+                                                      - fixedChar:
+                                                          length: 15
+                                                          nullability: NULLABILITY_NULLABLE
+                                                      - i32:
+                                                          nullability: NULLABILITY_NULLABLE
+                                                      - varchar:
+                                                          length: 79
+                                                          nullability: NULLABILITY_NULLABLE
+                                                  common:
+                                                    direct: {}
+                                                  namedTable:
+                                                    names:
+                                                    - ORDERS
+                                              type: JOIN_TYPE_INNER
+                                          right:
+                                            read:
+                                              baseSchema:
+                                                names:
+                                                - C_CUSTKEY
+                                                - C_NAME
+                                                - C_ADDRESS
+                                                - C_NATIONKEY
+                                                - C_PHONE
+                                                - C_ACCTBAL
+                                                - C_MKTSEGMENT
+                                                - C_COMMENT
+                                                struct:
+                                                  nullability: NULLABILITY_REQUIRED
+                                                  types:
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - varchar:
+                                                      length: 25
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - varchar:
+                                                      length: 40
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - fixedChar:
+                                                      length: 15
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - decimal:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                      precision: 19
+                                                  - fixedChar:
+                                                      length: 10
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - varchar:
+                                                      length: 117
+                                                      nullability: NULLABILITY_NULLABLE
+                                              common:
+                                                direct: {}
+                                              namedTable:
+                                                names:
+                                                - CUSTOMER
+                                          type: JOIN_TYPE_INNER
+                                      right:
+                                        read:
+                                          baseSchema:
+                                            names:
+                                            - N_NATIONKEY
+                                            - N_NAME
+                                            - N_REGIONKEY
+                                            - N_COMMENT
+                                            struct:
+                                              nullability: NULLABILITY_REQUIRED
+                                              types:
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - fixedChar:
+                                                  length: 25
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - varchar:
+                                                  length: 152
+                                                  nullability: NULLABILITY_NULLABLE
+                                          common:
+                                            direct: {}
+                                          namedTable:
+                                            names:
+                                            - NATION
+                                      type: JOIN_TYPE_INNER
+                                  right:
+                                    read:
+                                      baseSchema:
+                                        names:
+                                        - N_NATIONKEY
+                                        - N_NAME
+                                        - N_REGIONKEY
+                                        - N_COMMENT
+                                        struct:
+                                          nullability: NULLABILITY_REQUIRED
+                                          types:
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - fixedChar:
+                                              length: 25
+                                              nullability: NULLABILITY_NULLABLE
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - varchar:
+                                              length: 152
+                                              nullability: NULLABILITY_NULLABLE
+                                      common:
+                                        direct: {}
+                                      namedTable:
+                                        names:
+                                        - NATION
+                                  type: JOIN_TYPE_INNER
+                              right:
+                                read:
+                                  baseSchema:
+                                    names:
+                                    - R_REGIONKEY
+                                    - R_NAME
+                                    - R_COMMENT
+                                    struct:
+                                      nullability: NULLABILITY_REQUIRED
+                                      types:
+                                      - i64:
+                                          nullability: NULLABILITY_REQUIRED
+                                      - fixedChar:
+                                          length: 25
+                                          nullability: NULLABILITY_NULLABLE
+                                      - varchar:
+                                          length: 152
+                                          nullability: NULLABILITY_NULLABLE
+                                  common:
+                                    direct: {}
+                                  namedTable:
+                                    names:
+                                    - REGION
+                              type: JOIN_TYPE_INNER
+                  measures:
+                  - measure:
+                      args:
+                      - ifThen:
+                          ifs:
+                          - if:
+                              scalarFunction:
+                                args:
+                                - selection:
+                                    directReference:
+                                      structField:
+                                        field: 2
+                                    rootReference: {}
+                                - literal:
+                                    fixedChar: "EGYPT                    "
+                                    nullable: true
+                                functionReference: 2
+                                outputType:
+                                  bool:
+                                    nullability: NULLABILITY_NULLABLE
+                            then:
+                              selection:
+                                directReference:
+                                  structField:
+                                    field: 1
+                                rootReference: {}
+                          else:
+                            literal:
+                              decimal:
+                                value: AAAAAAAAAAAAAAAAAAAAAA==
+                                precision: 19
+                      functionReference: 8
+                      outputType:
+                        decimal:
+                          nullability: NULLABILITY_REQUIRED
+                          precision: 38
+                      phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+                  - measure:
+                      args:
+                      - selection:
+                          directReference:
+                            structField:
+                              field: 1
+                          rootReference: {}
+                      functionReference: 8
+                      outputType:
+                        decimal:
+                          nullability: NULLABILITY_REQUIRED
+                          precision: 38
+                      phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+              sorts:
+              - direction: SORT_DIRECTION_ASC_NULLS_LAST
+                expr:
+                  selection:
+                    directReference:
+                      structField: {}
+                    rootReference: {}
+      names:
+      - O_YEAR
+      - MKT_SHARE
diff --git a/tests/tests/tpc-h/tpc-h09.yaml b/tests/tests/tpc-h/tpc-h09.yaml
new file mode 100644
index 00000000..1c6ae21b
--- /dev/null
+++ b/tests/tests/tpc-h/tpc-h09.yaml
@@ -0,0 +1,640 @@
+# select
+#   nation,
+#   o_year,
+#   sum(amount) as sum_profit
+# from
+#   (
+#     select
+#       n.n_name as nation,
+#       extract(year from o.o_orderdate) as o_year,
+#       l.l_extendedprice * (1 - l.l_discount) - ps.ps_supplycost * l.l_quantity as amount
+#     from
+#       "part" p,
+#       "supplier" s,
+#       "lineitem" l,
+#       "partsupp" ps,
+#       "orders" o,
+#       "nation" n
+#     where
+#       s.s_suppkey = l.l_suppkey
+#       and ps.ps_suppkey = l.l_suppkey
+#       and ps.ps_partkey = l.l_partkey
+#       and p.p_partkey = l.l_partkey
+#       and o.o_orderkey = l.l_orderkey
+#       and s.s_nationkey = n.n_nationkey
+#       and p.p_name like '%yellow%'
+#   ) as profit
+# group by
+#   nation,
+#   o_year
+# order by
+#   nation,
+#   o_year desc
+
+name: TPC-H09
+diags:
+- { code: 0001, max: i } # Suppress "not yet implemented" warnings
+- { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet)
+- { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet)
+plan:
+  __test:
+  - level: i
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri: /functions_boolean.yaml
+  - extensionUriAnchor: 2
+    uri: /functions_comparison.yaml
+  - extensionUriAnchor: 3
+    uri: /functions_string.yaml
+  - extensionUriAnchor: 4
+    uri: /functions_arithmetic_decimal.yaml
+  - extensionUriAnchor: 5
+    uri: /functions_datetime.yaml
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: and:bool
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 2
+      name: equal:any_any
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 3
+      name: like:vchar_vchar
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 4
+      name: subtract:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 5
+      name: multiply:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 5
+      functionAnchor: 6
+      name: extract:req_date
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 7
+      name: sum:opt_dec
+  relations:
+  - root:
+      input:
+        sort:
+          common:
+            direct: {}
+          input:
+            aggregate:
+              common:
+                emit:
+                  outputMapping:
+                  - 0
+                  - 1
+                  - 2
+              groupings:
+              - groupingExpressions:
+                - selection:
+                    directReference:
+                      structField:
+                        field: 0
+                    rootReference: {}
+                - selection:
+                    directReference:
+                      structField:
+                        field: 1
+                    rootReference: {}
+              input:
+                project:
+                  common:
+                    emit:
+                      outputMapping:
+                      - 50
+                      - 51
+                      - 52
+                  expressions:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 47
+                      rootReference: {}
+                  - scalarFunction:
+                      args:
+                      - enum:
+                          specified: YEAR
+                      - selection:
+                          directReference:
+                            structField:
+                              field: 41
+                          rootReference: {}
+                      functionReference: 6
+                      outputType:
+                        i64:
+                          nullability: NULLABILITY_NULLABLE
+                  - scalarFunction:
+                      args:
+                      - scalarFunction:
+                          args:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 21
+                              rootReference: {}
+                          - scalarFunction:
+                              args:
+                              - cast:
+                                  input:
+                                    literal:
+                                      i32: 1
+                                  type:
+                                    decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 22
+                                  rootReference: {}
+                              functionReference: 4
+                              outputType:
+                                decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                          functionReference: 5
+                          outputType:
+                            decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                      - scalarFunction:
+                          args:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 35
+                              rootReference: {}
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 20
+                              rootReference: {}
+                          functionReference: 5
+                          outputType:
+                            decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                      functionReference: 4
+                      outputType:
+                        decimal:
+                          nullability: NULLABILITY_NULLABLE
+                          precision: 19
+                  input:
+                    filter:
+                      common:
+                        direct: {}
+                      condition:
+                        scalarFunction:
+                          args:
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 9
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 18
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 33
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 18
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 32
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 17
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField: {}
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 17
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 37
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 16
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 12
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 46
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 1
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      fixedChar: '%yellow%'
+                                  type:
+                                    varchar:
+                                      length: 55
+                                      nullability: NULLABILITY_NULLABLE
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          functionReference: 1
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      input:
+                        join:
+                          common:
+                            direct: {}
+                          expression:
+                            literal:
+                              boolean: true
+                          left:
+                            join:
+                              common:
+                                direct: {}
+                              expression:
+                                literal:
+                                  boolean: true
+                              left:
+                                join:
+                                  common:
+                                    direct: {}
+                                  expression:
+                                    literal:
+                                      boolean: true
+                                  left:
+                                    join:
+                                      common:
+                                        direct: {}
+                                      expression:
+                                        literal:
+                                          boolean: true
+                                      left:
+                                        join:
+                                          common:
+                                            direct: {}
+                                          expression:
+                                            literal:
+                                              boolean: true
+                                          left:
+                                            read:
+                                              baseSchema:
+                                                names:
+                                                - P_PARTKEY
+                                                - P_NAME
+                                                - P_MFGR
+                                                - P_BRAND
+                                                - P_TYPE
+                                                - P_SIZE
+                                                - P_CONTAINER
+                                                - P_RETAILPRICE
+                                                - P_COMMENT
+                                                struct:
+                                                  nullability: NULLABILITY_REQUIRED
+                                                  types:
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - varchar:
+                                                      length: 55
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - fixedChar:
+                                                      length: 25
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - fixedChar:
+                                                      length: 10
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - varchar:
+                                                      length: 25
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - i32:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - fixedChar:
+                                                      length: 10
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - decimal:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                      precision: 19
+                                                  - varchar:
+                                                      length: 23
+                                                      nullability: NULLABILITY_NULLABLE
+                                              common:
+                                                direct: {}
+                                              namedTable:
+                                                names:
+                                                - PART
+                                          right:
+                                            read:
+                                              baseSchema:
+                                                names:
+                                                - S_SUPPKEY
+                                                - S_NAME
+                                                - S_ADDRESS
+                                                - S_NATIONKEY
+                                                - S_PHONE
+                                                - S_ACCTBAL
+                                                - S_COMMENT
+                                                struct:
+                                                  nullability: NULLABILITY_REQUIRED
+                                                  types:
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - fixedChar:
+                                                      length: 25
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - varchar:
+                                                      length: 40
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - fixedChar:
+                                                      length: 15
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - decimal:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                      precision: 19
+                                                  - varchar:
+                                                      length: 101
+                                                      nullability: NULLABILITY_NULLABLE
+                                              common:
+                                                direct: {}
+                                              namedTable:
+                                                names:
+                                                - SUPPLIER
+                                          type: JOIN_TYPE_INNER
+                                      right:
+                                        read:
+                                          baseSchema:
+                                            names:
+                                            - L_ORDERKEY
+                                            - L_PARTKEY
+                                            - L_SUPPKEY
+                                            - L_LINENUMBER
+                                            - L_QUANTITY
+                                            - L_EXTENDEDPRICE
+                                            - L_DISCOUNT
+                                            - L_TAX
+                                            - L_RETURNFLAG
+                                            - L_LINESTATUS
+                                            - L_SHIPDATE
+                                            - L_COMMITDATE
+                                            - L_RECEIPTDATE
+                                            - L_SHIPINSTRUCT
+                                            - L_SHIPMODE
+                                            - L_COMMENT
+                                            struct:
+                                              nullability: NULLABILITY_REQUIRED
+                                              types:
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - i32:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - fixedChar:
+                                                  length: 1
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 1
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - date:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - date:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - date:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 25
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 10
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - varchar:
+                                                  length: 44
+                                                  nullability: NULLABILITY_NULLABLE
+                                          common:
+                                            direct: {}
+                                          namedTable:
+                                            names:
+                                            - LINEITEM
+                                      type: JOIN_TYPE_INNER
+                                  right:
+                                    read:
+                                      baseSchema:
+                                        names:
+                                        - PS_PARTKEY
+                                        - PS_SUPPKEY
+                                        - PS_AVAILQTY
+                                        - PS_SUPPLYCOST
+                                        - PS_COMMENT
+                                        struct:
+                                          nullability: NULLABILITY_REQUIRED
+                                          types:
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - i32:
+                                              nullability: NULLABILITY_NULLABLE
+                                          - decimal:
+                                              nullability: NULLABILITY_NULLABLE
+                                              precision: 19
+                                          - varchar:
+                                              length: 199
+                                              nullability: NULLABILITY_NULLABLE
+                                      common:
+                                        direct: {}
+                                      namedTable:
+                                        names:
+                                        - PARTSUPP
+                                  type: JOIN_TYPE_INNER
+                              right:
+                                read:
+                                  baseSchema:
+                                    names:
+                                    - O_ORDERKEY
+                                    - O_CUSTKEY
+                                    - O_ORDERSTATUS
+                                    - O_TOTALPRICE
+                                    - O_ORDERDATE
+                                    - O_ORDERPRIORITY
+                                    - O_CLERK
+                                    - O_SHIPPRIORITY
+                                    - O_COMMENT
+                                    struct:
+                                      nullability: NULLABILITY_REQUIRED
+                                      types:
+                                      - i64:
+                                          nullability: NULLABILITY_REQUIRED
+                                      - i64:
+                                          nullability: NULLABILITY_REQUIRED
+                                      - fixedChar:
+                                          length: 1
+                                          nullability: NULLABILITY_NULLABLE
+                                      - decimal:
+                                          nullability: NULLABILITY_NULLABLE
+                                          precision: 19
+                                      - date:
+                                          nullability: NULLABILITY_NULLABLE
+                                      - fixedChar:
+                                          length: 15
+                                          nullability: NULLABILITY_NULLABLE
+                                      - fixedChar:
+                                          length: 15
+                                          nullability: NULLABILITY_NULLABLE
+                                      - i32:
+                                          nullability: NULLABILITY_NULLABLE
+                                      - varchar:
+                                          length: 79
+                                          nullability: NULLABILITY_NULLABLE
+                                  common:
+                                    direct: {}
+                                  namedTable:
+                                    names:
+                                    - ORDERS
+                              type: JOIN_TYPE_INNER
+                          right:
+                            read:
+                              baseSchema:
+                                names:
+                                - N_NATIONKEY
+                                - N_NAME
+                                - N_REGIONKEY
+                                - N_COMMENT
+                                struct:
+                                  nullability: NULLABILITY_REQUIRED
+                                  types:
+                                  - i64:
+                                      nullability: NULLABILITY_REQUIRED
+                                  - fixedChar:
+                                      length: 25
+                                      nullability: NULLABILITY_NULLABLE
+                                  - i64:
+                                      nullability: NULLABILITY_REQUIRED
+                                  - varchar:
+                                      length: 152
+                                      nullability: NULLABILITY_NULLABLE
+                              common:
+                                direct: {}
+                              namedTable:
+                                names:
+                                - NATION
+                          type: JOIN_TYPE_INNER
+              measures:
+              - measure:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 2
+                      rootReference: {}
+                  functionReference: 7
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_REQUIRED
+                      precision: 38
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+          sorts:
+          - direction: SORT_DIRECTION_ASC_NULLS_LAST
+            expr:
+              selection:
+                directReference:
+                  structField:
+                    field: 0
+                rootReference: {}
+          - direction: SORT_DIRECTION_DESC_NULLS_LAST
+            expr:
+              selection:
+                directReference:
+                  structField:
+                    field: 1
+                rootReference: {}
+      names:
+      - NATION
+      - O_YEAR
+      - SUM_PROFIT
diff --git a/tests/tests/tpc-h/tpc-h10.yaml b/tests/tests/tpc-h/tpc-h10.yaml
new file mode 100644
index 00000000..67e993c1
--- /dev/null
+++ b/tests/tests/tpc-h/tpc-h10.yaml
@@ -0,0 +1,622 @@
+# select
+#   c.c_custkey,
+#   c.c_name,
+#   sum(l.l_extendedprice * (1 - l.l_discount)) as revenue,
+#   c.c_acctbal,
+#   n.n_name,
+#   c.c_address,
+#   c.c_phone,
+#   c.c_comment
+# from
+#   "customer" c,
+#   "orders" o,
+#   "lineitem" l,
+#   "nation" n
+# where
+#   c.c_custkey = o.o_custkey
+#   and l.l_orderkey = o.o_orderkey
+#   and o.o_orderdate >= date '1994-03-01'
+#   and o.o_orderdate < date '1994-03-01' + interval '3' month
+#   and l.l_returnflag = 'R'
+#   and c.c_nationkey = n.n_nationkey
+# group by
+#   c.c_custkey,
+#   c.c_name,
+#   c.c_acctbal,
+#   c.c_phone,
+#   n.n_name,
+#   c.c_address,
+#   c.c_comment
+# order by
+#   revenue desc
+# limit 20
+
+name: TPC-H10
+diags:
+- { code: 0001, max: i } # Suppress "not yet implemented" warnings
+- { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet)
+- { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet)
+plan:
+  __test:
+  - level: i
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri: /functions_boolean.yaml
+  - extensionUriAnchor: 2
+    uri: /functions_comparison.yaml
+  - extensionUriAnchor: 3
+    uri: /functions_datetime.yaml
+  - extensionUriAnchor: 4
+    uri: /functions_arithmetic_decimal.yaml
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: and:bool
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 2
+      name: equal:any_any
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 3
+      name: gte:date_date
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 4
+      name: lt:date_date
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 5
+      name: add:date_year
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 6
+      name: multiply:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 7
+      name: subtract:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 8
+      name: sum:opt_dec
+  relations:
+  - root:
+      __test:
+      - type: "\
+        NSTRUCT<\
+          C_CUSTKEY: i32, \
+          C_NAME: i64, \
+          REVENUE: VARCHAR?<25>, \
+          C_ACCTBAL: DECIMAL?<19, 0>, \
+          N_NAME: DECIMAL?<19, 0>, \
+          C_ADDRESS: FIXEDCHAR?<25>, \
+          C_PHONE: VARCHAR?<40>, \
+          C_COMMENT: FIXEDCHAR?<15>\
+        >"
+      input:
+        fetch:
+          common:
+            direct: {}
+          count: '20'
+          input:
+            sort:
+              common:
+                direct: {}
+              input:
+                project:
+                  common:
+                    emit:
+                      outputMapping:
+                      - 8
+                      - 9
+                      - 10
+                      - 11
+                      - 12
+                      - 13
+                      - 14
+                      - 15
+                  expressions:
+                  - selection:
+                      directReference:
+                        structField: {}
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 1
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 7
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 2
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 4
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 5
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 3
+                      rootReference: {}
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 6
+                      rootReference: {}
+                  input:
+                    aggregate:
+                      common:
+                        direct: {}
+                      groupings:
+                      - groupingExpressions:
+                        - selection:
+                            directReference:
+                              structField:
+                                field: 0
+                            rootReference: {}
+                        - selection:
+                            directReference:
+                              structField:
+                                field: 1
+                            rootReference: {}
+                        - selection:
+                            directReference:
+                              structField:
+                                field: 2
+                            rootReference: {}
+                        - selection:
+                            directReference:
+                              structField:
+                                field: 3
+                            rootReference: {}
+                        - selection:
+                            directReference:
+                              structField:
+                                field: 4
+                            rootReference: {}
+                        - selection:
+                            directReference:
+                              structField:
+                                field: 5
+                            rootReference: {}
+                        - selection:
+                            directReference:
+                              structField:
+                                field: 6
+                            rootReference: {}
+                      input:
+                        project:
+                          common:
+                            emit:
+                              outputMapping:
+                              - 37
+                              - 38
+                              - 39
+                              - 40
+                              - 41
+                              - 42
+                              - 43
+                              - 44
+                          expressions:
+                          - selection:
+                              directReference:
+                                structField: {}
+                              rootReference: {}
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 1
+                              rootReference: {}
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 5
+                              rootReference: {}
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 4
+                              rootReference: {}
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 34
+                              rootReference: {}
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 2
+                              rootReference: {}
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 7
+                              rootReference: {}
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 22
+                                  rootReference: {}
+                              - scalarFunction:
+                                  args:
+                                  - cast:
+                                      input:
+                                        literal:
+                                          i32: 1
+                                      type:
+                                        decimal:
+                                          nullability: NULLABILITY_NULLABLE
+                                          precision: 19
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 23
+                                      rootReference: {}
+                                  functionReference: 7
+                                  outputType:
+                                    decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                              functionReference: 6
+                              outputType:
+                                decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                          input:
+                            filter:
+                              common:
+                                direct: {}
+                              condition:
+                                scalarFunction:
+                                  args:
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField: {}
+                                          rootReference: {}
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 9
+                                          rootReference: {}
+                                      functionReference: 2
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_REQUIRED
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 17
+                                          rootReference: {}
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 8
+                                          rootReference: {}
+                                      functionReference: 2
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_REQUIRED
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 12
+                                          rootReference: {}
+                                      - literal:
+                                          date: 8825
+                                      functionReference: 3
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_NULLABLE
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 12
+                                          rootReference: {}
+                                      - scalarFunction:
+                                          args:
+                                          - literal:
+                                              date: 8825
+                                          - literal:
+                                              intervalYearToMonth:
+                                                months: 3
+                                          functionReference: 5
+                                          outputType:
+                                            date:
+                                              nullability: NULLABILITY_REQUIRED
+                                      functionReference: 4
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_NULLABLE
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 25
+                                          rootReference: {}
+                                      - literal:
+                                          fixedChar: R
+                                      functionReference: 2
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_NULLABLE
+                                  - scalarFunction:
+                                      args:
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 3
+                                          rootReference: {}
+                                      - selection:
+                                          directReference:
+                                            structField:
+                                              field: 33
+                                          rootReference: {}
+                                      functionReference: 2
+                                      outputType:
+                                        bool:
+                                          nullability: NULLABILITY_REQUIRED
+                                  functionReference: 1
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              input:
+                                join:
+                                  common:
+                                    direct: {}
+                                  expression:
+                                    literal:
+                                      boolean: true
+                                  left:
+                                    join:
+                                      common:
+                                        direct: {}
+                                      expression:
+                                        literal:
+                                          boolean: true
+                                      left:
+                                        join:
+                                          common:
+                                            direct: {}
+                                          expression:
+                                            literal:
+                                              boolean: true
+                                          left:
+                                            read:
+                                              baseSchema:
+                                                names:
+                                                - C_CUSTKEY
+                                                - C_NAME
+                                                - C_ADDRESS
+                                                - C_NATIONKEY
+                                                - C_PHONE
+                                                - C_ACCTBAL
+                                                - C_MKTSEGMENT
+                                                - C_COMMENT
+                                                struct:
+                                                  nullability: NULLABILITY_REQUIRED
+                                                  types:
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - varchar:
+                                                      length: 25
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - varchar:
+                                                      length: 40
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - fixedChar:
+                                                      length: 15
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - decimal:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                      precision: 19
+                                                  - fixedChar:
+                                                      length: 10
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - varchar:
+                                                      length: 117
+                                                      nullability: NULLABILITY_NULLABLE
+                                              common:
+                                                direct: {}
+                                              namedTable:
+                                                names:
+                                                - CUSTOMER
+                                          right:
+                                            read:
+                                              baseSchema:
+                                                names:
+                                                - O_ORDERKEY
+                                                - O_CUSTKEY
+                                                - O_ORDERSTATUS
+                                                - O_TOTALPRICE
+                                                - O_ORDERDATE
+                                                - O_ORDERPRIORITY
+                                                - O_CLERK
+                                                - O_SHIPPRIORITY
+                                                - O_COMMENT
+                                                struct:
+                                                  nullability: NULLABILITY_REQUIRED
+                                                  types:
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - i64:
+                                                      nullability: NULLABILITY_REQUIRED
+                                                  - fixedChar:
+                                                      length: 1
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - decimal:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                      precision: 19
+                                                  - date:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - fixedChar:
+                                                      length: 15
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - fixedChar:
+                                                      length: 15
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - i32:
+                                                      nullability: NULLABILITY_NULLABLE
+                                                  - varchar:
+                                                      length: 79
+                                                      nullability: NULLABILITY_NULLABLE
+                                              common:
+                                                direct: {}
+                                              namedTable:
+                                                names:
+                                                - ORDERS
+                                          type: JOIN_TYPE_INNER
+                                      right:
+                                        read:
+                                          baseSchema:
+                                            names:
+                                            - L_ORDERKEY
+                                            - L_PARTKEY
+                                            - L_SUPPKEY
+                                            - L_LINENUMBER
+                                            - L_QUANTITY
+                                            - L_EXTENDEDPRICE
+                                            - L_DISCOUNT
+                                            - L_TAX
+                                            - L_RETURNFLAG
+                                            - L_LINESTATUS
+                                            - L_SHIPDATE
+                                            - L_COMMITDATE
+                                            - L_RECEIPTDATE
+                                            - L_SHIPINSTRUCT
+                                            - L_SHIPMODE
+                                            - L_COMMENT
+                                            struct:
+                                              nullability: NULLABILITY_REQUIRED
+                                              types:
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - i64:
+                                                  nullability: NULLABILITY_REQUIRED
+                                              - i32:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - decimal:
+                                                  nullability: NULLABILITY_NULLABLE
+                                                  precision: 19
+                                              - fixedChar:
+                                                  length: 1
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 1
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - date:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - date:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - date:
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 25
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - fixedChar:
+                                                  length: 10
+                                                  nullability: NULLABILITY_NULLABLE
+                                              - varchar:
+                                                  length: 44
+                                                  nullability: NULLABILITY_NULLABLE
+                                          common:
+                                            direct: {}
+                                          namedTable:
+                                            names:
+                                            - LINEITEM
+                                      type: JOIN_TYPE_INNER
+                                  right:
+                                    read:
+                                      baseSchema:
+                                        names:
+                                        - N_NATIONKEY
+                                        - N_NAME
+                                        - N_REGIONKEY
+                                        - N_COMMENT
+                                        struct:
+                                          nullability: NULLABILITY_REQUIRED
+                                          types:
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - fixedChar:
+                                              length: 25
+                                              nullability: NULLABILITY_NULLABLE
+                                          - i64:
+                                              nullability: NULLABILITY_REQUIRED
+                                          - varchar:
+                                              length: 152
+                                              nullability: NULLABILITY_NULLABLE
+                                      common:
+                                        direct: {}
+                                      namedTable:
+                                        names:
+                                        - NATION
+                                  type: JOIN_TYPE_INNER
+                      measures:
+                      - measure:
+                          args:
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 7
+                              rootReference: {}
+                          functionReference: 8
+                          outputType:
+                            decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                          phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+              sorts:
+              - direction: SORT_DIRECTION_DESC_NULLS_FIRST
+                expr:
+                  selection:
+                    directReference:
+                      structField:
+                        field: 2
+                    rootReference: {}
+      names:
+      - C_CUSTKEY
+      - C_NAME
+      - REVENUE
+      - C_ACCTBAL
+      - N_NAME
+      - C_ADDRESS
+      - C_PHONE
+      - C_COMMENT
diff --git a/tests/tests/tpc-h/tpc-h14.yaml b/tests/tests/tpc-h/tpc-h14.yaml
new file mode 100644
index 00000000..38d1afeb
--- /dev/null
+++ b/tests/tests/tpc-h/tpc-h14.yaml
@@ -0,0 +1,432 @@
+# select
+#   100.00 * sum(case
+#     when p.p_type like 'PROMO%'
+#       then l.l_extendedprice * (1 - l.l_discount)
+#     else 0
+#   end) / sum(l.l_extendedprice * (1 - l.l_discount)) as promo_revenue
+# from
+#   "lineitem" l,
+#   "part" p
+# where
+#   l.l_partkey = p.p_partkey
+#   and l.l_shipdate >= date '1994-08-01'
+#   and l.l_shipdate < date '1994-08-01' + interval '1' month
+
+name: TPC-H14
+diags:
+- { code: 0001, max: i } # Suppress "not yet implemented" warnings
+- { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet)
+- { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet)
+plan:
+  __test:
+  - level: i
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri: /functions_boolean.yaml
+  - extensionUriAnchor: 2
+    uri: /functions_comparison.yaml
+  - extensionUriAnchor: 3
+    uri: /functions_datetime.yaml
+  - extensionUriAnchor: 4
+    uri: /functions_string.yaml
+  - extensionUriAnchor: 5
+    uri: /functions_arithmetic_decimal.yaml
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: and:bool
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 2
+      name: equal:any_any
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 3
+      name: gte:date_date
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 4
+      name: lt:date_date
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 5
+      name: add:date_year
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 6
+      name: like:vchar_vchar
+  - extensionFunction:
+      extensionUriReference: 5
+      functionAnchor: 7
+      name: multiply:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 5
+      functionAnchor: 8
+      name: subtract:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 5
+      functionAnchor: 9
+      name: sum:opt_dec
+  - extensionFunction:
+      extensionUriReference: 5
+      functionAnchor: 10
+      name: divide:opt_dec_dec
+  relations:
+  - root:
+      __test:
+      - type: "NSTRUCT<PROMO_REVENUE: i32>"
+      input:
+        project:
+          common:
+            emit:
+              outputMapping:
+              - 2
+          expressions:
+          - scalarFunction:
+              args:
+              - scalarFunction:
+                  args:
+                  - literal:
+                      decimal:
+                        precision: 5
+                        scale: 2
+                        value: AAAAAAAAAAAAAAAAAAAAAA==
+                  - selection:
+                      directReference:
+                        structField: {}
+                      rootReference: {}
+                  functionReference: 7
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+                      scale: 2
+              - selection:
+                  directReference:
+                    structField:
+                      field: 1
+                  rootReference: {}
+              functionReference: 10
+              outputType:
+                decimal:
+                  nullability: NULLABILITY_NULLABLE
+                  precision: 19
+                  scale: 2
+          input:
+            aggregate:
+              common:
+                direct: {}
+              input:
+                project:
+                  common:
+                    emit:
+                      outputMapping:
+                      - 25
+                      - 26
+                  expressions:
+                  - ifThen:
+                      else:
+                        literal:
+                          decimal:
+                            precision: 19
+                            value: AAAAAAAAAAAAAAAAAAAAAA==
+                      ifs:
+                      - if:
+                          scalarFunction:
+                            args:
+                            - selection:
+                                directReference:
+                                  structField:
+                                    field: 20
+                                rootReference: {}
+                            - cast:
+                                input:
+                                  literal:
+                                    fixedChar: PROMO%
+                                type:
+                                  varchar:
+                                    length: 25
+                                    nullability: NULLABILITY_NULLABLE
+                            functionReference: 6
+                            outputType:
+                              bool:
+                                nullability: NULLABILITY_NULLABLE
+                        then:
+                          scalarFunction:
+                            args:
+                            - selection:
+                                directReference:
+                                  structField:
+                                    field: 5
+                                rootReference: {}
+                            - scalarFunction:
+                                args:
+                                - cast:
+                                    input:
+                                      literal:
+                                        i32: 1
+                                    type:
+                                      decimal:
+                                        nullability: NULLABILITY_NULLABLE
+                                        precision: 19
+                                - selection:
+                                    directReference:
+                                      structField:
+                                        field: 6
+                                    rootReference: {}
+                                functionReference: 8
+                                outputType:
+                                  decimal:
+                                    nullability: NULLABILITY_NULLABLE
+                                    precision: 19
+                            functionReference: 7
+                            outputType:
+                              decimal:
+                                nullability: NULLABILITY_NULLABLE
+                                precision: 19
+                  - scalarFunction:
+                      args:
+                      - selection:
+                          directReference:
+                            structField:
+                              field: 5
+                          rootReference: {}
+                      - scalarFunction:
+                          args:
+                          - cast:
+                              input:
+                                literal:
+                                  i32: 1
+                              type:
+                                decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                          - selection:
+                              directReference:
+                                structField:
+                                  field: 6
+                              rootReference: {}
+                          functionReference: 8
+                          outputType:
+                            decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                      functionReference: 7
+                      outputType:
+                        decimal:
+                          nullability: NULLABILITY_NULLABLE
+                          precision: 19
+                  input:
+                    filter:
+                      common:
+                        direct: {}
+                      condition:
+                        scalarFunction:
+                          args:
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 1
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 16
+                                  rootReference: {}
+                              functionReference: 2
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 10
+                                  rootReference: {}
+                              - literal:
+                                  date: 8978
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 10
+                                  rootReference: {}
+                              - scalarFunction:
+                                  args:
+                                  - literal:
+                                      date: 8978
+                                  - literal:
+                                      intervalYearToMonth:
+                                        months: 1
+                                  functionReference: 5
+                                  outputType:
+                                    date:
+                                      nullability: NULLABILITY_REQUIRED
+                              functionReference: 4
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          functionReference: 1
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      input:
+                        join:
+                          common:
+                            direct: {}
+                          expression:
+                            literal:
+                              boolean: true
+                          left:
+                            read:
+                              baseSchema:
+                                names:
+                                - L_ORDERKEY
+                                - L_PARTKEY
+                                - L_SUPPKEY
+                                - L_LINENUMBER
+                                - L_QUANTITY
+                                - L_EXTENDEDPRICE
+                                - L_DISCOUNT
+                                - L_TAX
+                                - L_RETURNFLAG
+                                - L_LINESTATUS
+                                - L_SHIPDATE
+                                - L_COMMITDATE
+                                - L_RECEIPTDATE
+                                - L_SHIPINSTRUCT
+                                - L_SHIPMODE
+                                - L_COMMENT
+                                struct:
+                                  nullability: NULLABILITY_REQUIRED
+                                  types:
+                                  - i64:
+                                      nullability: NULLABILITY_REQUIRED
+                                  - i64:
+                                      nullability: NULLABILITY_REQUIRED
+                                  - i64:
+                                      nullability: NULLABILITY_REQUIRED
+                                  - i32:
+                                      nullability: NULLABILITY_NULLABLE
+                                  - decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                                  - decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                                  - decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                                  - decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                                  - fixedChar:
+                                      length: 1
+                                      nullability: NULLABILITY_NULLABLE
+                                  - fixedChar:
+                                      length: 1
+                                      nullability: NULLABILITY_NULLABLE
+                                  - date:
+                                      nullability: NULLABILITY_NULLABLE
+                                  - date:
+                                      nullability: NULLABILITY_NULLABLE
+                                  - date:
+                                      nullability: NULLABILITY_NULLABLE
+                                  - fixedChar:
+                                      length: 25
+                                      nullability: NULLABILITY_NULLABLE
+                                  - fixedChar:
+                                      length: 10
+                                      nullability: NULLABILITY_NULLABLE
+                                  - varchar:
+                                      length: 44
+                                      nullability: NULLABILITY_NULLABLE
+                              common:
+                                direct: {}
+                              namedTable:
+                                names:
+                                - LINEITEM
+                          right:
+                            read:
+                              baseSchema:
+                                names:
+                                - P_PARTKEY
+                                - P_NAME
+                                - P_MFGR
+                                - P_BRAND
+                                - P_TYPE
+                                - P_SIZE
+                                - P_CONTAINER
+                                - P_RETAILPRICE
+                                - P_COMMENT
+                                struct:
+                                  nullability: NULLABILITY_REQUIRED
+                                  types:
+                                  - i64:
+                                      nullability: NULLABILITY_REQUIRED
+                                  - varchar:
+                                      length: 55
+                                      nullability: NULLABILITY_NULLABLE
+                                  - fixedChar:
+                                      length: 25
+                                      nullability: NULLABILITY_NULLABLE
+                                  - fixedChar:
+                                      length: 10
+                                      nullability: NULLABILITY_NULLABLE
+                                  - varchar:
+                                      length: 25
+                                      nullability: NULLABILITY_NULLABLE
+                                  - i32:
+                                      nullability: NULLABILITY_NULLABLE
+                                  - fixedChar:
+                                      length: 10
+                                      nullability: NULLABILITY_NULLABLE
+                                  - decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                                  - varchar:
+                                      length: 23
+                                      nullability: NULLABILITY_NULLABLE
+                              common:
+                                direct: {}
+                              namedTable:
+                                names:
+                                - PART
+                          type: JOIN_TYPE_INNER
+              measures:
+              - measure:
+                  args:
+                  - selection:
+                      directReference:
+                        structField: {}
+                      rootReference: {}
+                  functionReference: 9
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+              - measure:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 1
+                      rootReference: {}
+                  functionReference: 9
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+                  phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+      names:
+      - PROMO_REVENUE
diff --git a/tests/tests/tpc-h/tpc-h19.yaml b/tests/tests/tpc-h/tpc-h19.yaml
new file mode 100644
index 00000000..c3d4396c
--- /dev/null
+++ b/tests/tests/tpc-h/tpc-h19.yaml
@@ -0,0 +1,954 @@
+# select
+#   sum(l.l_extendedprice* (1 - l.l_discount)) as revenue
+# from
+#   "lineitem" l,
+#   "part" p
+# where
+#   (
+#     p.p_partkey = l.l_partkey
+#     and p.p_brand = 'Brand#41'
+#     and p.p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
+#     and l.l_quantity >= 2 and l.l_quantity <= 2 + 10
+#     and p.p_size between 1 and 5
+#     and l.l_shipmode in ('AIR', 'AIR REG')
+#     and l.l_shipinstruct = 'DELIVER IN PERSON'
+#   )
+#   or
+#   (
+#     p.p_partkey = l.l_partkey
+#     and p.p_brand = 'Brand#13'
+#     and p.p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
+#     and l.l_quantity >= 14 and l.l_quantity <= 14 + 10
+#     and p.p_size between 1 and 10
+#     and l.l_shipmode in ('AIR', 'AIR REG')
+#     and l.l_shipinstruct = 'DELIVER IN PERSON'
+#   )
+#   or
+#   (
+#     p.p_partkey = l.l_partkey
+#     and p.p_brand = 'Brand#55'
+#     and p.p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
+#     and l.l_quantity >= 23 and l.l_quantity <= 23 + 10
+#     and p.p_size between 1 and 15
+#     and l.l_shipmode in ('AIR', 'AIR REG')
+#     and l.l_shipinstruct = 'DELIVER IN PERSON'
+#   )
+
+name: TPC-H19
+diags:
+- { code: 0001, max: i } # Suppress "not yet implemented" warnings
+- { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet)
+- { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet)
+plan:
+  __test:
+  - level: i
+  extensionUris:
+  - extensionUriAnchor: 1
+    uri: /functions_boolean.yaml
+  - extensionUriAnchor: 2
+    uri: /functions_comparison.yaml
+  - extensionUriAnchor: 3
+    uri: /functions_arithmetic.yaml
+  - extensionUriAnchor: 4
+    uri: /functions_arithmetic_decimal.yaml
+  extensions:
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 1
+      name: or:bool
+  - extensionFunction:
+      extensionUriReference: 1
+      functionAnchor: 2
+      name: and:bool
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 3
+      name: equal:any_any
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 4
+      name: gte:any_any
+  - extensionFunction:
+      extensionUriReference: 2
+      functionAnchor: 5
+      name: lte:any_any
+  - extensionFunction:
+      extensionUriReference: 3
+      functionAnchor: 6
+      name: add:opt_i32_i32
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 7
+      name: multiply:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 8
+      name: subtract:opt_dec_dec
+  - extensionFunction:
+      extensionUriReference: 4
+      functionAnchor: 9
+      name: sum:opt_dec
+  relations:
+  - root:
+      __test:
+      - type: "NSTRUCT<REVENUE: DECIMAL?<19, 0>>"
+      input:
+        aggregate:
+          common:
+            emit:
+              outputMapping:
+              - 0
+          input:
+            project:
+              common:
+                emit:
+                  outputMapping:
+                  - 25
+              expressions:
+              - scalarFunction:
+                  args:
+                  - selection:
+                      directReference:
+                        structField:
+                          field: 5
+                      rootReference: {}
+                  - scalarFunction:
+                      args:
+                      - cast:
+                          input:
+                            literal:
+                              i32: 1
+                          type:
+                            decimal:
+                              nullability: NULLABILITY_NULLABLE
+                              precision: 19
+                      - selection:
+                          directReference:
+                            structField:
+                              field: 6
+                          rootReference: {}
+                      functionReference: 8
+                      outputType:
+                        decimal:
+                          nullability: NULLABILITY_NULLABLE
+                          precision: 19
+                  functionReference: 7
+                  outputType:
+                    decimal:
+                      nullability: NULLABILITY_NULLABLE
+                      precision: 19
+              input:
+                filter:
+                  common:
+                    direct: {}
+                  condition:
+                    scalarFunction:
+                      args:
+                      - scalarFunction:
+                          args:
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 16
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 1
+                                  rootReference: {}
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 19
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      fixedChar: Brand#41
+                                  type:
+                                    fixedChar:
+                                      length: 10
+                                      nullability: NULLABILITY_REQUIRED
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 22
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: SM CASE
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 22
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: SM BOX
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 22
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: SM PACK
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 22
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: SM PKG
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              functionReference: 1
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 4
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      i32: 2
+                                  type:
+                                    decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                              functionReference: 4
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 4
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    scalarFunction:
+                                      args:
+                                      - literal:
+                                          i32: 2
+                                      - literal:
+                                          i32: 10
+                                      functionReference: 6
+                                      outputType:
+                                        i32:
+                                          nullability: NULLABILITY_REQUIRED
+                                  type:
+                                    decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                              functionReference: 5
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 21
+                                  rootReference: {}
+                              - literal:
+                                  i32: 1
+                              functionReference: 4
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 21
+                                  rootReference: {}
+                              - literal:
+                                  i32: 5
+                              functionReference: 5
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 14
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: AIR
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 14
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: AIR REG
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              functionReference: 1
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 13
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      fixedChar: DELIVER IN PERSON
+                                  type:
+                                    fixedChar:
+                                      length: 25
+                                      nullability: NULLABILITY_REQUIRED
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          functionReference: 2
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      - scalarFunction:
+                          args:
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 16
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 1
+                                  rootReference: {}
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 19
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      fixedChar: Brand#13
+                                  type:
+                                    fixedChar:
+                                      length: 10
+                                      nullability: NULLABILITY_REQUIRED
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 22
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: MED BAG
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 22
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: MED BOX
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 22
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: MED PKG
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 22
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: MED PACK
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              functionReference: 1
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 4
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      i32: 14
+                                  type:
+                                    decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                              functionReference: 4
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 4
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    scalarFunction:
+                                      args:
+                                      - literal:
+                                          i32: 14
+                                      - literal:
+                                          i32: 10
+                                      functionReference: 6
+                                      outputType:
+                                        i32:
+                                          nullability: NULLABILITY_REQUIRED
+                                  type:
+                                    decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                              functionReference: 5
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 21
+                                  rootReference: {}
+                              - literal:
+                                  i32: 1
+                              functionReference: 4
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 21
+                                  rootReference: {}
+                              - literal:
+                                  i32: 10
+                              functionReference: 5
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 14
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: AIR
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 14
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: AIR REG
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              functionReference: 1
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 13
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      fixedChar: DELIVER IN PERSON
+                                  type:
+                                    fixedChar:
+                                      length: 25
+                                      nullability: NULLABILITY_REQUIRED
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          functionReference: 2
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      - scalarFunction:
+                          args:
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 16
+                                  rootReference: {}
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 1
+                                  rootReference: {}
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_REQUIRED
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 19
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      fixedChar: Brand#55
+                                  type:
+                                    fixedChar:
+                                      length: 10
+                                      nullability: NULLABILITY_REQUIRED
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 22
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: LG CASE
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 22
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: LG BOX
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 22
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: LG PACK
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 22
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: LG PKG
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              functionReference: 1
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 4
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      i32: 23
+                                  type:
+                                    decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                              functionReference: 4
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 4
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    scalarFunction:
+                                      args:
+                                      - literal:
+                                          i32: 23
+                                      - literal:
+                                          i32: 10
+                                      functionReference: 6
+                                      outputType:
+                                        i32:
+                                          nullability: NULLABILITY_REQUIRED
+                                  type:
+                                    decimal:
+                                      nullability: NULLABILITY_NULLABLE
+                                      precision: 19
+                              functionReference: 5
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 21
+                                  rootReference: {}
+                              - literal:
+                                  i32: 1
+                              functionReference: 4
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 21
+                                  rootReference: {}
+                              - literal:
+                                  i32: 15
+                              functionReference: 5
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 14
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: AIR
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              - scalarFunction:
+                                  args:
+                                  - selection:
+                                      directReference:
+                                        structField:
+                                          field: 14
+                                      rootReference: {}
+                                  - literal:
+                                      fixedChar: AIR REG
+                                  functionReference: 3
+                                  outputType:
+                                    bool:
+                                      nullability: NULLABILITY_NULLABLE
+                              functionReference: 1
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          - scalarFunction:
+                              args:
+                              - selection:
+                                  directReference:
+                                    structField:
+                                      field: 13
+                                  rootReference: {}
+                              - cast:
+                                  input:
+                                    literal:
+                                      fixedChar: DELIVER IN PERSON
+                                  type:
+                                    fixedChar:
+                                      length: 25
+                                      nullability: NULLABILITY_REQUIRED
+                              functionReference: 3
+                              outputType:
+                                bool:
+                                  nullability: NULLABILITY_NULLABLE
+                          functionReference: 2
+                          outputType:
+                            bool:
+                              nullability: NULLABILITY_NULLABLE
+                      functionReference: 1
+                      outputType:
+                        bool:
+                          nullability: NULLABILITY_NULLABLE
+                  input:
+                    join:
+                      common:
+                        direct: {}
+                      expression:
+                        literal:
+                          boolean: true
+                      left:
+                        read:
+                          baseSchema:
+                            names:
+                            - L_ORDERKEY
+                            - L_PARTKEY
+                            - L_SUPPKEY
+                            - L_LINENUMBER
+                            - L_QUANTITY
+                            - L_EXTENDEDPRICE
+                            - L_DISCOUNT
+                            - L_TAX
+                            - L_RETURNFLAG
+                            - L_LINESTATUS
+                            - L_SHIPDATE
+                            - L_COMMITDATE
+                            - L_RECEIPTDATE
+                            - L_SHIPINSTRUCT
+                            - L_SHIPMODE
+                            - L_COMMENT
+                            struct:
+                              nullability: NULLABILITY_REQUIRED
+                              types:
+                              - i64:
+                                  nullability: NULLABILITY_REQUIRED
+                              - i64:
+                                  nullability: NULLABILITY_REQUIRED
+                              - i64:
+                                  nullability: NULLABILITY_REQUIRED
+                              - i32:
+                                  nullability: NULLABILITY_NULLABLE
+                              - decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                              - decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                              - decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                              - decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                              - fixedChar:
+                                  length: 1
+                                  nullability: NULLABILITY_NULLABLE
+                              - fixedChar:
+                                  length: 1
+                                  nullability: NULLABILITY_NULLABLE
+                              - date:
+                                  nullability: NULLABILITY_NULLABLE
+                              - date:
+                                  nullability: NULLABILITY_NULLABLE
+                              - date:
+                                  nullability: NULLABILITY_NULLABLE
+                              - fixedChar:
+                                  length: 25
+                                  nullability: NULLABILITY_NULLABLE
+                              - fixedChar:
+                                  length: 10
+                                  nullability: NULLABILITY_NULLABLE
+                              - varchar:
+                                  length: 44
+                                  nullability: NULLABILITY_NULLABLE
+                          common:
+                            direct: {}
+                          namedTable:
+                            names:
+                            - LINEITEM
+                      right:
+                        read:
+                          baseSchema:
+                            names:
+                            - P_PARTKEY
+                            - P_NAME
+                            - P_MFGR
+                            - P_BRAND
+                            - P_TYPE
+                            - P_SIZE
+                            - P_CONTAINER
+                            - P_RETAILPRICE
+                            - P_COMMENT
+                            struct:
+                              nullability: NULLABILITY_REQUIRED
+                              types:
+                              - i64:
+                                  nullability: NULLABILITY_REQUIRED
+                              - varchar:
+                                  length: 55
+                                  nullability: NULLABILITY_NULLABLE
+                              - fixedChar:
+                                  length: 25
+                                  nullability: NULLABILITY_NULLABLE
+                              - fixedChar:
+                                  length: 10
+                                  nullability: NULLABILITY_NULLABLE
+                              - varchar:
+                                  length: 25
+                                  nullability: NULLABILITY_NULLABLE
+                              - i32:
+                                  nullability: NULLABILITY_NULLABLE
+                              - fixedChar:
+                                  length: 10
+                                  nullability: NULLABILITY_NULLABLE
+                              - decimal:
+                                  nullability: NULLABILITY_NULLABLE
+                                  precision: 19
+                              - varchar:
+                                  length: 23
+                                  nullability: NULLABILITY_NULLABLE
+                          common:
+                            direct: {}
+                          namedTable:
+                            names:
+                            - PART
+                      type: JOIN_TYPE_INNER
+          measures:
+          - measure:
+              args:
+              - selection:
+                  directReference:
+                    structField: {}
+                  rootReference: {}
+              functionReference: 9
+              outputType:
+                decimal:
+                  nullability: NULLABILITY_NULLABLE
+                  precision: 19
+              phase: AGGREGATION_PHASE_INITIAL_TO_RESULT
+      names:
+      - REVENUE