diff --git a/.azure-pipelines.yml b/.azure-pipelines.yml
new file mode 100644
index 000000000..9e0a4697e
--- /dev/null
+++ b/.azure-pipelines.yml
@@ -0,0 +1,180 @@
+name: $(Build.SourceBranch)-$(date:yyyy-MM-dd)$(rev:.r)
+trigger:
+  branches:
+    include:
+    - 'master'
+  tags:
+    include:
+    - '*'
+    exclude:
+      - 'dev'
+
+jobs:
+- job: rustfmt
+  pool:
+    vmImage: 'macos-10.14'
+  steps:
+  - checkout: self
+    submodules: true
+  - template: ci/azure-install-rust.yml
+  - script: rustup component add rustfmt
+    displayName: Add rustfmt
+  - script: cargo fmt --all -- --check
+    displayName: Check formatting
+  variables:
+    toolchain: stable
+
+# Smoke test to build docs on one builder, using OSX for now since it's the
+# fastest
+- job: docs
+  pool:
+    vmImage: 'macos-10.14'
+  steps:
+  - checkout: self
+    submodules: true
+  - template: ci/azure-install-rust.yml
+  - script: cargo doc
+    displayName: Build documentation
+  - script: cargo install cargo-deadlinks
+    displayName: Install cargo-deadlinks
+  - bash: |
+      find ./target/doc -maxdepth 1 -type d -name "cranelift*" | xargs -I{} cargo deadlinks --dir {}
+    displayName: Run cargo-deadlinks
+  variables:
+    toolchain: nightly
+
+- job: Test
+  strategy:
+    matrix:
+      windows-earliest:
+        imageName: 'vs2017-win2016'
+        toolchain: '1.37.0'
+      linux-earliest:
+        imageName: 'ubuntu-16.04'
+        toolchain: '1.37.0'
+      mac-earliest:
+        imageName: 'macos-10.14'
+        toolchain: '1.37.0'
+      mac-stable:
+        imageName: 'macos-10.14'
+        toolchain: stable
+      mac-beta:
+        imageName: 'macos-10.14'
+        toolchain: beta
+      mac-nightly:
+        imageName: 'macos-10.14'
+        toolchain: nightly
+
+  pool:
+    vmImage: $(imageName)
+
+  steps:
+    - checkout: self
+      submodules: true
+    - template: ci/azure-install-rust.yml
+
+    - script: cargo fetch
+      displayName: Fetch cargo dependencies
+
+    - script: cargo build
+      displayName: Cargo build
+
+    - bash: cargo test --all
+      displayName: Cargo test
+      env:
+        RUST_BACKTRACE: 1
+
+    # Ensure fuzzer works by running it with a single input
+    - bash: cargo install cargo-fuzz
+      displayName: Install cargo-fuzz
+      condition: and(succeeded(), eq(variables['toolchain'], 'nightly'))
+    - bash: |
+        fuzz_module="ffaefab69523eb11935a9b420d58826c8ea65c4c"
+        cargo fuzz run fuzz_translate_module \
+        "fuzz/corpus/fuzz_translate_module/$fuzz_module"
+      displayName: Run cargo-fuzz
+      env:
+        RUST_BACKTRACE: 1
+      condition: and(succeeded(), eq(variables['toolchain'], 'nightly'))
+
+- job: Fuzz_regression
+  displayName: Fuzz regression
+  pool:
+    vmImage: "ubuntu-16.04"
+  variables:
+    toolchain: nightly
+  steps:
+    - template: ci/azure-install-rust.yml
+    - bash: cargo install cargo-fuzz
+    - bash: ci/fuzzit.sh local-regression
+
+- job: Fuzz
+  condition: ne(variables['Build.Reason'], 'PullRequest')
+  pool:
+    vmImage: "ubuntu-16.04"
+  variables:
+    toolchain: nightly
+  steps:
+    - template: ci/azure-install-rust.yml
+    - bash: cargo install cargo-fuzz
+    - bash: ci/fuzzit.sh fuzzing
+      env:
+        FUZZIT_API_KEY: $(FUZZIT_API_KEY)
+
+- job: Build
+  strategy:
+    matrix:
+      windows:
+        imageName: 'vs2017-win2016'
+        # Statically link against msvcrt to produce slightly more portable
+        # binaries on Windows by reducing our binary compatibility requirements.
+        RUSTFLAGS: -Ctarget-feature=+crt-static
+      mac:
+        imageName: 'macos-10.14'
+        # Lower the deployment target from our build image in an attempt to
+        # build more portable binaries that run on older releases. Note that
+        # 10.9 here is arbitrarily chosen and just happens to be the lowest that
+        # works at this time. Raising this is probably fine.
+        MACOSX_DEPLOYMENT_TARGET: 10.9
+  variables:
+    toolchain: '1.37.0'
+  pool:
+    vmImage: $(imageName)
+  # We try to be compatible with beta and nightly, but they occasionally
+  # fail, so we don't allow them to hold up people using stable.
+  continueOnError: $[ne(variables['toolchain'], 'stable')]
+  steps:
+  - template: ci/azure-build-release.yml
+
+# Build the Linux release binary in an older Linux container (in this case
+# Centos 6)
+- job: Build_linux
+  variables:
+    toolchain: '1.37.0'
+  container:
+    image: centos:6
+    options: "--name ci-container -v /usr/bin/docker:/tmp/docker:ro"
+  steps:
+  # We're executing in the container as non-root but `yum` requires root. We
+  # need to install `sudo` but to do that we need `sudo`. Do a bit of a weird
+  # hack where we use the host `docker` executable to re-execute in our own
+  # container with the root user to install `sudo`
+  - bash: /tmp/docker exec -t -u 0 ci-container sh -c "yum install -y sudo"
+    displayName: Configure sudo
+
+  # See https://edwards.sdsu.edu/research/c11-on-centos-6/ for where these
+  # various commands came from.
+  - bash: |
+      set -e
+      sudo yum install -y centos-release-scl cmake xz
+      sudo yum install -y devtoolset-8-gcc devtoolset-8-binutils devtoolset-8-gcc-c++
+      echo "##vso[task.prependpath]/opt/rh/devtoolset-8/root/usr/bin"
+    displayName: Install system dependencies
+
+  # Delete `libstdc++.so` to force gcc to link against `libstdc++.a` instead.
+  # This is a hack and not the right way to do this, but it ends up doing the
+  # right thing for now.
+  - bash: sudo rm -f /opt/rh/devtoolset-8/root/usr/lib/gcc/x86_64-redhat-linux/8/libstdc++.so
+    displayName: Force a static libstdc++
+
+  - template: ci/azure-build-release.yml
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 2465863fe..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,51 +0,0 @@
-# Travis CI script. See https://travis-ci.org/ for more info.
-
-os:
-    - linux
-    - osx
-language: rust
-rust:
-    # The oldest version we currently support. See
-    # CONTRIBUTING.md#rustc-version-support for details.
-    - 1.34.0
-    - beta
-    - nightly
-matrix:
-    allow_failures:
-        # We try to be compatible with beta and nightly, but they occasionally
-        # fail, so we don't allow them to hold up people using stable.
-        - rust: beta
-        - rust: nightly
-    # Similarly, we don't need to hold up people using stable while we wait
-    # for the results which may fail.
-    fast_finish: true
-dist: xenial
-sudo: false
-addons:
-    apt:
-        packages:
-            - python3-pip
-install:
-    - pip3 install --verbose --user --upgrade mypy flake8
-before_script:
-    # If an old version of rustfmt from cargo is already installed, uninstall
-    # it, since it can prevent the installation of the new version from rustup.
-    - cargo uninstall rustfmt || true
-    - cargo install --list
-    # If we're testing beta or nightly, we still need to install the stable
-    # toolchain so that we can run the stable version of rustfmt.
-    - rustup toolchain install stable
-    # Install the stable version of rustfmt.
-    - rustup component add --toolchain=stable rustfmt-preview
-    - rustup component list --toolchain=stable
-    - rustup show
-    - rustfmt +stable --version || echo fail
-    # Sometimes the component isn't actually ready after being installed, and
-    # rustup update makes it ready.
-    - rustup update
-    - rustfmt +stable --version
-script: ./test-all.sh
-cache:
-    cargo: true
-    directories:
-        - $HOME/.cache/pip
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 403e912e6..97ce756eb 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -50,7 +50,7 @@ things set in stone yet.
 
 We abide by our [Code of Conduct] and ask that you do as well.
 
-[Code of Conduct](CODE_OF_CONDUCT.md)
+[Code of Conduct]: CODE_OF_CONDUCT.md
 
 ## Coding Guidelines
 
@@ -84,14 +84,6 @@ build.
 
 [Rust Update Policy for Firefox]: https://wiki.mozilla.org/Rust_Update_Policy_for_Firefox#Schedule
 
-### Python
-
-Our Python code is checked with [mypy](http://mypy-lang.org/) and
-[flake8](http://flake8.pycqa.org/en/latest/); see the
-[check.sh](https://github.com/CraneStation/cranelift/blob/master/cranelift-codegen/meta-python/check.sh)
-file for details. The versions available in common package repositories such
-as Ubuntu or Homebrew typically work fine.
-
 ## Development Process
 
 We use [issues] for asking questions and tracking bugs and unimplemented
diff --git a/Cargo.toml b/Cargo.toml
index a25beb418..df1224038 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "cranelift-tools"
 authors = ["The Cranelift Project Developers"]
-version = "0.31.0"
+version = "0.43.1"
 description = "Binaries for testing the Cranelift libraries"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://cranelift.readthedocs.io/"
@@ -19,33 +19,38 @@ path = "src/clif-util.rs"
 
 [dependencies]
 cfg-if = "0.1"
-cranelift-codegen = { path = "cranelift-codegen", version = "0.31.0" }
-cranelift-entity = { path = "cranelift-entity", version = "0.31.0" }
-cranelift-reader = { path = "cranelift-reader", version = "0.31.0" }
-cranelift-frontend = { path = "cranelift-frontend", version = "0.31.0" }
-cranelift-serde = { path = "cranelift-serde", version = "0.31.0", optional = true }
-cranelift-wasm = { path = "cranelift-wasm", version = "0.31.0", optional = true }
-cranelift-native = { path = "cranelift-native", version = "0.31.0" }
-cranelift-filetests = { path = "cranelift-filetests", version = "0.31.0" }
-cranelift-module = { path = "cranelift-module", version = "0.31.0" }
-cranelift-faerie = { path = "cranelift-faerie", version = "0.31.0" }
-cranelift-simplejit = { path = "cranelift-simplejit", version = "0.31.0" }
-cranelift-preopt = { path = "cranelift-preopt", version = "0.31.0" }
-cranelift = { path = "cranelift-umbrella", version = "0.31.0" }
+cranelift-codegen = { path = "cranelift-codegen", version = "0.43.1" }
+cranelift-entity = { path = "cranelift-entity", version = "0.43.1" }
+cranelift-reader = { path = "cranelift-reader", version = "0.43.1" }
+cranelift-frontend = { path = "cranelift-frontend", version = "0.43.1" }
+cranelift-serde = { path = "cranelift-serde", version = "0.43.1", optional = true }
+cranelift-wasm = { path = "cranelift-wasm", version = "0.43.1", optional = true }
+cranelift-native = { path = "cranelift-native", version = "0.43.1" }
+cranelift-filetests = { path = "cranelift-filetests", version = "0.43.1" }
+cranelift-module = { path = "cranelift-module", version = "0.43.1" }
+cranelift-faerie = { path = "cranelift-faerie", version = "0.43.1" }
+cranelift-object = { path = "cranelift-object", version = "0.43.1" }
+cranelift-simplejit = { path = "cranelift-simplejit", version = "0.43.1" }
+cranelift-preopt = { path = "cranelift-preopt", version = "0.43.1" }
+cranelift = { path = "cranelift-umbrella", version = "0.43.1" }
 filecheck = "0.4.0"
 clap = "2.32.0"
 serde = "1.0.8"
-term = "0.5.1"
-capstone = { version = "0.5.0", optional = true }
-wabt = { version = "0.7.0", optional = true }
-target-lexicon = "0.4.0"
+term = "0.6.1"
+capstone = { version = "0.6.0", optional = true }
+wabt = { version = "0.9.1", optional = true }
+target-lexicon = "0.8.1"
 pretty_env_logger = "0.3.0"
 file-per-thread-logger = "0.1.2"
+indicatif = "0.11.0"
+walkdir = "2.2"
 
 [features]
-default = ["disas", "wasm"]
+default = ["disas", "wasm", "cranelift-codegen/all-arch"]
 disas = ["capstone"]
 wasm = ["wabt", "cranelift-wasm"]
+basic-blocks = ["cranelift-codegen/basic-blocks", "cranelift-frontend/basic-blocks",
+"cranelift-wasm/basic-blocks", "cranelift-filetests/basic-blocks"]
 
 # We want debug symbols on release binaries by default since it allows profiling
 # tools to give more accurate information. We can always strip them out later if
diff --git a/README.md b/README.md
index bd05f8d67..171055d03 100644
--- a/README.md
+++ b/README.md
@@ -8,9 +8,9 @@ into executable machine code.
 
 [![Documentation Status](https://readthedocs.org/projects/cranelift/badge/?version=latest)](https://cranelift.readthedocs.io/en/latest/?badge=latest)
 [![Travis Status](https://travis-ci.org/CraneStation/cranelift.svg?branch=master)](https://travis-ci.org/CraneStation/cranelift)
-[![Appveyor Status](https://ci.appveyor.com/api/projects/status/oub7wrrb59utuv8x?svg=true)](https://ci.appveyor.com/project/CraneStation/cranelift)
+[![Fuzzit Status](https://app.fuzzit.dev/badge?org_id=CraneStation)](https://app.fuzzit.dev/orgs/CraneStation/dashboard)
 [![Gitter chat](https://badges.gitter.im/CraneStation/CraneStation.svg)](https://gitter.im/CraneStation/Lobby)
-![Minimum rustc 1.34](https://img.shields.io/badge/rustc-1.34+-green.svg)
+![Minimum rustc 1.37](https://img.shields.io/badge/rustc-1.37+-green.svg)
 
 For more information, see [the
 documentation](https://cranelift.readthedocs.io/en/latest/?badge=latest).
@@ -53,8 +53,7 @@ needed before it would be ready for a production use case.
 
 Cranelift's APIs are not yet stable.
 
-Cranelift currently requires Rust 1.32 or later, and Python 2.7 or 3
-to build.
+Cranelift currently requires Rust 1.37 or later to build.
 
 Planned uses
 ------------
diff --git a/appveyor.yml b/appveyor.yml
deleted file mode 100644
index 5b95b731d..000000000
--- a/appveyor.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-environment:
-  matrix:
-  - TARGET: x86_64-pc-windows-gnu
-    BITS: 64
-    MSYS2: 1
-  - TARGET: x86_64-pc-windows-msvc
-    BITS: 64
-  - TARGET: i686-pc-windows-gnu
-    BITS: 32
-    MSYS2: 1
-  - TARGET: i686-pc-windows-msvc
-    BITS: 32
-install:
-  - curl -sSf -o rustup-init.exe https://win.rustup.rs/
-  - rustup-init.exe -y --default-host %TARGET%
-  - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
-  - if defined MSYS2 set PATH=C:\msys64\mingw%BITS%\bin;%PATH%
-  - rustc -V
-  - cargo -V
-build: false
-test_script:
-  # TODO: Remove --no-default-features here. wabt and disass currently
-  # don't build on appveyor due to cmake issues.
-  - cargo build --verbose --all --no-default-features
-  - cargo test --verbose --all --no-default-features
-branches:
-  only:
-    - master
diff --git a/ci/azure-build-release.yml b/ci/azure-build-release.yml
new file mode 100644
index 000000000..6400eed0e
--- /dev/null
+++ b/ci/azure-build-release.yml
@@ -0,0 +1,66 @@
+steps:
+- checkout: self
+  submodules: true
+
+- template: azure-install-rust.yml
+
+- bash: echo "##vso[task.setvariable variable=RUSTC_VERSION;]`rustc --version`"
+  displayName: Set rustc version string for caching
+
+- bash: cargo build --release
+  displayName: Cargo build
+
+# Test what we're about to release in release mode itself.
+- bash: cargo test --release --all
+  displayName: Cargo test
+  env:
+    RUST_BACKTRACE: 1
+
+- bash: |
+    echo "##vso[task.setvariable variable=tagName;]`echo $BUILD_SOURCEBRANCH | sed -e 's|refs/tags/||'`"
+  displayName: Set tag name
+  condition: startsWith(variables['Build.SourceBranch'], 'refs/tags/')
+- bash: |
+    echo "##vso[task.setvariable variable=tagName;]dev"
+  displayName: Set tag name to "dev"
+  condition: not(startsWith(variables['Build.SourceBranch'], 'refs/tags/'))
+
+- bash: echo "##vso[task.setvariable variable=basename;]cranelift-$(tagName)-x86_64-windows"
+  displayName: Configure basename var
+  condition: and(succeeded(), eq(variables['Agent.OS'], 'Windows_NT'))
+- bash: echo "##vso[task.setvariable variable=basename;]cranelift-$(tagName)-x86_64-macos"
+  displayName: Configure basename var
+  condition: and(succeeded(), eq(variables['Agent.OS'], 'Darwin'))
+- bash: echo "##vso[task.setvariable variable=basename;]cranelift-$(tagName)-x86_64-linux"
+  displayName: Configure basename var
+  condition: and(succeeded(), eq( variables['Agent.OS'], 'Linux' ))
+
+- bash: |
+    set -e
+    mkdir -p $BUILD_BINARIESDIRECTORY/$BASENAME
+    if [ "$AGENT_OS" = "Windows_NT" ]; then
+      ext=.exe
+    fi
+    cp LICENSE README.md target/release/clif-util$ext $BUILD_BINARIESDIRECTORY/$BASENAME
+  displayName: Copy binaries
+
+- task: ArchiveFiles@2
+  inputs:
+    rootFolderOrFile: $(Build.BinariesDirectory)/$(basename)
+    archiveType: 'zip'
+    archiveFile: '$(Build.ArtifactStagingDirectory)/$(basename).zip'
+  displayName: Archive files (Win)
+  condition: and(succeeded(), eq(variables['Agent.OS'], 'Windows_NT'))
+- task: ArchiveFiles@2
+  inputs:
+    rootFolderOrFile: $(Build.BinariesDirectory)/$(basename)
+    archiveType: 'tar'
+    tarCompression: 'xz'
+    archiveFile: '$(Build.ArtifactStagingDirectory)/$(basename).tar.xz'
+  displayName: Archive files (Unix)
+  condition: and(succeeded(), ne(variables['Agent.OS'], 'Windows_NT'))
+- task: PublishPipelineArtifact@1
+  inputs:
+    path: $(Build.ArtifactStagingDirectory)/
+    artifactName: 'bundle-$(Agent.OS)'
+
diff --git a/ci/azure-install-rust.yml b/ci/azure-install-rust.yml
new file mode 100644
index 000000000..64e502b77
--- /dev/null
+++ b/ci/azure-install-rust.yml
@@ -0,0 +1,33 @@
+steps:
+  # Rustup is currently installed on Windows and Linux, but not macOS.
+  # It is installed in /usr/local/cargo/bin/ or C:\Program Files\Rust\.cargo\bin\
+  # This steps ensures that rustup is installed, mainly for macOS, or if the
+  # azure image changes in the future.
+  - bash: |
+      set -ex
+      if [ -x "`command -v rustup`" ]; then
+        echo `command -v rustup` `rustup -V` already installed
+        rustup self update
+      else
+        if [ "$AGENT_OS" = "Windows_NT" ]; then
+          curl -sSf -o rustup-init.exe https://win.rustup.rs
+          ./rustup-init.exe -y --default-toolchain $TOOLCHAIN
+          echo "##vso[task.prependpath]$USERPROFILE/.cargo/bin"
+        else
+          curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain $TOOLCHAIN
+          echo "##vso[task.prependpath]$HOME/.cargo/bin"
+        fi
+      fi
+    displayName: Install rustup
+
+  - bash: |
+      set -ex
+      rustup update $TOOLCHAIN
+      rustup default $TOOLCHAIN
+    displayName: Install rust
+
+  - bash: |
+      set -ex
+      rustc -Vv
+      cargo -V
+    displayName: Query rust and cargo versions
diff --git a/ci/fuzzit.sh b/ci/fuzzit.sh
new file mode 100755
index 000000000..c08fb3c64
--- /dev/null
+++ b/ci/fuzzit.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+set -xe
+
+# Validate arguments
+if [ "$#" -ne 1 ]; then
+    cat << EOF
+Usage: $0 <type>
+
+Types are:
+local-regression - Run corpus and past crashes locally to catch regressions.
+fuzzing - Submit for long run fuzzing on Fuzzit.
+EOF
+    exit 1
+fi
+
+# Configure
+set -xe
+NAME=cranelift
+TYPE=$1
+FUZZIT_VERSION=2.4.46
+
+# Setup
+if [[ ! -f fuzzit || ! `./fuzzit --version` =~ $FUZZIT_VERSION$ ]]; then
+    wget -q -O fuzzit https://github.com/fuzzitdev/fuzzit/releases/download/v$FUZZIT_VERSION/fuzzit_Linux_x86_64
+    chmod a+x fuzzit
+fi
+./fuzzit --version
+
+# Fuzz
+function fuzz {
+    FUZZER=$1
+    TARGET=$2
+    cargo fuzz run $FUZZER -- -runs=0
+    ./fuzzit --version
+    ./fuzzit create job --type $TYPE $NAME/$TARGET ./fuzz/target/x86_64-unknown-linux-gnu/debug/$FUZZER
+}
+fuzz fuzz_translate_module translate-module
+fuzz fuzz_reader_parse_test reader-parse
diff --git a/cranelift-bforest/Cargo.toml b/cranelift-bforest/Cargo.toml
index 4c7b5b4cf..4726440e7 100644
--- a/cranelift-bforest/Cargo.toml
+++ b/cranelift-bforest/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-bforest"
-version = "0.31.0"
+version = "0.43.1"
 description = "A forest of B+-trees"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://cranelift.readthedocs.io/"
@@ -12,7 +12,7 @@ keywords = ["btree", "forest", "set", "map"]
 edition = "2018"
 
 [dependencies]
-cranelift-entity = { path = "../cranelift-entity", version = "0.31.0", default-features = false }
+cranelift-entity = { path = "../cranelift-entity", version = "0.43.1", default-features = false }
 
 [features]
 default = ["std"]
diff --git a/cranelift-bforest/src/lib.rs b/cranelift-bforest/src/lib.rs
index 7b6beb259..5d33068b7 100644
--- a/cranelift-bforest/src/lib.rs
+++ b/cranelift-bforest/src/lib.rs
@@ -32,7 +32,6 @@
     )
 )]
 #![no_std]
-#![cfg_attr(not(feature = "std"), feature(alloc))]
 
 #[cfg(test)]
 #[cfg(not(feature = "std"))]
diff --git a/cranelift-codegen/Cargo.toml b/cranelift-codegen/Cargo.toml
index 61aee20da..251440cee 100644
--- a/cranelift-codegen/Cargo.toml
+++ b/cranelift-codegen/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-codegen"
-version = "0.31.0"
+version = "0.43.1"
 description = "Low-level code generator library"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://cranelift.readthedocs.io/"
@@ -13,23 +13,25 @@ build = "build.rs"
 edition = "2018"
 
 [dependencies]
-cranelift-entity = { path = "../cranelift-entity", version = "0.31.0", default-features = false }
-cranelift-bforest = { path = "../cranelift-bforest", version = "0.31.0", default-features = false }
+cranelift-entity = { path = "../cranelift-entity", version = "0.43.1", default-features = false }
+cranelift-bforest = { path = "../cranelift-bforest", version = "0.43.1", default-features = false }
 failure = { version = "0.1.1", default-features = false, features = ["derive"] }
 failure_derive = { version = "0.1.1", default-features = false }
 hashmap_core = { version = "0.1.9", optional = true }
-target-lexicon = { version = "0.4.0", default-features = false }
+target-lexicon = "0.8.1"
 log = { version = "0.4.6", default-features = false }
+serde = { version = "1.0.94", features = ["derive"], optional = true }
+smallvec = { version = "0.6.10" }
 # It is a goal of the cranelift-codegen crate to have minimal external dependencies.
 # Please don't add any unless they are essential to the task of creating binary
 # machine code. Integration tests that need external dependencies can be
 # accomodated in `tests`.
 
 [build-dependencies]
-cranelift-codegen-meta = { path = "meta", version = "0.31.0", default-features = false }
+cranelift-codegen-meta = { path = "meta", version = "0.43.1", default-features = false }
 
 [features]
-default = ["std", "x86", "arm32", "arm64", "riscv"]
+default = ["std"]
 
 # The "std" feature enables use of libstd. The "core" feature enables use
 # of some minimal std-like replacement libraries. At least one of these two
@@ -37,7 +39,6 @@ default = ["std", "x86", "arm32", "arm64", "riscv"]
 std = [
     "cranelift-entity/std",
     "cranelift-bforest/std",
-    "target-lexicon/std",
     "cranelift-codegen-meta/std"
 ]
 
@@ -53,11 +54,26 @@ core = [
 testing_hooks = []
 
 # ISA targets for which we should build.
+# If no ISA targets are explicitly enabled, the ISA target for the host machine is enabled.
 x86 = []
 arm32 = []
 arm64 = []
 riscv = []
 
+# Option to enable all architectures.
+all-arch = [
+    "x86",
+    "arm32",
+    "arm64",
+    "riscv"
+]
+
+# For dependent crates that want to serialize some parts of cranelift
+enable-serde = ["serde"]
+
+# Temporary feature that enforces basic block semantics.
+basic-blocks = []
+
 [badges]
 maintenance = { status = "experimental" }
 travis-ci = { repository = "CraneStation/cranelift" }
diff --git a/cranelift-codegen/build.rs b/cranelift-codegen/build.rs
index 733a1c782..ef89d48ff 100644
--- a/cranelift-codegen/build.rs
+++ b/cranelift-codegen/build.rs
@@ -50,39 +50,12 @@ fn main() {
     let crate_dir = cur_dir.as_path();
 
     // Make sure we rebuild if this build script changes (will not happen with
-    // if the path to this file contains non-UTF8 bytes). The `build.py` script
-    // prints out its own dependencies.
+    // if the path to this file contains non-UTF-8 bytes).
     println!(
         "cargo:rerun-if-changed={}",
         crate_dir.join("build.rs").to_str().unwrap()
     );
 
-    // Scripts are in `$crate_dir/meta-python`.
-    let meta_dir = crate_dir.join("meta-python");
-    let build_script = meta_dir.join("build.py");
-
-    // Launch build script with Python. We'll just find python in the path.
-    // Use -B to disable .pyc files, because they cause trouble for vendoring
-    // scripts, and this is a build step that isn't run very often anyway.
-    let python = identify_python();
-    let status = process::Command::new(python)
-        .current_dir(crate_dir)
-        .arg("-B")
-        .arg(build_script)
-        .arg("--out-dir")
-        .arg(out_dir.clone())
-        .status()
-        .expect("Failed to launch second-level build script; is python installed?");
-    if !status.success() {
-        process::exit(status.code().unwrap());
-    }
-
-    // DEVELOPMENT:
-    // ------------------------------------------------------------------------
-    // Now that the Python build process is complete, generate files that are
-    // emitted by the `meta` crate.
-    // ------------------------------------------------------------------------
-
     if let Err(err) = meta::generate(&isas, &out_dir) {
         eprintln!("Error: {}", err);
         process::exit(1);
@@ -99,16 +72,3 @@ fn main() {
         println!("cargo:warning=Generated files are in {}", out_dir);
     }
 }
-
-fn identify_python() -> &'static str {
-    for python in &["python", "python3", "python2.7"] {
-        if process::Command::new(python)
-            .arg("--version")
-            .status()
-            .is_ok()
-        {
-            return python;
-        }
-    }
-    panic!("The Cranelift build requires Python (version 2.7 or version 3)");
-}
diff --git a/cranelift-codegen/meta-python/base/__init__.py b/cranelift-codegen/meta-python/base/__init__.py
deleted file mode 100644
index 79f6ccbf4..000000000
--- a/cranelift-codegen/meta-python/base/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Definitions for the base Cranelift language."""
diff --git a/cranelift-codegen/meta-python/base/entities.py b/cranelift-codegen/meta-python/base/entities.py
deleted file mode 100644
index 0226b18ca..000000000
--- a/cranelift-codegen/meta-python/base/entities.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""
-The `cranelift.entities` module predefines all the Cranelift entity reference
-operand types. There are corresponding definitions in the `cranelift.entities`
-Rust module.
-"""
-from __future__ import absolute_import
-from cdsl.operands import EntityRefKind
-
-
-#: A reference to an extended basic block in the same function.
-#: This is primarliy used in control flow instructions.
-ebb = EntityRefKind(
-        'ebb', 'An extended basic block in the same function.',
-        default_member='destination')
-
-#: A reference to a stack slot declared in the function preamble.
-stack_slot = EntityRefKind('stack_slot', 'A stack slot.')
-
-#: A reference to a global value.
-global_value = EntityRefKind('global_value', 'A global value.')
-
-#: A reference to a function signature declared in the function preamble.
-#: This is used to provide the call signature in a call_indirect instruction.
-sig_ref = EntityRefKind('sig_ref', 'A function signature.')
-
-#: A reference to an external function declared in the function preamble.
-#: This is used to provide the callee and signature in a call instruction.
-func_ref = EntityRefKind('func_ref', 'An external function.')
-
-#: A reference to a jump table declared in the function preamble.
-jump_table = EntityRefKind(
-        'jump_table', 'A jump table.', default_member='table')
-
-#: A reference to a heap declared in the function preamble.
-heap = EntityRefKind('heap', 'A heap.')
-
-#: A reference to a table declared in the function preamble.
-table = EntityRefKind('table', 'A table.')
diff --git a/cranelift-codegen/meta-python/base/formats.py b/cranelift-codegen/meta-python/base/formats.py
deleted file mode 100644
index b45ac26f7..000000000
--- a/cranelift-codegen/meta-python/base/formats.py
+++ /dev/null
@@ -1,91 +0,0 @@
-"""
-The cranelift.formats defines all instruction formats.
-
-Every instruction format has a corresponding `InstructionData` variant in the
-Rust representation of Cranelift IR, so all instruction formats must be defined
-in this module.
-"""
-from __future__ import absolute_import
-from cdsl.formats import InstructionFormat
-from cdsl.operands import VALUE, VARIABLE_ARGS
-from .immediates import imm64, uimm8, uimm32, ieee32, ieee64, offset32
-from .immediates import boolean, intcc, floatcc, memflags, regunit, trapcode
-from . import entities
-from .entities import ebb, sig_ref, func_ref, stack_slot, heap, table
-
-Unary = InstructionFormat(VALUE)
-UnaryImm = InstructionFormat(imm64)
-UnaryIeee32 = InstructionFormat(ieee32)
-UnaryIeee64 = InstructionFormat(ieee64)
-UnaryBool = InstructionFormat(boolean)
-UnaryGlobalValue = InstructionFormat(entities.global_value)
-
-Binary = InstructionFormat(VALUE, VALUE)
-BinaryImm = InstructionFormat(VALUE, imm64)
-
-# The select instructions are controlled by the second VALUE operand.
-# The first VALUE operand is the controlling flag which has a derived type.
-# The fma instruction has the same constraint on all inputs.
-Ternary = InstructionFormat(VALUE, VALUE, VALUE, typevar_operand=1)
-
-# Catch-all for instructions with many outputs and inputs and no immediate
-# operands.
-MultiAry = InstructionFormat(VARIABLE_ARGS)
-
-NullAry = InstructionFormat()
-
-InsertLane = InstructionFormat(VALUE, ('lane', uimm8), VALUE)
-ExtractLane = InstructionFormat(VALUE, ('lane', uimm8))
-
-IntCompare = InstructionFormat(intcc, VALUE, VALUE)
-IntCompareImm = InstructionFormat(intcc, VALUE, imm64)
-IntCond = InstructionFormat(intcc, VALUE)
-FloatCompare = InstructionFormat(floatcc, VALUE, VALUE)
-FloatCond = InstructionFormat(floatcc, VALUE)
-
-IntSelect = InstructionFormat(intcc, VALUE, VALUE, VALUE)
-
-Jump = InstructionFormat(ebb, VARIABLE_ARGS)
-Branch = InstructionFormat(VALUE, ebb, VARIABLE_ARGS)
-BranchInt = InstructionFormat(intcc, VALUE, ebb, VARIABLE_ARGS)
-BranchFloat = InstructionFormat(floatcc, VALUE, ebb, VARIABLE_ARGS)
-BranchIcmp = InstructionFormat(intcc, VALUE, VALUE, ebb, VARIABLE_ARGS)
-BranchTable = InstructionFormat(VALUE, ebb, entities.jump_table)
-BranchTableEntry = InstructionFormat(VALUE, VALUE, uimm8, entities.jump_table)
-BranchTableBase = InstructionFormat(entities.jump_table)
-IndirectJump = InstructionFormat(VALUE, entities.jump_table)
-
-Call = InstructionFormat(func_ref, VARIABLE_ARGS)
-CallIndirect = InstructionFormat(sig_ref, VALUE, VARIABLE_ARGS)
-FuncAddr = InstructionFormat(func_ref)
-
-Load = InstructionFormat(memflags, VALUE, offset32)
-LoadComplex = InstructionFormat(memflags, VARIABLE_ARGS, offset32)
-Store = InstructionFormat(memflags, VALUE, VALUE, offset32)
-StoreComplex = InstructionFormat(memflags, VALUE, VARIABLE_ARGS, offset32)
-
-StackLoad = InstructionFormat(stack_slot, offset32)
-StackStore = InstructionFormat(VALUE, stack_slot, offset32)
-
-# Accessing a WebAssembly heap.
-HeapAddr = InstructionFormat(heap, VALUE, uimm32)
-
-# Accessing a WebAssembly table.
-TableAddr = InstructionFormat(table, VALUE, offset32)
-
-RegMove = InstructionFormat(VALUE, ('src', regunit), ('dst', regunit))
-CopySpecial = InstructionFormat(('src', regunit), ('dst', regunit))
-CopyNop = InstructionFormat(
-        ('src', entities.stack_slot), ('dst', entities.stack_slot))
-RegSpill = InstructionFormat(
-        VALUE, ('src', regunit), ('dst', entities.stack_slot))
-RegFill = InstructionFormat(
-        VALUE, ('src', entities.stack_slot), ('dst', regunit))
-
-Trap = InstructionFormat(trapcode)
-CondTrap = InstructionFormat(VALUE, trapcode)
-IntCondTrap = InstructionFormat(intcc, VALUE, trapcode)
-FloatCondTrap = InstructionFormat(floatcc, VALUE, trapcode)
-
-# Finally extract the names of global values in this module.
-InstructionFormat.extract_names(globals())
diff --git a/cranelift-codegen/meta-python/base/immediates.py b/cranelift-codegen/meta-python/base/immediates.py
deleted file mode 100644
index e8a6fccc7..000000000
--- a/cranelift-codegen/meta-python/base/immediates.py
+++ /dev/null
@@ -1,123 +0,0 @@
-"""
-The `cranelift.immediates` module predefines all the Cranelift immediate
-operand types.
-"""
-from __future__ import absolute_import
-from cdsl.operands import ImmediateKind
-
-#: A 64-bit immediate integer operand.
-#:
-#: This type of immediate integer can interact with SSA values with any
-#: :py:class:`cranelift.IntType` type.
-imm64 = ImmediateKind('imm64', 'A 64-bit immediate integer.')
-
-#: An unsigned 8-bit immediate integer operand.
-#:
-#: This small operand is used to indicate lane indexes in SIMD vectors and
-#: immediate bit counts on shift instructions.
-uimm8 = ImmediateKind('uimm8', 'An 8-bit immediate unsigned integer.')
-
-#: An unsigned 32-bit immediate integer operand.
-uimm32 = ImmediateKind('uimm32', 'A 32-bit immediate unsigned integer.')
-
-#: A 32-bit immediate signed offset.
-#:
-#: This is used to represent an immediate address offset in load/store
-#: instructions.
-offset32 = ImmediateKind(
-        'offset32',
-        'A 32-bit immediate signed offset.',
-        default_member='offset')
-
-#: A 32-bit immediate floating point operand.
-#:
-#: IEEE 754-2008 binary32 interchange format.
-ieee32 = ImmediateKind('ieee32', 'A 32-bit immediate floating point number.')
-
-#: A 64-bit immediate floating point operand.
-#:
-#: IEEE 754-2008 binary64 interchange format.
-ieee64 = ImmediateKind('ieee64', 'A 64-bit immediate floating point number.')
-
-#: An immediate boolean operand.
-#:
-#: This type of immediate boolean can interact with SSA values with any
-#: :py:class:`cranelift.BoolType` type.
-boolean = ImmediateKind('bool', 'An immediate boolean.',
-                        rust_type='bool')
-
-#: A condition code for comparing integer values.
-#:
-#: This enumerated operand kind is used for the :clif:inst:`icmp` instruction
-#: and corresponds to the `condcodes::IntCC` Rust type.
-intcc = ImmediateKind(
-        'intcc',
-        'An integer comparison condition code.',
-        default_member='cond',
-        rust_type='ir::condcodes::IntCC',
-        values={
-            'eq':  'Equal',
-            'ne':  'NotEqual',
-            'sge': 'SignedGreaterThanOrEqual',
-            'sgt': 'SignedGreaterThan',
-            'sle': 'SignedLessThanOrEqual',
-            'slt': 'SignedLessThan',
-            'uge': 'UnsignedGreaterThanOrEqual',
-            'ugt': 'UnsignedGreaterThan',
-            'ule': 'UnsignedLessThanOrEqual',
-            'ult': 'UnsignedLessThan',
-        })
-
-#: A condition code for comparing floating point values.
-#:
-#: This enumerated operand kind is used for the :clif:inst:`fcmp` instruction
-#: and corresponds to the `condcodes::FloatCC` Rust type.
-floatcc = ImmediateKind(
-        'floatcc',
-        'A floating point comparison condition code.',
-        default_member='cond',
-        rust_type='ir::condcodes::FloatCC',
-        values={
-            'ord': 'Ordered',
-            'uno': 'Unordered',
-            'eq':  'Equal',
-            'ne':  'NotEqual',
-            'one': 'OrderedNotEqual',
-            'ueq': 'UnorderedOrEqual',
-            'lt':  'LessThan',
-            'le':  'LessThanOrEqual',
-            'gt':  'GreaterThan',
-            'ge':  'GreaterThanOrEqual',
-            'ult': 'UnorderedOrLessThan',
-            'ule': 'UnorderedOrLessThanOrEqual',
-            'ugt': 'UnorderedOrGreaterThan',
-            'uge': 'UnorderedOrGreaterThanOrEqual',
-        })
-
-#: Flags for memory operations like :clif:inst:`load` and :clif:inst:`store`.
-memflags = ImmediateKind(
-        'memflags',
-        'Memory operation flags',
-        default_member='flags', rust_type='ir::MemFlags')
-
-#: A register unit in the current target ISA.
-regunit = ImmediateKind(
-        'regunit',
-        'A register unit in the target ISA',
-        rust_type='isa::RegUnit')
-
-#: A trap code indicating the reason for trapping.
-#:
-#: The Rust enum type also has a `User(u16)` variant for user-provided trap
-#: codes.
-trapcode = ImmediateKind(
-        'trapcode',
-        'A trap reason code.',
-        default_member='code',
-        rust_type='ir::TrapCode',
-        values={
-            "stk_ovf": 'StackOverflow',
-            "heap_oob": 'HeapOutOfBounds',
-            "int_ovf": 'IntegerOverflow',
-            "int_divz": 'IntegerDivisionByZero',
-        })
diff --git a/cranelift-codegen/meta-python/base/instructions.py b/cranelift-codegen/meta-python/base/instructions.py
deleted file mode 100644
index 1c0ebc164..000000000
--- a/cranelift-codegen/meta-python/base/instructions.py
+++ /dev/null
@@ -1,2054 +0,0 @@
-"""
-Cranelift base instruction set.
-
-This module defines the basic Cranelift instruction set that all targets
-support.
-"""
-from __future__ import absolute_import
-from cdsl.operands import Operand, VARIABLE_ARGS
-from cdsl.typevar import TypeVar
-from cdsl.instructions import Instruction, InstructionGroup
-from base.types import f32, f64, b1, iflags, fflags
-from base.immediates import imm64, uimm8, uimm32, ieee32, ieee64, offset32
-from base.immediates import boolean, intcc, floatcc, memflags, regunit
-from base.immediates import trapcode
-from base import entities
-from cdsl.ti import WiderOrEq
-import base.formats  # noqa
-
-GROUP = InstructionGroup("base", "Shared base instruction set")
-
-Int = TypeVar('Int', 'A scalar or vector integer type', ints=True, simd=True)
-Bool = TypeVar('Bool', 'A scalar or vector boolean type',
-               bools=True, simd=True)
-iB = TypeVar('iB', 'A scalar integer type', ints=True)
-iAddr = TypeVar('iAddr', 'An integer address type', ints=(32, 64))
-Testable = TypeVar(
-        'Testable', 'A scalar boolean or integer type',
-        ints=True, bools=True)
-TxN = TypeVar(
-        'TxN', 'A SIMD vector type',
-        ints=True, floats=True, bools=True, scalars=False, simd=True)
-Any = TypeVar(
-        'Any', 'Any integer, float, or boolean scalar or vector type',
-        ints=True, floats=True, bools=True, scalars=True, simd=True)
-Mem = TypeVar(
-        'Mem', 'Any type that can be stored in memory',
-        ints=True, floats=True, simd=True)
-MemTo = TypeVar(
-        'MemTo', 'Any type that can be stored in memory',
-        ints=True, floats=True, simd=True)
-
-addr = Operand('addr', iAddr)
-
-#
-# Control flow
-#
-c = Operand('c', Testable, doc='Controlling value to test')
-Cond = Operand('Cond', intcc)
-x = Operand('x', iB)
-y = Operand('y', iB)
-EBB = Operand('EBB', entities.ebb, doc='Destination extended basic block')
-args = Operand('args', VARIABLE_ARGS, doc='EBB arguments')
-
-jump = Instruction(
-        'jump', r"""
-        Jump.
-
-        Unconditionally jump to an extended basic block, passing the specified
-        EBB arguments. The number and types of arguments must match the
-        destination EBB.
-        """,
-        ins=(EBB, args), is_branch=True, is_terminator=True)
-
-fallthrough = Instruction(
-        'fallthrough', r"""
-        Fall through to the next EBB.
-
-        This is the same as :inst:`jump`, except the destination EBB must be
-        the next one in the layout.
-
-        Jumps are turned into fall-through instructions by the branch
-        relaxation pass. There is no reason to use this instruction outside
-        that pass.
-        """,
-        ins=(EBB, args), is_branch=True, is_terminator=True)
-
-brz = Instruction(
-        'brz', r"""
-        Branch when zero.
-
-        If ``c`` is a :type:`b1` value, take the branch when ``c`` is false. If
-        ``c`` is an integer value, take the branch when ``c = 0``.
-        """,
-        ins=(c, EBB, args), is_branch=True)
-
-brnz = Instruction(
-        'brnz', r"""
-        Branch when non-zero.
-
-        If ``c`` is a :type:`b1` value, take the branch when ``c`` is true. If
-        ``c`` is an integer value, take the branch when ``c != 0``.
-        """,
-        ins=(c, EBB, args), is_branch=True)
-
-br_icmp = Instruction(
-        'br_icmp', r"""
-        Compare scalar integers and branch.
-
-        Compare ``x`` and ``y`` in the same way as the :inst:`icmp` instruction
-        and take the branch if the condition is true::
-
-            br_icmp ugt v1, v2, ebb4(v5, v6)
-
-        is semantically equivalent to::
-
-            v10 = icmp ugt, v1, v2
-            brnz v10, ebb4(v5, v6)
-
-        Some RISC architectures like MIPS and RISC-V provide instructions that
-        implement all or some of the condition codes. The instruction can also
-        be used to represent *macro-op fusion* on architectures like Intel's.
-        """,
-        ins=(Cond, x, y, EBB, args), is_branch=True)
-
-f = Operand('f', iflags)
-
-brif = Instruction(
-        'brif', r"""
-        Branch when condition is true in integer CPU flags.
-        """,
-        ins=(Cond, f, EBB, args), is_branch=True)
-
-Cond = Operand('Cond', floatcc)
-f = Operand('f', fflags)
-
-brff = Instruction(
-        'brff', r"""
-        Branch when condition is true in floating point CPU flags.
-        """,
-        ins=(Cond, f, EBB, args), is_branch=True)
-
-x = Operand('x', iB, doc='index into jump table')
-Entry = TypeVar('Entry', 'A scalar integer type', ints=True)
-entry = Operand('entry', Entry, doc='entry of jump table')
-JT = Operand('JT', entities.jump_table)
-br_table = Instruction(
-        'br_table', r"""
-        Indirect branch via jump table.
-
-        Use ``x`` as an unsigned index into the jump table ``JT``. If a jump
-        table entry is found, branch to the corresponding EBB. If no entry was
-        found or the index is out-of-bounds, branch to the given default EBB.
-
-        Note that this branch instruction can't pass arguments to the targeted
-        blocks. Split critical edges as needed to work around this.
-
-        Do not confuse this with "tables" in WebAssembly. ``br_table`` is for
-        jump tables with destinations within the current function only -- think
-        of a ``match`` in Rust or a ``switch`` in C.  If you want to call a
-        function in a dynamic library, that will typically use
-        ``call_indirect``.
-        """,
-        ins=(x, EBB, JT), is_branch=True, is_terminator=True)
-
-Size = Operand('Size', uimm8, 'Size in bytes')
-jump_table_entry = Instruction(
-    'jump_table_entry', r"""
-    Get an entry from a jump table.
-
-    Load a serialized ``entry`` from a jump table ``JT`` at a given index
-    ``addr`` with a specific ``Size``. The retrieved entry may need to be
-    decoded after loading, depending upon the jump table type used.
-
-    Currently, the only type supported is entries which are relative to the
-    base of the jump table.
-    """,
-    ins=(x, addr, Size, JT), outs=entry, can_load=True)
-
-jump_table_base = Instruction(
-    'jump_table_base', r"""
-    Get the absolute base address of a jump table.
-
-    This is used for jump tables wherein the entries are stored relative to
-    the base of jump table. In order to use these, generated code should first
-    load an entry using ``jump_table_entry``, then use this instruction to add
-    the relative base back to it.
-    """,
-    ins=JT, outs=addr)
-
-indirect_jump_table_br = Instruction(
-    'indirect_jump_table_br', r"""
-    Branch indirectly via a jump table entry.
-
-    Unconditionally jump via a jump table entry that was previously loaded
-    with the ``jump_table_entry`` instruction.
-    """,
-    ins=(addr, JT),
-    is_branch=True, is_indirect_branch=True, is_terminator=True)
-
-debugtrap = Instruction('debugtrap', r"""
-    Encodes an assembly debug trap.
-    """, can_load=True, can_store=True, other_side_effects=True)
-
-code = Operand('code', trapcode)
-trap = Instruction(
-        'trap', r"""
-        Terminate execution unconditionally.
-        """,
-        ins=code, is_terminator=True, can_trap=True)
-
-trapz = Instruction(
-        'trapz', r"""
-        Trap when zero.
-
-        if ``c`` is non-zero, execution continues at the following instruction.
-        """,
-        ins=(c, code), can_trap=True)
-
-trapnz = Instruction(
-        'trapnz', r"""
-        Trap when non-zero.
-
-        if ``c`` is zero, execution continues at the following instruction.
-        """,
-        ins=(c, code), can_trap=True)
-
-Cond = Operand('Cond', intcc)
-f = Operand('f', iflags)
-
-trapif = Instruction(
-        'trapif', r"""
-        Trap when condition is true in integer CPU flags.
-        """,
-        ins=(Cond, f, code), can_trap=True)
-
-Cond = Operand('Cond', floatcc)
-f = Operand('f', fflags)
-
-trapff = Instruction(
-        'trapff', r"""
-        Trap when condition is true in floating point CPU flags.
-        """,
-        ins=(Cond, f, code), can_trap=True)
-
-rvals = Operand('rvals', VARIABLE_ARGS, doc='return values')
-
-x_return = Instruction(
-        'return', r"""
-        Return from the function.
-
-        Unconditionally transfer control to the calling function, passing the
-        provided return values. The list of return values must match the
-        function signature's return types.
-        """,
-        ins=rvals, is_return=True, is_terminator=True)
-
-fallthrough_return = Instruction(
-        'fallthrough_return', r"""
-        Return from the function by fallthrough.
-
-        This is a specialized instruction for use where one wants to append
-        a custom epilogue, which will then perform the real return. This
-        instruction has no encoding.
-        """,
-        ins=rvals, is_return=True, is_terminator=True)
-
-FN = Operand(
-        'FN',
-        entities.func_ref,
-        doc='function to call, declared by :inst:`function`')
-args = Operand('args', VARIABLE_ARGS, doc='call arguments')
-
-call = Instruction(
-        'call', r"""
-        Direct function call.
-
-        Call a function which has been declared in the preamble. The argument
-        types must match the function's signature.
-        """,
-        ins=(FN, args), outs=rvals, is_call=True)
-
-SIG = Operand('SIG', entities.sig_ref, doc='function signature')
-callee = Operand('callee', iAddr, doc='address of function to call')
-
-call_indirect = Instruction(
-        'call_indirect', r"""
-        Indirect function call.
-
-        Call the function pointed to by `callee` with the given arguments. The
-        called function must match the specified signature.
-
-        Note that this is different from WebAssembly's ``call_indirect``; the
-        callee is a native address, rather than a table index. For WebAssembly,
-        :inst:`table_addr` and :inst:`load` are used to obtain a native address
-        from a table.
-        """,
-        ins=(SIG, callee, args), outs=rvals, is_call=True)
-
-func_addr = Instruction(
-        'func_addr', r"""
-        Get the address of a function.
-
-        Compute the absolute address of a function declared in the preamble.
-        The returned address can be used as a ``callee`` argument to
-        :inst:`call_indirect`. This is also a method for calling functions that
-        are too far away to be addressable by a direct :inst:`call`
-        instruction.
-        """,
-        ins=FN, outs=addr)
-
-#
-# Memory operations
-#
-
-SS = Operand('SS', entities.stack_slot)
-Offset = Operand('Offset', offset32, 'Byte offset from base address')
-x = Operand('x', Mem, doc='Value to be stored')
-a = Operand('a', Mem, doc='Value loaded')
-p = Operand('p', iAddr)
-MemFlags = Operand('MemFlags', memflags)
-args = Operand('args', VARIABLE_ARGS, doc='Address arguments')
-
-load = Instruction(
-        'load', r"""
-        Load from memory at ``p + Offset``.
-
-        This is a polymorphic instruction that can load any value type which
-        has a memory representation.
-        """,
-        ins=(MemFlags, p, Offset), outs=a, can_load=True)
-
-load_complex = Instruction(
-        'load_complex', r"""
-        Load from memory at ``sum(args) + Offset``.
-
-        This is a polymorphic instruction that can load any value type which
-        has a memory representation.
-        """,
-        ins=(MemFlags, args, Offset), outs=a, can_load=True)
-
-store = Instruction(
-        'store', r"""
-        Store ``x`` to memory at ``p + Offset``.
-
-        This is a polymorphic instruction that can store any value type with a
-        memory representation.
-        """,
-        ins=(MemFlags, x, p, Offset), can_store=True)
-
-store_complex = Instruction(
-        'store_complex', r"""
-        Store ``x`` to memory at ``sum(args) + Offset``.
-
-        This is a polymorphic instruction that can store any value type with a
-        memory representation.
-        """,
-        ins=(MemFlags, x, args, Offset), can_store=True)
-
-
-iExt8 = TypeVar(
-        'iExt8', 'An integer type with more than 8 bits',
-        ints=(16, 64))
-x = Operand('x', iExt8)
-a = Operand('a', iExt8)
-
-uload8 = Instruction(
-        'uload8', r"""
-        Load 8 bits from memory at ``p + Offset`` and zero-extend.
-
-        This is equivalent to ``load.i8`` followed by ``uextend``.
-        """,
-        ins=(MemFlags, p, Offset), outs=a, can_load=True)
-
-uload8_complex = Instruction(
-        'uload8_complex', r"""
-        Load 8 bits from memory at ``sum(args) + Offset`` and zero-extend.
-
-        This is equivalent to ``load.i8`` followed by ``uextend``.
-        """,
-        ins=(MemFlags, args, Offset), outs=a, can_load=True)
-
-sload8 = Instruction(
-        'sload8', r"""
-        Load 8 bits from memory at ``p + Offset`` and sign-extend.
-
-        This is equivalent to ``load.i8`` followed by ``sextend``.
-        """,
-        ins=(MemFlags, p, Offset), outs=a, can_load=True)
-
-sload8_complex = Instruction(
-        'sload8_complex', r"""
-        Load 8 bits from memory at ``sum(args) + Offset`` and sign-extend.
-
-        This is equivalent to ``load.i8`` followed by ``sextend``.
-        """,
-        ins=(MemFlags, args, Offset), outs=a, can_load=True)
-
-istore8 = Instruction(
-        'istore8', r"""
-        Store the low 8 bits of ``x`` to memory at ``p + Offset``.
-
-        This is equivalent to ``ireduce.i8`` followed by ``store.i8``.
-        """,
-        ins=(MemFlags, x, p, Offset), can_store=True)
-
-istore8_complex = Instruction(
-        'istore8_complex', r"""
-        Store the low 8 bits of ``x`` to memory at ``sum(args) + Offset``.
-
-        This is equivalent to ``ireduce.i8`` followed by ``store.i8``.
-        """,
-        ins=(MemFlags, x, args, Offset), can_store=True)
-
-iExt16 = TypeVar(
-        'iExt16', 'An integer type with more than 16 bits',
-        ints=(32, 64))
-x = Operand('x', iExt16)
-a = Operand('a', iExt16)
-
-uload16 = Instruction(
-        'uload16', r"""
-        Load 16 bits from memory at ``p + Offset`` and zero-extend.
-
-        This is equivalent to ``load.i16`` followed by ``uextend``.
-        """,
-        ins=(MemFlags, p, Offset), outs=a, can_load=True)
-
-uload16_complex = Instruction(
-        'uload16_complex', r"""
-        Load 16 bits from memory at ``sum(args) + Offset`` and zero-extend.
-
-        This is equivalent to ``load.i16`` followed by ``uextend``.
-        """,
-        ins=(MemFlags, args, Offset), outs=a, can_load=True)
-
-sload16 = Instruction(
-        'sload16', r"""
-        Load 16 bits from memory at ``p + Offset`` and sign-extend.
-
-        This is equivalent to ``load.i16`` followed by ``sextend``.
-        """,
-        ins=(MemFlags, p, Offset), outs=a, can_load=True)
-
-sload16_complex = Instruction(
-        'sload16_complex', r"""
-        Load 16 bits from memory at ``sum(args) + Offset`` and sign-extend.
-
-        This is equivalent to ``load.i16`` followed by ``sextend``.
-        """,
-        ins=(MemFlags, args, Offset), outs=a, can_load=True)
-
-istore16 = Instruction(
-        'istore16', r"""
-        Store the low 16 bits of ``x`` to memory at ``p + Offset``.
-
-        This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
-        """,
-        ins=(MemFlags, x, p, Offset), can_store=True)
-
-istore16_complex = Instruction(
-        'istore16_complex', r"""
-        Store the low 16 bits of ``x`` to memory at ``sum(args) + Offset``.
-
-        This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
-        """,
-        ins=(MemFlags, x, args, Offset), can_store=True)
-
-iExt32 = TypeVar(
-        'iExt32', 'An integer type with more than 32 bits',
-        ints=(64, 64))
-x = Operand('x', iExt32)
-a = Operand('a', iExt32)
-
-uload32 = Instruction(
-        'uload32', r"""
-        Load 32 bits from memory at ``p + Offset`` and zero-extend.
-
-        This is equivalent to ``load.i32`` followed by ``uextend``.
-        """,
-        ins=(MemFlags, p, Offset), outs=a, can_load=True)
-
-uload32_complex = Instruction(
-        'uload32_complex', r"""
-        Load 32 bits from memory at ``sum(args) + Offset`` and zero-extend.
-
-        This is equivalent to ``load.i32`` followed by ``uextend``.
-        """,
-        ins=(MemFlags, args, Offset), outs=a, can_load=True)
-
-sload32 = Instruction(
-        'sload32', r"""
-        Load 32 bits from memory at ``p + Offset`` and sign-extend.
-
-        This is equivalent to ``load.i32`` followed by ``sextend``.
-        """,
-        ins=(MemFlags, p, Offset), outs=a, can_load=True)
-
-sload32_complex = Instruction(
-        'sload32_complex', r"""
-        Load 32 bits from memory at ``sum(args) + Offset`` and sign-extend.
-
-        This is equivalent to ``load.i32`` followed by ``sextend``.
-        """,
-        ins=(MemFlags, args, Offset), outs=a, can_load=True)
-
-istore32 = Instruction(
-        'istore32', r"""
-        Store the low 32 bits of ``x`` to memory at ``p + Offset``.
-
-        This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
-        """,
-        ins=(MemFlags, x, p, Offset), can_store=True)
-
-istore32_complex = Instruction(
-        'istore32_complex', r"""
-        Store the low 32 bits of ``x`` to memory at ``sum(args) + Offset``.
-
-        This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
-        """,
-        ins=(MemFlags, x, args, Offset), can_store=True)
-
-x = Operand('x', Mem, doc='Value to be stored')
-a = Operand('a', Mem, doc='Value loaded')
-Offset = Operand('Offset', offset32, 'In-bounds offset into stack slot')
-
-stack_load = Instruction(
-        'stack_load', r"""
-        Load a value from a stack slot at the constant offset.
-
-        This is a polymorphic instruction that can load any value type which
-        has a memory representation.
-
-        The offset is an immediate constant, not an SSA value. The memory
-        access cannot go out of bounds, i.e.
-        :math:`sizeof(a) + Offset <= sizeof(SS)`.
-        """,
-        ins=(SS, Offset), outs=a, can_load=True)
-
-stack_store = Instruction(
-        'stack_store', r"""
-        Store a value to a stack slot at a constant offset.
-
-        This is a polymorphic instruction that can store any value type with a
-        memory representation.
-
-        The offset is an immediate constant, not an SSA value. The memory
-        access cannot go out of bounds, i.e.
-        :math:`sizeof(a) + Offset <= sizeof(SS)`.
-        """,
-        ins=(x, SS, Offset), can_store=True)
-
-stack_addr = Instruction(
-        'stack_addr', r"""
-        Get the address of a stack slot.
-
-        Compute the absolute address of a byte in a stack slot. The offset must
-        refer to a byte inside the stack slot:
-        :math:`0 <= Offset < sizeof(SS)`.
-        """,
-        ins=(SS, Offset), outs=addr)
-
-#
-# Global values.
-#
-
-GV = Operand('GV', entities.global_value)
-
-global_value = Instruction(
-        'global_value', r"""
-        Compute the value of global GV.
-        """,
-        ins=GV, outs=a)
-
-# A specialized form of global_value instructions that only handles
-# symbolic names.
-symbol_value = Instruction(
-        'symbol_value', r"""
-        Compute the value of global GV, which is a symbolic value.
-        """,
-        ins=GV, outs=a)
-
-#
-# WebAssembly bounds-checked heap accesses.
-#
-
-HeapOffset = TypeVar('HeapOffset', 'An unsigned heap offset', ints=(32, 64))
-
-H = Operand('H', entities.heap)
-p = Operand('p', HeapOffset)
-Size = Operand('Size', uimm32, 'Size in bytes')
-
-heap_addr = Instruction(
-        'heap_addr', r"""
-        Bounds check and compute absolute address of heap memory.
-
-        Verify that the offset range ``p .. p + Size - 1`` is in bounds for the
-        heap H, and generate an absolute address that is safe to dereference.
-
-        1. If ``p + Size`` is not greater than the heap bound, return an
-           absolute address corresponding to a byte offset of ``p`` from the
-           heap's base address.
-        2. If ``p + Size`` is greater than the heap bound, generate a trap.
-        """,
-        ins=(H, p, Size), outs=addr)
-
-#
-# WebAssembly bounds-checked table accesses.
-#
-
-TableOffset = TypeVar('TableOffset', 'An unsigned table offset', ints=(32, 64))
-
-T = Operand('T', entities.table)
-p = Operand('p', TableOffset)
-Offset = Operand('Offset', offset32, 'Byte offset from element address')
-
-table_addr = Instruction(
-        'table_addr', r"""
-        Bounds check and compute absolute address of a table entry.
-
-        Verify that the offset ``p`` is in bounds for the table T, and generate
-        an absolute address that is safe to dereference.
-
-        ``Offset`` must be less than the size of a table element.
-
-        1. If ``p`` is not greater than the table bound, return an absolute
-           address corresponding to a byte offset of ``p`` from the table's
-           base address.
-        2. If ``p`` is greater than the table bound, generate a trap.
-        """,
-        ins=(T, p, Offset), outs=addr)
-
-
-#
-# Materializing constants.
-#
-
-N = Operand('N', imm64)
-a = Operand('a', Int, doc='A constant integer scalar or vector value')
-iconst = Instruction(
-        'iconst', r"""
-        Integer constant.
-
-        Create a scalar integer SSA value with an immediate constant value, or
-        an integer vector where all the lanes have the same value.
-        """,
-        ins=N, outs=a)
-
-N = Operand('N', ieee32)
-a = Operand('a', f32, doc='A constant f32 scalar value')
-f32const = Instruction(
-        'f32const', r"""
-        Floating point constant.
-
-        Create a :type:`f32` SSA value with an immediate constant value.
-        """,
-        ins=N, outs=a)
-
-N = Operand('N', ieee64)
-a = Operand('a', f64, doc='A constant f64 scalar value')
-f64const = Instruction(
-        'f64const', r"""
-        Floating point constant.
-
-        Create a :type:`f64` SSA value with an immediate constant value.
-        """,
-        ins=N, outs=a)
-
-N = Operand('N', boolean)
-a = Operand('a', Bool, doc='A constant boolean scalar or vector value')
-bconst = Instruction(
-        'bconst', r"""
-        Boolean constant.
-
-        Create a scalar boolean SSA value with an immediate constant value, or
-        a boolean vector where all the lanes have the same value.
-        """,
-        ins=N, outs=a)
-
-#
-# Generics.
-#
-
-nop = Instruction(
-        'nop', r"""
-        Just a dummy instruction
-
-        Note: this doesn't compile to a machine code nop
-        """)
-
-c = Operand('c', Testable, doc='Controlling value to test')
-x = Operand('x', Any, doc='Value to use when `c` is true')
-y = Operand('y', Any, doc='Value to use when `c` is false')
-a = Operand('a', Any)
-
-select = Instruction(
-        'select', r"""
-        Conditional select.
-
-        This instruction selects whole values. Use :inst:`vselect` for
-        lane-wise selection.
-        """,
-        ins=(c, x, y), outs=a)
-
-cc = Operand('cc', intcc, doc='Controlling condition code')
-flags = Operand('flags', iflags, doc='The machine\'s flag register')
-
-selectif = Instruction(
-        'selectif', r"""
-        Conditional select, dependent on integer condition codes.
-        """,
-        ins=(cc, flags, x, y), outs=a)
-
-x = Operand('x', Any)
-
-copy = Instruction(
-        'copy', r"""
-        Register-register copy.
-
-        This instruction copies its input, preserving the value type.
-
-        A pure SSA-form program does not need to copy values, but this
-        instruction is useful for representing intermediate stages during
-        instruction transformations, and the register allocator needs a way of
-        representing register copies.
-        """,
-        ins=x, outs=a)
-
-spill = Instruction(
-        'spill', r"""
-        Spill a register value to a stack slot.
-
-        This instruction behaves exactly like :inst:`copy`, but the result
-        value is assigned to a spill slot.
-        """,
-        ins=x, outs=a, can_store=True)
-
-fill = Instruction(
-        'fill', r"""
-        Load a register value from a stack slot.
-
-        This instruction behaves exactly like :inst:`copy`, but creates a new
-        SSA value for the spilled input value.
-        """,
-        ins=x, outs=a, can_load=True)
-
-src = Operand('src', regunit)
-dst = Operand('dst', regunit)
-
-regmove = Instruction(
-        'regmove', r"""
-        Temporarily divert ``x`` from ``src`` to ``dst``.
-
-        This instruction moves the location of a value from one register to
-        another without creating a new SSA value. It is used by the register
-        allocator to temporarily rearrange register assignments in order to
-        satisfy instruction constraints.
-
-        The register diversions created by this instruction must be undone
-        before the value leaves the EBB. At the entry to a new EBB, all live
-        values must be in their originally assigned registers.
-        """,
-        ins=(x, src, dst),
-        other_side_effects=True)
-
-copy_special = Instruction(
-        'copy_special', r"""
-        Copies the contents of ''src'' register to ''dst'' register.
-
-        This instructions copies the contents of one register to another
-        register without involving any SSA values. This is used for copying
-        special registers, e.g. copying the stack register to the frame
-        register in a function prologue.
-        """,
-        ins=(src, dst),
-        other_side_effects=True)
-
-copy_nop = Instruction(
-        'copy_nop', r"""
-        Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
-        into a no-op.  This instruction is for use only within Cranelift
-        itself.
-
-        This instruction copies its input, preserving the value type.
-        """,
-        ins=x, outs=a)
-
-delta = Operand('delta', Int)
-adjust_sp_down = Instruction(
-    'adjust_sp_down', r"""
-    Subtracts ``delta`` offset value from the stack pointer register.
-
-    This instruction is used to adjust the stack pointer by a dynamic amount.
-    """,
-    ins=(delta,),
-    other_side_effects=True)
-
-StackOffset = Operand('Offset', imm64, 'Offset from current stack pointer')
-adjust_sp_up_imm = Instruction(
-    'adjust_sp_up_imm', r"""
-    Adds ``Offset`` immediate offset value to the stack pointer register.
-
-    This instruction is used to adjust the stack pointer, primarily in function
-    prologues and epilogues. ``Offset`` is constrained to the size of a signed
-    32-bit integer.
-    """,
-    ins=(StackOffset,),
-    other_side_effects=True)
-
-StackOffset = Operand('Offset', imm64, 'Offset from current stack pointer')
-adjust_sp_down_imm = Instruction(
-    'adjust_sp_down_imm', r"""
-    Subtracts ``Offset`` immediate offset value from the stack pointer
-    register.
-
-    This instruction is used to adjust the stack pointer, primarily in function
-    prologues and epilogues. ``Offset`` is constrained to the size of a signed
-    32-bit integer.
-    """,
-    ins=(StackOffset,),
-    other_side_effects=True)
-
-f = Operand('f', iflags)
-
-ifcmp_sp = Instruction(
-    'ifcmp_sp', r"""
-    Compare ``addr`` with the stack pointer and set the CPU flags.
-
-    This is like :inst:`ifcmp` where ``addr`` is the LHS operand and the stack
-    pointer is the RHS.
-    """,
-    ins=addr, outs=f)
-
-regspill = Instruction(
-        'regspill', r"""
-        Temporarily divert ``x`` from ``src`` to ``SS``.
-
-        This instruction moves the location of a value from a register to a
-        stack slot without creating a new SSA value. It is used by the register
-        allocator to temporarily rearrange register assignments in order to
-        satisfy instruction constraints.
-
-        See also :inst:`regmove`.
-        """,
-        ins=(x, src, SS),
-        other_side_effects=True)
-
-
-regfill = Instruction(
-        'regfill', r"""
-        Temporarily divert ``x`` from ``SS`` to ``dst``.
-
-        This instruction moves the location of a value from a stack slot to a
-        register without creating a new SSA value. It is used by the register
-        allocator to temporarily rearrange register assignments in order to
-        satisfy instruction constraints.
-
-        See also :inst:`regmove`.
-        """,
-        ins=(x, SS, dst),
-        other_side_effects=True)
-#
-# Vector operations
-#
-
-x = Operand('x', TxN, doc='Vector to split')
-lo = Operand('lo', TxN.half_vector(), doc='Low-numbered lanes of `x`')
-hi = Operand('hi', TxN.half_vector(), doc='High-numbered lanes of `x`')
-
-vsplit = Instruction(
-        'vsplit', r"""
-        Split a vector into two halves.
-
-        Split the vector `x` into two separate values, each containing half of
-        the lanes from ``x``. The result may be two scalars if ``x`` only had
-        two lanes.
-        """,
-        ins=x, outs=(lo, hi), is_ghost=True)
-
-Any128 = TypeVar(
-        'Any128', 'Any scalar or vector type with as most 128 lanes',
-        ints=True, floats=True, bools=True, scalars=True, simd=(1, 128))
-x = Operand('x', Any128, doc='Low-numbered lanes')
-y = Operand('y', Any128, doc='High-numbered lanes')
-a = Operand('a', Any128.double_vector(), doc='Concatenation of `x` and `y`')
-
-vconcat = Instruction(
-        'vconcat', r"""
-        Vector concatenation.
-
-        Return a vector formed by concatenating ``x`` and ``y``. The resulting
-        vector type has twice as many lanes as each of the inputs. The lanes of
-        ``x`` appear as the low-numbered lanes, and the lanes of ``y`` become
-        the high-numbered lanes of ``a``.
-
-        It is possible to form a vector by concatenating two scalars.
-        """,
-        ins=(x, y), outs=a, is_ghost=True)
-
-c = Operand('c', TxN.as_bool(), doc='Controlling vector')
-x = Operand('x', TxN, doc='Value to use where `c` is true')
-y = Operand('y', TxN, doc='Value to use where `c` is false')
-a = Operand('a', TxN)
-
-vselect = Instruction(
-        'vselect', r"""
-        Vector lane select.
-
-        Select lanes from ``x`` or ``y`` controlled by the lanes of the boolean
-        vector ``c``.
-        """,
-        ins=(c, x, y), outs=a)
-
-x = Operand('x', TxN.lane_of())
-
-splat = Instruction(
-        'splat', r"""
-        Vector splat.
-
-        Return a vector whose lanes are all ``x``.
-        """,
-        ins=x, outs=a)
-
-x = Operand('x', TxN, doc='SIMD vector to modify')
-y = Operand('y', TxN.lane_of(), doc='New lane value')
-Idx = Operand('Idx', uimm8, doc='Lane index')
-
-insertlane = Instruction(
-        'insertlane', r"""
-        Insert ``y`` as lane ``Idx`` in x.
-
-        The lane index, ``Idx``, is an immediate value, not an SSA value. It
-        must indicate a valid lane index for the type of ``x``.
-        """,
-        ins=(x, Idx, y), outs=a)
-
-x = Operand('x', TxN)
-a = Operand('a', TxN.lane_of())
-
-extractlane = Instruction(
-        'extractlane', r"""
-        Extract lane ``Idx`` from ``x``.
-
-        The lane index, ``Idx``, is an immediate value, not an SSA value. It
-        must indicate a valid lane index for the type of ``x``.
-        """,
-        ins=(x, Idx), outs=a)
-
-#
-# Integer arithmetic
-#
-
-a = Operand('a', Int.as_bool())
-Cond = Operand('Cond', intcc)
-x = Operand('x', Int)
-y = Operand('y', Int)
-
-icmp = Instruction(
-        'icmp', r"""
-        Integer comparison.
-
-        The condition code determines if the operands are interpreted as signed
-        or unsigned integers.
-
-        ====== ======== =========
-        Signed Unsigned Condition
-        ====== ======== =========
-        eq     eq       Equal
-        ne     ne       Not equal
-        slt    ult      Less than
-        sge    uge      Greater than or equal
-        sgt    ugt      Greater than
-        sle    ule      Less than or equal
-        ====== ======== =========
-
-        When this instruction compares integer vectors, it returns a boolean
-        vector of lane-wise comparisons.
-        """,
-        ins=(Cond, x, y), outs=a)
-
-a = Operand('a', b1)
-x = Operand('x', iB)
-Y = Operand('Y', imm64)
-
-icmp_imm = Instruction(
-        'icmp_imm', r"""
-        Compare scalar integer to a constant.
-
-        This is the same as the :inst:`icmp` instruction, except one operand is
-        an immediate constant.
-
-        This instruction can only compare scalars. Use :inst:`icmp` for
-        lane-wise vector comparisons.
-        """,
-        ins=(Cond, x, Y), outs=a)
-
-f = Operand('f', iflags)
-x = Operand('x', iB)
-y = Operand('y', iB)
-
-ifcmp = Instruction(
-        'ifcmp', r"""
-        Compare scalar integers and return flags.
-
-        Compare two scalar integer values and return integer CPU flags
-        representing the result.
-        """,
-        ins=(x, y), outs=f)
-
-ifcmp_imm = Instruction(
-        'ifcmp_imm', r"""
-        Compare scalar integer to a constant and return flags.
-
-        Like :inst:`icmp_imm`, but returns integer CPU flags instead of testing
-        a specific condition code.
-        """,
-        ins=(x, Y), outs=f)
-
-a = Operand('a', Int)
-x = Operand('x', Int)
-y = Operand('y', Int)
-
-iadd = Instruction(
-        'iadd', r"""
-        Wrapping integer addition: :math:`a := x + y \pmod{2^B}`.
-
-        This instruction does not depend on the signed/unsigned interpretation
-        of the operands.
-        """,
-        ins=(x, y), outs=a)
-
-isub = Instruction(
-        'isub', r"""
-        Wrapping integer subtraction: :math:`a := x - y \pmod{2^B}`.
-
-        This instruction does not depend on the signed/unsigned interpretation
-        of the operands.
-        """,
-        ins=(x, y), outs=a)
-
-imul = Instruction(
-        'imul', r"""
-        Wrapping integer multiplication: :math:`a := x y \pmod{2^B}`.
-
-        This instruction does not depend on the signed/unsigned interpretation
-        of the
-        operands.
-
-        Polymorphic over all integer types (vector and scalar).
-        """,
-        ins=(x, y), outs=a)
-
-umulhi = Instruction(
-        'umulhi', r"""
-        Unsigned integer multiplication, producing the high half of a
-        double-length result.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, y), outs=a)
-
-smulhi = Instruction(
-        'smulhi', """
-        Signed integer multiplication, producing the high half of a
-        double-length result.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, y), outs=a)
-
-udiv = Instruction(
-        'udiv', r"""
-        Unsigned integer division: :math:`a := \lfloor {x \over y} \rfloor`.
-
-        This operation traps if the divisor is zero.
-        """,
-        ins=(x, y), outs=a, can_trap=True)
-
-sdiv = Instruction(
-        'sdiv', r"""
-        Signed integer division rounded toward zero: :math:`a := sign(xy)
-        \lfloor {|x| \over |y|}\rfloor`.
-
-        This operation traps if the divisor is zero, or if the result is not
-        representable in :math:`B` bits two's complement. This only happens
-        when :math:`x = -2^{B-1}, y = -1`.
-        """,
-        ins=(x, y), outs=a, can_trap=True)
-
-urem = Instruction(
-        'urem', """
-        Unsigned integer remainder.
-
-        This operation traps if the divisor is zero.
-        """,
-        ins=(x, y), outs=a, can_trap=True)
-
-srem = Instruction(
-        'srem', """
-        Signed integer remainder. The result has the sign of the dividend.
-
-        This operation traps if the divisor is zero.
-        """,
-        ins=(x, y), outs=a, can_trap=True)
-
-a = Operand('a', iB)
-x = Operand('x', iB)
-Y = Operand('Y', imm64)
-
-iadd_imm = Instruction(
-        'iadd_imm', """
-        Add immediate integer.
-
-        Same as :inst:`iadd`, but one operand is an immediate constant.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, Y), outs=a)
-
-imul_imm = Instruction(
-        'imul_imm', """
-        Integer multiplication by immediate constant.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, Y), outs=a)
-
-udiv_imm = Instruction(
-        'udiv_imm', """
-        Unsigned integer division by an immediate constant.
-
-        This operation traps if the divisor is zero.
-        """,
-        ins=(x, Y), outs=a)
-
-sdiv_imm = Instruction(
-        'sdiv_imm', """
-        Signed integer division by an immediate constant.
-
-        This operation traps if the divisor is zero, or if the result is not
-        representable in :math:`B` bits two's complement. This only happens
-        when :math:`x = -2^{B-1}, Y = -1`.
-        """,
-        ins=(x, Y), outs=a)
-
-urem_imm = Instruction(
-        'urem_imm', """
-        Unsigned integer remainder with immediate divisor.
-
-        This operation traps if the divisor is zero.
-        """,
-        ins=(x, Y), outs=a)
-
-srem_imm = Instruction(
-        'srem_imm', """
-        Signed integer remainder with immediate divisor.
-
-        This operation traps if the divisor is zero.
-        """,
-        ins=(x, Y), outs=a)
-
-irsub_imm = Instruction(
-        'irsub_imm', """
-        Immediate reverse wrapping subtraction: :math:`a := Y - x \\pmod{2^B}`.
-
-        Also works as integer negation when :math:`Y = 0`. Use :inst:`iadd_imm`
-        with a negative immediate operand for the reverse immediate
-        subtraction.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, Y), outs=a)
-
-#
-# Integer arithmetic with carry and/or borrow.
-#
-a = Operand('a', iB)
-x = Operand('x', iB)
-y = Operand('y', iB)
-c_in = Operand('c_in', b1, doc="Input carry flag")
-c_out = Operand('c_out', b1, doc="Output carry flag")
-b_in = Operand('b_in', b1, doc="Input borrow flag")
-b_out = Operand('b_out', b1, doc="Output borrow flag")
-
-iadd_cin = Instruction(
-        'iadd_cin', r"""
-        Add integers with carry in.
-
-        Same as :inst:`iadd` with an additional carry input. Computes:
-
-        .. math::
-
-            a = x + y + c_{in} \pmod 2^B
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, y, c_in), outs=a)
-
-iadd_cout = Instruction(
-        'iadd_cout', r"""
-        Add integers with carry out.
-
-        Same as :inst:`iadd` with an additional carry output.
-
-        .. math::
-
-            a &= x + y \pmod 2^B \\
-            c_{out} &= x+y >= 2^B
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, y), outs=(a, c_out))
-
-iadd_carry = Instruction(
-        'iadd_carry', r"""
-        Add integers with carry in and out.
-
-        Same as :inst:`iadd` with an additional carry input and output.
-
-        .. math::
-
-            a &= x + y + c_{in} \pmod 2^B \\
-            c_{out} &= x + y + c_{in} >= 2^B
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, y, c_in), outs=(a, c_out))
-
-isub_bin = Instruction(
-        'isub_bin', r"""
-        Subtract integers with borrow in.
-
-        Same as :inst:`isub` with an additional borrow flag input. Computes:
-
-        .. math::
-
-            a = x - (y + b_{in}) \pmod 2^B
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, y, b_in), outs=a)
-
-isub_bout = Instruction(
-        'isub_bout', r"""
-        Subtract integers with borrow out.
-
-        Same as :inst:`isub` with an additional borrow flag output.
-
-        .. math::
-
-            a &= x - y \pmod 2^B \\
-            b_{out} &= x < y
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, y), outs=(a, b_out))
-
-isub_borrow = Instruction(
-        'isub_borrow', r"""
-        Subtract integers with borrow in and out.
-
-        Same as :inst:`isub` with an additional borrow flag input and output.
-
-        .. math::
-
-            a &= x - (y + b_{in}) \pmod 2^B \\
-            b_{out} &= x < y + b_{in}
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, y, b_in), outs=(a, b_out))
-
-#
-# Bitwise operations.
-#
-
-# TODO: Which types should permit boolean operations? Any reason to restrict?
-bits = TypeVar(
-        'bits', 'Any integer, float, or boolean scalar or vector type',
-        ints=True, floats=True, bools=True, scalars=True, simd=True)
-
-x = Operand('x', bits)
-y = Operand('y', bits)
-a = Operand('a', bits)
-
-band = Instruction(
-        'band', """
-        Bitwise and.
-        """,
-        ins=(x, y), outs=a)
-
-bor = Instruction(
-        'bor', """
-        Bitwise or.
-        """,
-        ins=(x, y), outs=a)
-
-bxor = Instruction(
-        'bxor', """
-        Bitwise xor.
-        """,
-        ins=(x, y), outs=a)
-
-bnot = Instruction(
-        'bnot', """
-        Bitwise not.
-        """,
-        ins=x, outs=a)
-
-band_not = Instruction(
-        'band_not', """
-        Bitwise and not.
-
-        Computes `x & ~y`.
-        """,
-        ins=(x, y), outs=a)
-
-bor_not = Instruction(
-        'bor_not', """
-        Bitwise or not.
-
-        Computes `x | ~y`.
-        """,
-        ins=(x, y), outs=a)
-
-bxor_not = Instruction(
-        'bxor_not', """
-        Bitwise xor not.
-
-        Computes `x ^ ~y`.
-        """,
-        ins=(x, y), outs=a)
-
-# Bitwise binary ops with immediate arg.
-x = Operand('x', iB)
-Y = Operand('Y', imm64)
-a = Operand('a', iB)
-
-band_imm = Instruction(
-        'band_imm', """
-        Bitwise and with immediate.
-
-        Same as :inst:`band`, but one operand is an immediate constant.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, Y), outs=a)
-
-bor_imm = Instruction(
-        'bor_imm', """
-        Bitwise or with immediate.
-
-        Same as :inst:`bor`, but one operand is an immediate constant.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, Y), outs=a)
-
-bxor_imm = Instruction(
-        'bxor_imm', """
-        Bitwise xor with immediate.
-
-        Same as :inst:`bxor`, but one operand is an immediate constant.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(x, Y), outs=a)
-
-# Shift/rotate.
-x = Operand('x', Int, doc='Scalar or vector value to shift')
-y = Operand('y', iB, doc='Number of bits to shift')
-Y = Operand('Y', imm64)
-
-a = Operand('a', Int)
-
-rotl = Instruction(
-        'rotl', r"""
-        Rotate left.
-
-        Rotate the bits in ``x`` by ``y`` places.
-        """,
-        ins=(x, y), outs=a)
-
-rotr = Instruction(
-        'rotr', r"""
-        Rotate right.
-
-        Rotate the bits in ``x`` by ``y`` places.
-        """,
-        ins=(x, y), outs=a)
-
-rotl_imm = Instruction(
-        'rotl_imm', r"""
-        Rotate left by immediate.
-        """,
-        ins=(x, Y), outs=a)
-
-rotr_imm = Instruction(
-        'rotr_imm', r"""
-        Rotate right by immediate.
-        """,
-        ins=(x, Y), outs=a)
-
-ishl = Instruction(
-        'ishl', r"""
-        Integer shift left. Shift the bits in ``x`` towards the MSB by ``y``
-        places. Shift in zero bits to the LSB.
-
-        The shift amount is masked to the size of ``x``.
-
-        When shifting a B-bits integer type, this instruction computes:
-
-        .. math::
-            s &:= y \pmod B,                \\
-            a &:= x \cdot 2^s \pmod{2^B}.
-        """,
-        ins=(x, y), outs=a)
-
-ushr = Instruction(
-        'ushr', r"""
-        Unsigned shift right. Shift bits in ``x`` towards the LSB by ``y``
-        places, shifting in zero bits to the MSB. Also called a *logical
-        shift*.
-
-        The shift amount is masked to the size of the register.
-
-        When shifting a B-bits integer type, this instruction computes:
-
-        .. math::
-            s &:= y \pmod B,                \\
-            a &:= \lfloor x \cdot 2^{-s} \rfloor.
-        """,
-        ins=(x, y), outs=a)
-
-sshr = Instruction(
-        'sshr', r"""
-        Signed shift right. Shift bits in ``x`` towards the LSB by ``y``
-        places, shifting in sign bits to the MSB. Also called an *arithmetic
-        shift*.
-
-        The shift amount is masked to the size of the register.
-        """,
-        ins=(x, y), outs=a)
-
-ishl_imm = Instruction(
-        'ishl_imm', r"""
-        Integer shift left by immediate.
-
-        The shift amount is masked to the size of ``x``.
-        """,
-        ins=(x, Y), outs=a)
-
-ushr_imm = Instruction(
-        'ushr_imm', r"""
-        Unsigned shift right by immediate.
-
-        The shift amount is masked to the size of the register.
-        """,
-        ins=(x, Y), outs=a)
-
-sshr_imm = Instruction(
-        'sshr_imm', r"""
-        Signed shift right by immediate.
-
-        The shift amount is masked to the size of the register.
-        """,
-        ins=(x, Y), outs=a)
-
-#
-# Bit counting.
-#
-
-x = Operand('x', iB)
-a = Operand('a', iB)
-
-bitrev = Instruction(
-        'bitrev', r"""
-        Reverse the bits of a integer.
-
-        Reverses the bits in ``x``.
-        """,
-        ins=x, outs=a)
-
-clz = Instruction(
-        'clz', r"""
-        Count leading zero bits.
-
-        Starting from the MSB in ``x``, count the number of zero bits before
-        reaching the first one bit. When ``x`` is zero, returns the size of x
-        in bits.
-        """,
-        ins=x, outs=a)
-
-cls = Instruction(
-        'cls', r"""
-        Count leading sign bits.
-
-        Starting from the MSB after the sign bit in ``x``, count the number of
-        consecutive bits identical to the sign bit. When ``x`` is 0 or -1,
-        returns one less than the size of x in bits.
-        """,
-        ins=x, outs=a)
-
-ctz = Instruction(
-        'ctz', r"""
-        Count trailing zeros.
-
-        Starting from the LSB in ``x``, count the number of zero bits before
-        reaching the first one bit. When ``x`` is zero, returns the size of x
-        in bits.
-        """,
-        ins=x, outs=a)
-
-popcnt = Instruction(
-        'popcnt', r"""
-        Population count
-
-        Count the number of one bits in ``x``.
-        """,
-        ins=x, outs=a)
-
-#
-# Floating point.
-#
-
-Float = TypeVar(
-        'Float', 'A scalar or vector floating point number',
-        floats=True, simd=True)
-fB = TypeVar('fB', 'A scalar floating point number', floats=True)
-
-Cond = Operand('Cond', floatcc)
-x = Operand('x', Float)
-y = Operand('y', Float)
-a = Operand('a', Float.as_bool())
-
-fcmp = Instruction(
-        'fcmp', r"""
-        Floating point comparison.
-
-        Two IEEE 754-2008 floating point numbers, `x` and `y`, relate to each
-        other in exactly one of four ways:
-
-        == ==========================================
-        UN Unordered when one or both numbers is NaN.
-        EQ When :math:`x = y`. (And :math:`0.0 = -0.0`).
-        LT When :math:`x < y`.
-        GT When :math:`x > y`.
-        == ==========================================
-
-        The 14 :type:`floatcc` condition codes each correspond to a subset of
-        the four relations, except for the empty set which would always be
-        false, and the full set which would always be true.
-
-        The condition codes are divided into 7 'ordered' conditions which don't
-        include UN, and 7 unordered conditions which all include UN.
-
-        +-------+------------+---------+------------+-------------------------+
-        |Ordered             |Unordered             |Condition                |
-        +=======+============+=========+============+=========================+
-        |ord    |EQ | LT | GT|uno      |UN          |NaNs absent / present.   |
-        +-------+------------+---------+------------+-------------------------+
-        |eq     |EQ          |ueq      |UN | EQ     |Equal                    |
-        +-------+------------+---------+------------+-------------------------+
-        |one    |LT | GT     |ne       |UN | LT | GT|Not equal                |
-        +-------+------------+---------+------------+-------------------------+
-        |lt     |LT          |ult      |UN | LT     |Less than                |
-        +-------+------------+---------+------------+-------------------------+
-        |le     |LT | EQ     |ule      |UN | LT | EQ|Less than or equal       |
-        +-------+------------+---------+------------+-------------------------+
-        |gt     |GT          |ugt      |UN | GT     |Greater than             |
-        +-------+------------+---------+------------+-------------------------+
-        |ge     |GT | EQ     |uge      |UN | GT | EQ|Greater than or equal    |
-        +-------+------------+---------+------------+-------------------------+
-
-        The standard C comparison operators, `<, <=, >, >=`, are all ordered,
-        so they are false if either operand is NaN. The C equality operator,
-        `==`, is ordered, and since inequality is defined as the logical
-        inverse it is *unordered*. They map to the :type:`floatcc` condition
-        codes as follows:
-
-        ==== ====== ============
-        C    `Cond` Subset
-        ==== ====== ============
-        `==` eq     EQ
-        `!=` ne     UN | LT | GT
-        `<`  lt     LT
-        `<=` le     LT | EQ
-        `>`  gt     GT
-        `>=` ge     GT | EQ
-        ==== ====== ============
-
-        This subset of condition codes also corresponds to the WebAssembly
-        floating point comparisons of the same name.
-
-        When this instruction compares floating point vectors, it returns a
-        boolean vector with the results of lane-wise comparisons.
-        """,
-        ins=(Cond, x, y), outs=a)
-
-f = Operand('f', fflags)
-
-ffcmp = Instruction(
-        'ffcmp', r"""
-        Floating point comparison returning flags.
-
-        Compares two numbers like :inst:`fcmp`, but returns floating point CPU
-        flags instead of testing a specific condition.
-        """,
-        ins=(x, y), outs=f)
-
-x = Operand('x', Float)
-y = Operand('y', Float)
-z = Operand('z', Float)
-a = Operand('a', Float, 'Result of applying operator to each lane')
-
-fadd = Instruction(
-        'fadd', r"""
-        Floating point addition.
-        """,
-        ins=(x, y), outs=a)
-
-fsub = Instruction(
-        'fsub', r"""
-        Floating point subtraction.
-        """,
-        ins=(x, y), outs=a)
-
-fmul = Instruction(
-        'fmul', r"""
-        Floating point multiplication.
-        """,
-        ins=(x, y), outs=a)
-
-fdiv = Instruction(
-        'fdiv', r"""
-        Floating point division.
-
-        Unlike the integer division instructions :clif:inst:`sdiv` and
-        :clif:inst:`udiv`, this can't trap. Division by zero is infinity or
-        NaN, depending on the dividend.
-        """,
-        ins=(x, y), outs=a)
-
-sqrt = Instruction(
-        'sqrt', r"""
-        Floating point square root.
-        """,
-        ins=x, outs=a)
-
-fma = Instruction(
-        'fma', r"""
-        Floating point fused multiply-and-add.
-
-        Computes :math:`a := xy+z` without any intermediate rounding of the
-        product.
-        """,
-        ins=(x, y, z), outs=a)
-
-a = Operand('a', Float, '``x`` with its sign bit inverted')
-fneg = Instruction(
-        'fneg', r"""
-        Floating point negation.
-
-        Note that this is a pure bitwise operation.
-        """,
-        ins=x, outs=a)
-
-a = Operand('a', Float, '``x`` with its sign bit cleared')
-fabs = Instruction(
-        'fabs', r"""
-        Floating point absolute value.
-
-        Note that this is a pure bitwise operation.
-        """,
-        ins=x, outs=a)
-
-a = Operand('a', Float, '``x`` with its sign bit changed to that of ``y``')
-fcopysign = Instruction(
-        'fcopysign', r"""
-        Floating point copy sign.
-
-        Note that this is a pure bitwise operation. The sign bit from ``y`` is
-        copied to the sign bit of ``x``.
-        """,
-        ins=(x, y), outs=a)
-
-a = Operand('a', Float, 'The smaller of ``x`` and ``y``')
-
-fmin = Instruction(
-        'fmin', r"""
-        Floating point minimum, propagating NaNs.
-
-        If either operand is NaN, this returns a NaN.
-        """,
-        ins=(x, y), outs=a)
-
-a = Operand('a', Float, 'The larger of ``x`` and ``y``')
-
-fmax = Instruction(
-        'fmax', r"""
-        Floating point maximum, propagating NaNs.
-
-        If either operand is NaN, this returns a NaN.
-        """,
-        ins=(x, y), outs=a)
-
-a = Operand('a', Float, '``x`` rounded to integral value')
-
-ceil = Instruction(
-        'ceil', r"""
-        Round floating point round to integral, towards positive infinity.
-        """,
-        ins=x, outs=a)
-
-floor = Instruction(
-        'floor', r"""
-        Round floating point round to integral, towards negative infinity.
-        """,
-        ins=x, outs=a)
-
-trunc = Instruction(
-        'trunc', r"""
-        Round floating point round to integral, towards zero.
-        """,
-        ins=x, outs=a)
-
-nearest = Instruction(
-        'nearest', r"""
-        Round floating point round to integral, towards nearest with ties to
-        even.
-        """,
-        ins=x, outs=a)
-
-#
-# CPU flag operations
-#
-
-
-Cond = Operand('Cond', intcc)
-f = Operand('f', iflags)
-a = Operand('a', b1)
-
-trueif = Instruction(
-        'trueif', r"""
-        Test integer CPU flags for a specific condition.
-
-        Check the CPU flags in ``f`` against the ``Cond`` condition code and
-        return true when the condition code is satisfied.
-        """,
-        ins=(Cond, f), outs=a)
-
-Cond = Operand('Cond', floatcc)
-f = Operand('f', fflags)
-
-trueff = Instruction(
-        'trueff', r"""
-        Test floating point CPU flags for a specific condition.
-
-        Check the CPU flags in ``f`` against the ``Cond`` condition code and
-        return true when the condition code is satisfied.
-        """,
-        ins=(Cond, f), outs=a)
-
-#
-# Conversions
-#
-
-x = Operand('x', Mem)
-a = Operand('a', MemTo, 'Bits of `x` reinterpreted')
-
-bitcast = Instruction(
-        'bitcast', r"""
-        Reinterpret the bits in `x` as a different type.
-
-        The input and output types must be storable to memory and of the same
-        size. A bitcast is equivalent to storing one type and loading the other
-        type from the same address.
-        """,
-        ins=x, outs=a)
-
-Bool = TypeVar(
-        'Bool',
-        'A scalar or vector boolean type',
-        bools=True, simd=True)
-BoolTo = TypeVar(
-        'BoolTo',
-        'A smaller boolean type with the same number of lanes',
-        bools=True, simd=True)
-
-x = Operand('x', Bool)
-a = Operand('a', BoolTo)
-
-breduce = Instruction(
-        'breduce', r"""
-        Convert `x` to a smaller boolean type in the platform-defined way.
-
-        The result type must have the same number of vector lanes as the input,
-        and each lane must not have more bits that the input lanes. If the
-        input and output types are the same, this is a no-op.
-        """, ins=x, outs=a, constraints=WiderOrEq(Bool, BoolTo))
-
-BoolTo = TypeVar(
-        'BoolTo',
-        'A larger boolean type with the same number of lanes',
-        bools=True, simd=True)
-
-x = Operand('x', Bool)
-a = Operand('a', BoolTo)
-
-bextend = Instruction(
-        'bextend', r"""
-        Convert `x` to a larger boolean type in the platform-defined way.
-
-        The result type must have the same number of vector lanes as the input,
-        and each lane must not have fewer bits that the input lanes. If the
-        input and output types are the same, this is a no-op.
-        """, ins=x, outs=a, constraints=WiderOrEq(BoolTo, Bool))
-
-IntTo = TypeVar(
-        'IntTo', 'An integer type with the same number of lanes',
-        ints=True, simd=True)
-
-x = Operand('x', Bool)
-a = Operand('a', IntTo)
-
-bint = Instruction(
-        'bint', r"""
-        Convert `x` to an integer.
-
-        True maps to 1 and false maps to 0. The result type must have the same
-        number of vector lanes as the input.
-        """, ins=x, outs=a)
-
-bmask = Instruction(
-        'bmask', r"""
-        Convert `x` to an integer mask.
-
-        True maps to all 1s and false maps to all 0s. The result type must have
-        the same number of vector lanes as the input.
-        """, ins=x, outs=a)
-
-Int = TypeVar('Int', 'A scalar or vector integer type', ints=True, simd=True)
-IntTo = TypeVar(
-        'IntTo', 'A smaller integer type with the same number of lanes',
-        ints=True, simd=True)
-
-x = Operand('x', Int)
-a = Operand('a', IntTo)
-
-ireduce = Instruction(
-        'ireduce', r"""
-        Convert `x` to a smaller integer type by dropping high bits.
-
-        Each lane in `x` is converted to a smaller integer type by discarding
-        the most significant bits. This is the same as reducing modulo
-        :math:`2^n`.
-
-        The result type must have the same number of vector lanes as the input,
-        and each lane must not have more bits that the input lanes. If the
-        input and output types are the same, this is a no-op.
-        """,
-        ins=x, outs=a, constraints=WiderOrEq(Int, IntTo))
-
-
-IntTo = TypeVar(
-        'IntTo', 'A larger integer type with the same number of lanes',
-        ints=True, simd=True)
-
-x = Operand('x', Int)
-a = Operand('a', IntTo)
-
-uextend = Instruction(
-        'uextend', r"""
-        Convert `x` to a larger integer type by zero-extending.
-
-        Each lane in `x` is converted to a larger integer type by adding
-        zeroes. The result has the same numerical value as `x` when both are
-        interpreted as unsigned integers.
-
-        The result type must have the same number of vector lanes as the input,
-        and each lane must not have fewer bits that the input lanes. If the
-        input and output types are the same, this is a no-op.
-        """,
-        ins=x, outs=a, constraints=WiderOrEq(IntTo, Int))
-
-sextend = Instruction(
-        'sextend', r"""
-        Convert `x` to a larger integer type by sign-extending.
-
-        Each lane in `x` is converted to a larger integer type by replicating
-        the sign bit. The result has the same numerical value as `x` when both
-        are interpreted as signed integers.
-
-        The result type must have the same number of vector lanes as the input,
-        and each lane must not have fewer bits that the input lanes. If the
-        input and output types are the same, this is a no-op.
-        """,
-        ins=x, outs=a, constraints=WiderOrEq(IntTo, Int))
-
-FloatTo = TypeVar(
-        'FloatTo', 'A scalar or vector floating point number',
-        floats=True, simd=True)
-
-x = Operand('x', Float)
-a = Operand('a', FloatTo)
-
-fpromote = Instruction(
-        'fpromote', r"""
-        Convert `x` to a larger floating point format.
-
-        Each lane in `x` is converted to the destination floating point format.
-        This is an exact operation.
-
-        Cranelift currently only supports two floating point formats
-        - :type:`f32` and :type:`f64`. This may change in the future.
-
-        The result type must have the same number of vector lanes as the input,
-        and the result lanes must not have fewer bits than the input lanes. If
-        the input and output types are the same, this is a no-op.
-        """,
-        ins=x, outs=a, constraints=WiderOrEq(FloatTo, Float))
-
-fdemote = Instruction(
-        'fdemote', r"""
-        Convert `x` to a smaller floating point format.
-
-        Each lane in `x` is converted to the destination floating point format
-        by rounding to nearest, ties to even.
-
-        Cranelift currently only supports two floating point formats
-        - :type:`f32` and :type:`f64`. This may change in the future.
-
-        The result type must have the same number of vector lanes as the input,
-        and the result lanes must not have more bits than the input lanes. If
-        the input and output types are the same, this is a no-op.
-        """,
-        ins=x, outs=a, constraints=WiderOrEq(Float, FloatTo))
-
-x = Operand('x', Float)
-a = Operand('a', IntTo)
-
-fcvt_to_uint = Instruction(
-        'fcvt_to_uint', r"""
-        Convert floating point to unsigned integer.
-
-        Each lane in `x` is converted to an unsigned integer by rounding
-        towards zero. If `x` is NaN or if the unsigned integral value cannot be
-        represented in the result type, this instruction traps.
-
-        The result type must have the same number of vector lanes as the input.
-        """,
-        ins=x, outs=a, can_trap=True)
-
-fcvt_to_uint_sat = Instruction(
-        'fcvt_to_uint_sat', r"""
-        Convert floating point to unsigned integer as fcvt_to_uint does, but
-        saturates the input instead of trapping. NaN and negative values are
-        converted to 0.
-        """,
-        ins=x, outs=a)
-
-fcvt_to_sint = Instruction(
-        'fcvt_to_sint', r"""
-        Convert floating point to signed integer.
-
-        Each lane in `x` is converted to a signed integer by rounding towards
-        zero. If `x` is NaN or if the signed integral value cannot be
-        represented in the result type, this instruction traps.
-
-        The result type must have the same number of vector lanes as the input.
-        """,
-        ins=x, outs=a, can_trap=True)
-
-fcvt_to_sint_sat = Instruction(
-        'fcvt_to_sint_sat', r"""
-        Convert floating point to signed integer as fcvt_to_sint does, but
-        saturates the input instead of trapping. NaN values are converted to 0.
-        """,
-        ins=x, outs=a)
-
-x = Operand('x', Int)
-a = Operand('a', FloatTo)
-
-fcvt_from_uint = Instruction(
-        'fcvt_from_uint', r"""
-        Convert unsigned integer to floating point.
-
-        Each lane in `x` is interpreted as an unsigned integer and converted to
-        floating point using round to nearest, ties to even.
-
-        The result type must have the same number of vector lanes as the input.
-        """,
-        ins=x, outs=a)
-
-fcvt_from_sint = Instruction(
-        'fcvt_from_sint', r"""
-        Convert signed integer to floating point.
-
-        Each lane in `x` is interpreted as a signed integer and converted to
-        floating point using round to nearest, ties to even.
-
-        The result type must have the same number of vector lanes as the input.
-        """,
-        ins=x, outs=a)
-
-#
-# Legalization helper instructions.
-#
-
-WideInt = TypeVar(
-        'WideInt', 'An integer type with lanes from `i16` upwards',
-        ints=(16, 64), simd=True)
-x = Operand('x', WideInt)
-lo = Operand(
-        'lo', WideInt.half_width(), 'The low bits of `x`')
-hi = Operand(
-        'hi', WideInt.half_width(), 'The high bits of `x`')
-
-isplit = Instruction(
-        'isplit', r"""
-        Split an integer into low and high parts.
-
-        Vectors of integers are split lane-wise, so the results have the same
-        number of lanes as the input, but the lanes are half the size.
-
-        Returns the low half of `x` and the high half of `x` as two independent
-        values.
-        """,
-        ins=x, outs=(lo, hi), is_ghost=True)
-
-
-NarrowInt = TypeVar(
-        'NarrowInt', 'An integer type with lanes type to `i32`',
-        ints=(8, 32), simd=True)
-lo = Operand('lo', NarrowInt)
-hi = Operand('hi', NarrowInt)
-a = Operand(
-        'a', NarrowInt.double_width(),
-        doc='The concatenation of `lo` and `hi`')
-
-iconcat = Instruction(
-        'iconcat', r"""
-        Concatenate low and high bits to form a larger integer type.
-
-        Vectors of integers are concatenated lane-wise such that the result has
-        the same number of lanes as the inputs, but the lanes are twice the
-        size.
-        """,
-        ins=(lo, hi), outs=a, is_ghost=True)
-
-GROUP.close()
diff --git a/cranelift-codegen/meta-python/base/legalize.py b/cranelift-codegen/meta-python/base/legalize.py
deleted file mode 100644
index 3e287a7b4..000000000
--- a/cranelift-codegen/meta-python/base/legalize.py
+++ /dev/null
@@ -1,713 +0,0 @@
-"""
-Patterns for legalizing the `base` instruction set.
-
-The base Cranelift instruction set is 'fat', and many instructions don't have
-legal representations in a given target ISA. This module defines legalization
-patterns that describe how base instructions can be transformed to other base
-instructions that are legal.
-"""
-from __future__ import absolute_import
-from .immediates import intcc, imm64, ieee32, ieee64
-from . import instructions as insts
-from . import types
-from .instructions import uextend, sextend, ireduce
-from .instructions import iadd, iadd_cout, iadd_cin, iadd_carry, iadd_imm
-from .instructions import isub, isub_bin, isub_bout, isub_borrow, irsub_imm
-from .instructions import imul, imul_imm
-from .instructions import sdiv, sdiv_imm, udiv, udiv_imm
-from .instructions import srem, srem_imm, urem, urem_imm
-from .instructions import band, bor, bxor, isplit, iconcat
-from .instructions import bnot, band_not, bor_not, bxor_not
-from .instructions import band_imm, bor_imm, bxor_imm
-from .instructions import icmp, icmp_imm, ifcmp, ifcmp_imm
-from .instructions import iconst, bint, select
-from .instructions import ishl, ishl_imm, sshr, sshr_imm, ushr, ushr_imm
-from .instructions import rotl, rotl_imm, rotr, rotr_imm
-from .instructions import f32const, f64const
-from .instructions import store, load
-from .instructions import br_table
-from .instructions import bitrev
-from cdsl.ast import Var
-from cdsl.xform import Rtl, XFormGroup
-
-try:
-    from typing import TYPE_CHECKING # noqa
-    if TYPE_CHECKING:
-        from cdsl.instructions import Instruction # noqa
-except ImportError:
-    TYPE_CHECKING = False
-
-
-narrow = XFormGroup('narrow', """
-        Legalize instructions by narrowing.
-
-        The transformations in the 'narrow' group work by expressing
-        instructions in terms of smaller types. Operations on vector types are
-        expressed in terms of vector types with fewer lanes, and integer
-        operations are expressed in terms of smaller integer types.
-        """)
-
-widen = XFormGroup('widen', """
-        Legalize instructions by widening.
-
-        The transformations in the 'widen' group work by expressing
-        instructions in terms of larger types.
-        """)
-
-expand = XFormGroup('expand', """
-        Legalize instructions by expansion.
-
-        Rewrite instructions in terms of other instructions, generally
-        operating on the same types as the original instructions.
-        """)
-
-expand_flags = XFormGroup('expand_flags', """
-        Instruction expansions for architectures with flags.
-
-        Expand some instructions using CPU flags, then fall back to the normal
-        expansions. Not all architectures support CPU flags, so these patterns
-        are kept separate.
-        """, chain=expand)
-
-
-# Custom expansions for memory objects.
-expand.custom_legalize(insts.global_value, 'expand_global_value')
-expand.custom_legalize(insts.heap_addr, 'expand_heap_addr')
-expand.custom_legalize(insts.table_addr, 'expand_table_addr')
-
-# Custom expansions for calls.
-expand.custom_legalize(insts.call, 'expand_call')
-
-# Custom expansions that need to change the CFG.
-# TODO: Add sufficient XForm syntax that we don't need to hand-code these.
-expand.custom_legalize(insts.trapz, 'expand_cond_trap')
-expand.custom_legalize(insts.trapnz, 'expand_cond_trap')
-expand.custom_legalize(insts.br_table, 'expand_br_table')
-expand.custom_legalize(insts.select, 'expand_select')
-
-# Custom expansions for floating point constants.
-# These expansions require bit-casting or creating constant pool entries.
-expand.custom_legalize(insts.f32const, 'expand_fconst')
-expand.custom_legalize(insts.f64const, 'expand_fconst')
-
-# Custom expansions for stack memory accesses.
-expand.custom_legalize(insts.stack_load, 'expand_stack_load')
-expand.custom_legalize(insts.stack_store, 'expand_stack_store')
-
-x = Var('x')
-y = Var('y')
-z = Var('z')
-a = Var('a')
-a1 = Var('a1')
-a2 = Var('a2')
-a3 = Var('a3')
-a4 = Var('a4')
-b = Var('b')
-b1 = Var('b1')
-b2 = Var('b2')
-b3 = Var('b3')
-b4 = Var('b4')
-b_in = Var('b_in')
-b_int = Var('b_int')
-c = Var('c')
-c1 = Var('c1')
-c2 = Var('c2')
-c3 = Var('c3')
-c4 = Var('c4')
-c_in = Var('c_in')
-c_int = Var('c_int')
-d = Var('d')
-d1 = Var('d1')
-d2 = Var('d2')
-d3 = Var('d3')
-d4 = Var('d4')
-e = Var('e')
-e1 = Var('e1')
-e2 = Var('e2')
-e3 = Var('e3')
-e4 = Var('e4')
-f = Var('f')
-f1 = Var('f1')
-f2 = Var('f2')
-xl = Var('xl')
-xh = Var('xh')
-yl = Var('yl')
-yh = Var('yh')
-al = Var('al')
-ah = Var('ah')
-cc = Var('cc')
-ptr = Var('ptr')
-flags = Var('flags')
-offset = Var('off')
-ss = Var('ss')
-
-narrow.legalize(
-        a << iadd(x, y),
-        Rtl(
-            (xl, xh) << isplit(x),
-            (yl, yh) << isplit(y),
-            (al, c) << iadd_cout(xl, yl),
-            ah << iadd_cin(xh, yh, c),
-            a << iconcat(al, ah)
-        ))
-
-narrow.legalize(
-        a << isub(x, y),
-        Rtl(
-            (xl, xh) << isplit(x),
-            (yl, yh) << isplit(y),
-            (al, b) << isub_bout(xl, yl),
-            ah << isub_bin(xh, yh, b),
-            a << iconcat(al, ah)
-        ))
-
-for bitop in [band, bor, bxor]:
-    narrow.legalize(
-            a << bitop(x, y),
-            Rtl(
-                (xl, xh) << isplit(x),
-                (yl, yh) << isplit(y),
-                al << bitop(xl, yl),
-                ah << bitop(xh, yh),
-                a << iconcat(al, ah)
-            ))
-
-narrow.legalize(
-        a << select(c, x, y),
-        Rtl(
-            (xl, xh) << isplit(x),
-            (yl, yh) << isplit(y),
-            al << select(c, xl, yl),
-            ah << select(c, xh, yh),
-            a << iconcat(al, ah)
-        ))
-
-
-def widen_one_arg(signed, op):
-    # type: (bool, Instruction) -> None
-    for int_ty in [types.i8, types.i16]:
-        if signed:
-            widen.legalize(
-                a << op.bind(int_ty)(b),
-                Rtl(
-                    x << sextend.i32(b),
-                    z << op.i32(x),
-                    a << ireduce.bind(int_ty)(z)
-                ))
-        else:
-            widen.legalize(
-                a << op.bind(int_ty)(b),
-                Rtl(
-                    x << uextend.i32(b),
-                    z << op.i32(x),
-                    a << ireduce.bind(int_ty)(z)
-                ))
-
-
-def widen_two_arg(signed, op):
-    # type: (bool, Instruction) -> None
-    for int_ty in [types.i8, types.i16]:
-        if signed:
-            widen.legalize(
-                a << op.bind(int_ty)(b, c),
-                Rtl(
-                    x << sextend.i32(b),
-                    y << sextend.i32(c),
-                    z << op.i32(x, y),
-                    a << ireduce.bind(int_ty)(z)
-                ))
-        else:
-            widen.legalize(
-                a << op.bind(int_ty)(b, c),
-                Rtl(
-                    x << uextend.i32(b),
-                    y << uextend.i32(c),
-                    z << op.i32(x, y),
-                    a << ireduce.bind(int_ty)(z)
-                ))
-
-
-def widen_imm(signed, op):
-    # type: (bool, Instruction) -> None
-    for int_ty in [types.i8, types.i16]:
-        if signed:
-            widen.legalize(
-                a << op.bind(int_ty)(b, c),
-                Rtl(
-                    x << sextend.i32(b),
-                    z << op.i32(x, c),
-                    a << ireduce.bind(int_ty)(z)
-                ))
-        else:
-            widen.legalize(
-                a << op.bind(int_ty)(b, c),
-                Rtl(
-                    x << uextend.i32(b),
-                    z << op.i32(x, c),
-                    a << ireduce.bind(int_ty)(z)
-                ))
-
-
-# int ops
-for binop in [iadd, isub, imul, udiv, urem]:
-    widen_two_arg(False, binop)
-
-for binop in [sdiv, srem]:
-    widen_two_arg(True, binop)
-
-for binop in [iadd_imm, imul_imm, udiv_imm, urem_imm]:
-    widen_imm(False, binop)
-
-for binop in [sdiv_imm, srem_imm]:
-    widen_imm(True, binop)
-
-widen_imm(False, irsub_imm)
-
-# bit ops
-widen_one_arg(False, bnot)
-
-for binop in [band, bor, bxor, band_not, bor_not, bxor_not]:
-    widen_two_arg(False, binop)
-
-for binop in [band_imm, bor_imm, bxor_imm]:
-    widen_imm(False, binop)
-
-widen_one_arg(False, insts.popcnt)
-
-for (int_ty, num) in [(types.i8, 24), (types.i16, 16)]:
-    widen.legalize(
-        a << insts.clz.bind(int_ty)(b),
-        Rtl(
-            c << uextend.i32(b),
-            d << insts.clz.i32(c),
-            e << iadd_imm(d, imm64(-num)),
-            a << ireduce.bind(int_ty)(e)
-        ))
-
-    widen.legalize(
-        a << insts.cls.bind(int_ty)(b),
-        Rtl(
-            c << sextend.i32(b),
-            d << insts.cls.i32(c),
-            e << iadd_imm(d, imm64(-num)),
-            a << ireduce.bind(int_ty)(e)
-        ))
-
-for (int_ty, num) in [(types.i8, 1 << 8), (types.i16, 1 << 16)]:
-    widen.legalize(
-        a << insts.ctz.bind(int_ty)(b),
-        Rtl(
-            c << uextend.i32(b),
-            # When `b` is zero, returns the size of x in bits.
-            d << bor_imm(c, imm64(num)),
-            e << insts.ctz.i32(d),
-            a << ireduce.bind(int_ty)(e)
-        ))
-
-# iconst
-for int_ty in [types.i8, types.i16]:
-    widen.legalize(
-        a << iconst.bind(int_ty)(b),
-        Rtl(
-            c << iconst.i32(b),
-            a << ireduce.bind(int_ty)(c)
-        ))
-
-widen.legalize(
-    a << uextend.i16.i8(b),
-    Rtl(
-        c << uextend.i32(b),
-        a << ireduce(c)
-    ))
-
-widen.legalize(
-    a << sextend.i16.i8(b),
-    Rtl(
-        c << sextend.i32(b),
-        a << ireduce(c)
-    ))
-
-
-widen.legalize(
-    store.i8(flags, a, ptr, offset),
-    Rtl(
-        b << uextend.i32(a),
-        insts.istore8(flags, b, ptr, offset)
-    ))
-
-widen.legalize(
-    store.i16(flags, a, ptr, offset),
-    Rtl(
-        b << uextend.i32(a),
-        insts.istore16(flags, b, ptr, offset)
-    ))
-
-widen.legalize(
-    a << load.i8(flags, ptr, offset),
-    Rtl(
-        b << insts.uload8.i32(flags, ptr, offset),
-        a << ireduce(b)
-    ))
-
-widen.legalize(
-    a << load.i16(flags, ptr, offset),
-    Rtl(
-        b << insts.uload16.i32(flags, ptr, offset),
-        a << ireduce(b)
-    ))
-
-for int_ty in [types.i8, types.i16]:
-    widen.legalize(
-        br_table.bind(int_ty)(x, y, z),
-        Rtl(
-            b << uextend.i32(x),
-            br_table(b, y, z),
-        )
-    )
-
-for int_ty in [types.i8, types.i16]:
-    widen.legalize(
-        a << insts.bint.bind(int_ty)(b),
-        Rtl(
-            x << insts.bint.i32(b),
-            a << ireduce.bind(int_ty)(x)
-        )
-    )
-
-for int_ty in [types.i8, types.i16]:
-    for op in [ishl, ishl_imm, ushr, ushr_imm]:
-        widen.legalize(
-            a << op.bind(int_ty)(b, c),
-            Rtl(
-                x << uextend.i32(b),
-                z << op.i32(x, c),
-                a << ireduce.bind(int_ty)(z)
-            ))
-
-    for op in [sshr, sshr_imm]:
-        widen.legalize(
-            a << op.bind(int_ty)(b, c),
-            Rtl(
-                x << sextend.i32(b),
-                z << op.i32(x, c),
-                a << ireduce.bind(int_ty)(z)
-            ))
-
-    for w_cc in [
-        intcc.eq, intcc.ne, intcc.ugt, intcc.ult, intcc.uge, intcc.ule
-    ]:
-        widen.legalize(
-            a << insts.icmp_imm.bind(int_ty)(w_cc, b, c),
-            Rtl(
-                x << uextend.i32(b),
-                a << insts.icmp_imm(w_cc, x, c)
-            ))
-        widen.legalize(
-            a << insts.icmp.bind(int_ty)(w_cc, b, c),
-            Rtl(
-                x << uextend.i32(b),
-                y << uextend.i32(c),
-                a << insts.icmp.i32(w_cc, x, y)
-            ))
-    for w_cc in [intcc.sgt, intcc.slt, intcc.sge, intcc.sle]:
-        widen.legalize(
-            a << insts.icmp_imm.bind(int_ty)(w_cc, b, c),
-            Rtl(
-                x << sextend.i32(b),
-                a << insts.icmp_imm(w_cc, x, c)
-            ))
-        widen.legalize(
-            a << insts.icmp.bind(int_ty)(w_cc, b, c),
-            Rtl(
-                x << sextend.i32(b),
-                y << sextend.i32(c),
-                a << insts.icmp(w_cc, x, y)
-            )
-        )
-
-# Expand integer operations with carry for RISC architectures that don't have
-# the flags.
-expand.legalize(
-        (a, c) << iadd_cout(x, y),
-        Rtl(
-            a << iadd(x, y),
-            c << icmp(intcc.ult, a, x)
-        ))
-
-expand.legalize(
-        (a, b) << isub_bout(x, y),
-        Rtl(
-            a << isub(x, y),
-            b << icmp(intcc.ugt, a, x)
-        ))
-
-expand.legalize(
-        a << iadd_cin(x, y, c),
-        Rtl(
-            a1 << iadd(x, y),
-            c_int << bint(c),
-            a << iadd(a1, c_int)
-        ))
-
-expand.legalize(
-        a << isub_bin(x, y, b),
-        Rtl(
-            a1 << isub(x, y),
-            b_int << bint(b),
-            a << isub(a1, b_int)
-        ))
-
-expand.legalize(
-        (a, c) << iadd_carry(x, y, c_in),
-        Rtl(
-            (a1, c1) << iadd_cout(x, y),
-            c_int << bint(c_in),
-            (a, c2) << iadd_cout(a1, c_int),
-            c << bor(c1, c2)
-        ))
-
-expand.legalize(
-        (a, b) << isub_borrow(x, y, b_in),
-        Rtl(
-            (a1, b1) << isub_bout(x, y),
-            b_int << bint(b_in),
-            (a, b2) << isub_bout(a1, b_int),
-            b << bor(b1, b2)
-        ))
-
-# Expansions for fcvt_from_{u,s}int for smaller integer types.
-# These use expand and not widen because the controlling type variable for
-# these instructions are f32/f64, which are legalized as part of the expand
-# group.
-for dest_ty in [types.f32, types.f64]:
-    for src_ty in [types.i8, types.i16]:
-        expand.legalize(
-            a << insts.fcvt_from_uint.bind(dest_ty).bind(src_ty)(b),
-            Rtl(
-                x << uextend.i32(b),
-                a << insts.fcvt_from_uint.bind(dest_ty).i32(x),
-            ))
-
-        expand.legalize(
-            a << insts.fcvt_from_sint.bind(dest_ty).bind(src_ty)(b),
-            Rtl(
-                x << sextend.i32(b),
-                a << insts.fcvt_from_sint.bind(dest_ty).i32(x),
-            ))
-
-# Expansions for immediate operands that are out of range.
-for inst_imm,      inst in [
-        (iadd_imm, iadd),
-        (imul_imm, imul),
-        (sdiv_imm, sdiv),
-        (udiv_imm, udiv),
-        (srem_imm, srem),
-        (urem_imm, urem),
-        (band_imm, band),
-        (bor_imm, bor),
-        (bxor_imm, bxor),
-        (ifcmp_imm, ifcmp)]:
-    expand.legalize(
-            a << inst_imm(x, y),
-            Rtl(
-                a1 << iconst(y),
-                a << inst(x, a1)
-            ))
-expand.legalize(
-    a << irsub_imm(y, x),
-    Rtl(
-        a1 << iconst(x),
-        a << isub(a1, y)
-    ))
-
-# Rotates and shifts.
-for inst_imm,      inst in [
-        (rotl_imm, rotl),
-        (rotr_imm, rotr),
-        (ishl_imm, ishl),
-        (sshr_imm, sshr),
-        (ushr_imm, ushr)]:
-    expand.legalize(
-            a << inst_imm(x, y),
-            Rtl(
-                a1 << iconst.i32(y),
-                a << inst(x, a1)
-            ))
-
-expand.legalize(
-        a << icmp_imm(cc, x, y),
-        Rtl(
-            a1 << iconst(y),
-            a << icmp(cc, x, a1)
-        ))
-
-# Expansions for *_not variants of bitwise ops.
-for inst_not,      inst in [
-        (band_not, band),
-        (bor_not,  bor),
-        (bxor_not, bxor)]:
-    expand.legalize(
-            a << inst_not(x, y),
-            Rtl(
-                a1 << bnot(y),
-                a << inst(x, a1)
-            ))
-
-# Expand bnot using xor.
-expand.legalize(
-        a << bnot(x),
-        Rtl(
-            y << iconst(imm64(-1)),
-            a << bxor(x, y)
-        ))
-
-# Expand bitrev
-# Adapted from Stack Overflow.
-# https://stackoverflow.com/questions/746171/most-efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c
-widen.legalize(
-        a << bitrev.i8(x),
-        Rtl(
-            a1 << band_imm(x, imm64(0xaa)),
-            a2 << ushr_imm(a1, imm64(1)),
-            a3 << band_imm(x, imm64(0x55)),
-            a4 << ishl_imm(a3, imm64(1)),
-            b << bor(a2, a4),
-            b1 << band_imm(b, imm64(0xcc)),
-            b2 << ushr_imm(b1, imm64(2)),
-            b3 << band_imm(b, imm64(0x33)),
-            b4 << ishl_imm(b3, imm64(2)),
-            c << bor(b2, b4),
-            c1 << band_imm(c, imm64(0xf0)),
-            c2 << ushr_imm(c1, imm64(4)),
-            c3 << band_imm(c, imm64(0x0f)),
-            c4 << ishl_imm(c3, imm64(4)),
-            a << bor(c2, c4),
-        ))
-
-widen.legalize(
-        a << bitrev.i16(x),
-        Rtl(
-            a1 << band_imm(x, imm64(0xaaaa)),
-            a2 << ushr_imm(a1, imm64(1)),
-            a3 << band_imm(x, imm64(0x5555)),
-            a4 << ishl_imm(a3, imm64(1)),
-            b << bor(a2, a4),
-            b1 << band_imm(b, imm64(0xcccc)),
-            b2 << ushr_imm(b1, imm64(2)),
-            b3 << band_imm(b, imm64(0x3333)),
-            b4 << ishl_imm(b3, imm64(2)),
-            c << bor(b2, b4),
-            c1 << band_imm(c, imm64(0xf0f0)),
-            c2 << ushr_imm(c1, imm64(4)),
-            c3 << band_imm(c, imm64(0x0f0f)),
-            c4 << ishl_imm(c3, imm64(4)),
-            d << bor(c2, c4),
-            d1 << band_imm(d, imm64(0xff00)),
-            d2 << ushr_imm(d1, imm64(8)),
-            d3 << band_imm(d, imm64(0x00ff)),
-            d4 << ishl_imm(d3, imm64(8)),
-            a << bor(d2, d4),
-        ))
-
-expand.legalize(
-        a << bitrev.i32(x),
-        Rtl(
-            a1 << band_imm(x, imm64(0xaaaaaaaa)),
-            a2 << ushr_imm(a1, imm64(1)),
-            a3 << band_imm(x, imm64(0x55555555)),
-            a4 << ishl_imm(a3, imm64(1)),
-            b << bor(a2, a4),
-            b1 << band_imm(b, imm64(0xcccccccc)),
-            b2 << ushr_imm(b1, imm64(2)),
-            b3 << band_imm(b, imm64(0x33333333)),
-            b4 << ishl_imm(b3, imm64(2)),
-            c << bor(b2, b4),
-            c1 << band_imm(c, imm64(0xf0f0f0f0)),
-            c2 << ushr_imm(c1, imm64(4)),
-            c3 << band_imm(c, imm64(0x0f0f0f0f)),
-            c4 << ishl_imm(c3, imm64(4)),
-            d << bor(c2, c4),
-            d1 << band_imm(d, imm64(0xff00ff00)),
-            d2 << ushr_imm(d1, imm64(8)),
-            d3 << band_imm(d, imm64(0x00ff00ff)),
-            d4 << ishl_imm(d3, imm64(8)),
-            e << bor(d2, d4),
-            e1 << ushr_imm(e, imm64(16)),
-            e2 << ishl_imm(e, imm64(16)),
-            a << bor(e1, e2),
-        ))
-
-expand.legalize(
-        a << bitrev.i64(x),
-        Rtl(
-            a1 << band_imm(x, imm64(0xaaaaaaaaaaaaaaaa)),
-            a2 << ushr_imm(a1, imm64(1)),
-            a3 << band_imm(x, imm64(0x5555555555555555)),
-            a4 << ishl_imm(a3, imm64(1)),
-            b << bor(a2, a4),
-            b1 << band_imm(b, imm64(0xcccccccccccccccc)),
-            b2 << ushr_imm(b1, imm64(2)),
-            b3 << band_imm(b, imm64(0x3333333333333333)),
-            b4 << ishl_imm(b3, imm64(2)),
-            c << bor(b2, b4),
-            c1 << band_imm(c, imm64(0xf0f0f0f0f0f0f0f0)),
-            c2 << ushr_imm(c1, imm64(4)),
-            c3 << band_imm(c, imm64(0x0f0f0f0f0f0f0f0f)),
-            c4 << ishl_imm(c3, imm64(4)),
-            d << bor(c2, c4),
-            d1 << band_imm(d, imm64(0xff00ff00ff00ff00)),
-            d2 << ushr_imm(d1, imm64(8)),
-            d3 << band_imm(d, imm64(0x00ff00ff00ff00ff)),
-            d4 << ishl_imm(d3, imm64(8)),
-            e << bor(d2, d4),
-            e1 << band_imm(e, imm64(0xffff0000ffff0000)),
-            e2 << ushr_imm(e1, imm64(16)),
-            e3 << band_imm(e, imm64(0x0000ffff0000ffff)),
-            e4 << ishl_imm(e3, imm64(16)),
-            f << bor(e2, e4),
-            f1 << ushr_imm(f, imm64(32)),
-            f2 << ishl_imm(f, imm64(32)),
-            a << bor(f1, f2),
-        ))
-
-# Floating-point sign manipulations.
-for ty,             minus_zero in [
-        (types.f32, f32const(ieee32.bits(0x80000000))),
-        (types.f64, f64const(ieee64.bits(0x8000000000000000)))]:
-    expand.legalize(
-            a << insts.fabs.bind(ty)(x),
-            Rtl(
-                b << minus_zero,
-                a << band_not(x, b),
-            ))
-    expand.legalize(
-            a << insts.fneg.bind(ty)(x),
-            Rtl(
-                b << minus_zero,
-                a << bxor(x, b),
-            ))
-    expand.legalize(
-            a << insts.fcopysign.bind(ty)(x, y),
-            Rtl(
-                b << minus_zero,
-                a1 << band_not(x, b),
-                a2 << band(y, b),
-                a << bor(a1, a2)
-            ))
-
-expand.custom_legalize(insts.br_icmp, 'expand_br_icmp')
-
-# Expansions using CPU flags.
-
-expand_flags.legalize(
-    insts.trapnz(x, c),
-    Rtl(
-        a << insts.ifcmp_imm(x, imm64(0)),
-        insts.trapif(intcc.ne, a, c)
-    ))
-expand_flags.legalize(
-    insts.trapz(x, c),
-    Rtl(
-        a << insts.ifcmp_imm(x, imm64(0)),
-        insts.trapif(intcc.eq, a, c)
-    ))
diff --git a/cranelift-codegen/meta-python/base/predicates.py b/cranelift-codegen/meta-python/base/predicates.py
deleted file mode 100644
index 2a521f7cc..000000000
--- a/cranelift-codegen/meta-python/base/predicates.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""
-Cranelift predicates that consider `Function` fields.
-"""
-from cdsl.predicates import FieldPredicate
-from .formats import UnaryGlobalValue, InstructionFormat
-
-try:
-    from typing import TYPE_CHECKING
-    if TYPE_CHECKING:
-        from cdsl.formats import InstructionFormat, FormatField  # noqa
-except ImportError:
-    pass
-
-
-class IsColocatedFunc(FieldPredicate):
-    """
-    An instruction predicate that checks the referenced function is colocated.
-    """
-
-    def __init__(self, field):
-        # type: (FormatField) -> None
-        super(IsColocatedFunc, self).__init__(
-            field, 'is_colocated_func', ('func',))
-
-
-class IsColocatedData(FieldPredicate):
-    """
-    An instruction predicate that checks the referenced data object is
-    colocated.
-    """
-
-    def __init__(self):
-        # type: () -> None
-        super(IsColocatedData, self).__init__(
-            UnaryGlobalValue.global_value, 'is_colocated_data', ('func',))
-
-
-class LengthEquals(FieldPredicate):
-    def __init__(self, iform, num):
-        # type: (InstructionFormat, int) -> None
-        super(LengthEquals, self).__init__(
-            iform.args(), 'has_length_of', (num, 'func'))
diff --git a/cranelift-codegen/meta-python/base/semantics.py b/cranelift-codegen/meta-python/base/semantics.py
deleted file mode 100644
index ec1852133..000000000
--- a/cranelift-codegen/meta-python/base/semantics.py
+++ /dev/null
@@ -1,218 +0,0 @@
-from __future__ import absolute_import
-from semantics.primitives import prim_to_bv, prim_from_bv, bvsplit, bvconcat,\
-    bvadd, bvzeroext, bvsignext
-from semantics.primitives import bveq, bvne, bvsge, bvsgt, bvsle, bvslt,\
-        bvuge, bvugt, bvule, bvult
-from semantics.macros import bool2bv
-from .instructions import vsplit, vconcat, iadd, iadd_cout, icmp, bextend, \
-    isplit, iconcat, iadd_cin, iadd_carry
-from .immediates import intcc
-from cdsl.xform import Rtl, XForm
-from cdsl.ast import Var
-from cdsl.typevar import TypeSet
-from cdsl.ti import InTypeset
-
-try:
-    from typing import TYPE_CHECKING # noqa
-    if TYPE_CHECKING:
-        from cdsl.ast import Enumerator # noqa
-        from cdsl.instructions import Instruction # noqa
-except ImportError:
-    TYPE_CHECKING = False
-
-x = Var('x')
-y = Var('y')
-a = Var('a')
-b = Var('b')
-c_out = Var('c_out')
-c_in = Var('c_in')
-CC = Var('CC')
-bc_out = Var('bc_out')
-bvc_out = Var('bvc_out')
-bvc_in = Var('bvc_in')
-xhi = Var('xhi')
-yhi = Var('yhi')
-ahi = Var('ahi')
-bhi = Var('bhi')
-xlo = Var('xlo')
-ylo = Var('ylo')
-alo = Var('alo')
-blo = Var('blo')
-lo = Var('lo')
-hi = Var('hi')
-bvx = Var('bvx')
-bvy = Var('bvy')
-bva = Var('bva')
-bvt = Var('bvt')
-bvs = Var('bvs')
-bva_wide = Var('bva_wide')
-bvlo = Var('bvlo')
-bvhi = Var('bvhi')
-
-ScalarTS = TypeSet(lanes=(1, 1), ints=True, floats=True, bools=True)
-
-vsplit.set_semantics(
-    (lo, hi) << vsplit(x),
-    Rtl(
-        bvx << prim_to_bv(x),
-        (bvlo, bvhi) << bvsplit(bvx),
-        lo << prim_from_bv(bvlo),
-        hi << prim_from_bv(bvhi)
-    ))
-
-vconcat.set_semantics(
-    x << vconcat(lo, hi),
-    Rtl(
-        bvlo << prim_to_bv(lo),
-        bvhi << prim_to_bv(hi),
-        bvx << bvconcat(bvlo, bvhi),
-        x << prim_from_bv(bvx)
-    ))
-
-iadd.set_semantics(
-    a << iadd(x, y),
-    (Rtl(
-        bvx << prim_to_bv(x),
-        bvy << prim_to_bv(y),
-        bva << bvadd(bvx, bvy),
-        a << prim_from_bv(bva)
-    ), [InTypeset(x.get_typevar(), ScalarTS)]),
-    Rtl(
-        (xlo, xhi) << vsplit(x),
-        (ylo, yhi) << vsplit(y),
-        alo << iadd(xlo, ylo),
-        ahi << iadd(xhi, yhi),
-        a << vconcat(alo, ahi)
-    ))
-
-#
-# Integer arithmetic with carry and/or borrow.
-#
-iadd_cin.set_semantics(
-    a << iadd_cin(x, y, c_in),
-    Rtl(
-        bvx << prim_to_bv(x),
-        bvy << prim_to_bv(y),
-        bvc_in << prim_to_bv(c_in),
-        bvs << bvzeroext(bvc_in),
-        bvt << bvadd(bvx, bvy),
-        bva << bvadd(bvt, bvs),
-        a << prim_from_bv(bva)
-    ))
-
-iadd_cout.set_semantics(
-    (a, c_out) << iadd_cout(x, y),
-    Rtl(
-        bvx << prim_to_bv(x),
-        bvy << prim_to_bv(y),
-        bva << bvadd(bvx, bvy),
-        bc_out << bvult(bva, bvx),
-        bvc_out << bool2bv(bc_out),
-        a << prim_from_bv(bva),
-        c_out << prim_from_bv(bvc_out)
-    ))
-
-iadd_carry.set_semantics(
-    (a, c_out) << iadd_carry(x, y, c_in),
-    Rtl(
-        bvx << prim_to_bv(x),
-        bvy << prim_to_bv(y),
-        bvc_in << prim_to_bv(c_in),
-        bvs << bvzeroext(bvc_in),
-        bvt << bvadd(bvx, bvy),
-        bva << bvadd(bvt, bvs),
-        bc_out << bvult(bva, bvx),
-        bvc_out << bool2bv(bc_out),
-        a << prim_from_bv(bva),
-        c_out << prim_from_bv(bvc_out)
-    ))
-
-bextend.set_semantics(
-    a << bextend(x),
-    (Rtl(
-        bvx << prim_to_bv(x),
-        bvy << bvsignext(bvx),
-        a << prim_from_bv(bvy)
-    ), [InTypeset(x.get_typevar(), ScalarTS)]),
-    Rtl(
-        (xlo, xhi) << vsplit(x),
-        alo << bextend(xlo),
-        ahi << bextend(xhi),
-        a << vconcat(alo, ahi)
-    ))
-
-
-def create_comp_xform(cc, bvcmp_func):
-    # type: (Enumerator, Instruction) -> XForm
-    ba = Var('ba')
-    return XForm(
-               Rtl(
-                   a << icmp(cc, x, y)
-               ),
-               Rtl(
-                   bvx << prim_to_bv(x),
-                   bvy << prim_to_bv(y),
-                   ba << bvcmp_func(bvx, bvy),
-                   bva << bool2bv(ba),
-                   bva_wide << bvzeroext(bva),
-                   a << prim_from_bv(bva_wide),
-               ),
-               constraints=InTypeset(x.get_typevar(), ScalarTS))
-
-
-icmp.set_semantics(
-    a << icmp(CC, x, y),
-    Rtl(
-        (xlo, xhi) << vsplit(x),
-        (ylo, yhi) << vsplit(y),
-        alo << icmp(CC, xlo, ylo),
-        ahi << icmp(CC, xhi, yhi),
-        b << vconcat(alo, ahi),
-        a << bextend(b)
-    ),
-    create_comp_xform(intcc.eq, bveq),
-    create_comp_xform(intcc.ne, bvne),
-    create_comp_xform(intcc.sge, bvsge),
-    create_comp_xform(intcc.sgt, bvsgt),
-    create_comp_xform(intcc.sle, bvsle),
-    create_comp_xform(intcc.slt, bvslt),
-    create_comp_xform(intcc.uge, bvuge),
-    create_comp_xform(intcc.ugt, bvugt),
-    create_comp_xform(intcc.ule, bvule),
-    create_comp_xform(intcc.ult, bvult))
-
-#
-# Legalization helper instructions.
-#
-
-isplit.set_semantics(
-    (xlo, xhi) << isplit(x),
-    (Rtl(
-        bvx << prim_to_bv(x),
-        (bvlo, bvhi) << bvsplit(bvx),
-        xlo << prim_from_bv(bvlo),
-        xhi << prim_from_bv(bvhi)
-    ), [InTypeset(x.get_typevar(), ScalarTS)]),
-    Rtl(
-        (a, b) << vsplit(x),
-        (alo, ahi) << isplit(a),
-        (blo, bhi) << isplit(b),
-        xlo << vconcat(alo, blo),
-        xhi << vconcat(bhi, bhi)
-    ))
-
-iconcat.set_semantics(
-    x << iconcat(xlo, xhi),
-    (Rtl(
-        bvlo << prim_to_bv(xlo),
-        bvhi << prim_to_bv(xhi),
-        bvx << bvconcat(bvlo, bvhi),
-        x << prim_from_bv(bvx)
-    ), [InTypeset(x.get_typevar(), ScalarTS)]),
-    Rtl(
-        (alo, ahi) << vsplit(xlo),
-        (blo, bhi) << vsplit(xhi),
-        a << iconcat(alo, blo),
-        b << iconcat(ahi, bhi),
-        x << vconcat(a, b),
-    ))
diff --git a/cranelift-codegen/meta-python/base/settings.py b/cranelift-codegen/meta-python/base/settings.py
deleted file mode 100644
index 534ef2883..000000000
--- a/cranelift-codegen/meta-python/base/settings.py
+++ /dev/null
@@ -1,144 +0,0 @@
-"""
-Cranelift shared settings.
-
-This module defines settings relevant for all code generators.
-"""
-from __future__ import absolute_import
-from cdsl.settings import SettingGroup, BoolSetting, EnumSetting, NumSetting
-
-group = SettingGroup('shared')
-
-opt_level = EnumSetting(
-        """
-        Optimization level:
-
-        - default: Very profitable optimizations enabled, none slow.
-        - best: Enable all optimizations
-        - fastest: Optimize for compile time by disabling most optimizations.
-        """,
-        'default', 'best', 'fastest')
-
-enable_verifier = BoolSetting(
-        """
-        Run the Cranelift IR verifier at strategic times during compilation.
-
-        This makes compilation slower but catches many bugs. The verifier is
-        disabled by default, except when reading Cranelift IR from a text file.
-        """,
-        default=True)
-
-# Note that Cranelift doesn't currently need an is_pie flag, because PIE is
-# just PIC where symbols can't be pre-empted, which can be expressed with the
-# `colocated` flag on external functions and global values.
-is_pic = BoolSetting("Enable Position-Independent Code generation")
-
-colocated_libcalls = BoolSetting(
-        """
-        Use colocated libcalls.
-
-        Generate code that assumes that libcalls can be declared "colocated",
-        meaning they will be defined along with the current function, such that
-        they can use more efficient addressing.
-        """)
-
-avoid_div_traps = BoolSetting(
-        """
-        Generate explicit checks around native division instructions to avoid
-        their trapping.
-
-        This is primarily used by SpiderMonkey which doesn't install a signal
-        handler for SIGFPE, but expects a SIGILL trap for division by zero.
-
-        On ISAs like ARM where the native division instructions don't trap,
-        this setting has no effect - explicit checks are always inserted.
-        """)
-
-enable_float = BoolSetting(
-        """
-        Enable the use of floating-point instructions
-
-        Disabling use of floating-point instructions is not yet implemented.
-        """,
-        default=True)
-
-enable_nan_canonicalization = BoolSetting(
-        """
-        Enable NaN canonicalization
-
-        This replaces NaNs with a single canonical value, for users requiring
-        entirely deterministic WebAssembly computation. This is not required
-        by the WebAssembly spec, so it is not enabled by default.
-        """,
-        default=False)
-
-enable_simd = BoolSetting(
-        """Enable the use of SIMD instructions.""",
-        default=True)
-
-enable_atomics = BoolSetting(
-        """Enable the use of atomic instructions""",
-        default=True)
-
-#
-# Settings specific to the `baldrdash` calling convention.
-#
-baldrdash_prologue_words = NumSetting(
-        """
-        Number of pointer-sized words pushed by the baldrdash prologue.
-
-        Functions with the `baldrdash` calling convention don't generate their
-        own prologue and epilogue. They depend on externally generated code
-        that pushes a fixed number of words in the prologue and restores them
-        in the epilogue.
-
-        This setting configures the number of pointer-sized words pushed on the
-        stack when the Cranelift-generated code is entered. This includes the
-        pushed return address on x86.
-        """)
-
-#
-# BaldrMonkey requires that not-yet-relocated function addresses be encoded
-# as all-ones bitpatterns.
-#
-allones_funcaddrs = BoolSetting(
-        """
-        Emit not-yet-relocated function addresses as all-ones bit patterns.
-        """)
-
-#
-# Stack probing options.
-#
-probestack_enabled = BoolSetting(
-        """
-        Enable the use of stack probes, for calling conventions which support
-        this functionality.
-        """,
-        default=True)
-
-probestack_func_adjusts_sp = BoolSetting(
-        """
-        Set this to true of the stack probe function modifies the stack pointer
-        itself.
-        """)
-
-probestack_size_log2 = NumSetting(
-        """
-        The log2 of the size of the stack guard region.
-
-        Stack frames larger than this size will have stack overflow checked
-        by calling the probestack function.
-
-        The default is 12, which translates to a size of 4096.
-        """,
-        default=12)
-
-#
-# Jump table options.
-#
-jump_tables_enabled = BoolSetting(
-        """
-        Enable the use of jump tables in generated machine code.
-        """,
-        default=True)
-
-group.close(globals())
diff --git a/cranelift-codegen/meta-python/base/types.py b/cranelift-codegen/meta-python/base/types.py
deleted file mode 100644
index 9141f4fbc..000000000
--- a/cranelift-codegen/meta-python/base/types.py
+++ /dev/null
@@ -1,49 +0,0 @@
-"""
-The base.types module predefines all the Cranelift scalar types.
-"""
-from __future__ import absolute_import
-from cdsl.types import IntType, FloatType, BoolType, FlagsType
-
-#: Abstract boolean (can't be stored in memory, use bint to convert to 0 or 1).
-b1 = BoolType(1)    #: 1-bit bool.
-
-#: Booleans used as SIMD elements (can be stored in memory, true is all-ones).
-b8 = BoolType(8)    #: 8-bit bool.
-b16 = BoolType(16)  #: 16-bit bool.
-b32 = BoolType(32)  #: 32-bit bool.
-b64 = BoolType(64)  #: 64-bit bool.
-
-# Integers.
-i8 = IntType(8)     #: 8-bit int.
-i16 = IntType(16)   #: 16-bit int.
-i32 = IntType(32)   #: 32-bit int.
-i64 = IntType(64)   #: 64-bit int.
-
-#: IEEE single precision.
-f32 = FloatType(
-        32, """
-        A 32-bit floating point type represented in the IEEE 754-2008
-        *binary32* interchange format. This corresponds to the :c:type:`float`
-        type in most C implementations.
-        """)
-
-#: IEEE double precision.
-f64 = FloatType(
-        64, """
-        A 64-bit floating point type represented in the IEEE 754-2008
-        *binary64* interchange format. This corresponds to the :c:type:`double`
-        type in most C implementations.
-        """)
-#: CPU flags from an integer comparison.
-iflags = FlagsType(
-        'iflags', """
-        CPU flags representing the result of an integer comparison. These flags
-        can be tested with an :type:`intcc` condition code.
-        """)
-
-#: CPU flags from a floating point comparison.
-fflags = FlagsType(
-        'fflags', """
-        CPU flags representing the result of a floating point comparison. These
-        flags can be tested with a :type:`floatcc` condition code.
-        """)
diff --git a/cranelift-codegen/meta-python/build.py b/cranelift-codegen/meta-python/build.py
deleted file mode 100644
index 90a0cf254..000000000
--- a/cranelift-codegen/meta-python/build.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Second-level build script.
-#
-# This script is run from cranelift-codegen/build.rs to generate Rust files.
-
-from __future__ import absolute_import
-import argparse
-import isa
-import gen_build_deps
-import gen_encoding
-import gen_binemit
-
-try:
-    from typing import List, Set  # noqa
-    from cdsl.isa import TargetISA  # noqa
-    from cdsl.instructions import InstructionGroup  # noqa
-except ImportError:
-    pass
-
-
-def number_all_instructions(isas):
-    # type: (List[TargetISA]) -> None
-    seen = set()  # type: Set[InstructionGroup]
-    num_inst = 1
-    for target_isa in isas:
-        for g in target_isa.instruction_groups:
-            if g not in seen:
-                for i in g.instructions:
-                    i.number = num_inst
-                    num_inst += 1
-                seen.add(g)
-
-
-def main():
-    # type: () -> None
-    parser = argparse.ArgumentParser(
-            description='Generate sources for Cranelift.')
-    parser.add_argument('--out-dir', help='set output directory')
-
-    args = parser.parse_args()
-    out_dir = args.out_dir
-
-    isas = isa.all_isas()
-    number_all_instructions(isas)
-
-    gen_encoding.generate(isas, out_dir)
-    gen_binemit.generate(isas, out_dir)
-    gen_build_deps.generate()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/cranelift-codegen/meta-python/cdsl/__init__.py b/cranelift-codegen/meta-python/cdsl/__init__.py
deleted file mode 100644
index a0b5d4c13..000000000
--- a/cranelift-codegen/meta-python/cdsl/__init__.py
+++ /dev/null
@@ -1,59 +0,0 @@
-"""
-Cranelift DSL classes.
-
-This module defines the classes that are used to define Cranelift instructions
-and other entities.
-"""
-from __future__ import absolute_import
-import re
-
-
-camel_re = re.compile('(^|_)([a-z])')
-
-
-def camel_case(s):
-    # type: (str) -> str
-    """Convert the string s to CamelCase:
-        >>> camel_case('x')
-        'X'
-        >>> camel_case('camel_case')
-        'CamelCase'
-    """
-    return camel_re.sub(lambda m: m.group(2).upper(), s)
-
-
-def is_power_of_two(x):
-    # type: (int) -> bool
-    """Check if `x` is a power of two:
-        >>> is_power_of_two(0)
-        False
-        >>> is_power_of_two(1)
-        True
-        >>> is_power_of_two(2)
-        True
-        >>> is_power_of_two(3)
-        False
-    """
-    return x > 0 and x & (x-1) == 0
-
-
-def next_power_of_two(x):
-    # type: (int) -> int
-    """
-    Compute the next power of two that is greater than `x`:
-        >>> next_power_of_two(0)
-        1
-        >>> next_power_of_two(1)
-        2
-        >>> next_power_of_two(2)
-        4
-        >>> next_power_of_two(3)
-        4
-        >>> next_power_of_two(4)
-        8
-    """
-    s = 1
-    while x & (x + 1) != 0:
-        x |= x >> s
-        s *= 2
-    return x + 1
diff --git a/cranelift-codegen/meta-python/cdsl/ast.py b/cranelift-codegen/meta-python/cdsl/ast.py
deleted file mode 100644
index 4a72fc486..000000000
--- a/cranelift-codegen/meta-python/cdsl/ast.py
+++ /dev/null
@@ -1,581 +0,0 @@
-"""
-Abstract syntax trees.
-
-This module defines classes that can be used to create abstract syntax trees
-for pattern matching an rewriting of cranelift instructions.
-"""
-from __future__ import absolute_import
-from . import instructions
-from .typevar import TypeVar
-from .predicates import IsEqual, And, TypePredicate, CtrlTypePredicate
-
-try:
-    from typing import Union, Tuple, Sequence, TYPE_CHECKING, Dict, List  # noqa
-    from typing import Optional, Set, Any # noqa
-    if TYPE_CHECKING:
-        from .operands import ImmediateKind  # noqa
-        from .predicates import PredNode  # noqa
-        VarAtomMap = Dict["Var", "Atom"]
-except ImportError:
-    pass
-
-
-def replace_var(arg, m):
-    # type: (Expr, VarAtomMap) -> Expr
-    """
-    Given a var v return either m[v] or a new variable v' (and remember
-    m[v]=v'). Otherwise return the argument unchanged
-    """
-    if isinstance(arg, Var):
-        new_arg = m.get(arg, Var(arg.name))  # type: Atom
-        m[arg] = new_arg
-        return new_arg
-    return arg
-
-
-class Def(object):
-    """
-    An AST definition associates a set of variables with the values produced by
-    an expression.
-
-    Example:
-
-    >>> from base.instructions import iadd_cout, iconst
-    >>> x = Var('x')
-    >>> y = Var('y')
-    >>> x << iconst(4)
-    (Var(x),) << Apply(iconst, (4,))
-    >>> (x, y) << iadd_cout(4, 5)
-    (Var(x), Var(y)) << Apply(iadd_cout, (4, 5))
-
-    The `<<` operator is used to create variable definitions.
-
-    :param defs: Single variable or tuple of variables to be defined.
-    :param expr: Expression generating the values.
-    """
-
-    def __init__(self, defs, expr):
-        # type: (Union[Var, Tuple[Var, ...]], Apply) -> None
-        if not isinstance(defs, tuple):
-            self.defs = (defs,)  # type: Tuple[Var, ...]
-        else:
-            self.defs = defs
-        assert isinstance(expr, Apply)
-        self.expr = expr
-
-    def __repr__(self):
-        # type: () -> str
-        return "{} << {!r}".format(self.defs, self.expr)
-
-    def __str__(self):
-        # type: () -> str
-        if len(self.defs) == 1:
-            return "{!s} << {!s}".format(self.defs[0], self.expr)
-        else:
-            return "({}) << {!s}".format(
-                    ', '.join(map(str, self.defs)), self.expr)
-
-    def copy(self, m):
-        # type: (VarAtomMap) -> Def
-        """
-        Return a copy of this Def with vars replaced with fresh variables,
-        in accordance with the map m. Update m as necessary.
-        """
-        new_expr = self.expr.copy(m)
-        new_defs = []  # type: List[Var]
-        for v in self.defs:
-            new_v = replace_var(v, m)
-            assert(isinstance(new_v, Var))
-            new_defs.append(new_v)
-
-        return Def(tuple(new_defs), new_expr)
-
-    def definitions(self):
-        # type: () -> Set[Var]
-        """ Return the set of all Vars that are defined by self"""
-        return set(self.defs)
-
-    def uses(self):
-        # type: () -> Set[Var]
-        """ Return the set of all Vars that are used(read) by self"""
-        return set(self.expr.vars())
-
-    def vars(self):
-        # type: () -> Set[Var]
-        """Return the set of all Vars in self that correspond to SSA values"""
-        return self.definitions().union(self.uses())
-
-    def substitution(self, other, s):
-        # type: (Def, VarAtomMap) -> Optional[VarAtomMap]
-        """
-        If the Defs self and other agree structurally, return a variable
-        substitution to transform self to other. Otherwise return None. Two
-        Defs agree structurally if there exists a Var substitution, that can
-        transform one into the other. See Apply.substitution() for more
-        details.
-        """
-        s = self.expr.substitution(other.expr, s)
-
-        if (s is None):
-            return s
-
-        assert len(self.defs) == len(other.defs)
-        for (self_d, other_d) in zip(self.defs, other.defs):
-            assert self_d not in s  # Guaranteed by SSA form
-            s[self_d] = other_d
-
-        return s
-
-
-class Expr(object):
-    """
-    An AST expression.
-    """
-
-
-class Atom(Expr):
-    """
-    An Atom in the DSL is either a literal or a Var
-    """
-
-
-class Var(Atom):
-    """
-    A free variable.
-
-    When variables are used in `XForms` with source and destination patterns,
-    they are classified as follows:
-
-    Input values
-        Uses in the source pattern with no preceding def. These may appear as
-        inputs in the destination pattern too, but no new inputs can be
-        introduced.
-    Output values
-        Variables that are defined in both the source and destination pattern.
-        These values may have uses outside the source pattern, and the
-        destination pattern must compute the same value.
-    Intermediate values
-        Values that are defined in the source pattern, but not in the
-        destination pattern. These may have uses outside the source pattern, so
-        the defining instruction can't be deleted immediately.
-    Temporary values
-        Values that are defined only in the destination pattern.
-    """
-
-    def __init__(self, name):
-        # type: (str) -> None
-        self.name = name
-        # The `Def` defining this variable in a source pattern.
-        self.src_def = None  # type: Def
-        # The `Def` defining this variable in a destination pattern.
-        self.dst_def = None  # type: Def
-        # TypeVar representing the type of this variable.
-        self.typevar = None  # type: TypeVar
-        # The original 'typeof(x)' type variable that was created for this Var.
-        # This one doesn't change. `self.typevar` above may be changed to
-        # another typevar by type inference.
-        self.original_typevar = None  # type: TypeVar
-
-    def __str__(self):
-        # type: () -> str
-        return self.name
-
-    def __repr__(self):
-        # type: () -> str
-        s = self.name
-        if self.src_def:
-            s += ", src"
-        if self.dst_def:
-            s += ", dst"
-        return "Var({})".format(s)
-
-    # Context bits for `set_def` indicating which pattern has defines of this
-    # var.
-    SRCCTX = 1
-    DSTCTX = 2
-
-    def set_def(self, context, d):
-        # type: (int, Def) -> None
-        """
-        Set the `Def` that defines this variable in the given context.
-
-        The `context` must be one of `SRCCTX` or `DSTCTX`
-        """
-        if context == self.SRCCTX:
-            self.src_def = d
-        else:
-            self.dst_def = d
-
-    def get_def(self, context):
-        # type: (int) -> Def
-        """
-        Get the def of this variable in context.
-
-        The `context` must be one of `SRCCTX` or `DSTCTX`
-        """
-        if context == self.SRCCTX:
-            return self.src_def
-        else:
-            return self.dst_def
-
-    def is_input(self):
-        # type: () -> bool
-        """Is this an input value to the src pattern?"""
-        return self.src_def is None and self.dst_def is None
-
-    def is_output(self):
-        # type: () -> bool
-        """Is this an output value, defined in both src and dst patterns?"""
-        return self.src_def is not None and self.dst_def is not None
-
-    def is_intermediate(self):
-        # type: () -> bool
-        """Is this an intermediate value, defined only in the src pattern?"""
-        return self.src_def is not None and self.dst_def is None
-
-    def is_temp(self):
-        # type: () -> bool
-        """Is this a temp value, defined only in the dst pattern?"""
-        return self.src_def is None and self.dst_def is not None
-
-    def get_typevar(self):
-        # type: () -> TypeVar
-        """Get the type variable representing the type of this variable."""
-        if not self.typevar:
-            # Create a TypeVar allowing all types.
-            tv = TypeVar(
-                    'typeof_{}'.format(self),
-                    'Type of the pattern variable `{}`'.format(self),
-                    ints=True, floats=True, bools=True,
-                    scalars=True, simd=True, bitvecs=True,
-                    specials=True)
-            self.original_typevar = tv
-            self.typevar = tv
-        return self.typevar
-
-    def set_typevar(self, tv):
-        # type: (TypeVar) -> None
-        self.typevar = tv
-
-    def has_free_typevar(self):
-        # type: () -> bool
-        """
-        Check if this variable has a free type variable.
-
-        If not, the type of this variable is computed from the type of another
-        variable.
-        """
-        if not self.typevar or self.typevar.is_derived:
-            return False
-        return self.typevar is self.original_typevar
-
-    def rust_type(self):
-        # type: () -> str
-        """
-        Get a Rust expression that computes the type of this variable.
-
-        It is assumed that local variables exist corresponding to the free type
-        variables.
-        """
-        return self.typevar.rust_expr()
-
-
-class Apply(Expr):
-    """
-    Apply an instruction to arguments.
-
-    An `Apply` AST expression is created by using function call syntax on
-    instructions. This applies to both bound and unbound polymorphic
-    instructions:
-
-    >>> from base.instructions import jump, iadd
-    >>> jump('next', ())
-    Apply(jump, ('next', ()))
-    >>> iadd.i32('x', 'y')
-    Apply(iadd.i32, ('x', 'y'))
-
-    :param inst: The instruction being applied, an `Instruction` or
-                 `BoundInstruction` instance.
-    :param args: Tuple of arguments.
-    """
-
-    def __init__(self, inst, args):
-        # type: (instructions.MaybeBoundInst, Tuple[Expr, ...]) -> None  # noqa
-        if isinstance(inst, instructions.BoundInstruction):
-            self.inst = inst.inst
-            self.typevars = inst.typevars
-        else:
-            assert isinstance(inst, instructions.Instruction)
-            self.inst = inst
-            self.typevars = ()
-        self.args = args
-        assert len(self.inst.ins) == len(args)
-
-        # Check that the kinds of Literals arguments match the expected Operand
-        for op_idx in self.inst.imm_opnums:
-            arg = self.args[op_idx]
-            op = self.inst.ins[op_idx]
-
-            if isinstance(arg, Literal):
-                assert arg.kind == op.kind, \
-                    "Passing literal {} to field of wrong kind {}."\
-                    .format(arg, op.kind)
-
-    def __rlshift__(self, other):
-        # type: (Union[Var, Tuple[Var, ...]]) -> Def
-        """
-        Define variables using `var << expr` or `(v1, v2) << expr`.
-        """
-        return Def(other, self)
-
-    def instname(self):
-        # type: () -> str
-        i = self.inst.name
-        for t in self.typevars:
-            i += '.{}'.format(t)
-        return i
-
-    def __repr__(self):
-        # type: () -> str
-        return "Apply({}, {})".format(self.instname(), self.args)
-
-    def __str__(self):
-        # type: () -> str
-        args = ', '.join(map(str, self.args))
-        return '{}({})'.format(self.instname(), args)
-
-    def rust_builder(self, defs=None):
-        # type: (Sequence[Var]) -> str
-        """
-        Return a Rust Builder method call for instantiating this instruction
-        application.
-
-        The `defs` argument should be a list of variables defined by this
-        instruction. It is used to construct a result type if necessary.
-        """
-        args = ', '.join(map(str, self.args))
-        # Do we need to pass an explicit type argument?
-        if self.inst.is_polymorphic and not self.inst.use_typevar_operand:
-            args = defs[0].rust_type() + ', ' + args
-        method = self.inst.snake_name()
-        return '{}({})'.format(method, args)
-
-    def inst_predicate(self):
-        # type: () -> PredNode
-        """
-        Construct an instruction predicate that verifies the immediate operands
-        on this instruction.
-
-        Immediate operands in a source pattern can be either free variables or
-        constants like `ConstantInt` and `Enumerator`. We don't currently
-        support constraints on free variables, but we may in the future.
-        """
-        pred = None  # type: PredNode
-        iform = self.inst.format
-
-        # Examine all of the immediate operands.
-        for ffield, opnum in zip(iform.imm_fields, self.inst.imm_opnums):
-            arg = self.args[opnum]
-
-            # Ignore free variables for now. We may add variable predicates
-            # later.
-            if isinstance(arg, Var):
-                continue
-
-            pred = And.combine(pred, IsEqual(ffield, arg))
-
-        # Add checks for any bound secondary type variables.
-        # We can't check the controlling type variable this way since it may
-        # not appear as the type of an operand.
-        if len(self.typevars) > 1:
-            for bound_ty, tv in zip(self.typevars[1:],
-                                    self.inst.other_typevars):
-                if bound_ty is None:
-                    continue
-                type_chk = TypePredicate.typevar_check(self.inst, tv, bound_ty)
-                pred = And.combine(pred, type_chk)
-
-        return pred
-
-    def inst_predicate_with_ctrl_typevar(self):
-        # type: () -> PredNode
-        """
-        Same as `inst_predicate()`, but also check the controlling type
-        variable.
-        """
-        pred = self.inst_predicate()
-
-        if len(self.typevars) > 0:
-            bound_ty = self.typevars[0]
-            type_chk = None  # type: PredNode
-            if bound_ty is not None:
-                # Prefer to look at the types of input operands.
-                if self.inst.use_typevar_operand:
-                    type_chk = TypePredicate.typevar_check(
-                            self.inst, self.inst.ctrl_typevar, bound_ty)
-                else:
-                    type_chk = CtrlTypePredicate(bound_ty)
-                pred = And.combine(pred, type_chk)
-
-        return pred
-
-    def copy(self, m):
-        # type: (VarAtomMap) -> Apply
-        """
-        Return a copy of this Expr with vars replaced with fresh variables,
-        in accordance with the map m. Update m as necessary.
-        """
-        return Apply(self.inst, tuple(map(lambda e: replace_var(e, m),
-                                          self.args)))
-
-    def vars(self):
-        # type: () -> Set[Var]
-        """Return the set of all Vars in self that correspond to SSA values"""
-        res = set()
-        for i in self.inst.value_opnums:
-            arg = self.args[i]
-            assert isinstance(arg, Var)
-            res.add(arg)
-        return res
-
-    def substitution(self, other, s):
-        # type: (Apply, VarAtomMap) -> Optional[VarAtomMap]
-        """
-        If there is a substitution from Var->Atom that converts self to other,
-        return it, otherwise return None. Note that this is strictly weaker
-        than unification (see TestXForm.test_subst_enum_bad_var_const for
-        example).
-        """
-        if self.inst != other.inst:
-            return None
-
-        # Guaranteed by self.inst == other.inst
-        assert (len(self.args) == len(other.args))
-
-        for (self_a, other_a) in zip(self.args, other.args):
-            assert isinstance(self_a, Atom) and isinstance(other_a, Atom)
-
-            if (isinstance(self_a, Var)):
-                if (self_a not in s):
-                    s[self_a] = other_a
-                else:
-                    if (s[self_a] != other_a):
-                        return None
-            elif isinstance(other_a, Var):
-                assert isinstance(self_a, Literal)
-                if (other_a not in s):
-                    s[other_a] = self_a
-                else:
-                    if s[other_a] != self_a:
-                        return None
-            else:
-                assert (isinstance(self_a, Literal) and
-                        isinstance(other_a, Literal))
-                # Guaranteed by self.inst == other.inst
-                assert self_a.kind == other_a.kind
-                if (self_a.value != other_a.value):
-                    return None
-
-        return s
-
-
-class Literal(Atom):
-    """
-    Base Class for all literal expressions in the DSL.
-    """
-    def __init__(self, kind, value):
-        # type: (ImmediateKind, Any) -> None
-        self.kind = kind
-        self.value = value
-
-    def __eq__(self, other):
-        # type: (Any) -> bool
-        if not isinstance(other, Literal):
-            return False
-
-        if self.kind != other.kind:
-            return False
-
-        # Can't just compare value here, as comparison Any <> Any returns Any
-        return repr(self) == repr(other)
-
-    def __ne__(self, other):
-        # type: (Any) -> bool
-        return not self.__eq__(other)
-
-    def __repr__(self):
-        # type: () -> str
-        return '{}.{}'.format(self.kind, self.value)
-
-
-class ConstantInt(Literal):
-    """
-    A value of an integer immediate operand.
-
-    Immediate operands like `imm64` or `offset32` can be specified in AST
-    expressions using the call syntax: `imm64(5)` which creates a `ConstantInt`
-    node.
-    """
-
-    def __init__(self, kind, value):
-        # type: (ImmediateKind, int) -> None
-        super(ConstantInt, self).__init__(kind, value)
-
-    def __str__(self):
-        # type: () -> str
-        # If the value is in the signed imm64 range, print it as-is.
-        if self.value >= -(2**63) and self.value < (2**63):
-            return str(self.value)
-        # Otherwise if the value is in the unsigned imm64 range, print its
-        # bitwise counterpart in the signed imm64 range.
-        if self.value >= (2**63) and self.value < (2**64):
-            return str(self.value - (2**64))
-        assert False, "immediate value not in signed or unsigned imm64 range"
-
-
-class ConstantBits(Literal):
-    """
-    A bitwise value of an immediate operand.
-
-    This is used to create bitwise exact floating point constants using
-    `ieee32.bits(0x80000000)`.
-    """
-
-    def __init__(self, kind, bits):
-        # type: (ImmediateKind, int) -> None
-        v = '{}::with_bits({:#x})'.format(kind.rust_type, bits)
-        super(ConstantBits, self).__init__(kind, v)
-
-    def __str__(self):
-        # type: () -> str
-        """
-        Get the Rust expression form of this constant.
-        """
-        return str(self.value)
-
-
-class Enumerator(Literal):
-    """
-    A value of an enumerated immediate operand.
-
-    Some immediate operand kinds like `intcc` and `floatcc` have an enumerated
-    range of values corresponding to a Rust enum type. An `Enumerator` object
-    is an AST leaf node representing one of the values.
-
-    :param kind: The enumerated `ImmediateKind` containing the value.
-    :param value: The textual IR representation of the value.
-
-    `Enumerator` nodes are not usually created directly. They are created by
-    using the dot syntax on immediate kinds: `intcc.ult`.
-    """
-
-    def __init__(self, kind, value):
-        # type: (ImmediateKind, str) -> None
-        super(Enumerator, self).__init__(kind, value)
-
-    def __str__(self):
-        # type: () -> str
-        """
-        Get the Rust expression form of this enumerator.
-        """
-        return self.kind.rust_enumerator(self.value)
diff --git a/cranelift-codegen/meta-python/cdsl/formats.py b/cranelift-codegen/meta-python/cdsl/formats.py
deleted file mode 100644
index 3eee94d24..000000000
--- a/cranelift-codegen/meta-python/cdsl/formats.py
+++ /dev/null
@@ -1,268 +0,0 @@
-"""Classes for describing instruction formats."""
-from __future__ import absolute_import
-from .operands import OperandKind, VALUE, VARIABLE_ARGS
-from .operands import Operand  # noqa
-
-# The typing module is only required by mypy, and we don't use these imports
-# outside type comments.
-try:
-    from typing import Dict, List, Tuple, Union, Any, Sequence, Iterable  # noqa
-except ImportError:
-    pass
-
-
-class InstructionContext(object):
-    """
-    Most instruction predicates refer to immediate fields of a specific
-    instruction format, so their `predicate_context()` method returns the
-    specific instruction format.
-
-    Predicates that only care about the types of SSA values are independent of
-    the instruction format. They can be evaluated in the context of any
-    instruction.
-
-    The singleton `InstructionContext` class serves as the predicate context
-    for these predicates.
-    """
-
-    def __init__(self):
-        # type: () -> None
-        self.name = 'inst'
-
-
-# Singleton instance.
-instruction_context = InstructionContext()
-
-
-class InstructionFormat(object):
-    """
-    Every instruction opcode has a corresponding instruction format which
-    determines the number of operands and their kinds. Instruction formats are
-    identified structurally, i.e., the format of an instruction is derived from
-    the kinds of operands used in its declaration.
-
-    The instruction format stores two separate lists of operands: Immediates
-    and values. Immediate operands (including entity references) are
-    represented as explicit members in the `InstructionData` variants. The
-    value operands are stored differently, depending on how many there are.
-    Beyond a certain point, instruction formats switch to an external value
-    list for storing value arguments. Value lists can hold an arbitrary number
-    of values.
-
-    All instruction formats must be predefined in the
-    :py:mod:`cranelift.formats` module.
-
-    :param kinds: List of `OperandKind` objects describing the operands.
-    :param name: Instruction format name in CamelCase. This is used as a Rust
-        variant name in both the `InstructionData` and `InstructionFormat`
-        enums.
-    :param typevar_operand: Index of the value input operand that is used to
-        infer the controlling type variable. By default, this is `0`, the first
-        `value` operand. The index is relative to the values only, ignoring
-        immediate operands.
-    """
-
-    # Map (imm_kinds, num_value_operands) -> format
-    _registry = dict()  # type: Dict[Tuple[Tuple[OperandKind, ...], int, bool], InstructionFormat]  # noqa
-
-    # All existing formats.
-    all_formats = list()  # type: List[InstructionFormat]
-
-    def __init__(self, *kinds, **kwargs):
-        # type: (*Union[OperandKind, Tuple[str, OperandKind]], **Any) -> None # noqa
-        self.name = kwargs.get('name', None)  # type: str
-        self.parent = instruction_context
-
-        # The number of value operands stored in the format, or `None` when
-        # `has_value_list` is set.
-        self.num_value_operands = 0
-        # Does this format use a value list for storing value operands?
-        self.has_value_list = False
-        # Operand fields for the immediate operands. All other instruction
-        # operands are values or variable argument lists. They are all handled
-        # specially.
-        self.imm_fields = tuple(self._process_member_names(kinds))
-
-        # The typevar_operand argument must point to a 'value' operand.
-        self.typevar_operand = kwargs.get('typevar_operand', None)  # type: int
-        if self.typevar_operand is not None:
-            if not self.has_value_list:
-                assert self.typevar_operand < self.num_value_operands, \
-                        "typevar_operand must indicate a 'value' operand"
-        elif self.has_value_list or self.num_value_operands > 0:
-            # Default to the first 'value' operand, if there is one.
-            self.typevar_operand = 0
-
-        # Compute a signature for the global registry.
-        imm_kinds = tuple(f.kind for f in self.imm_fields)
-        sig = (imm_kinds, self.num_value_operands, self.has_value_list)
-        if sig in InstructionFormat._registry:
-            raise RuntimeError(
-                "Format '{}' has the same signature as existing format '{}'"
-                .format(self.name, InstructionFormat._registry[sig]))
-        InstructionFormat._registry[sig] = self
-        InstructionFormat.all_formats.append(self)
-
-    def args(self):
-        # type: () -> FormatField
-        """
-        Provides a ValueListField, which is derived from FormatField,
-        corresponding to the full ValueList of the instruction format. This
-        is useful for creating predicates for instructions which use variadic
-        arguments.
-        """
-
-        if self.has_value_list:
-            return ValueListField(self)
-        return None
-
-    def _process_member_names(self, kinds):
-        # type: (Sequence[Union[OperandKind, Tuple[str, OperandKind]]]) -> Iterable[FormatField]  # noqa
-        """
-        Extract names of all the immediate operands in the kinds tuple.
-
-        Each entry is either an `OperandKind` instance, or a `(member, kind)`
-        pair. The member names correspond to members in the Rust
-        `InstructionData` data structure.
-
-        Updates the fields `self.num_value_operands` and `self.has_value_list`.
-
-        Yields the immediate operand fields.
-        """
-        inum = 0
-        for arg in kinds:
-            if isinstance(arg, OperandKind):
-                member = arg.default_member
-                k = arg
-            else:
-                member, k = arg
-
-            # We define 'immediate' as not a value or variable arguments.
-            if k is VALUE:
-                self.num_value_operands += 1
-            elif k is VARIABLE_ARGS:
-                self.has_value_list = True
-            else:
-                yield FormatField(self, inum, k, member)
-                inum += 1
-
-    def __str__(self):
-        # type: () -> str
-        args = ', '.join(
-                '{}: {}'.format(f.member, f.kind) for f in self.imm_fields)
-        return '{}(imms=({}), vals={})'.format(
-                self.name, args, self.num_value_operands)
-
-    def __getattr__(self, attr):
-        # type: (str) -> FormatField
-        """
-        Make immediate instruction format members available as attributes.
-
-        Each non-value format member becomes a corresponding `FormatField`
-        attribute.
-        """
-        for f in self.imm_fields:
-            if f.member == attr:
-                # Cache this field attribute so we won't have to search again.
-                setattr(self, attr, f)
-                return f
-
-        raise AttributeError(
-                '{} is neither a {} member or a '
-                .format(attr, self.name) +
-                'normal InstructionFormat attribute')
-
-    @staticmethod
-    def lookup(ins, outs):
-        # type: (Sequence[Operand], Sequence[Operand]) -> InstructionFormat
-        """
-        Find an existing instruction format that matches the given lists of
-        instruction inputs and outputs.
-
-        The `ins` and `outs` arguments correspond to the
-        :py:class:`Instruction` arguments of the same name, except they must be
-        tuples of :py:`Operand` objects.
-        """
-        # Construct a signature.
-        imm_kinds = tuple(op.kind for op in ins if op.is_immediate())
-        num_values = sum(1 for op in ins if op.is_value())
-        has_varargs = (VARIABLE_ARGS in tuple(op.kind for op in ins))
-
-        sig = (imm_kinds, num_values, has_varargs)
-        if sig in InstructionFormat._registry:
-            return InstructionFormat._registry[sig]
-
-        # Try another value list format as an alternative.
-        sig = (imm_kinds, 0, True)
-        if sig in InstructionFormat._registry:
-            return InstructionFormat._registry[sig]
-
-        raise RuntimeError(
-                'No instruction format matches '
-                'imms={}, vals={}, varargs={}'.format(
-                    imm_kinds, num_values, has_varargs))
-
-    @staticmethod
-    def extract_names(globs):
-        # type: (Dict[str, Any]) -> None
-        """
-        Given a dict mapping name -> object as returned by `globals()`, find
-        all the InstructionFormat objects and set their name from the dict key.
-        This is used to name a bunch of global values in a module.
-        """
-        for name, obj in globs.items():
-            if isinstance(obj, InstructionFormat):
-                assert obj.name is None
-                obj.name = name
-
-
-class FormatField(object):
-    """
-    An immediate field in an instruction format.
-
-    This corresponds to a single member of a variant of the `InstructionData`
-    data type.
-
-    :param iform: Parent `InstructionFormat`.
-    :param immnum: Immediate operand number in parent.
-    :param kind: Immediate Operand kind.
-    :param member: Member name in `InstructionData` variant.
-    """
-
-    def __init__(self, iform, immnum, kind, member):
-        # type: (InstructionFormat, int, OperandKind, str) -> None
-        self.format = iform
-        self.immnum = immnum
-        self.kind = kind
-        self.member = member
-
-    def __str__(self):
-        # type: () -> str
-        return '{}.{}'.format(self.format.name, self.member)
-
-    def rust_destructuring_name(self):
-        # type: () -> str
-        return self.member
-
-    def rust_name(self):
-        # type: () -> str
-        return self.member
-
-
-class ValueListField(FormatField):
-    """
-    The full value list field of an instruction format.
-
-    This corresponds to all Value-type members of a variant of the
-    `InstructionData` format, which contains a ValueList.
-
-    :param iform: Parent `InstructionFormat`.
-    """
-    def __init__(self, iform):
-        # type: (InstructionFormat) -> None
-        self.format = iform
-        self.member = "args"
-
-    def rust_destructuring_name(self):
-        # type: () -> str
-        return 'ref {}'.format(self.member)
diff --git a/cranelift-codegen/meta-python/cdsl/instructions.py b/cranelift-codegen/meta-python/cdsl/instructions.py
deleted file mode 100644
index 34741991d..000000000
--- a/cranelift-codegen/meta-python/cdsl/instructions.py
+++ /dev/null
@@ -1,446 +0,0 @@
-"""Classes for defining instructions."""
-from __future__ import absolute_import
-from . import camel_case
-from .types import ValueType
-from .operands import Operand
-from .formats import InstructionFormat
-
-try:
-    from typing import Union, Sequence, List, Tuple, Any, TYPE_CHECKING  # noqa
-    from typing import Dict # noqa
-    if TYPE_CHECKING:
-        from .ast import Expr, Apply, Var, Def, VarAtomMap  # noqa
-        from .typevar import TypeVar  # noqa
-        from .ti import TypeConstraint  # noqa
-        from .xform import XForm, Rtl
-        # List of operands for ins/outs:
-        OpList = Union[Sequence[Operand], Operand]
-        ConstrList = Union[Sequence[TypeConstraint], TypeConstraint]
-        MaybeBoundInst = Union['Instruction', 'BoundInstruction']
-        InstructionSemantics = Sequence[XForm]
-        SemDefCase = Union[Rtl, Tuple[Rtl, Sequence[TypeConstraint]], XForm]
-except ImportError:
-    pass
-
-
-class InstructionGroup(object):
-    """
-    Every instruction must belong to exactly one instruction group. A given
-    target architecture can support instructions from multiple groups, and it
-    does not necessarily support all instructions in a group.
-
-    New instructions are automatically added to the currently open instruction
-    group.
-    """
-
-    # The currently open instruction group.
-    _current = None  # type: InstructionGroup
-
-    def open(self):
-        # type: () -> None
-        """
-        Open this instruction group such that future new instructions are
-        added to this group.
-        """
-        assert InstructionGroup._current is None, (
-                "Can't open {} since {} is already open"
-                .format(self, InstructionGroup._current))
-        InstructionGroup._current = self
-
-    def close(self):
-        # type: () -> None
-        """
-        Close this instruction group. This function should be called before
-        opening another instruction group.
-        """
-        assert InstructionGroup._current is self, (
-                "Can't close {}, the open instuction group is {}"
-                .format(self, InstructionGroup._current))
-        InstructionGroup._current = None
-
-    def __init__(self, name, doc):
-        # type: (str, str) -> None
-        self.name = name
-        self.__doc__ = doc
-        self.instructions = []  # type: List[Instruction]
-        self.open()
-
-    @staticmethod
-    def append(inst):
-        # type: (Instruction) -> None
-        assert InstructionGroup._current, \
-                "Open an instruction group before defining instructions."
-        InstructionGroup._current.instructions.append(inst)
-
-
-class Instruction(object):
-    """
-    The operands to the instruction are specified as two tuples: ``ins`` and
-    ``outs``. Since the Python singleton tuple syntax is a bit awkward, it is
-    allowed to specify a singleton as just the operand itself, i.e., `ins=x`
-    and `ins=(x,)` are both allowed and mean the same thing.
-
-    :param name: Instruction mnemonic, also becomes opcode name.
-    :param doc: Documentation string.
-    :param ins: Tuple of input operands. This can be a mix of SSA value
-                operands and other operand kinds.
-    :param outs: Tuple of output operands. The output operands must be SSA
-                values or `variable_args`.
-    :param constraints: Tuple of instruction-specific TypeConstraints.
-    :param is_terminator: This is a terminator instruction.
-    :param is_branch: This is a branch instruction.
-    :param is_indirect_branch: This is an indirect branch instruction.
-    :param is_call: This is a call instruction.
-    :param is_return: This is a return instruction.
-    :param is_ghost: This is a ghost instruction, which has no encoding and no
-                     other register allocation constraints.
-    :param can_trap: This instruction can trap.
-    :param can_load: This instruction can load from memory.
-    :param can_store: This instruction can store to memory.
-    :param other_side_effects: Instruction has other side effects.
-    """
-
-    # Boolean instruction attributes that can be passed as keyword arguments to
-    # the constructor. Map attribute name to doc comment for generated Rust
-    # code.
-    ATTRIBS = {
-            'is_terminator': 'True for instructions that terminate the EBB.',
-            'is_branch': 'True for all branch or jump instructions.',
-            'is_indirect_branch':
-            'True for all indirect branch or jump instructions.',
-            'is_call': 'Is this a call instruction?',
-            'is_return': 'Is this a return instruction?',
-            'is_ghost': 'Is this a ghost instruction?',
-            'can_load': 'Can this instruction read from memory?',
-            'can_store': 'Can this instruction write to memory?',
-            'can_trap': 'Can this instruction cause a trap?',
-            'other_side_effects':
-            'Does this instruction have other side effects besides can_*',
-            'writes_cpu_flags': 'Does this instruction write to CPU flags?',
-            }
-
-    def __init__(self, name, doc, ins=(), outs=(), constraints=(), **kwargs):
-        # type: (str, str, OpList, OpList, ConstrList, **Any) -> None
-        self.name = name
-        self.camel_name = camel_case(name)
-        self.__doc__ = doc
-        self.ins = self._to_operand_tuple(ins)
-        self.outs = self._to_operand_tuple(outs)
-        self.constraints = self._to_constraint_tuple(constraints)
-        self.format = InstructionFormat.lookup(self.ins, self.outs)
-        self.semantics = None  # type: InstructionSemantics
-
-        # Opcode number, assigned by gen_instr.py.
-        self.number = None  # type: int
-
-        # Indexes into `self.outs` for value results.
-        # Other results are `variable_args`.
-        self.value_results = tuple(
-                i for i, o in enumerate(self.outs) if o.is_value())
-        # Indexes into `self.ins` for value operands.
-        self.value_opnums = tuple(
-                i for i, o in enumerate(self.ins) if o.is_value())
-        # Indexes into `self.ins` for non-value operands.
-        self.imm_opnums = tuple(
-                i for i, o in enumerate(self.ins) if o.is_immediate())
-
-        self._verify_polymorphic()
-        for attr in kwargs:
-            if attr not in Instruction.ATTRIBS:
-                raise AssertionError(
-                        "unknown instruction attribute '" + attr + "'")
-        for attr in Instruction.ATTRIBS:
-            setattr(self, attr, not not kwargs.get(attr, False))
-
-        # Infer the 'writes_cpu_flags' field value.
-        if 'writes_cpu_flags' not in kwargs:
-            self.writes_cpu_flags = any(
-                out.is_cpu_flags() for out in self.outs)
-
-        InstructionGroup.append(self)
-
-    def __str__(self):
-        # type: () -> str
-        prefix = ', '.join(o.name for o in self.outs)
-        if prefix:
-            prefix = prefix + ' = '
-        suffix = ', '.join(o.name for o in self.ins)
-        return '{}{} {}'.format(prefix, self.name, suffix)
-
-    def snake_name(self):
-        # type: () -> str
-        """
-        Get the snake_case name of this instruction.
-
-        Keywords in Rust and Python are altered by appending a '_'
-        """
-        if self.name == 'return':
-            return 'return_'
-        else:
-            return self.name
-
-    def blurb(self):
-        # type: () -> str
-        """Get the first line of the doc comment"""
-        for line in self.__doc__.split('\n'):
-            line = line.strip()
-            if line:
-                return line
-        return ""
-
-    def _verify_polymorphic(self):
-        # type: () -> None
-        """
-        Check if this instruction is polymorphic, and verify its use of type
-        variables.
-        """
-        poly_ins = [
-                i for i in self.value_opnums
-                if self.ins[i].typevar.free_typevar()]
-        poly_outs = [
-                i for i, o in enumerate(self.outs)
-                if o.is_value() and o.typevar.free_typevar()]
-        self.is_polymorphic = len(poly_ins) > 0 or len(poly_outs) > 0
-        if not self.is_polymorphic:
-            return
-
-        # Prefer to use the typevar_operand to infer the controlling typevar.
-        self.use_typevar_operand = False
-        typevar_error = None
-        tv_op = self.format.typevar_operand
-        if tv_op is not None and tv_op < len(self.value_opnums):
-            try:
-                opnum = self.value_opnums[tv_op]
-                tv = self.ins[opnum].typevar
-                if tv is tv.free_typevar() or tv.singleton_type() is not None:
-                    self.other_typevars = self._verify_ctrl_typevar(tv)
-                    self.ctrl_typevar = tv
-                    self.use_typevar_operand = True
-            except RuntimeError as e:
-                typevar_error = e
-
-        if not self.use_typevar_operand:
-            # The typevar_operand argument doesn't work. Can we infer from the
-            # first result instead?
-            if len(self.outs) == 0:
-                if typevar_error:
-                    raise typevar_error
-                else:
-                    raise RuntimeError(
-                            "typevar_operand must be a free type variable")
-            tv = self.outs[0].typevar
-            if tv is not tv.free_typevar():
-                raise RuntimeError("first result must be a free type variable")
-            self.other_typevars = self._verify_ctrl_typevar(tv)
-            self.ctrl_typevar = tv
-
-    def _verify_ctrl_typevar(self, ctrl_typevar):
-        # type: (TypeVar) -> List[TypeVar]
-        """
-        Verify that the use of TypeVars is consistent with `ctrl_typevar` as
-        the controlling type variable.
-
-        All polymorhic inputs must either be derived from `ctrl_typevar` or be
-        independent free type variables only used once.
-
-        All polymorphic results must be derived from `ctrl_typevar`.
-
-        Return list of other type variables used, or raise an error.
-        """
-        other_tvs = []  # type: List[TypeVar]
-        # Check value inputs.
-        for opnum in self.value_opnums:
-            typ = self.ins[opnum].typevar
-            tv = typ.free_typevar()
-            # Non-polymorphic or derived from ctrl_typevar is OK.
-            if tv is None or tv is ctrl_typevar:
-                continue
-            # No other derived typevars allowed.
-            if typ is not tv:
-                raise RuntimeError(
-                        "{}: type variable {} must be derived from {}"
-                        .format(self.ins[opnum], typ.name, ctrl_typevar))
-            # Other free type variables can only be used once each.
-            if tv in other_tvs:
-                raise RuntimeError(
-                        "type variable {} can't be used more than once"
-                        .format(tv.name))
-            other_tvs.append(tv)
-
-        # Check outputs.
-        for result in self.outs:
-            if not result.is_value():
-                continue
-            typ = result.typevar
-            tv = typ.free_typevar()
-            # Non-polymorphic or derived from ctrl_typevar is OK.
-            if tv is None or tv is ctrl_typevar:
-                continue
-            raise RuntimeError(
-                    "type variable in output not derived from ctrl_typevar")
-
-        return other_tvs
-
-    def all_typevars(self):
-        # type: () -> List[TypeVar]
-        """
-        Get a list of all type variables in the instruction.
-        """
-        if self.is_polymorphic:
-            return [self.ctrl_typevar] + self.other_typevars
-        else:
-            return []
-
-    @staticmethod
-    def _to_operand_tuple(x):
-        # type: (Union[Sequence[Operand], Operand]) -> Tuple[Operand, ...]
-        # Allow a single Operand instance instead of the awkward singleton
-        # tuple syntax.
-        if isinstance(x, Operand):
-            y = (x,)  # type: Tuple[Operand, ...]
-        else:
-            y = tuple(x)
-        for op in y:
-            assert isinstance(op, Operand)
-        return y
-
-    @staticmethod
-    def _to_constraint_tuple(x):
-        # type: (ConstrList) -> Tuple[TypeConstraint, ...]
-        """
-        Allow a single TypeConstraint instance instead of the awkward singleton
-        tuple syntax.
-        """
-        # import placed here to avoid circular dependency
-        from .ti import TypeConstraint  # noqa
-        if isinstance(x, TypeConstraint):
-            y = (x,)  # type: Tuple[TypeConstraint, ...]
-        else:
-            y = tuple(x)
-        for op in y:
-            assert isinstance(op, TypeConstraint)
-        return y
-
-    def bind(self, *args):
-        # type: (*ValueType) -> BoundInstruction
-        """
-        Bind a polymorphic instruction to a concrete list of type variable
-        values.
-        """
-        assert self.is_polymorphic
-        return BoundInstruction(self, args)
-
-    def __getattr__(self, name):
-        # type: (str) -> BoundInstruction
-        """
-        Bind a polymorphic instruction to a single type variable with dot
-        syntax:
-
-        >>> iadd.i32
-        """
-        assert name != 'any', 'Wildcard not allowed for ctrl_typevar'
-        return self.bind(ValueType.by_name(name))
-
-    def fully_bound(self):
-        # type: () -> Tuple[Instruction, Tuple[ValueType, ...]]
-        """
-        Verify that all typevars have been bound, and return a
-        `(inst, typevars)` pair.
-
-        This version in `Instruction` itself allows non-polymorphic
-        instructions to duck-type as `BoundInstruction`\\s.
-        """
-        assert not self.is_polymorphic, self
-        return (self, ())
-
-    def __call__(self, *args):
-        # type: (*Expr) -> Apply
-        """
-        Create an `ast.Apply` AST node representing the application of this
-        instruction to the arguments.
-        """
-        from .ast import Apply  # noqa
-        return Apply(self, args)
-
-    def set_semantics(self, src, *dsts):
-        # type: (Union[Def, Apply], *SemDefCase) -> None
-        """Set our semantics."""
-        from semantics import verify_semantics
-        from .xform import XForm, Rtl
-
-        sem = []  # type: List[XForm]
-        for dst in dsts:
-            if isinstance(dst, Rtl):
-                sem.append(XForm(Rtl(src).copy({}), dst))
-            elif isinstance(dst, XForm):
-                sem.append(XForm(
-                    dst.src.copy({}),
-                    dst.dst.copy({}),
-                    dst.constraints))
-            else:
-                assert isinstance(dst, tuple)
-                sem.append(XForm(Rtl(src).copy({}), dst[0],
-                                 constraints=dst[1]))
-
-        verify_semantics(self, Rtl(src), sem)
-
-        self.semantics = sem
-
-
-class BoundInstruction(object):
-    """
-    A polymorphic `Instruction` bound to concrete type variables.
-    """
-
-    def __init__(self, inst, typevars):
-        # type: (Instruction, Tuple[ValueType, ...]) -> None
-        self.inst = inst
-        self.typevars = typevars
-        assert len(typevars) <= 1 + len(inst.other_typevars)
-
-    def __str__(self):
-        # type: () -> str
-        return '.'.join([self.inst.name, ] + list(map(str, self.typevars)))
-
-    def bind(self, *args):
-        # type: (*ValueType) -> BoundInstruction
-        """
-        Bind additional typevars.
-        """
-        return BoundInstruction(self.inst, self.typevars + args)
-
-    def __getattr__(self, name):
-        # type: (str) -> BoundInstruction
-        """
-        Bind an additional typevar dot syntax:
-
-        >>> uext.i32.i8
-        """
-        if name == 'any':
-            # This is a wild card bind represented as a None type variable.
-            return self.bind(None)
-
-        return self.bind(ValueType.by_name(name))
-
-    def fully_bound(self):
-        # type: () -> Tuple[Instruction, Tuple[ValueType, ...]]
-        """
-        Verify that all typevars have been bound, and return a
-        `(inst, typevars)` pair.
-        """
-        if len(self.typevars) < 1 + len(self.inst.other_typevars):
-            unb = ', '.join(
-                    str(tv) for tv in
-                    self.inst.other_typevars[len(self.typevars) - 1:])
-            raise AssertionError("Unbound typevar {} in {}".format(unb, self))
-        assert len(self.typevars) == 1 + len(self.inst.other_typevars)
-        return (self.inst, self.typevars)
-
-    def __call__(self, *args):
-        # type: (*Expr) -> Apply
-        """
-        Create an `ast.Apply` AST node representing the application of this
-        instruction to the arguments.
-        """
-        from .ast import Apply  # noqa
-        return Apply(self, args)
diff --git a/cranelift-codegen/meta-python/cdsl/isa.py b/cranelift-codegen/meta-python/cdsl/isa.py
deleted file mode 100644
index 3e3ac0b4f..000000000
--- a/cranelift-codegen/meta-python/cdsl/isa.py
+++ /dev/null
@@ -1,455 +0,0 @@
-"""Defining instruction set architectures."""
-from __future__ import absolute_import
-from collections import OrderedDict
-from .predicates import And, TypePredicate
-from .registers import RegClass, Register, Stack
-from .ast import Apply
-from .types import ValueType
-from .instructions import InstructionGroup
-
-# The typing module is only required by mypy, and we don't use these imports
-# outside type comments.
-try:
-    from typing import Tuple, Union, Any, Iterable, Sequence, List, Set, Dict, TYPE_CHECKING  # noqa
-    if TYPE_CHECKING:
-        from .instructions import MaybeBoundInst, InstructionFormat  # noqa
-        from .predicates import PredNode, PredKey  # noqa
-        from .settings import SettingGroup  # noqa
-        from .registers import RegBank  # noqa
-        from .xform import XFormGroup  # noqa
-        OperandConstraint = Union[RegClass, Register, int, Stack]
-        ConstraintSeq = Union[OperandConstraint, Tuple[OperandConstraint, ...]]
-        # Instruction specification for encodings. Allows for predicated
-        # instructions.
-        InstSpec = Union[MaybeBoundInst, Apply]
-        BranchRange = Sequence[int]
-        # A recipe predicate consisting of an ISA predicate and an instruction
-        # predicate.
-        RecipePred = Tuple[PredNode, PredNode]
-except ImportError:
-    pass
-
-
-class TargetISA(object):
-    """
-    A target instruction set architecture.
-
-    The `TargetISA` class collects everything known about a target ISA.
-
-    :param name: Short mnemonic name for the ISA.
-    :param instruction_groups: List of `InstructionGroup` instances that are
-        relevant for this ISA.
-    """
-
-    def __init__(self, name, instruction_groups):
-        # type: (str, Sequence[InstructionGroup]) -> None
-        self.name = name
-        self.settings = None  # type: SettingGroup
-        self.instruction_groups = instruction_groups
-        self.cpumodes = list()  # type: List[CPUMode]
-        self.regbanks = list()  # type: List[RegBank]
-        self.legalize_codes = OrderedDict()  # type: OrderedDict[XFormGroup, int]  # noqa
-        # Unique copies of all predicates.
-        self._predicates = dict()  # type: Dict[PredKey, PredNode]
-
-        assert InstructionGroup._current is None,\
-            "InstructionGroup {} is still open"\
-            .format(InstructionGroup._current.name)
-
-    def __str__(self):
-        # type: () -> str
-        return self.name
-
-    def finish(self):
-        # type: () -> TargetISA
-        """
-        Finish the definition of a target ISA after adding all CPU modes and
-        settings.
-
-        This computes some derived properties that are used in multiple
-        places.
-
-        :returns self:
-        """
-        self._collect_encoding_recipes()
-        self._collect_predicates()
-        self._collect_legalize_codes()
-        return self
-
-    def _collect_encoding_recipes(self):
-        # type: () -> None
-        """
-        Collect and number all encoding recipes in use.
-        """
-        self.all_recipes = list()  # type: List[EncRecipe]
-        rcps = set()  # type: Set[EncRecipe]
-        for cpumode in self.cpumodes:
-            for enc in cpumode.encodings:
-                recipe = enc.recipe
-                if recipe not in rcps:
-                    assert recipe.number is None
-                    recipe.number = len(rcps)
-                    rcps.add(recipe)
-                    self.all_recipes.append(recipe)
-                    # Make sure ISA predicates are registered.
-                    if recipe.isap:
-                        recipe.isap = self.unique_pred(recipe.isap)
-                        self.settings.number_predicate(recipe.isap)
-                    recipe.instp = self.unique_pred(recipe.instp)
-
-    def _collect_predicates(self):
-        # type: () -> None
-        """
-        Collect and number all predicates in use.
-
-        Ensures that all ISA predicates have an assigned bit number in
-        `self.settings`.
-        """
-        self.instp_number = OrderedDict()  # type: OrderedDict[PredNode, int]
-        for cpumode in self.cpumodes:
-            for enc in cpumode.encodings:
-                instp = enc.instp
-                if instp and instp not in self.instp_number:
-                    # assign predicate number starting from 0.
-                    n = len(self.instp_number)
-                    self.instp_number[instp] = n
-
-                # All referenced ISA predicates must have a number in
-                # `self.settings`. This may cause some parent predicates to be
-                # replicated here, which is OK.
-                if enc.isap:
-                    self.settings.number_predicate(enc.isap)
-
-    def _collect_legalize_codes(self):
-        # type: () -> None
-        """
-        Make sure all legalization transforms have been assigned a code.
-        """
-        for cpumode in self.cpumodes:
-            self.legalize_code(cpumode.default_legalize)
-            for x in cpumode.type_legalize.values():
-                self.legalize_code(x)
-
-    def legalize_code(self, xgrp):
-        # type: (XFormGroup) -> int
-        """
-        Get the legalization code for the transform group `xgrp`. Assign one if
-        necessary.
-
-        Each target ISA has its own list of legalization actions with
-        associated legalize codes that appear in the encoding tables.
-
-        This method is used to maintain the registry of legalization actions
-        and their table codes.
-        """
-        if xgrp in self.legalize_codes:
-            code = self.legalize_codes[xgrp]
-        else:
-            code = len(self.legalize_codes)
-            self.legalize_codes[xgrp] = code
-        return code
-
-    def unique_pred(self, pred):
-        # type: (PredNode) -> PredNode
-        """
-        Get a unique predicate that is equivalent to `pred`.
-        """
-        if pred is None:
-            return pred
-        # TODO: We could actually perform some algebraic simplifications. It's
-        # not clear if it is worthwhile.
-        k = pred.predicate_key()
-        if k in self._predicates:
-            return self._predicates[k]
-        self._predicates[k] = pred
-        return pred
-
-
-class CPUMode(object):
-    """
-    A CPU mode determines which instruction encodings are active.
-
-    All instruction encodings are associated with exactly one `CPUMode`, and
-    all CPU modes are associated with exactly one `TargetISA`.
-
-    :param name: Short mnemonic name for the CPU mode.
-    :param target: Associated `TargetISA`.
-    """
-
-    def __init__(self, name, isa):
-        # type: (str, TargetISA) -> None
-        self.name = name
-        self.isa = isa
-        self.encodings = []  # type: List[Encoding]
-        isa.cpumodes.append(self)
-
-        # Tables for configuring legalization actions when no valid encoding
-        # exists for an instruction.
-        self.default_legalize = None  # type: XFormGroup
-        self.type_legalize = OrderedDict()  # type: OrderedDict[ValueType, XFormGroup]  # noqa
-
-    def __str__(self):
-        # type: () -> str
-        return self.name
-
-    def enc(self, *args, **kwargs):
-        # type: (*Any, **Any) -> None
-        """
-        Add a new encoding to this CPU mode.
-
-        Arguments are the `Encoding constructor arguments, except for the first
-        `CPUMode argument which is implied.
-        """
-        self.encodings.append(Encoding(self, *args, **kwargs))
-
-    def legalize_type(self, default=None, **kwargs):
-        # type: (XFormGroup, **XFormGroup) -> None
-        """
-        Configure the legalization action per controlling type variable.
-
-        Instructions that have a controlling type variable mentioned in one of
-        the arguments will be legalized according to the action specified here
-        instead of  using the `legalize_default` action.
-
-        The keyword arguments are value type names:
-
-            mode.legalize_type(i8=widen, i16=widen, i32=expand)
-
-        The `default` argument specifies the action to take for controlling
-        type variables that don't have an explicitly configured action.
-        """
-        if default is not None:
-            self.default_legalize = default
-
-        for name, xgrp in kwargs.items():
-            ty = ValueType.by_name(name)
-            self.type_legalize[ty] = xgrp
-
-    def legalize_monomorphic(self, xgrp):
-        # type: (XFormGroup) -> None
-        """
-        Configure the legalization action to take for monomorphic instructions
-        which don't have a controlling type variable.
-
-        See also `legalize_type()` for polymorphic instructions.
-        """
-        self.type_legalize[None] = xgrp
-
-    def get_legalize_action(self, ty):
-        # type: (ValueType) -> XFormGroup
-        """
-        Get the legalization action to use for `ty`.
-        """
-        return self.type_legalize.get(ty, self.default_legalize)
-
-
-class EncRecipe(object):
-    """
-    A recipe for encoding instructions with a given format.
-
-    Many different instructions can be encoded by the same recipe, but they
-    must all have the same instruction format.
-
-    The `ins` and `outs` arguments are tuples specifying the register
-    allocation constraints for the value operands and results respectively. The
-    possible constraints for an operand are:
-
-    - A `RegClass` specifying the set of allowed registers.
-    - A `Register` specifying a fixed-register operand.
-    - An integer indicating that this result is tied to a value operand, so
-      they must use the same register.
-    - A `Stack` specifying a value in a stack slot.
-
-    The `branch_range` argument must be provided for recipes that can encode
-    branch instructions. It is an `(origin, bits)` tuple describing the exact
-    range that can be encoded in a branch instruction.
-
-    For ISAs that use CPU flags in `iflags` and `fflags` value types, the
-    `clobbers_flags` is used to indicate instruction encodings that clobbers
-    the CPU flags, so they can't be used where a flag value is live.
-
-    :param name: Short mnemonic name for this recipe.
-    :param format: All encoded instructions must have this
-            :py:class:`InstructionFormat`.
-    :param base_size: Base number of bytes in the binary encoded instruction.
-    :param compute_size: Function name to use when computing actual size.
-    :param ins: Tuple of register constraints for value operands.
-    :param outs: Tuple of register constraints for results.
-    :param branch_range: `(origin, bits)` range for branches.
-    :param clobbers_flags: This instruction clobbers `iflags` and `fflags`.
-    :param instp: Instruction predicate.
-    :param isap: ISA predicate.
-    :param emit: Rust code for binary emission.
-    """
-
-    def __init__(
-            self,
-            name,                     # type: str
-            format,                   # type: InstructionFormat
-            base_size,                # type: int
-            ins,                      # type: ConstraintSeq
-            outs,                     # type: ConstraintSeq
-            compute_size=None,        # type: str
-            branch_range=None,        # type: BranchRange
-            clobbers_flags=True,      # type: bool
-            instp=None,               # type: PredNode
-            isap=None,                # type: PredNode
-            emit=None                 # type: str
-            ):
-        # type: (...) -> None
-        self.name = name
-        self.format = format
-        assert base_size >= 0
-        self.base_size = base_size
-        self.compute_size = compute_size if compute_size is not None \
-            else 'base_size'
-        self.branch_range = branch_range
-        self.clobbers_flags = clobbers_flags
-        self.instp = instp
-        self.isap = isap
-        self.emit = emit
-        if instp:
-            assert instp.predicate_context() == format
-        self.number = None  # type: int
-
-        self.ins = self._verify_constraints(ins)
-        if not format.has_value_list:
-            assert len(self.ins) == format.num_value_operands
-        self.outs = self._verify_constraints(outs)
-
-    def __str__(self):
-        # type: () -> str
-        return self.name
-
-    def _verify_constraints(self, seq):
-        # type: (ConstraintSeq) -> Sequence[OperandConstraint]
-        if not isinstance(seq, tuple):
-            seq = (seq,)
-        for c in seq:
-            if isinstance(c, int):
-                # An integer constraint is bound to a value operand.
-                # Check that it is in range.
-                assert c >= 0 and c < len(self.ins)
-            else:
-                assert (isinstance(c, RegClass)
-                        or isinstance(c, Register)
-                        or isinstance(c, Stack))
-        return seq
-
-    def ties(self):
-        # type: () -> Tuple[Dict[int, int], Dict[int, int]]
-        """
-        Return two dictionaries representing the tied operands.
-
-        The first maps input number to tied output number, the second maps
-        output number to tied input number.
-        """
-        i2o = dict()  # type: Dict[int, int]
-        o2i = dict()  # type: Dict[int, int]
-        for o, i in enumerate(self.outs):
-            if isinstance(i, int):
-                i2o[i] = o
-                o2i[o] = i
-        return (i2o, o2i)
-
-    def fixed_ops(self):
-        # type: () -> Tuple[Set[Register], Set[Register]]
-        """
-        Return two sets of registers representing the fixed input and output
-        operands.
-        """
-        i = set(r for r in self.ins if isinstance(r, Register))
-        o = set(r for r in self.outs if isinstance(r, Register))
-        return (i, o)
-
-    def recipe_pred(self):
-        # type: () -> RecipePred
-        """
-        Get the combined recipe predicate which includes both the ISA predicate
-        and the instruction predicate.
-
-        Return `None` if this recipe has neither predicate.
-        """
-        if self.isap is None and self.instp is None:
-            return None
-        else:
-            return (self.isap, self.instp)
-
-
-class Encoding(object):
-    """
-    Encoding for a concrete instruction.
-
-    An `Encoding` object ties an instruction opcode with concrete type
-    variables together with and encoding recipe and encoding bits.
-
-    The concrete instruction can be in three different forms:
-
-    1. A naked opcode: `trap` for non-polymorphic instructions.
-    2. With bound type variables: `iadd.i32` for polymorphic instructions.
-    3. With operands providing constraints: `icmp.i32(intcc.eq, x, y)`.
-
-    If the instruction is polymorphic, all type variables must be provided.
-
-    :param cpumode: The CPU mode where the encoding is active.
-    :param inst: The :py:class:`Instruction` or :py:class:`BoundInstruction`
-                 being encoded.
-    :param recipe: The :py:class:`EncRecipe` to use.
-    :param encbits: Additional encoding bits to be interpreted by `recipe`.
-    :param instp: Instruction predicate, or `None`.
-    :param isap: ISA predicate, or `None`.
-    """
-
-    def __init__(self, cpumode, inst, recipe, encbits, instp=None, isap=None):
-        # type: (CPUMode, InstSpec, EncRecipe, int, PredNode, PredNode) -> None # noqa
-        assert isinstance(cpumode, CPUMode)
-        assert isinstance(recipe, EncRecipe)
-
-        # Check for possible instruction predicates in `inst`.
-        if isinstance(inst, Apply):
-            instp = And.combine(instp, inst.inst_predicate())
-            self.inst = inst.inst
-            self.typevars = inst.typevars
-        else:
-            self.inst, self.typevars = inst.fully_bound()
-
-            # Add secondary type variables to the instruction predicate.
-            # This is already included by Apply.inst_predicate() above.
-            if len(self.typevars) > 1:
-                for tv, vt in zip(self.inst.other_typevars, self.typevars[1:]):
-                    # A None tv is an 'any' wild card: `ishl.i32.any`.
-                    if vt is None:
-                        continue
-                    typred = TypePredicate.typevar_check(self.inst, tv, vt)
-                    instp = And.combine(instp, typred)
-
-        self.cpumode = cpumode
-        assert self.inst.format == recipe.format, (
-                "Format {} must match recipe: {}".format(
-                    self.inst.format, recipe.format))
-
-        if self.inst.is_branch and not self.inst.is_indirect_branch:
-            assert recipe.branch_range, (
-                    'Recipe {} for {} must have a branch_range'
-                    .format(recipe, self.inst.name))
-
-        self.recipe = recipe
-        self.encbits = encbits
-
-        # Record specific predicates. Note that the recipe also has predicates.
-        self.instp = self.cpumode.isa.unique_pred(instp)
-        self.isap = self.cpumode.isa.unique_pred(isap)
-
-    def __str__(self):
-        # type: () -> str
-        return '[{}#{:02x}]'.format(self.recipe, self.encbits)
-
-    def ctrl_typevar(self):
-        # type: () -> ValueType
-        """
-        Get the controlling type variable for this encoding or `None`.
-        """
-        if self.typevars:
-            return self.typevars[0]
-        else:
-            return None
diff --git a/cranelift-codegen/meta-python/cdsl/operands.py b/cranelift-codegen/meta-python/cdsl/operands.py
deleted file mode 100644
index cf99645df..000000000
--- a/cranelift-codegen/meta-python/cdsl/operands.py
+++ /dev/null
@@ -1,251 +0,0 @@
-"""Classes for describing instruction operands."""
-from __future__ import absolute_import
-from . import camel_case
-from .types import ValueType
-from .typevar import TypeVar
-
-try:
-    from typing import Union, Dict, TYPE_CHECKING, Iterable  # noqa
-    OperandSpec = Union['OperandKind', ValueType, TypeVar]
-    if TYPE_CHECKING:
-        from .ast import Enumerator, ConstantInt, ConstantBits, Literal  # noqa
-except ImportError:
-    pass
-
-
-# Kinds of operands.
-#
-# Each instruction has an opcode and a number of operands. The opcode
-# determines the instruction format, and the format determines the number of
-# operands and the kind of each operand.
-class OperandKind(object):
-    """
-    An instance of the `OperandKind` class corresponds to a kind of operand.
-    Each operand kind has a corresponding type in the Rust representation of an
-    instruction.
-    """
-
-    def __init__(self, name, doc, default_member=None, rust_type=None):
-        # type: (str, str, str, str) -> None
-        self.name = name
-        self.__doc__ = doc
-        self.default_member = default_member
-        # The camel-cased name of an operand kind is also the Rust type used to
-        # represent it.
-        self.rust_type = rust_type or ('ir::' + camel_case(name))
-
-    def __str__(self):
-        # type: () -> str
-        return self.name
-
-    def __repr__(self):
-        # type: () -> str
-        return 'OperandKind({})'.format(self.name)
-
-
-#: An SSA value operand. This is a value defined by another instruction.
-VALUE = OperandKind(
-        'value', """
-        An SSA value defined by another instruction.
-
-        This kind of operand can represent any SSA value type, but the
-        instruction format may restrict the valid value types for a given
-        operand.
-        """)
-
-#: A variable-sized list of value operands. Use for Ebb and function call
-#: arguments.
-VARIABLE_ARGS = OperandKind(
-        'variable_args', """
-        A variable size list of `value` operands.
-
-        Use this to represent arguments passed to a function call, arguments
-        passed to an extended basic block, or a variable number of results
-        returned from an instruction.
-        """,
-        rust_type='&[Value]')
-
-
-# Instances of immediate operand types are provided in the
-# `cranelift.immediates` module.
-class ImmediateKind(OperandKind):
-    """
-    The kind of an immediate instruction operand.
-
-    :param default_member: The default member name of this kind the
-                           `InstructionData` data structure.
-    """
-
-    def __init__(
-            self, name, doc,
-            default_member='imm',
-            rust_type=None,
-            values=None):
-        # type: (str, str, str, str, Dict[str, str]) -> None
-        if rust_type is None:
-            rust_type = 'ir::immediates::' + camel_case(name)
-        super(ImmediateKind, self).__init__(
-                name, doc, default_member, rust_type)
-        self.values = values
-
-    def __repr__(self):
-        # type: () -> str
-        return 'ImmediateKind({})'.format(self.name)
-
-    def __getattr__(self, value):
-        # type: (str) -> Enumerator
-        """
-        Enumerated immediate kinds allow the use of dot syntax to produce
-        `Enumerator` AST nodes: `icmp.i32(intcc.ult, a, b)`.
-        """
-        from .ast import Enumerator  # noqa
-        if not self.values:
-            raise AssertionError(
-                    '{n} is not an enumerated operand kind: {n}.{a}'.format(
-                        n=self.name, a=value))
-        if value not in self.values:
-            raise AssertionError(
-                    'No such {n} enumerator: {n}.{a}'.format(
-                        n=self.name, a=value))
-        return Enumerator(self, value)
-
-    def __call__(self, value):
-        # type: (int) -> ConstantInt
-        """
-        Create an AST node representing a constant integer:
-
-            iconst(imm64(0))
-        """
-        from .ast import ConstantInt  # noqa
-        if self.values:
-            raise AssertionError(
-                    "{}({}): Can't make a constant numeric value for an enum"
-                    .format(self.name, value))
-        return ConstantInt(self, value)
-
-    def bits(self, bits):
-        # type: (int) -> ConstantBits
-        """
-        Create an AST literal node for the given bitwise representation of this
-        immediate operand kind.
-        """
-        from .ast import ConstantBits  # noqa
-        return ConstantBits(self, bits)
-
-    def rust_enumerator(self, value):
-        # type: (str) -> str
-        """
-        Get the qualified Rust name of the enumerator value `value`.
-        """
-        return '{}::{}'.format(self.rust_type, self.values[value])
-
-    def is_enumerable(self):
-        # type: () -> bool
-        return self.values is not None
-
-    def possible_values(self):
-        # type: () -> Iterable[Literal]
-        from cdsl.ast import Enumerator # noqa
-        assert self.is_enumerable()
-        for v in self.values.keys():
-            yield Enumerator(self, v)
-
-
-# Instances of entity reference operand types are provided in the
-# `cranelift.entities` module.
-class EntityRefKind(OperandKind):
-    """
-    The kind of an entity reference instruction operand.
-    """
-
-    def __init__(self, name, doc, default_member=None, rust_type=None):
-        # type: (str, str, str, str) -> None
-        super(EntityRefKind, self).__init__(
-                name, doc, default_member or name, rust_type)
-
-    def __repr__(self):
-        # type: () -> str
-        return 'EntityRefKind({})'.format(self.name)
-
-
-class Operand(object):
-    """
-    An instruction operand can be an *immediate*, an *SSA value*, or an *entity
-    reference*. The type of the operand is one of:
-
-    1. A :py:class:`ValueType` instance indicates an SSA value operand with a
-       concrete type.
-
-    2. A :py:class:`TypeVar` instance indicates an SSA value operand, and the
-       instruction is polymorphic over the possible concrete types that the
-       type variable can assume.
-
-    3. An :py:class:`ImmediateKind` instance indicates an immediate operand
-       whose value is encoded in the instruction itself rather than being
-       passed as an SSA value.
-
-    4. An :py:class:`EntityRefKind` instance indicates an operand that
-       references another entity in the function, typically something declared
-       in the function preamble.
-
-    """
-    def __init__(self, name, typ, doc=''):
-        # type: (str, OperandSpec, str) -> None
-        self.name = name
-        self.__doc__ = doc
-
-        # Decode the operand spec and set self.kind.
-        # Only VALUE operands have a typevar member.
-        if isinstance(typ, ValueType):
-            self.kind = VALUE
-            self.typevar = TypeVar.singleton(typ)
-        elif isinstance(typ, TypeVar):
-            self.kind = VALUE
-            self.typevar = typ
-        else:
-            assert isinstance(typ, OperandKind)
-            self.kind = typ
-
-    def get_doc(self):
-        # type: () -> str
-        if self.__doc__:
-            return self.__doc__
-        if self.kind is VALUE:
-            return self.typevar.__doc__
-        return self.kind.__doc__
-
-    def __str__(self):
-        # type: () -> str
-        return "`{}`".format(self.name)
-
-    def is_value(self):
-        # type: () -> bool
-        """
-        Is this an SSA value operand?
-        """
-        return self.kind is VALUE
-
-    def is_varargs(self):
-        # type: () -> bool
-        """
-        Is this a VARIABLE_ARGS operand?
-        """
-        return self.kind is VARIABLE_ARGS
-
-    def is_immediate(self):
-        # type: () -> bool
-        """
-        Is this an immediate operand?
-
-        Note that this includes both `ImmediateKind` operands *and* entity
-        references. It is any operand that doesn't represent a value
-        dependency.
-        """
-        return self.kind is not VALUE and self.kind is not VARIABLE_ARGS
-
-    def is_cpu_flags(self):
-        # type: () -> bool
-        """
-        Is this a CPU flags operand?
-        """
-        return self.kind is VALUE and self.typevar.name in ['iflags', 'fflags']
diff --git a/cranelift-codegen/meta-python/cdsl/predicates.py b/cranelift-codegen/meta-python/cdsl/predicates.py
deleted file mode 100644
index 0177c09ff..000000000
--- a/cranelift-codegen/meta-python/cdsl/predicates.py
+++ /dev/null
@@ -1,448 +0,0 @@
-"""
-Cranelift predicates.
-
-A *predicate* is a function that computes a boolean result. The inputs to the
-function determine the kind of predicate:
-
-- An *ISA predicate* is evaluated on the current ISA settings together with the
-  shared settings defined in the :py:mod:`settings` module. Once a target ISA
-  has been configured, the value of all ISA predicates is known.
-
-- An *Instruction predicate* is evaluated on an instruction instance, so it can
-  inspect all the immediate fields and type variables of the instruction.
-  Instruction predicates can be evaluated before register allocation, so they
-  can not depend on specific register assignments to the value operands or
-  outputs.
-
-Predicates can also be computed from other predicates using the `And`, `Or`,
-and `Not` combinators defined in this module.
-
-All predicates have a *context* which determines where they can be evaluated.
-For an ISA predicate, the context is the ISA settings group. For an instruction
-predicate, the context is the instruction format.
-"""
-from __future__ import absolute_import
-from functools import reduce
-from .formats import instruction_context
-
-try:
-    from typing import Sequence, Tuple, Set, Any, Union, TYPE_CHECKING  # noqa
-    if TYPE_CHECKING:
-        from .formats import InstructionFormat, InstructionContext, FormatField  # noqa
-        from .instructions import Instruction  # noqa
-        from .settings import BoolSetting, SettingGroup  # noqa
-        from .types import ValueType  # noqa
-        from .typevar import TypeVar  # noqa
-        PredContext = Union[SettingGroup, InstructionFormat,
-                            InstructionContext]
-        PredLeaf = Union[BoolSetting, 'FieldPredicate', 'TypePredicate',
-                         'CtrlTypePredicate']
-        PredNode = Union[PredLeaf, 'Predicate']
-        # A predicate key is a (recursive) tuple of primitive types that
-        # uniquely describes a predicate. It is used for interning.
-        PredKey = Tuple[Any, ...]
-except ImportError:
-    pass
-
-
-def _is_parent(a, b):
-    # type: (PredContext, PredContext) -> bool
-    """
-    Return true if a is a parent of b, or equal to it.
-    """
-    while b and a is not b:
-        b = getattr(b, 'parent', None)
-    return a is b
-
-
-def _descendant(a, b):
-    # type: (PredContext, PredContext) -> PredContext
-    """
-    If a is a parent of b or b is a parent of a, return the descendant of the
-    two.
-
-    If neither is a parent of the other, return None.
-    """
-    if _is_parent(a, b):
-        return b
-    if _is_parent(b, a):
-        return a
-    return None
-
-
-class Predicate(object):
-    """
-    Superclass for all computed predicates.
-
-    Leaf predicates can have other types, such as `Setting`.
-
-    :param parts: Tuple of components in the predicate expression.
-    """
-
-    def __init__(self, parts):
-        # type: (Sequence[PredNode]) -> None
-        self.parts = parts
-        self.context = reduce(
-                _descendant,
-                (p.predicate_context() for p in parts))
-        assert self.context, "Incompatible predicate parts"
-        self.predkey = None  # type: PredKey
-
-    def __str__(self):
-        # type: () -> str
-        return '{}({})'.format(type(self).__name__,
-                               ', '.join(map(str, self.parts)))
-
-    def predicate_context(self):
-        # type: () -> PredContext
-        return self.context
-
-    def predicate_leafs(self, leafs):
-        # type: (Set[PredLeaf]) -> None
-        """
-        Collect all leaf predicates into the `leafs` set.
-        """
-        for part in self.parts:
-            part.predicate_leafs(leafs)
-
-    def rust_predicate(self, prec):
-        # type: (int) -> str
-        raise NotImplementedError("rust_predicate is an abstract method")
-
-    def predicate_key(self):
-        # type: () -> PredKey
-        """Tuple uniquely identifying a predicate."""
-        if not self.predkey:
-            p = tuple(p.predicate_key() for p in self.parts)  # type: PredKey
-            self.predkey = (type(self).__name__,) + p
-        return self.predkey
-
-
-class And(Predicate):
-    """
-    Computed predicate that is true if all parts are true.
-    """
-
-    precedence = 2
-
-    def __init__(self, *args):
-        # type: (*PredNode) -> None
-        super(And, self).__init__(args)
-
-    def rust_predicate(self, prec):
-        # type: (int) -> str
-        """
-        Return a Rust expression computing the value of this predicate.
-
-        The surrounding precedence determines whether parentheses are needed:
-
-        0. An `if` statement.
-        1. An `||` expression.
-        2. An `&&` expression.
-        3. A `!` expression.
-        """
-        s = ' && '.join(p.rust_predicate(And.precedence) for p in self.parts)
-        if prec > And.precedence:
-            s = '({})'.format(s)
-        return s
-
-    @staticmethod
-    def combine(*args):
-        # type: (*PredNode) -> PredNode
-        """
-        Combine a sequence of predicates, allowing for `None` members.
-
-        Return a predicate that is true when all non-`None` arguments are true,
-        or `None` if all of the arguments are `None`.
-        """
-        args = tuple(p for p in args if p)
-        if args == ():
-            return None
-        if len(args) == 1:
-            return args[0]
-        # We have multiple predicate args. Combine with `And`.
-        return And(*args)
-
-
-class Or(Predicate):
-    """
-    Computed predicate that is true if any parts are true.
-    """
-
-    precedence = 1
-
-    def __init__(self, *args):
-        # type: (*PredNode) -> None
-        super(Or, self).__init__(args)
-
-    def rust_predicate(self, prec):
-        # type: (int) -> str
-        s = ' || '.join(p.rust_predicate(Or.precedence) for p in self.parts)
-        if prec > Or.precedence:
-            s = '({})'.format(s)
-        return s
-
-
-class Not(Predicate):
-    """
-    Computed predicate that is true if its single part is false.
-    """
-
-    precedence = 3
-
-    def __init__(self, part):
-        # type: (PredNode) -> None
-        super(Not, self).__init__((part,))
-
-    def rust_predicate(self, prec):
-        # type: (int) -> str
-        return '!' + self.parts[0].rust_predicate(Not.precedence)
-
-
-class FieldPredicate(object):
-    """
-    An instruction predicate that performs a test on a single `FormatField`.
-
-    :param field: The `FormatField` to be tested.
-    :param function: Boolean predicate function to call.
-    :param args: Additional arguments for the predicate function.
-    """
-
-    def __init__(self, field, function, args):
-        # type: (FormatField, str, Sequence[Any]) -> None
-        self.field = field
-        self.function = function
-        self.args = args
-
-    def __str__(self):
-        # type: () -> str
-        args = (self.field.rust_name(),) + tuple(map(str, self.args))
-        return '{}({})'.format(self.function, ', '.join(args))
-
-    def predicate_context(self):
-        # type: () -> PredContext
-        """
-        This predicate can be evaluated in the context of an instruction
-        format.
-        """
-        iform = self.field.format  # type: InstructionFormat
-        return iform
-
-    def predicate_key(self):
-        # type: () -> PredKey
-        a = tuple(map(str, self.args))
-        return (self.function, str(self.field)) + a
-
-    def predicate_leafs(self, leafs):
-        # type: (Set[PredLeaf]) -> None
-        leafs.add(self)
-
-    def rust_predicate(self, prec):
-        # type: (int) -> str
-        """
-        Return a string of Rust code that evaluates this predicate.
-        """
-        # Prepend `field` to the predicate function arguments.
-        args = (self.field.rust_name(),) + tuple(map(str, self.args))
-        return 'crate::predicates::{}({})'\
-            .format(self.function, ', '.join(args))
-
-
-class IsEqual(FieldPredicate):
-    """
-    Instruction predicate that checks if an immediate instruction format field
-    is equal to a constant value.
-
-    :param field: `FormatField` to be checked.
-    :param value: The constant value to compare against.
-    """
-
-    def __init__(self, field, value):
-        # type: (FormatField, Any) -> None
-        super(IsEqual, self).__init__(field, 'is_equal', (value,))
-        self.value = value
-
-
-class IsZero32BitFloat(FieldPredicate):
-    """
-    Instruction predicate that checks if an immediate instruction format field
-    is equal to zero.
-
-    :param field: `FormatField` to be checked.
-    :param value: The constant value to check.
-    """
-
-    def __init__(self, field):
-        # type: (FormatField) -> None
-        super(IsZero32BitFloat, self).__init__(field,
-                                               'is_zero_32_bit_float',
-                                               ())
-
-
-class IsZero64BitFloat(FieldPredicate):
-    """
-    Instruction predicate that checks if an immediate instruction format field
-    is equal to zero.
-
-    :param field: `FormatField` to be checked.
-    :param value: The constant value to check.
-    """
-
-    def __init__(self, field):
-        # type: (FormatField) -> None
-        super(IsZero64BitFloat, self).__init__(field,
-                                               'is_zero_64_bit_float',
-                                               ())
-
-
-class IsSignedInt(FieldPredicate):
-    """
-    Instruction predicate that checks if an immediate instruction format field
-    is representable as an n-bit two's complement integer.
-
-    :param field: `FormatField` to be checked.
-    :param width: Number of bits in the allowed range.
-    :param scale: Number of low bits that must be 0.
-
-    The predicate is true if the field is in the range:
-    `-2^(width-1) -- 2^(width-1)-1`
-    and a multiple of `2^scale`.
-    """
-
-    def __init__(self, field, width, scale=0):
-        # type: (FormatField, int, int) -> None
-        super(IsSignedInt, self).__init__(
-                field, 'is_signed_int', (width, scale))
-        self.width = width
-        self.scale = scale
-        assert width >= 0 and width <= 64
-        assert scale >= 0 and scale < width
-
-
-class IsUnsignedInt(FieldPredicate):
-    """
-    Instruction predicate that checks if an immediate instruction format field
-    is representable as an n-bit unsigned complement integer.
-
-    :param field: `FormatField` to be checked.
-    :param width: Number of bits in the allowed range.
-    :param scale: Number of low bits that must be 0.
-
-    The predicate is true if the field is in the range:
-    `0 -- 2^width - 1` and a multiple of `2^scale`.
-    """
-
-    def __init__(self, field, width, scale=0):
-        # type: (FormatField, int, int) -> None
-        super(IsUnsignedInt, self).__init__(
-                field, 'is_unsigned_int', (width, scale))
-        self.width = width
-        self.scale = scale
-        assert width >= 0 and width <= 64
-        assert scale >= 0 and scale < width
-
-
-class TypePredicate(object):
-    """
-    An instruction predicate that checks the type of an SSA argument value.
-
-    Type predicates are used to implement encodings for instructions with
-    multiple type variables. The encoding tables are keyed by the controlling
-    type variable, type predicates check any secondary type variables.
-
-    A type predicate is not bound to any specific instruction format.
-
-    :param value_arg: Index of the value argument to type check.
-    :param value_type: The required value type.
-    """
-
-    def __init__(self, value_arg, value_type):
-        # type: (int, ValueType) -> None
-        assert value_arg >= 0
-        assert value_type is not None
-        self.value_arg = value_arg
-        self.value_type = value_type
-
-    def __str__(self):
-        # type: () -> str
-        return 'args[{}]:{}'.format(self.value_arg, self.value_type)
-
-    def predicate_context(self):
-        # type: () -> PredContext
-        return instruction_context
-
-    def predicate_key(self):
-        # type: () -> PredKey
-        return ('typecheck', self.value_arg, self.value_type.name)
-
-    def predicate_leafs(self, leafs):
-        # type: (Set[PredLeaf]) -> None
-        leafs.add(self)
-
-    @staticmethod
-    def typevar_check(inst, typevar, value_type):
-        # type: (Instruction, TypeVar, ValueType) -> TypePredicate
-        """
-        Return a type check predicate for the given type variable in `inst`.
-
-        The type variable must appear directly as the type of one of the
-        operands to `inst`, so this is only guaranteed to work for secondary
-        type variables.
-
-        Find an `inst` value operand whose type is determined by `typevar` and
-        create a `TypePredicate` that checks that the type variable has the
-        value `value_type`.
-        """
-        # Find the first value operand whose type is `typevar`.
-        value_arg = next(i for i, opnum in enumerate(inst.value_opnums)
-                         if inst.ins[opnum].typevar == typevar)
-        return TypePredicate(value_arg, value_type)
-
-    def rust_predicate(self, prec):
-        # type: (int) -> str
-        """
-        Return Rust code for evaluating this predicate.
-
-        It is assumed that the context has `func` and `args` variables.
-        """
-        return 'func.dfg.value_type(args[{}]) == {}'.format(
-                self.value_arg, self.value_type.rust_name())
-
-
-class CtrlTypePredicate(object):
-    """
-    An instruction predicate that checks the controlling type variable
-
-    :param value_type: The required value type.
-    """
-
-    def __init__(self, value_type):
-        # type: (ValueType) -> None
-        assert value_type is not None
-        self.value_type = value_type
-
-    def __str__(self):
-        # type: () -> str
-        return 'ctrl_typevar:{}'.format(self.value_type)
-
-    def predicate_context(self):
-        # type: () -> PredContext
-        return instruction_context
-
-    def predicate_key(self):
-        # type: () -> PredKey
-        return ('ctrltypecheck', self.value_type.name)
-
-    def predicate_leafs(self, leafs):
-        # type: (Set[PredLeaf]) -> None
-        leafs.add(self)
-
-    def rust_predicate(self, prec):
-        # type: (int) -> str
-        """
-        Return Rust code for evaluating this predicate.
-
-        It is assumed that the context has `func` and `inst` variables.
-        """
-        return 'func.dfg.ctrl_typevar(inst) == {}'.format(
-                self.value_type.rust_name())
diff --git a/cranelift-codegen/meta-python/cdsl/registers.py b/cranelift-codegen/meta-python/cdsl/registers.py
deleted file mode 100644
index 1e4ffe75b..000000000
--- a/cranelift-codegen/meta-python/cdsl/registers.py
+++ /dev/null
@@ -1,413 +0,0 @@
-"""
-Register set definitions
-------------------------
-
-Each ISA defines a separate register set that is used by the register allocator
-and the final binary encoding of machine code.
-
-The CPU registers are first divided into disjoint register banks, represented
-by a `RegBank` instance. Registers in different register banks never interfere
-with each other. A typical CPU will have a general purpose and a floating point
-register bank.
-
-A register bank consists of a number of *register units* which are the smallest
-indivisible units of allocation and interference. A register unit doesn't
-necessarily correspond to a particular number of bits in a register, it is more
-like a placeholder that can be used to determine of a register is taken or not.
-
-The register allocator works with *register classes* which can allocate one or
-more register units at a time. A register class allocates more than one
-register unit at a time when its registers are composed of smaller allocatable
-units. For example, the ARM double precision floating point registers are
-composed of two single precision registers.
-"""
-from __future__ import absolute_import
-from . import is_power_of_two, next_power_of_two
-
-
-try:
-    from typing import Sequence, Tuple, List, Dict, Any, Optional, TYPE_CHECKING  # noqa
-    if TYPE_CHECKING:
-        from .isa import TargetISA  # noqa
-        # A tuple uniquely identifying a register class inside a register bank.
-        # (width, bitmask)
-        RCTup = Tuple[int, int]
-except ImportError:
-    pass
-
-
-# The number of 32-bit elements in a register unit mask
-MASK_LEN = 3
-
-# The maximum total number of register units allowed.
-# This limit can be raised by also adjusting the RegUnitMask type in
-# src/isa/registers.rs.
-MAX_UNITS = MASK_LEN * 32
-
-
-class RegBank(object):
-    """
-    A register bank belonging to an ISA.
-
-    A register bank controls a set of *register units* disjoint from all the
-    other register banks in the ISA. The register units are numbered uniquely
-    within the target ISA, and the units in a register bank form a contiguous
-    sequence starting from a sufficiently aligned point that their low bits can
-    be used directly when encoding machine code instructions.
-
-    Register units can be given generated names like `r0`, `r1`, ..., or a
-    tuple of special register unit names can be provided.
-
-    :param name: Name of this register bank.
-    :param doc: Documentation string.
-    :param units: Number of register units.
-    :param pressure_tracking: Enable tracking of register pressure.
-    :param prefix: Prefix for generated unit names.
-    :param names: Special names for the first units. May be shorter than
-                  `units`, the remaining units are named using `prefix`.
-    """
-
-    def __init__(
-            self,
-            name,                       # type: str
-            isa,                        # type: TargetISA
-            doc,                        # type: str
-            units,                      # type: int
-            pressure_tracking=True,     # type: bool
-            prefix='r',                 # type: str
-            names=()                    # type: Sequence[str]
-            ):
-        # type: (...) -> None
-        self.name = name
-        self.isa = isa
-        self.first_unit = 0
-        self.units = units
-        self.pressure_tracking = pressure_tracking
-        self.prefix = prefix
-        self.names = names
-        self.classes = list()  # type: List[RegClass]
-        self.toprcs = list()  # type: List[RegClass]
-
-        assert len(names) <= units
-
-        if isa.regbanks:
-            # Get the next free unit number.
-            last = isa.regbanks[-1]
-            u = last.first_unit + last.units
-            align = units
-            if not is_power_of_two(align):
-                align = next_power_of_two(align)
-            self.first_unit = (u + align - 1) & -align
-
-        self.index = len(isa.regbanks)
-        isa.regbanks.append(self)
-
-    def __repr__(self):
-        # type: () -> str
-        return ('RegBank({}, units={}, first_unit={})'
-                .format(self.name, self.units, self.first_unit))
-
-    def finish_regclasses(self):
-        # type: () -> None
-        """
-        Compute subclasses and the top-level register class.
-
-        Verify that the set of register classes satisfies:
-
-        1. Closed under intersection: The intersection of any two register
-           classes in the set is either empty or identical to a member of the
-           set.
-        2. There are no identical classes under different names.
-        3. Classes are sorted topologically such that all subclasses have a
-           higher index that the superclass.
-
-        We could reorder classes topologically here instead of just enforcing
-        the order, but the ordering tends to fall out naturally anyway.
-        """
-        cmap = dict()  # type: Dict[RCTup, RegClass]
-
-        for rc in self.classes:
-            # All register classes must be given a name.
-            assert rc.name, "Anonymous register class found"
-
-            # Check for duplicates.
-            tup = rc.rctup()
-            if tup in cmap:
-                raise AssertionError(
-                        '{} and {} are identical register classes'
-                        .format(rc, cmap[tup]))
-            cmap[tup] = rc
-
-        # Check intersections and topological order.
-        for idx, rc1 in enumerate(self.classes):
-            rc1.toprc = rc1
-            for rc2 in self.classes[0:idx]:
-                itup = rc1.intersect(rc2)
-                if itup is None:
-                    continue
-                if itup not in cmap:
-                    raise AssertionError(
-                        'intersection of {} and {} missing'
-                        .format(rc1, rc2))
-                irc = cmap[itup]
-                # rc1 > rc2, so rc2 can't be the sub-class.
-                if irc is rc2:
-                    raise AssertionError(
-                            'Bad topological order: {}/{}'
-                            .format(rc1, rc2))
-                if irc is rc1:
-                    # The intersection of rc1 and rc2 is rc1, so it must be a
-                    # sub-class.
-                    rc2.subclasses.append(rc1)
-                    rc1.toprc = rc2.toprc
-
-            if rc1.is_toprc():
-                self.toprcs.append(rc1)
-
-    def unit_by_name(self, name):
-        # type: (str) -> int
-        """
-        Get a register unit in this bank by name.
-        """
-        if name in self.names:
-            r = self.names.index(name)
-        elif name.startswith(self.prefix):
-            r = int(name[len(self.prefix):])
-        assert r < self.units, 'Invalid register name: ' + name
-        return self.first_unit + r
-
-
-class RegClass(object):
-    """
-    A register class is a subset of register units in a RegBank along with a
-    strategy for allocating registers.
-
-    The *width* parameter determines how many register units are allocated at a
-    time. Usually it that is one, but for example the ARM D registers are
-    allocated two units at a time. When multiple units are allocated, it is
-    always a contiguous set of unit numbers.
-
-    :param bank: The register bank we're allocating from.
-    :param count: The maximum number of allocations in this register class. By
-                  default, the whole register bank can be allocated.
-    :param width: How many units to allocate at a time.
-    :param start: The first unit to allocate, relative to `bank.first.unit`.
-    """
-
-    def __init__(self, bank, count=0, width=1, start=0, bitmask=None):
-        # type: (RegBank, int, int, int, Optional[int]) -> None
-        self.name = None  # type: str
-        self.index = None  # type: int
-        self.bank = bank
-        self.width = width
-        self.bitmask = 0
-
-        # This is computed later in `finish_regclasses()`.
-        self.subclasses = list()  # type: List[RegClass]
-        self.toprc = None  # type: RegClass
-
-        assert width > 0
-
-        if bitmask:
-            self.bitmask = bitmask
-        else:
-            assert start >= 0 and start < bank.units
-            if count == 0:
-                count = bank.units // width
-            for a in range(count):
-                u = start + a * self.width
-                self.bitmask |= 1 << u
-
-        bank.classes.append(self)
-
-    def __str__(self):
-        # type: () -> str
-        return self.name
-
-    def is_toprc(self):
-        # type: () -> bool
-        """
-        Is this a top-level register class?
-
-        A top-level register class has no sub-classes. This can only be
-        answered aster running `finish_regclasses()`.
-        """
-        return self.toprc is self
-
-    def rctup(self):
-        # type: () -> RCTup
-        """
-        Get a tuple that uniquely identifies the registers in this class.
-
-        The tuple can be used as a dictionary key to ensure that there are no
-        duplicate register classes.
-        """
-        return (self.width, self.bitmask)
-
-    def intersect(self, other):
-        # type: (RegClass) -> RCTup
-        """
-        Get a tuple representing the intersection of two register classes.
-
-        Returns `None` if the two classes are disjoint.
-        """
-        if self.width != other.width:
-            return None
-        intersection = self.bitmask & other.bitmask
-        if intersection == 0:
-            return None
-
-        return (self.width, intersection)
-
-    def __getitem__(self, sliced):
-        # type: (slice) -> RegClass
-        """
-        Create a sub-class of a register class using slice notation. The slice
-        indexes refer to allocations in the parent register class, not register
-        units.
-        """
-        assert isinstance(sliced, slice), "RegClass slicing can't be 1 reg"
-        # We could add strided sub-classes if needed.
-        assert sliced.step is None, 'Subclass striding not supported'
-        # Can't slice a non-contiguous class
-        assert self.is_contiguous(), 'Cannot slice non-contiguous RegClass'
-
-        w = self.width
-        s = self.start() + sliced.start * w
-        c = sliced.stop - sliced.start
-        assert c > 1, "Can't have single-register classes"
-
-        return RegClass(self.bank, count=c, width=w, start=s)
-
-    def without(self, *registers):
-        # type: (*Register) -> RegClass
-        """
-        Create a sub-class of a register class excluding a specific set of
-        registers.
-
-        For example: GPR.without(GPR.r9)
-        """
-        bm = self.bitmask
-        w = self.width
-        fmask = (1 << self.width) - 1
-        for reg in registers:
-            bm &= ~(fmask << (reg.unit * w))
-
-        return RegClass(self.bank, bitmask=bm)
-
-    def is_contiguous(self):
-        # type: () -> bool
-        """
-        Returns boolean indicating whether a register class is a contiguous set
-        of register units.
-        """
-        x = self.bitmask | (self.bitmask-1)
-        return self.bitmask != 0 and ((x+1) & x) == 0
-
-    def start(self):
-        # type: () -> int
-        """
-        Returns the first valid register unit in this class.
-        """
-        start = 0
-        bm = self.bitmask
-        fmask = (1 << self.width) - 1
-        while True:
-            if bm & fmask > 0:
-                break
-            start += 1
-            bm >>= self.width
-
-        return start
-
-    def __getattr__(self, attr):
-        # type: (str) -> Register
-        """
-        Get a specific register in the class by name.
-
-        For example: `GPR.r5`.
-        """
-        reg = Register(self, self.bank.unit_by_name(attr))
-        # Save this register so we won't have to create it again.
-        setattr(self, attr, reg)
-        return reg
-
-    def mask(self):
-        # type: () -> List[int]
-        """
-        Compute a bit-mask of the register units allocated by this register
-        class.
-
-        Return as a list of 32-bit integers.
-        """
-        out_mask = []
-        mask32 = (1 << 32) - 1
-        bitmask = self.bitmask << self.bank.first_unit
-        for i in range(MASK_LEN):
-            out_mask.append((bitmask >> (i * 32)) & mask32)
-
-        return out_mask
-
-    def subclass_mask(self):
-        # type: () -> int
-        """
-        Compute a bit-mask of subclasses, including self.
-        """
-        m = 1 << self.index
-        for rc in self.subclasses:
-            m |= 1 << rc.index
-        return m
-
-    @staticmethod
-    def extract_names(globs):
-        # type: (Dict[str, Any]) -> None
-        """
-        Given a dict mapping name -> object as returned by `globals()`, find
-        all the RegClass objects and set their name from the dict key.
-        This is used to name a bunch of global values in a module.
-        """
-        for name, obj in globs.items():
-            if isinstance(obj, RegClass):
-                assert obj.name is None
-                obj.name = name
-
-
-class Register(object):
-    """
-    A specific register in a register class.
-
-    A register is identified by the top-level register class it belongs to and
-    its first register unit.
-
-    Specific registers are used to describe constraints on instructions where
-    some operands must use a fixed register.
-
-    Register instances can be created with the constructor, or accessed as
-    attributes on the register class: `GPR.rcx`.
-    """
-    def __init__(self, rc, unit):
-        # type: (RegClass, int) -> None
-        self.regclass = rc
-        self.unit = unit
-
-
-class Stack(object):
-    """
-    An operand that must be in a stack slot.
-
-    A `Stack` object can be used to indicate an operand constraint for a value
-    operand that must live in a stack slot.
-    """
-    def __init__(self, rc):
-        # type: (RegClass) -> None
-        self.regclass = rc
-
-    def stack_base_mask(self):
-        # type: () -> str
-        """
-        Get the StackBaseMask to use for this operand.
-
-        This is a mask of base registers that can be supported by this operand.
-        """
-        # TODO: Make this configurable instead of just using the SP.
-        return 'StackBaseMask(1)'
diff --git a/cranelift-codegen/meta-python/cdsl/settings.py b/cranelift-codegen/meta-python/cdsl/settings.py
deleted file mode 100644
index fe7902422..000000000
--- a/cranelift-codegen/meta-python/cdsl/settings.py
+++ /dev/null
@@ -1,416 +0,0 @@
-"""Classes for describing settings and groups of settings."""
-from __future__ import absolute_import
-from collections import OrderedDict
-from .predicates import Predicate
-
-try:
-    from typing import Tuple, Set, List, Dict, Any, Union, TYPE_CHECKING  # noqa
-    BoolOrPresetOrDict = Union['BoolSetting', 'Preset', Dict['Setting', Any]]
-    if TYPE_CHECKING:
-        from .predicates import PredLeaf, PredNode, PredKey  # noqa
-except ImportError:
-    pass
-
-
-class Setting(object):
-    """
-    A named setting variable that can be configured externally to Cranelift.
-
-    Settings are normally not named when they are created. They get their name
-    from the `extract_names` method.
-    """
-
-    def __init__(self, doc):
-        # type: (str) -> None
-        self.name = None  # type: str  # Assigned later by `extract_names()`.
-        self.__doc__ = doc
-        # Offset of byte in settings vector containing this setting.
-        self.byte_offset = None  # type: int
-        # Index into the generated DESCRIPTORS table.
-        self.descriptor_index = None  # type: int
-
-        self.group = SettingGroup.append(self)
-
-    def __str__(self):
-        # type: () -> str
-        return '{}.{}'.format(self.group.name, self.name)
-
-    def default_byte(self):
-        # type: () -> int
-        raise NotImplementedError("default_byte is an abstract method")
-
-    def byte_for_value(self, value):
-        # type: (Any) -> int
-        """Get the setting byte value that corresponds to `value`"""
-        raise NotImplementedError("byte_for_value is an abstract method")
-
-    def byte_mask(self):
-        # type: () -> int
-        """Get a mask of bits in our byte that are relevant to this setting."""
-        # Only BoolSetting has a different mask.
-        return 0xff
-
-
-class BoolSetting(Setting):
-    """
-    A named setting with a boolean on/off value.
-
-    :param doc: Documentation string.
-    :param default: The default value of this setting.
-    """
-
-    def __init__(self, doc, default=False):
-        # type: (str, bool) -> None
-        super(BoolSetting, self).__init__(doc)
-        self.default = default
-        self.bit_offset = None  # type: int
-
-    def default_byte(self):
-        # type: () -> int
-        """
-        Get the default value of this setting, as a byte that can be bitwise
-        or'ed with the other booleans sharing the same byte.
-        """
-        if self.default:
-            return 1 << self.bit_offset
-        else:
-            return 0
-
-    def byte_for_value(self, value):
-        # type: (Any) -> int
-        if value:
-            return 1 << self.bit_offset
-        else:
-            return 0
-
-    def byte_mask(self):
-        # type: () -> int
-        return 1 << self.bit_offset
-
-    def predicate_context(self):
-        # type: () -> SettingGroup
-        """
-        Return the context where this setting can be evaluated as a (leaf)
-        predicate.
-        """
-        return self.group
-
-    def predicate_key(self):
-        # type: () -> PredKey
-        assert self.name, "Can't compute key before setting is named"
-        return ('setting', self.group.name, self.name)
-
-    def predicate_leafs(self, leafs):
-        # type: (Set[PredLeaf]) -> None
-        leafs.add(self)
-
-    def rust_predicate(self, prec):
-        # type: (int) -> str
-        """
-        Return the Rust code to compute the value of this setting.
-
-        The emitted code assumes that the setting group exists as a local
-        variable.
-        """
-        return '{}.{}()'.format(self.group.name, self.name)
-
-
-class NumSetting(Setting):
-    """
-    A named setting with an integral value in the range 0--255.
-
-    :param doc: Documentation string.
-    :param default: The default value of this setting.
-    """
-
-    def __init__(self, doc, default=0):
-        # type: (str, int) -> None
-        super(NumSetting, self).__init__(doc)
-        assert default == int(default)
-        assert default >= 0 and default <= 255
-        self.default = default
-
-    def default_byte(self):
-        # type: () -> int
-        return self.default
-
-    def byte_for_value(self, value):
-        # type: (Any) -> int
-        assert isinstance(value, int), "NumSetting must be set to an int"
-        assert value >= 0 and value <= 255
-        return value
-
-
-class EnumSetting(Setting):
-    """
-    A named setting with an enumerated set of possible values.
-
-    The default value is always the first enumerator.
-
-    :param doc: Documentation string.
-    :param args: Tuple of unique strings representing the possible values.
-    """
-
-    def __init__(self, doc, *args):
-        # type: (str, *str) -> None
-        super(EnumSetting, self).__init__(doc)
-        assert len(args) > 0, "EnumSetting must have at least one value"
-        self.values = tuple(str(x) for x in args)
-        self.default = self.values[0]
-
-    def default_byte(self):
-        # type: () -> int
-        return 0
-
-    def byte_for_value(self, value):
-        # type: (Any) -> int
-        return self.values.index(value)
-
-
-class SettingGroup(object):
-    """
-    A group of settings.
-
-    Whenever a :class:`Setting` object is created, it is added to the currently
-    open group. A setting group must be closed explicitly before another can be
-    opened.
-
-    :param name: Short mnemonic name for setting group.
-    :param parent: Parent settings group.
-    """
-
-    # The currently open setting group.
-    _current = None  # type: SettingGroup
-
-    def __init__(self, name, parent=None):
-        # type: (str, SettingGroup) -> None
-        self.name = name
-        self.parent = parent
-        self.settings = []  # type: List[Setting]
-        # Named predicates computed from settings in this group or its
-        # parents.
-        self.named_predicates = OrderedDict()  # type: OrderedDict[str, Predicate]  # noqa
-        # All boolean predicates that can be accessed by number. This includes:
-        # - All boolean settings in this group.
-        # - All named predicates.
-        # - Added anonymous predicates, see `number_predicate()`.
-        # - Added parent predicates that are replicated in this group.
-        # Maps predicate -> number.
-        self.predicate_number = OrderedDict()  # type: OrderedDict[PredNode, int]  # noqa
-        self.presets = []  # type: List[Preset]
-
-        # Fully qualified Rust module name. See gen_settings.py.
-        self.qual_mod = None  # type: str
-
-        self.open()
-
-    def open(self):
-        # type: () -> None
-        """
-        Open this setting group such that future new settings are added to this
-        group.
-        """
-        assert SettingGroup._current is None, (
-                "Can't open {} since {} is already open"
-                .format(self, SettingGroup._current))
-        SettingGroup._current = self
-
-    def close(self, globs=None):
-        # type: (Dict[str, Any]) -> None
-        """
-        Close this setting group. This function must be called before opening
-        another setting group.
-
-        :param globs: Pass in `globals()` to run `extract_names` on all
-            settings defined in the module.
-        """
-        assert SettingGroup._current is self, (
-                "Can't close {}, the open setting group is {}"
-                .format(self, SettingGroup._current))
-        SettingGroup._current = None
-        if globs:
-            # Ensure that named predicates are ordered in a deterministic way
-            # that the Rust crate may simply reproduce, by pushing entries into
-            # a vector that we'll sort by name later.
-            named_predicates = []
-
-            for name, obj in globs.items():
-                if isinstance(obj, Setting):
-                    assert obj.name is None, obj.name
-                    obj.name = name
-                if isinstance(obj, Predicate):
-                    named_predicates.append((name, obj))
-                if isinstance(obj, Preset):
-                    assert obj.name is None, obj.name
-                    obj.name = name
-
-            named_predicates.sort(key=lambda x: x[0])
-            for (name, obj) in named_predicates:
-                self.named_predicates[name] = obj
-
-        self.layout()
-
-    @staticmethod
-    def append(setting):
-        # type: (Setting) -> SettingGroup
-        g = SettingGroup._current
-        assert g, "Open a setting group before defining settings."
-        g.settings.append(setting)
-        return g
-
-    @staticmethod
-    def append_preset(preset):
-        # type: (Preset) -> SettingGroup
-        g = SettingGroup._current
-        assert g, "Open a setting group before defining presets."
-        g.presets.append(preset)
-        return g
-
-    def number_predicate(self, pred):
-        # type: (PredNode) -> int
-        """
-        Make sure that `pred` has an assigned number, and will be included in
-        this group's bit vector.
-
-        The numbered predicates include:
-        - `BoolSetting` settings that belong to this group.
-        - `Predicate` instances in `named_predicates`.
-        - `Predicate` instances without a name.
-        - Settings or computed predicates that belong to the parent group, but
-          need to be accessible by number in this group.
-
-        The numbered predicates are referenced by the encoding tables as ISA
-        predicates. See the `isap` field on `Encoding`.
-
-        :returns: The assigned predicate number in this group.
-        """
-        if pred in self.predicate_number:
-            return self.predicate_number[pred]
-        else:
-            number = len(self.predicate_number)
-            self.predicate_number[pred] = number
-            return number
-
-    def layout(self):
-        # type: () -> None
-        """
-        Compute the layout of the byte vector used to represent this settings
-        group.
-
-        The byte vector contains the following entries in order:
-
-        1. Byte-sized settings like `NumSetting` and `EnumSetting`.
-        2. `BoolSetting` settings.
-        3. Precomputed named predicates.
-        4. Other numbered predicates, including anonymous predicates and parent
-           predicates that need to be accessible by number.
-
-        Set `self.settings_size` to the length of the byte vector prefix that
-        contains the settings. All bytes after that are computed, not
-        configured.
-
-        Set `self.boolean_offset` to the beginning of the numbered predicates,
-        2. in the list above.
-
-        Assign `byte_offset` and `bit_offset` fields in all settings.
-
-        After calling this method, no more settings can be added, but
-        additional predicates can be made accessible with `number_predicate()`.
-        """
-        assert len(self.predicate_number) == 0, "Too late for layout"
-
-        # Assign the non-boolean settings.
-        byte_offset = 0
-        for s in self.settings:
-            if not isinstance(s, BoolSetting):
-                s.byte_offset = byte_offset
-                byte_offset += 1
-
-        # Then the boolean settings.
-        self.boolean_offset = byte_offset
-        for s in self.settings:
-            if isinstance(s, BoolSetting):
-                number = self.number_predicate(s)
-                s.byte_offset = byte_offset + number // 8
-                s.bit_offset = number % 8
-
-        # This is the end of the settings. Round up to a whole number of bytes.
-        self.boolean_settings = len(self.predicate_number)
-        self.settings_size = self.byte_size()
-
-        # Now assign numbers to all our named predicates.
-        for name, pred in self.named_predicates.items():
-            self.number_predicate(pred)
-
-    def byte_size(self):
-        # type: () -> int
-        """
-        Compute the number of bytes required to hold all settings and
-        precomputed predicates.
-
-        This is the size of the byte-sized settings plus all the numbered
-        predicate bits rounded up to a whole number of bytes.
-        """
-        return self.boolean_offset + (len(self.predicate_number) + 7) // 8
-
-
-class Preset(object):
-    """
-    A collection of setting values that are applied at once.
-
-    A `Preset` represents a shorthand notation for applying a number of
-    settings at once. Example:
-
-        nehalem = Preset(has_sse41, has_cmov, has_avx=0)
-
-    Enabling the `nehalem` setting is equivalent to enabling `has_sse41` and
-    `has_cmov` while disabling the `has_avx` setting.
-    """
-
-    def __init__(self, *args):
-        # type: (*BoolOrPresetOrDict) -> None
-        self.name = None  # type: str  # Assigned later by `SettingGroup`.
-        # Each tuple provides the value for a setting.
-        self.values = list()  # type: List[Tuple[Setting, Any]]
-
-        for arg in args:
-            if isinstance(arg, Preset):
-                # Any presets in args are immediately expanded.
-                self.values.extend(arg.values)
-            elif isinstance(arg, dict):
-                # A dictionary of key: value pairs.
-                self.values.extend(arg.items())
-            else:
-                # A BoolSetting to enable.
-                assert isinstance(arg, BoolSetting)
-                self.values.append((arg, True))
-
-        self.group = SettingGroup.append_preset(self)
-        # Index into the generated DESCRIPTORS table.
-        self.descriptor_index = None  # type: int
-
-    def layout(self):
-        # type: () -> List[Tuple[int, int]]
-        """
-        Compute a list of (mask, byte) pairs that incorporate all values in
-        this preset.
-
-        The list will have an entry for each setting byte in the settings
-        group.
-        """
-        lst = [(0, 0)] * self.group.settings_size
-
-        # Apply setting values in order.
-        for s, v in self.values:
-            ofs = s.byte_offset
-            s_mask = s.byte_mask()
-            s_val = s.byte_for_value(v)
-            assert (s_val & ~s_mask) == 0
-            l_mask, l_val = lst[ofs]
-            # Accumulated mask of modified bits.
-            l_mask |= s_mask
-            # Overwrite the relevant bits with the new value.
-            l_val = (l_val & ~s_mask) | s_val
-            lst[ofs] = (l_mask, l_val)
-
-        return lst
diff --git a/cranelift-codegen/meta-python/cdsl/test_ast.py b/cranelift-codegen/meta-python/cdsl/test_ast.py
deleted file mode 100644
index 750142af0..000000000
--- a/cranelift-codegen/meta-python/cdsl/test_ast.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from __future__ import absolute_import
-from unittest import TestCase
-from doctest import DocTestSuite
-from . import ast
-from base.instructions import jump, iadd
-
-
-def load_tests(loader, tests, ignore):
-    tests.addTests(DocTestSuite(ast))
-    return tests
-
-
-x = 'x'
-y = 'y'
-a = 'a'
-
-
-class TestPatterns(TestCase):
-    def test_apply(self):
-        i = jump(x, y)
-        self.assertEqual(repr(i), "Apply(jump, ('x', 'y'))")
-
-        i = iadd.i32(x, y)
-        self.assertEqual(repr(i), "Apply(iadd.i32, ('x', 'y'))")
-
-    def test_single_ins(self):
-        pat = a << iadd.i32(x, y)
-        self.assertEqual(repr(pat), "('a',) << Apply(iadd.i32, ('x', 'y'))")
diff --git a/cranelift-codegen/meta-python/cdsl/test_package.py b/cranelift-codegen/meta-python/cdsl/test_package.py
deleted file mode 100644
index b66d60d69..000000000
--- a/cranelift-codegen/meta-python/cdsl/test_package.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from __future__ import absolute_import
-import doctest
-import cdsl
-
-
-def load_tests(loader, tests, ignore):
-    tests.addTests(doctest.DocTestSuite(cdsl))
-    return tests
diff --git a/cranelift-codegen/meta-python/cdsl/test_ti.py b/cranelift-codegen/meta-python/cdsl/test_ti.py
deleted file mode 100644
index b88113a20..000000000
--- a/cranelift-codegen/meta-python/cdsl/test_ti.py
+++ /dev/null
@@ -1,605 +0,0 @@
-from __future__ import absolute_import
-from base.instructions import vselect, vsplit, vconcat, iconst, iadd, bint,\
-    b1, icmp, iadd_cout, iadd_cin, uextend, sextend, ireduce, fpromote, \
-    fdemote
-from base.legalize import narrow, expand
-from base.immediates import intcc
-from base.types import i32, i8
-from .typevar import TypeVar
-from .ast import Var, Def
-from .xform import Rtl, XForm
-from .ti import ti_rtl, subst, TypeEnv, get_type_env, TypesEqual, WiderOrEq
-from unittest import TestCase
-from functools import reduce
-
-try:
-    from .ti import TypeMap, ConstraintList, VarTyping, TypingOrError # noqa
-    from typing import List, Dict, Tuple, TYPE_CHECKING, cast # noqa
-except ImportError:
-    TYPE_CHECKING = False
-
-
-def agree(me, other):
-    # type: (TypeEnv, TypeEnv) -> bool
-    """
-    Given TypeEnvs me and other, check if they agree. As part of that build
-    a map m from TVs in me to their corresponding TVs in other.
-    Specifically:
-
-        1. Check that all TVs that are keys in me.type_map are also defined
-           in other.type_map
-
-        2. For any tv in me.type_map check that:
-            me[tv].get_typeset() == other[tv].get_typeset()
-
-        3. Set m[me[tv]] = other[tv] in the substitution m
-
-        4. If we find another tv1 such that me[tv1] == me[tv], assert that
-           other[tv1] == m[me[tv1]] == m[me[tv]] = other[tv]
-
-        5. Check that me and other have the same constraints under the
-           substitution m
-    """
-    m = {}  # type: TypeMap
-    # Check that our type map and other's agree and built substitution m
-    for tv in me.type_map:
-        if (me[tv] not in m):
-            m[me[tv]] = other[tv]
-            if me[tv].get_typeset() != other[tv].get_typeset():
-                return False
-        else:
-            if m[me[tv]] != other[tv]:
-                return False
-
-    # Translate our constraints using m, and sort
-    me_equiv_constr = sorted([constr.translate(m)
-                              for constr in me.constraints], key=repr)
-    # Sort other's constraints
-    other_equiv_constr = sorted([constr.translate(other)
-                                 for constr in other.constraints], key=repr)
-    return me_equiv_constr == other_equiv_constr
-
-
-def check_typing(got_or_err, expected, symtab=None):
-    # type: (TypingOrError, Tuple[VarTyping, ConstraintList], Dict[str, Var]) -> None # noqa
-    """
-    Check that a the typing we received (got_or_err) complies with the
-    expected typing (expected). If symtab is specified, substitute the Vars in
-    expected using symtab first (used when checking type inference on XForms)
-    """
-    (m, c) = expected
-    got = get_type_env(got_or_err)
-
-    if (symtab is not None):
-        # For xforms we first need to re-write our TVs in terms of the tvs
-        # stored internally in the XForm. Use the symtab passed
-        subst_m = {k.get_typevar(): symtab[str(k)].get_typevar()
-                   for k in m.keys()}
-        # Convert m from a Var->TypeVar map to TypeVar->TypeVar map where
-        # the key TypeVar is re-written to its XForm internal version
-        tv_m = {subst(k.get_typevar(), subst_m): v for (k, v) in m.items()}
-        # Rewrite the TVs in the input constraints to their XForm internal
-        # versions
-        c = [constr.translate(subst_m) for constr in c]
-    else:
-        # If no symtab, just convert m from Var->TypeVar map to a
-        # TypeVar->TypeVar map
-        tv_m = {k.get_typevar(): v for (k, v) in m.items()}
-
-    expected_typ = TypeEnv((tv_m, c))
-    assert agree(expected_typ, got), \
-        "typings disagree:\n {} \n {}".format(got.dot(),
-                                              expected_typ.dot())
-
-
-def check_concrete_typing_rtl(var_types, rtl):
-    # type: (VarTyping, Rtl) -> None
-    """
-    Check that a concrete type assignment var_types (Dict[Var, TypeVar]) is
-    valid for an Rtl rtl. Specifically check that:
-
-    1) For each Var v \\in rtl, v is defined in var_types
-
-    2) For all v, var_types[v] is a singleton type
-
-    3) For each v, and each location u, where v is used with expected type
-       tv_u, var_types[v].get_typeset() is a subset of
-       subst(tv_u, m).get_typeset() where m is the substitution of
-       formals->actuals we are building so far.
-
-    4) If tv_u is non-derived and not in m, set m[tv_u]= var_types[v]
-    """
-    for d in rtl.rtl:
-        assert isinstance(d, Def)
-        inst = d.expr.inst
-        # Accumulate all actual TVs for value defs/opnums in actual_tvs
-        actual_tvs = [var_types[d.defs[i]] for i in inst.value_results]
-        for v in [d.expr.args[i] for i in inst.value_opnums]:
-            assert isinstance(v, Var)
-            actual_tvs.append(var_types[v])
-
-        # Accumulate all formal TVs for value defs/opnums in actual_tvs
-        formal_tvs = [inst.outs[i].typevar for i in inst.value_results] +\
-                     [inst.ins[i].typevar for i in inst.value_opnums]
-        m = {}  # type: TypeMap
-
-        # For each actual/formal pair check that they agree
-        for (actual_tv, formal_tv) in zip(actual_tvs, formal_tvs):
-            # actual should be a singleton
-            assert actual_tv.singleton_type() is not None
-            formal_tv = subst(formal_tv, m)
-            # actual should agree with the concretized formal
-            assert actual_tv.get_typeset().issubset(formal_tv.get_typeset())
-
-            if formal_tv not in m and not formal_tv.is_derived:
-                m[formal_tv] = actual_tv
-
-
-def check_concrete_typing_xform(var_types, xform):
-    # type: (VarTyping, XForm) -> None
-    """
-    Check a concrete type assignment var_types for an XForm xform
-    """
-    check_concrete_typing_rtl(var_types, xform.src)
-    check_concrete_typing_rtl(var_types, xform.dst)
-
-
-class TypeCheckingBaseTest(TestCase):
-    def setUp(self):
-        # type: () -> None
-        self.v0 = Var("v0")
-        self.v1 = Var("v1")
-        self.v2 = Var("v2")
-        self.v3 = Var("v3")
-        self.v4 = Var("v4")
-        self.v5 = Var("v5")
-        self.v6 = Var("v6")
-        self.v7 = Var("v7")
-        self.v8 = Var("v8")
-        self.v9 = Var("v9")
-        self.imm0 = Var("imm0")
-        self.IxN_nonscalar = TypeVar("IxN", "", ints=True, scalars=False,
-                                     simd=True)
-        self.TxN = TypeVar("TxN", "", ints=True, bools=True, floats=True,
-                           scalars=False, simd=True)
-        self.b1 = TypeVar.singleton(b1)
-
-
-class TestRTL(TypeCheckingBaseTest):
-    def test_bad_rtl1(self):
-        # type: () -> None
-        r = Rtl(
-                (self.v0, self.v1) << vsplit(self.v2),
-                self.v3 << vconcat(self.v0, self.v2),
-        )
-        ti = TypeEnv()
-        self.assertEqual(ti_rtl(r, ti),
-                         "On line 1: fail ti on `typeof_v2` <: `1`: " +
-                         "Error: empty type created when unifying " +
-                         "`typeof_v2` and `half_vector(typeof_v2)`")
-
-    def test_vselect(self):
-        # type: () -> None
-        r = Rtl(
-                self.v0 << vselect(self.v1, self.v2, self.v3),
-        )
-        ti = TypeEnv()
-        typing = ti_rtl(r, ti)
-        txn = self.TxN.get_fresh_copy("TxN1")
-        check_typing(typing, ({
-            self.v0: txn,
-            self.v1: txn.as_bool(),
-            self.v2: txn,
-            self.v3: txn
-        }, []))
-
-    def test_vselect_icmpimm(self):
-        # type: () -> None
-        r = Rtl(
-                self.v0 << iconst(self.imm0),
-                self.v1 << icmp(intcc.eq, self.v2, self.v0),
-                self.v5 << vselect(self.v1, self.v3, self.v4),
-        )
-        ti = TypeEnv()
-        typing = ti_rtl(r, ti)
-        ixn = self.IxN_nonscalar.get_fresh_copy("IxN1")
-        txn = self.TxN.get_fresh_copy("TxN1")
-        check_typing(typing, ({
-            self.v0: ixn,
-            self.v1: ixn.as_bool(),
-            self.v2: ixn,
-            self.v3: txn,
-            self.v4: txn,
-            self.v5: txn,
-        }, [TypesEqual(ixn.as_bool(), txn.as_bool())]))
-
-    def test_vselect_vsplits(self):
-        # type: () -> None
-        r = Rtl(
-                self.v3 << vselect(self.v0, self.v1, self.v2),
-                (self.v4, self.v5) << vsplit(self.v3),
-                (self.v6, self.v7) << vsplit(self.v4),
-        )
-        ti = TypeEnv()
-        typing = ti_rtl(r, ti)
-        t = TypeVar("t", "", ints=True, bools=True, floats=True,
-                    simd=(4, 256))
-        check_typing(typing, ({
-            self.v0: t.as_bool(),
-            self.v1: t,
-            self.v2: t,
-            self.v3: t,
-            self.v4: t.half_vector(),
-            self.v5: t.half_vector(),
-            self.v6: t.half_vector().half_vector(),
-            self.v7: t.half_vector().half_vector(),
-        }, []))
-
-    def test_vselect_vconcats(self):
-        # type: () -> None
-        r = Rtl(
-                self.v3 << vselect(self.v0, self.v1, self.v2),
-                self.v8 << vconcat(self.v3, self.v3),
-                self.v9 << vconcat(self.v8, self.v8),
-        )
-        ti = TypeEnv()
-        typing = ti_rtl(r, ti)
-        t = TypeVar("t", "", ints=True, bools=True, floats=True,
-                    simd=(2, 64))
-        check_typing(typing, ({
-            self.v0: t.as_bool(),
-            self.v1: t,
-            self.v2: t,
-            self.v3: t,
-            self.v8: t.double_vector(),
-            self.v9: t.double_vector().double_vector(),
-        }, []))
-
-    def test_vselect_vsplits_vconcats(self):
-        # type: () -> None
-        r = Rtl(
-                self.v3 << vselect(self.v0, self.v1, self.v2),
-                (self.v4, self.v5) << vsplit(self.v3),
-                (self.v6, self.v7) << vsplit(self.v4),
-                self.v8 << vconcat(self.v3, self.v3),
-                self.v9 << vconcat(self.v8, self.v8),
-        )
-        ti = TypeEnv()
-        typing = ti_rtl(r, ti)
-        t = TypeVar("t", "", ints=True, bools=True, floats=True,
-                    simd=(4, 64))
-        check_typing(typing, ({
-            self.v0: t.as_bool(),
-            self.v1: t,
-            self.v2: t,
-            self.v3: t,
-            self.v4: t.half_vector(),
-            self.v5: t.half_vector(),
-            self.v6: t.half_vector().half_vector(),
-            self.v7: t.half_vector().half_vector(),
-            self.v8: t.double_vector(),
-            self.v9: t.double_vector().double_vector(),
-        }, []))
-
-    def test_bint(self):
-        # type: () -> None
-        r = Rtl(
-            self.v4 << iadd(self.v1, self.v2),
-            self.v5 << bint(self.v3),
-            self.v0 << iadd(self.v4, self.v5)
-        )
-        ti = TypeEnv()
-        typing = ti_rtl(r, ti)
-        itype = TypeVar("t", "", ints=True, simd=(1, 256))
-        btype = TypeVar("b", "", bools=True, simd=True)
-
-        # Check that self.v5 gets the same integer type as
-        # the rest of them
-        # TODO: Add constraint nlanes(v3) == nlanes(v1) when we
-        # add that type constraint to bint
-        check_typing(typing, ({
-            self.v1:    itype,
-            self.v2:    itype,
-            self.v4:    itype,
-            self.v5:    itype,
-            self.v3:    btype,
-            self.v0:    itype,
-        }, []))
-
-    def test_fully_bound_inst_inference_bad(self):
-        # Incompatible bound instructions fail accordingly
-        r = Rtl(
-                self.v3 << uextend.i32(self.v1),
-                self.v4 << uextend.i16(self.v2),
-                self.v5 << iadd(self.v3, self.v4),
-            )
-        ti = TypeEnv()
-        typing = ti_rtl(r, ti)
-
-        self.assertEqual(typing,
-                         "On line 2: fail ti on `typeof_v4` <: `4`: " +
-                         "Error: empty type created when unifying " +
-                         "`i16` and `i32`")
-
-    def test_extend_reduce(self):
-        # type: () -> None
-        r = Rtl(
-            self.v1 << uextend(self.v0),
-            self.v2 << ireduce(self.v1),
-            self.v3 << sextend(self.v2),
-        )
-        ti = TypeEnv()
-        typing = ti_rtl(r, ti)
-        typing = typing.extract()
-
-        itype0 = TypeVar("t", "", ints=True, simd=(1, 256))
-        itype1 = TypeVar("t1", "", ints=True, simd=(1, 256))
-        itype2 = TypeVar("t2", "", ints=True, simd=(1, 256))
-        itype3 = TypeVar("t3", "", ints=True, simd=(1, 256))
-
-        check_typing(typing, ({
-            self.v0:    itype0,
-            self.v1:    itype1,
-            self.v2:    itype2,
-            self.v3:    itype3,
-        }, [WiderOrEq(itype1, itype0),
-            WiderOrEq(itype1, itype2),
-            WiderOrEq(itype3, itype2)]))
-
-    def test_extend_reduce_enumeration(self):
-        # type: () -> None
-        for op in (uextend, sextend, ireduce):
-            r = Rtl(
-                self.v1 << op(self.v0),
-            )
-            ti = TypeEnv()
-            typing = ti_rtl(r, ti).extract()
-
-            # The number of possible typings is 9 * (3+ 2*2 + 3) = 90
-            lst = [(t[self.v0], t[self.v1]) for t in typing.concrete_typings()]
-            assert (len(lst) == len(set(lst)) and len(lst) == 90)
-            for (tv0, tv1) in lst:
-                typ0, typ1 = (tv0.singleton_type(), tv1.singleton_type())
-                if (op == ireduce):
-                    assert typ0.wider_or_equal(typ1)
-                else:
-                    assert typ1.wider_or_equal(typ0)
-
-    def test_fpromote_fdemote(self):
-        # type: () -> None
-        r = Rtl(
-            self.v1 << fpromote(self.v0),
-            self.v2 << fdemote(self.v1),
-        )
-        ti = TypeEnv()
-        typing = ti_rtl(r, ti)
-        typing = typing.extract()
-
-        ftype0 = TypeVar("t", "", floats=True, simd=(1, 256))
-        ftype1 = TypeVar("t1", "", floats=True, simd=(1, 256))
-        ftype2 = TypeVar("t2", "", floats=True, simd=(1, 256))
-
-        check_typing(typing, ({
-            self.v0:    ftype0,
-            self.v1:    ftype1,
-            self.v2:    ftype2,
-        }, [WiderOrEq(ftype1, ftype0),
-            WiderOrEq(ftype1, ftype2)]))
-
-    def test_fpromote_fdemote_enumeration(self):
-        # type: () -> None
-        for op in (fpromote, fdemote):
-            r = Rtl(
-                self.v1 << op(self.v0),
-            )
-            ti = TypeEnv()
-            typing = ti_rtl(r, ti).extract()
-
-            # The number of possible typings is 9*(2 + 1) = 27
-            lst = [(t[self.v0], t[self.v1]) for t in typing.concrete_typings()]
-            assert (len(lst) == len(set(lst)) and len(lst) == 27)
-            for (tv0, tv1) in lst:
-                (typ0, typ1) = (tv0.singleton_type(), tv1.singleton_type())
-                if (op == fdemote):
-                    assert typ0.wider_or_equal(typ1)
-                else:
-                    assert typ1.wider_or_equal(typ0)
-
-
-class TestXForm(TypeCheckingBaseTest):
-    def test_iadd_cout(self):
-        # type: () -> None
-        x = XForm(Rtl((self.v0, self.v1) << iadd_cout(self.v2, self.v3),),
-                  Rtl(
-                      self.v0 << iadd(self.v2, self.v3),
-                      self.v1 << icmp(intcc.ult, self.v0, self.v2)
-                  ))
-        itype = TypeVar("t", "", ints=True, simd=(1, 1))
-
-        check_typing(x.ti, ({
-            self.v0:    itype,
-            self.v2:    itype,
-            self.v3:    itype,
-            self.v1:    itype.as_bool(),
-        }, []), x.symtab)
-
-    def test_iadd_cin(self):
-        # type: () -> None
-        x = XForm(Rtl(self.v0 << iadd_cin(self.v1, self.v2, self.v3)),
-                  Rtl(
-                      self.v4 << iadd(self.v1, self.v2),
-                      self.v5 << bint(self.v3),
-                      self.v0 << iadd(self.v4, self.v5)
-                  ))
-        itype = TypeVar("t", "", ints=True, simd=(1, 1))
-
-        check_typing(x.ti, ({
-            self.v0:    itype,
-            self.v1:    itype,
-            self.v2:    itype,
-            self.v3:    self.b1,
-            self.v4:    itype,
-            self.v5:    itype,
-        }, []), x.symtab)
-
-    def test_enumeration_with_constraints(self):
-        # type: () -> None
-        xform = XForm(
-            Rtl(
-                self.v0 << iconst(self.imm0),
-                self.v1 << icmp(intcc.eq, self.v2, self.v0),
-                self.v5 << vselect(self.v1, self.v3, self.v4)
-            ),
-            Rtl(
-                self.v0 << iconst(self.imm0),
-                self.v1 << icmp(intcc.eq, self.v2, self.v0),
-                self.v5 << vselect(self.v1, self.v3, self.v4)
-            ))
-
-        # Check all var assigns are correct
-        assert len(xform.ti.constraints) > 0
-        concrete_var_assigns = list(xform.ti.concrete_typings())
-
-        v0 = xform.symtab[str(self.v0)]
-        v1 = xform.symtab[str(self.v1)]
-        v2 = xform.symtab[str(self.v2)]
-        v3 = xform.symtab[str(self.v3)]
-        v4 = xform.symtab[str(self.v4)]
-        v5 = xform.symtab[str(self.v5)]
-
-        for var_m in concrete_var_assigns:
-            assert var_m[v0] == var_m[v2] and \
-                   var_m[v3] == var_m[v4] and\
-                   var_m[v5] == var_m[v3] and\
-                   var_m[v1] == var_m[v2].as_bool() and\
-                   var_m[v1].get_typeset() == var_m[v3].as_bool().get_typeset()
-            check_concrete_typing_xform(var_m, xform)
-
-        # The number of possible typings here is:
-        # 8 cases for v0 = i8xN times 2 options for v3 - i8, b8 = 16
-        # 8 cases for v0 = i16xN times 2 options for v3 - i16, b16 = 16
-        # 8 cases for v0 = i32xN times 3 options for v3 - i32, b32, f32 = 24
-        # 8 cases for v0 = i64xN times 3 options for v3 - i64, b64, f64 = 24
-        #
-        # (Note we have 8 cases for lanes since vselect prevents scalars)
-        # Total: 2*16 + 2*24 = 80
-        assert len(concrete_var_assigns) == 80
-
-    def test_base_legalizations_enumeration(self):
-        # type: () -> None
-        for xform in narrow.xforms + expand.xforms:
-            # Any legalization patterns we defined should have at least 1
-            # concrete typing
-            concrete_typings_list = list(xform.ti.concrete_typings())
-            assert len(concrete_typings_list) > 0
-
-            # If there are no free_typevars, this is a non-polymorphic pattern.
-            # There should be only one possible concrete typing.
-            if (len(xform.ti.free_typevars()) == 0):
-                assert len(concrete_typings_list) == 1
-                continue
-
-            # For any patterns where the type env includes constraints, at
-            # least one of the "theoretically possible" concrete typings must
-            # be prevented by the constraints. (i.e. we are not emitting
-            # unnecessary constraints).
-            # We check that by asserting that the number of concrete typings is
-            # less than the number of all possible free typevar assignments
-            if (len(xform.ti.constraints) > 0):
-                theoretical_num_typings =\
-                    reduce(lambda x, y:    x*y,
-                           [tv.get_typeset().size()
-                            for tv in xform.ti.free_typevars()], 1)
-                assert len(concrete_typings_list) < theoretical_num_typings
-
-            # Check the validity of each individual concrete typing against the
-            # xform
-            for concrete_typing in concrete_typings_list:
-                check_concrete_typing_xform(concrete_typing, xform)
-
-    def test_bound_inst_inference(self):
-        # First example from issue #26
-        x = XForm(
-            Rtl(
-                self.v0 << iadd(self.v1, self.v2),
-            ),
-            Rtl(
-                self.v3 << uextend.i32(self.v1),
-                self.v4 << uextend.i32(self.v2),
-                self.v5 << iadd(self.v3, self.v4),
-                self.v0 << ireduce(self.v5)
-            ))
-        itype = TypeVar("t", "", ints=True, simd=True)
-        i32t = TypeVar.singleton(i32)
-
-        check_typing(x.ti, ({
-            self.v0:    itype,
-            self.v1:    itype,
-            self.v2:    itype,
-            self.v3:    i32t,
-            self.v4:    i32t,
-            self.v5:    i32t,
-        }, [WiderOrEq(i32t, itype)]), x.symtab)
-
-    def test_bound_inst_inference1(self):
-        # Second example taken from issue #26
-        x = XForm(
-            Rtl(
-                self.v0 << iadd(self.v1, self.v2),
-            ),
-            Rtl(
-                self.v3 << uextend(self.v1),
-                self.v4 << uextend(self.v2),
-                self.v5 << iadd.i32(self.v3, self.v4),
-                self.v0 << ireduce(self.v5)
-            ))
-        itype = TypeVar("t", "", ints=True, simd=True)
-        i32t = TypeVar.singleton(i32)
-
-        check_typing(x.ti, ({
-            self.v0:    itype,
-            self.v1:    itype,
-            self.v2:    itype,
-            self.v3:    i32t,
-            self.v4:    i32t,
-            self.v5:    i32t,
-        }, [WiderOrEq(i32t, itype)]), x.symtab)
-
-    def test_fully_bound_inst_inference(self):
-        # Second example taken from issue #26 with complete bounds
-        x = XForm(
-            Rtl(
-                self.v0 << iadd(self.v1, self.v2),
-            ),
-            Rtl(
-                self.v3 << uextend.i32.i8(self.v1),
-                self.v4 << uextend.i32.i8(self.v2),
-                self.v5 << iadd(self.v3, self.v4),
-                self.v0 << ireduce(self.v5)
-            ))
-        i8t = TypeVar.singleton(i8)
-        i32t = TypeVar.singleton(i32)
-
-        # Note no constraints here since they are all trivial
-        check_typing(x.ti, ({
-            self.v0:    i8t,
-            self.v1:    i8t,
-            self.v2:    i8t,
-            self.v3:    i32t,
-            self.v4:    i32t,
-            self.v5:    i32t,
-        }, []), x.symtab)
-
-    def test_fully_bound_inst_inference_bad(self):
-        # Can't force a mistyped XForm using bound instructions
-        with self.assertRaises(AssertionError):
-            XForm(
-                Rtl(
-                    self.v0 << iadd(self.v1, self.v2),
-                ),
-                Rtl(
-                    self.v3 << uextend.i32.i8(self.v1),
-                    self.v4 << uextend.i32.i16(self.v2),
-                    self.v5 << iadd(self.v3, self.v4),
-                    self.v0 << ireduce(self.v5)
-                ))
diff --git a/cranelift-codegen/meta-python/cdsl/test_typevar.py b/cranelift-codegen/meta-python/cdsl/test_typevar.py
deleted file mode 100644
index 48806cc4a..000000000
--- a/cranelift-codegen/meta-python/cdsl/test_typevar.py
+++ /dev/null
@@ -1,266 +0,0 @@
-from __future__ import absolute_import
-from unittest import TestCase
-from doctest import DocTestSuite
-from . import typevar
-from .typevar import TypeSet, TypeVar
-from base.types import i32, i16, b1, f64
-from itertools import product
-from functools import reduce
-
-
-def load_tests(loader, tests, ignore):
-    tests.addTests(DocTestSuite(typevar))
-    return tests
-
-
-class TestTypeSet(TestCase):
-    def test_invalid(self):
-        with self.assertRaises(AssertionError):
-            TypeSet(lanes=(2, 1))
-        with self.assertRaises(AssertionError):
-            TypeSet(ints=(32, 16))
-        with self.assertRaises(AssertionError):
-            TypeSet(floats=(32, 16))
-        with self.assertRaises(AssertionError):
-            TypeSet(bools=(32, 16))
-        with self.assertRaises(AssertionError):
-            TypeSet(ints=(32, 33))
-
-    def test_hash(self):
-        a = TypeSet(lanes=True, ints=True, floats=True)
-        b = TypeSet(lanes=True, ints=True, floats=True)
-        c = TypeSet(lanes=True, ints=(8, 16), floats=True)
-        self.assertEqual(a, b)
-        self.assertNotEqual(a, c)
-        s = set()
-        s.add(a)
-        self.assertTrue(a in s)
-        self.assertTrue(b in s)
-        self.assertFalse(c in s)
-
-    def test_hash_modified(self):
-        a = TypeSet(lanes=True, ints=True, floats=True)
-        s = set()
-        s.add(a)
-        a.ints.remove(64)
-        # Can't rehash after modification.
-        with self.assertRaises(AssertionError):
-            a in s
-
-    def test_forward_images(self):
-        a = TypeSet(lanes=(2, 8), ints=(8, 8), floats=(32, 32))
-        b = TypeSet(lanes=(1, 8), ints=(8, 8), floats=(32, 32))
-        self.assertEqual(a.lane_of(), TypeSet(ints=(8, 8), floats=(32, 32)))
-
-        c = TypeSet(lanes=(2, 8))
-        c.bools = set([8, 32])
-
-        # Test case with disjoint intervals
-        self.assertEqual(a.as_bool(), c)
-
-        # For as_bool check b1 is present when 1 \in lanes
-        d = TypeSet(lanes=(1, 8))
-        d.bools = set([1, 8, 32])
-        self.assertEqual(b.as_bool(), d)
-
-        self.assertEqual(TypeSet(lanes=(1, 32)).half_vector(),
-                         TypeSet(lanes=(1, 16)))
-
-        self.assertEqual(TypeSet(lanes=(1, 32)).double_vector(),
-                         TypeSet(lanes=(2, 64)))
-
-        self.assertEqual(TypeSet(lanes=(128, 256)).double_vector(),
-                         TypeSet(lanes=(256, 256)))
-
-        self.assertEqual(TypeSet(ints=(8, 32)).half_width(),
-                         TypeSet(ints=(8, 16)))
-
-        self.assertEqual(TypeSet(ints=(8, 32)).double_width(),
-                         TypeSet(ints=(16, 64)))
-
-        self.assertEqual(TypeSet(ints=(32, 64)).double_width(),
-                         TypeSet(ints=(64, 64)))
-
-        # Should produce an empty ts
-        self.assertEqual(TypeSet(floats=(32, 32)).half_width(),
-                         TypeSet())
-
-        self.assertEqual(TypeSet(floats=(32, 64)).half_width(),
-                         TypeSet(floats=(32, 32)))
-
-        self.assertEqual(TypeSet(floats=(32, 32)).double_width(),
-                         TypeSet(floats=(64, 64)))
-
-        self.assertEqual(TypeSet(floats=(32, 64)).double_width(),
-                         TypeSet(floats=(64, 64)))
-
-        # Bools have trickier behavior around b1 (since b2, b4 don't exist)
-        self.assertEqual(TypeSet(bools=(1, 8)).half_width(),
-                         TypeSet())
-
-        t = TypeSet()
-        t.bools = set([8, 16])
-        self.assertEqual(TypeSet(bools=(1, 32)).half_width(), t)
-
-        # double_width() of bools={1, 8, 16} must not include 2 or 8
-        t.bools = set([16, 32])
-        self.assertEqual(TypeSet(bools=(1, 16)).double_width(), t)
-
-        self.assertEqual(TypeSet(bools=(32, 64)).double_width(),
-                         TypeSet(bools=(64, 64)))
-
-    def test_get_singleton(self):
-        # Raise error when calling get_singleton() on non-singleton TS
-        t = TypeSet(lanes=(1, 1), ints=(8, 8), floats=(32, 32))
-        with self.assertRaises(AssertionError):
-            t.get_singleton()
-        t = TypeSet(lanes=(1, 2), floats=(32, 32))
-
-        with self.assertRaises(AssertionError):
-            t.get_singleton()
-
-        self.assertEqual(TypeSet(ints=(16, 16)).get_singleton(), i16)
-        self.assertEqual(TypeSet(floats=(64, 64)).get_singleton(), f64)
-        self.assertEqual(TypeSet(bools=(1, 1)).get_singleton(), b1)
-        self.assertEqual(TypeSet(lanes=(4, 4), ints=(32, 32)).get_singleton(),
-                         i32.by(4))
-
-    def test_preimage(self):
-        t = TypeSet(lanes=(1, 1), ints=(8, 8), floats=(32, 32))
-
-        # LANEOF
-        self.assertEqual(TypeSet(lanes=True, ints=(8, 8), floats=(32, 32)),
-                         t.preimage(TypeVar.LANEOF))
-        # Inverse of empty set is still empty across LANEOF
-        self.assertEqual(TypeSet(),
-                         TypeSet().preimage(TypeVar.LANEOF))
-
-        # ASBOOL
-        t = TypeSet(lanes=(1, 4), bools=(1, 64))
-        self.assertEqual(t.preimage(TypeVar.ASBOOL),
-                         TypeSet(lanes=(1, 4), ints=True, bools=True,
-                                 floats=True))
-
-        # Half/Double Vector
-        t = TypeSet(lanes=(1, 1), ints=(8, 8))
-        t1 = TypeSet(lanes=(256, 256), ints=(8, 8))
-        self.assertEqual(t.preimage(TypeVar.DOUBLEVECTOR).size(), 0)
-        self.assertEqual(t1.preimage(TypeVar.HALFVECTOR).size(), 0)
-
-        t = TypeSet(lanes=(1, 16), ints=(8, 16), floats=(32, 32))
-        t1 = TypeSet(lanes=(64, 256), bools=(1, 32))
-
-        self.assertEqual(t.preimage(TypeVar.DOUBLEVECTOR),
-                         TypeSet(lanes=(1, 8), ints=(8, 16), floats=(32, 32)))
-        self.assertEqual(t1.preimage(TypeVar.HALFVECTOR),
-                         TypeSet(lanes=(128, 256), bools=(1, 32)))
-
-        # Half/Double Width
-        t = TypeSet(ints=(8, 8), floats=(32, 32), bools=(1, 8))
-        t1 = TypeSet(ints=(64, 64), floats=(64, 64), bools=(64, 64))
-        self.assertEqual(t.preimage(TypeVar.DOUBLEWIDTH).size(), 0)
-        self.assertEqual(t1.preimage(TypeVar.HALFWIDTH).size(), 0)
-
-        t = TypeSet(lanes=(1, 16), ints=(8, 16), floats=(32, 64))
-        t1 = TypeSet(lanes=(64, 256), bools=(1, 64))
-
-        self.assertEqual(t.preimage(TypeVar.DOUBLEWIDTH),
-                         TypeSet(lanes=(1, 16), ints=(8, 8), floats=(32, 32)))
-        self.assertEqual(t1.preimage(TypeVar.HALFWIDTH),
-                         TypeSet(lanes=(64, 256), bools=(16, 64)))
-
-
-def has_non_bijective_derived_f(iterable):
-    return any(not TypeVar.is_bijection(x) for x in iterable)
-
-
-class TestTypeVar(TestCase):
-    def test_functions(self):
-        x = TypeVar('x', 'all ints', ints=True)
-        with self.assertRaises(AssertionError):
-            x.double_width()
-        with self.assertRaises(AssertionError):
-            x.half_width()
-
-        x2 = TypeVar('x2', 'i16 and up', ints=(16, 64))
-        with self.assertRaises(AssertionError):
-            x2.double_width()
-        self.assertEqual(str(x2.half_width()), '`half_width(x2)`')
-        self.assertEqual(x2.half_width().rust_expr(), 'x2.half_width()')
-        self.assertEqual(
-                x2.half_width().double_width().rust_expr(),
-                'x2.half_width().double_width()')
-
-        x3 = TypeVar('x3', 'up to i32', ints=(8, 32))
-        self.assertEqual(str(x3.double_width()), '`double_width(x3)`')
-        with self.assertRaises(AssertionError):
-            x3.half_width()
-
-    def test_singleton(self):
-        x = TypeVar.singleton(i32)
-        self.assertEqual(str(x), '`i32`')
-        self.assertEqual(min(x.type_set.ints), 32)
-        self.assertEqual(max(x.type_set.ints), 32)
-        self.assertEqual(min(x.type_set.lanes), 1)
-        self.assertEqual(max(x.type_set.lanes), 1)
-        self.assertEqual(len(x.type_set.floats), 0)
-        self.assertEqual(len(x.type_set.bools), 0)
-
-        x = TypeVar.singleton(i32.by(4))
-        self.assertEqual(str(x), '`i32x4`')
-        self.assertEqual(min(x.type_set.ints), 32)
-        self.assertEqual(max(x.type_set.ints), 32)
-        self.assertEqual(min(x.type_set.lanes), 4)
-        self.assertEqual(max(x.type_set.lanes), 4)
-        self.assertEqual(len(x.type_set.floats), 0)
-        self.assertEqual(len(x.type_set.bools), 0)
-
-    def test_stress_constrain_types(self):
-        # Get all 43 possible derived vars of length up to 2
-        funcs = [TypeVar.LANEOF,
-                 TypeVar.ASBOOL, TypeVar.HALFVECTOR, TypeVar.DOUBLEVECTOR,
-                 TypeVar.HALFWIDTH, TypeVar.DOUBLEWIDTH]
-        v = [()] + [(x,) for x in funcs] + list(product(*[funcs, funcs]))
-
-        # For each pair of derived variables
-        for (i1, i2) in product(v, v):
-            # Compute the derived sets for each  starting with a full typeset
-            full_ts = TypeSet(lanes=True, floats=True, ints=True, bools=True)
-            ts1 = reduce(lambda ts, func:   ts.image(func), i1, full_ts)
-            ts2 = reduce(lambda ts, func:   ts.image(func), i2, full_ts)
-
-            # Compute intersection
-            intersect = ts1.copy()
-            intersect &= ts2
-
-            # Propagate intersections backward
-            ts1_src = reduce(lambda ts, func:   ts.preimage(func),
-                             reversed(i1),
-                             intersect)
-            ts2_src = reduce(lambda ts, func:   ts.preimage(func),
-                             reversed(i2),
-                             intersect)
-
-            # If the intersection or its propagated forms are empty, then these
-            # two variables can never overlap. For example x.double_vector and
-            # x.lane_of.
-            if (intersect.size() == 0 or ts1_src.size() == 0 or
-                    ts2_src.size() == 0):
-                continue
-
-            # Should be safe to create derived tvs from ts1_src and ts2_src
-            tv1 = reduce(lambda tv, func:   TypeVar.derived(tv, func),
-                         i1,
-                         TypeVar.from_typeset(ts1_src))
-
-            tv2 = reduce(lambda tv, func:   TypeVar.derived(tv, func),
-                         i2,
-                         TypeVar.from_typeset(ts2_src))
-
-            # In the absence of AS_BOOL image(preimage(f)) == f so the
-            # typesets of tv1 and tv2 should be exactly intersection
-            assert tv1.get_typeset() == intersect or\
-                has_non_bijective_derived_f(i1)
-
-            assert tv2.get_typeset() == intersect or\
-                has_non_bijective_derived_f(i2)
diff --git a/cranelift-codegen/meta-python/cdsl/test_xform.py b/cranelift-codegen/meta-python/cdsl/test_xform.py
deleted file mode 100644
index 424a7c824..000000000
--- a/cranelift-codegen/meta-python/cdsl/test_xform.py
+++ /dev/null
@@ -1,131 +0,0 @@
-from __future__ import absolute_import
-from unittest import TestCase
-from doctest import DocTestSuite
-from base.instructions import iadd, iadd_imm, iconst, icmp
-from base.immediates import intcc
-from . import xform
-from .ast import Var
-from .xform import Rtl, XForm
-
-
-def load_tests(loader, tests, ignore):
-    tests.addTests(DocTestSuite(xform))
-    return tests
-
-
-x = Var('x')
-y = Var('y')
-z = Var('z')
-u = Var('u')
-a = Var('a')
-b = Var('b')
-c = Var('c')
-
-CC1 = Var('CC1')
-CC2 = Var('CC2')
-
-
-class TestXForm(TestCase):
-    def test_macro_pattern(self):
-        src = Rtl(a << iadd_imm(x, y))
-        dst = Rtl(
-                c << iconst(y),
-                a << iadd(x, c))
-        XForm(src, dst)
-
-    def test_def_input(self):
-        # Src pattern has a def which is an input in dst.
-        src = Rtl(a << iadd_imm(x, 1))
-        dst = Rtl(y << iadd_imm(a, 1))
-        with self.assertRaisesRegexp(
-                AssertionError,
-                "'a' used as both input and def"):
-            XForm(src, dst)
-
-    def test_input_def(self):
-        # Converse of the above.
-        src = Rtl(y << iadd_imm(a, 1))
-        dst = Rtl(a << iadd_imm(x, 1))
-        with self.assertRaisesRegexp(
-                AssertionError,
-                "'a' used as both input and def"):
-            XForm(src, dst)
-
-    def test_extra_input(self):
-        src = Rtl(a << iadd_imm(x, 1))
-        dst = Rtl(a << iadd(x, y))
-        with self.assertRaisesRegexp(AssertionError, "extra inputs in dst"):
-            XForm(src, dst)
-
-    def test_double_def(self):
-        src = Rtl(
-                a << iadd_imm(x, 1),
-                a << iadd(x, y))
-        dst = Rtl(a << iadd(x, y))
-        with self.assertRaisesRegexp(AssertionError, "'a' multiply defined"):
-            XForm(src, dst)
-
-    def test_subst_imm(self):
-        src = Rtl(a << iconst(x))
-        dst = Rtl(c << iconst(y))
-        assert src.substitution(dst, {}) == {a: c, x: y}
-
-    def test_subst_enum_var(self):
-        src = Rtl(a << icmp(CC1, x, y))
-        dst = Rtl(b << icmp(CC2, z, u))
-        assert src.substitution(dst, {}) == {a: b, CC1: CC2, x: z, y: u}
-
-    def test_subst_enum_const(self):
-        src = Rtl(a << icmp(intcc.eq, x, y))
-        dst = Rtl(b << icmp(intcc.eq, z, u))
-        assert src.substitution(dst, {}) == {a: b, x: z, y: u}
-
-    def test_subst_enum_var_const(self):
-        src = Rtl(a << icmp(CC1, x, y))
-        dst = Rtl(b << icmp(intcc.eq, z, u))
-        assert src.substitution(dst, {}) == {CC1: intcc.eq, x: z, y: u, a: b},\
-            "{} != {}".format(src.substitution(dst, {}),
-                              {CC1: intcc.eq, x: z, y: u, a: b})
-
-        src = Rtl(a << icmp(intcc.eq, x, y))
-        dst = Rtl(b << icmp(CC1, z, u))
-        assert src.substitution(dst, {}) == {CC1: intcc.eq, x: z, y: u, a: b}
-
-    def test_subst_enum_bad(self):
-        src = Rtl(a << icmp(intcc.eq, x, y))
-        dst = Rtl(b << icmp(intcc.sge, z, u))
-        assert src.substitution(dst, {}) is None
-
-    def test_subst_enum_bad_var_const(self):
-        a1 = Var('a1')
-        x1 = Var('x1')
-        y1 = Var('y1')
-
-        b1 = Var('b1')
-        z1 = Var('z1')
-        u1 = Var('u1')
-
-        # Var mapping to 2 different constants
-        src = Rtl(a << icmp(CC1, x, y),
-                  a1 << icmp(CC1, x1, y1))
-        dst = Rtl(b << icmp(intcc.eq, z, u),
-                  b1 << icmp(intcc.sge, z1, u1))
-
-        assert src.substitution(dst, {}) is None
-
-        # 2 different constants mapping to the same var
-        src = Rtl(a << icmp(intcc.eq, x, y),
-                  a1 << icmp(intcc.sge, x1, y1))
-        dst = Rtl(b << icmp(CC1, z, u),
-                  b1 << icmp(CC1, z1, u1))
-
-        assert src.substitution(dst, {}) is None
-
-        # Var mapping to var and constant - note that full unification would
-        # have allowed this.
-        src = Rtl(a << icmp(CC1, x, y),
-                  a1 << icmp(CC1, x1, y1))
-        dst = Rtl(b << icmp(CC2, z, u),
-                  b1 << icmp(intcc.sge, z1, u1))
-
-        assert src.substitution(dst, {}) is None
diff --git a/cranelift-codegen/meta-python/cdsl/ti.py b/cranelift-codegen/meta-python/cdsl/ti.py
deleted file mode 100644
index 26f01f9e6..000000000
--- a/cranelift-codegen/meta-python/cdsl/ti.py
+++ /dev/null
@@ -1,894 +0,0 @@
-"""
-Type Inference
-"""
-from .typevar import TypeVar
-from .ast import Def, Var
-from copy import copy
-from itertools import product
-
-try:
-    from typing import Dict, TYPE_CHECKING, Union, Tuple, Optional, Set # noqa
-    from typing import Iterable, List, Any, TypeVar as MTypeVar # noqa
-    from typing import cast
-    from .xform import Rtl, XForm # noqa
-    from .ast import Expr # noqa
-    from .typevar import TypeSet # noqa
-    if TYPE_CHECKING:
-        T = MTypeVar('T')
-        TypeMap = Dict[TypeVar, TypeVar]
-        VarTyping = Dict[Var, TypeVar]
-except ImportError:
-    TYPE_CHECKING = False
-    pass
-
-
-class TypeConstraint(object):
-    """
-    Base class for all runtime-emittable type constraints.
-    """
-
-    def __init__(self, tv, tc):
-        # type: (TypeVar, Union[TypeVar, TypeSet]) -> None
-        """
-        Abstract "constructor" for linters
-        """
-        assert False, "Abstract"
-
-    def translate(self, m):
-        # type: (Union[TypeEnv, TypeMap]) -> TypeConstraint
-        """
-        Translate any TypeVars in the constraint according to the map or
-        TypeEnv m
-        """
-        def translate_one(a):
-            # type: (Any) -> Any
-            if (isinstance(a, TypeVar)):
-                return m[a] if isinstance(m, TypeEnv) else subst(a, m)
-            return a
-
-        res = None  # type: TypeConstraint
-        res = self.__class__(*tuple(map(translate_one, self._args())))
-        return res
-
-    def __eq__(self, other):
-        # type: (object) -> bool
-        if (not isinstance(other, self.__class__)):
-            return False
-
-        assert isinstance(other, TypeConstraint)  # help MyPy figure out other
-        return self._args() == other._args()
-
-    def is_concrete(self):
-        # type: () -> bool
-        """
-        Return true iff all typevars in the constraint are singletons.
-        """
-        return [] == list(filter(lambda x:  x.singleton_type() is None,
-                                 self.tvs()))
-
-    def __hash__(self):
-        # type: () -> int
-        return hash(self._args())
-
-    def _args(self):
-        # type: () -> Tuple[Any,...]
-        """
-        Return a tuple with the exact arguments passed to __init__ to create
-        this object.
-        """
-        assert False, "Abstract"
-
-    def tvs(self):
-        # type: () -> Iterable[TypeVar]
-        """
-        Return the typevars contained in this constraint.
-        """
-        return list(filter(lambda x: isinstance(x, TypeVar), self._args()))
-
-    def is_trivial(self):
-        # type: () -> bool
-        """
-        Return true if this constrain is statically decidable.
-        """
-        assert False, "Abstract"
-
-    def eval(self):
-        # type: () -> bool
-        """
-        Evaluate this constraint. Should only be called when the constraint has
-        been translated to concrete types.
-        """
-        assert False, "Abstract"
-
-    def __repr__(self):
-        # type: () -> str
-        return (self.__class__.__name__ + '(' +
-                ', '.join(map(str, self._args())) + ')')
-
-
-class TypesEqual(TypeConstraint):
-    """
-    Constraint specifying that two derived type vars must have the same runtime
-    type.
-    """
-    def __init__(self, tv1, tv2):
-        # type: (TypeVar, TypeVar) -> None
-        (self.tv1, self.tv2) = sorted([tv1, tv2], key=repr)
-
-    def _args(self):
-        # type: () -> Tuple[Any,...]
-        """ See TypeConstraint._args() """
-        return (self.tv1, self.tv2)
-
-    def is_trivial(self):
-        # type: () -> bool
-        """ See TypeConstraint.is_trivial() """
-        return self.tv1 == self.tv2 or self.is_concrete()
-
-    def eval(self):
-        # type: () -> bool
-        """ See TypeConstraint.eval() """
-        assert self.is_concrete()
-        return self.tv1.singleton_type() == self.tv2.singleton_type()
-
-
-class InTypeset(TypeConstraint):
-    """
-    Constraint specifying that a type var must belong to some typeset.
-    """
-    def __init__(self, tv, ts):
-        # type: (TypeVar, TypeSet) -> None
-        assert not tv.is_derived and tv.name.startswith("typeof_")
-        self.tv = tv
-        self.ts = ts
-
-    def _args(self):
-        # type: () -> Tuple[Any,...]
-        """ See TypeConstraint._args() """
-        return (self.tv, self.ts)
-
-    def is_trivial(self):
-        # type: () -> bool
-        """ See TypeConstraint.is_trivial() """
-        tv_ts = self.tv.get_typeset().copy()
-
-        # Trivially True
-        if (tv_ts.issubset(self.ts)):
-            return True
-
-        # Trivially false
-        tv_ts &= self.ts
-        if (tv_ts.size() == 0):
-            return True
-
-        return self.is_concrete()
-
-    def eval(self):
-        # type: () -> bool
-        """ See TypeConstraint.eval() """
-        assert self.is_concrete()
-        return self.tv.get_typeset().issubset(self.ts)
-
-
-class WiderOrEq(TypeConstraint):
-    """
-    Constraint specifying that a type var tv1 must be wider than or equal to
-    type var tv2 at runtime. This requires that:
-        1) They have the same number of lanes
-        2) In a lane tv1 has at least as many bits as tv2.
-    """
-    def __init__(self, tv1, tv2):
-        # type: (TypeVar, TypeVar) -> None
-        self.tv1 = tv1
-        self.tv2 = tv2
-
-    def _args(self):
-        # type: () -> Tuple[Any,...]
-        """ See TypeConstraint._args() """
-        return (self.tv1, self.tv2)
-
-    def is_trivial(self):
-        # type: () -> bool
-        """ See TypeConstraint.is_trivial() """
-        # Trivially true
-        if (self.tv1 == self.tv2):
-            return True
-
-        ts1 = self.tv1.get_typeset()
-        ts2 = self.tv2.get_typeset()
-
-        def set_wider_or_equal(s1, s2):
-            # type: (Set[int], Set[int]) -> bool
-            return len(s1) > 0 and len(s2) > 0 and min(s1) >= max(s2)
-
-        # Trivially True
-        if set_wider_or_equal(ts1.ints, ts2.ints) and\
-           set_wider_or_equal(ts1.floats, ts2.floats) and\
-           set_wider_or_equal(ts1.bools, ts2.bools):
-            return True
-
-        def set_narrower(s1, s2):
-            # type: (Set[int], Set[int]) -> bool
-            return len(s1) > 0 and len(s2) > 0 and min(s1) < max(s2)
-
-        # Trivially False
-        if set_narrower(ts1.ints, ts2.ints) and\
-           set_narrower(ts1.floats, ts2.floats) and\
-           set_narrower(ts1.bools, ts2.bools):
-            return True
-
-        # Trivially False
-        if len(ts1.lanes.intersection(ts2.lanes)) == 0:
-            return True
-
-        return self.is_concrete()
-
-    def eval(self):
-        # type: () -> bool
-        """ See TypeConstraint.eval() """
-        assert self.is_concrete()
-        typ1 = self.tv1.singleton_type()
-        typ2 = self.tv2.singleton_type()
-
-        return typ1.wider_or_equal(typ2)
-
-
-class SameWidth(TypeConstraint):
-    """
-    Constraint specifying that two types have the same width. E.g. i32x2 has
-    the same width as i64x1, i16x4, f32x2, f64, b1x64 etc.
-    """
-    def __init__(self, tv1, tv2):
-        # type: (TypeVar, TypeVar) -> None
-        self.tv1 = tv1
-        self.tv2 = tv2
-
-    def _args(self):
-        # type: () -> Tuple[Any,...]
-        """ See TypeConstraint._args() """
-        return (self.tv1, self.tv2)
-
-    def is_trivial(self):
-        # type: () -> bool
-        """ See TypeConstraint.is_trivial() """
-        # Trivially true
-        if (self.tv1 == self.tv2):
-            return True
-
-        ts1 = self.tv1.get_typeset()
-        ts2 = self.tv2.get_typeset()
-
-        # Trivially False
-        if len(ts1.widths().intersection(ts2.widths())) == 0:
-            return True
-
-        return self.is_concrete()
-
-    def eval(self):
-        # type: () -> bool
-        """ See TypeConstraint.eval() """
-        assert self.is_concrete()
-        typ1 = self.tv1.singleton_type()
-        typ2 = self.tv2.singleton_type()
-
-        return (typ1.width() == typ2.width())
-
-
-class TypeEnv(object):
-    """
-    Class encapsulating the necessary book keeping for type inference.
-        :attribute type_map: dict holding the equivalence relations between tvs
-        :attribute constraints: a list of accumulated constraints - tuples
-                            (tv1, tv2)) where tv1 and tv2 are equal
-        :attribute ranks: dictionary recording the (optional) ranks for tvs.
-                          'rank' is a partial ordering on TVs based on their
-                          origin. See comments in rank() and register().
-        :attribute vars: a set containing all known Vars
-        :attribute idx: counter used to get fresh ids
-    """
-
-    RANK_SINGLETON = 5
-    RANK_INPUT = 4
-    RANK_INTERMEDIATE = 3
-    RANK_OUTPUT = 2
-    RANK_TEMP = 1
-    RANK_INTERNAL = 0
-
-    def __init__(self, arg=None):
-        # type: (Optional[Tuple[TypeMap, List[TypeConstraint]]]) -> None
-        self.ranks = {}  # type: Dict[TypeVar, int]
-        self.vars = set()  # type: Set[Var]
-
-        if arg is None:
-            self.type_map = {}  # type: TypeMap
-            self.constraints = []  # type: List[TypeConstraint]
-        else:
-            self.type_map, self.constraints = arg
-
-        self.idx = 0
-
-    def __getitem__(self, arg):
-        # type: (Union[TypeVar, Var]) -> TypeVar
-        """
-        Lookup the canonical representative for a Var/TypeVar.
-        """
-        if (isinstance(arg, Var)):
-            assert arg in self.vars
-            tv = arg.get_typevar()
-        else:
-            assert (isinstance(arg, TypeVar))
-            tv = arg
-
-        while tv in self.type_map:
-            tv = self.type_map[tv]
-
-        if tv.is_derived:
-            tv = TypeVar.derived(self[tv.base], tv.derived_func)
-        return tv
-
-    def equivalent(self, tv1, tv2):
-        # type: (TypeVar, TypeVar) -> None
-        """
-        Record a that the free tv1 is part of the same equivalence class as
-        tv2. The canonical representative of the merged class is tv2's
-        canonical representative.
-        """
-        assert not tv1.is_derived
-        assert self[tv1] == tv1
-
-        # Make sure we don't create cycles
-        if tv2.is_derived:
-            assert self[tv2.base] != tv1
-
-        self.type_map[tv1] = tv2
-
-    def add_constraint(self, constr):
-        # type: (TypeConstraint) -> None
-        """
-        Add a new constraint
-        """
-        if (constr in self.constraints):
-            return
-
-        # InTypeset constraints can be expressed by constraining the typeset of
-        # a variable. No need to add them to self.constraints
-        if (isinstance(constr, InTypeset)):
-            self[constr.tv].constrain_types_by_ts(constr.ts)
-            return
-
-        self.constraints.append(constr)
-
-    def get_uid(self):
-        # type: () -> str
-        r = str(self.idx)
-        self.idx += 1
-        return r
-
-    def __repr__(self):
-        # type: () -> str
-        return self.dot()
-
-    def rank(self, tv):
-        # type: (TypeVar) -> int
-        """
-        Get the rank of tv in the partial order. TVs directly associated with a
-        Var get their rank from the Var (see register()). Internally generated
-        non-derived TVs implicitly get the lowest rank (0). Derived variables
-        get their rank from their free typevar. Singletons have the highest
-        rank. TVs associated with vars in a source pattern have a higher rank
-        than TVs associated with temporary vars.
-        """
-        default_rank = TypeEnv.RANK_INTERNAL if tv.singleton_type() is None \
-            else TypeEnv.RANK_SINGLETON
-
-        if tv.is_derived:
-            tv = tv.free_typevar()
-
-        return self.ranks.get(tv, default_rank)
-
-    def register(self, v):
-        # type: (Var) -> None
-        """
-        Register a new Var v. This computes a rank for the associated TypeVar
-        for v, which is used to impose a partial order on type variables.
-        """
-        self.vars.add(v)
-
-        if v.is_input():
-            r = TypeEnv.RANK_INPUT
-        elif v.is_intermediate():
-            r = TypeEnv.RANK_INTERMEDIATE
-        elif v.is_output():
-            r = TypeEnv.RANK_OUTPUT
-        else:
-            assert(v.is_temp())
-            r = TypeEnv.RANK_TEMP
-
-        self.ranks[v.get_typevar()] = r
-
-    def free_typevars(self):
-        # type: () -> List[TypeVar]
-        """
-        Get the free typevars in the current type env.
-        """
-        tvs = set([self[tv].free_typevar() for tv in self.type_map.keys()])
-        tvs = tvs.union(set([self[v].free_typevar() for v in self.vars]))
-        # Filter out None here due to singleton type vars
-        return sorted(filter(lambda x: x is not None, tvs),
-                      key=lambda x:   x.name)
-
-    def normalize(self):
-        # type: () -> None
-        """
-        Normalize by:
-            - collapsing any roots that don't correspond to a concrete TV AND
-              have a single TV derived from them or equivalent to them
-
-        E.g. if we have a root of the tree that looks like:
-
-          typeof_a   typeof_b
-                 \\  /
-              typeof_x
-                  |
-                half_width(1)
-                  |
-                  1
-
-        we want to collapse the linear path between 1 and typeof_x. The
-        resulting graph is:
-
-          typeof_a   typeof_b
-                 \\  /
-              typeof_x
-        """
-        source_tvs = set([v.get_typevar() for v in self.vars])
-        children = {}  # type: Dict[TypeVar, Set[TypeVar]]
-        for v in self.type_map.values():
-            if not v.is_derived:
-                continue
-
-            t = v.free_typevar()
-            s = children.get(t, set())
-            s.add(v)
-            children[t] = s
-
-        for (a, b) in self.type_map.items():
-            s = children.get(b, set())
-            s.add(a)
-            children[b] = s
-
-        for r in self.free_typevars():
-            while (r not in source_tvs and r in children and
-                   len(children[r]) == 1):
-                child = list(children[r])[0]
-                if child in self.type_map:
-                    assert self.type_map[child] == r
-                    del self.type_map[child]
-
-                r = child
-
-    def extract(self):
-        # type: () -> TypeEnv
-        """
-        Extract a clean type environment from self, that only mentions
-        TVs associated with real variables
-        """
-        vars_tvs = set([v.get_typevar() for v in self.vars])
-        new_type_map = {tv: self[tv] for tv in vars_tvs if tv != self[tv]}
-
-        new_constraints = []  # type: List[TypeConstraint]
-        for constr in self.constraints:
-            constr = constr.translate(self)
-
-            if constr.is_trivial() or constr in new_constraints:
-                continue
-
-            # Sanity: translated constraints should refer to only real vars
-            for arg in constr._args():
-                if (not isinstance(arg, TypeVar)):
-                    continue
-
-                arg_free_tv = arg.free_typevar()
-                assert arg_free_tv is None or arg_free_tv in vars_tvs
-
-            new_constraints.append(constr)
-
-        # Sanity: translated typemap should refer to only real vars
-        for (k, v) in new_type_map.items():
-            assert k in vars_tvs
-            assert v.free_typevar() is None or v.free_typevar() in vars_tvs
-
-        t = TypeEnv()
-        t.type_map = new_type_map
-        t.constraints = new_constraints
-        # ranks and vars contain only TVs associated with real vars
-        t.ranks = copy(self.ranks)
-        t.vars = copy(self.vars)
-        return t
-
-    def concrete_typings(self):
-        # type: () -> Iterable[VarTyping]
-        """
-        Return an iterable over all possible concrete typings permitted by this
-        TypeEnv.
-        """
-        free_tvs = self.free_typevars()
-        free_tv_iters = [tv.get_typeset().concrete_types() for tv in free_tvs]
-        for concrete_types in product(*free_tv_iters):
-            # Build type substitutions for all free vars
-            m = {tv: TypeVar.singleton(typ)
-                 for (tv, typ) in zip(free_tvs, concrete_types)}
-
-            concrete_var_map = {v: subst(self[v.get_typevar()], m)
-                                for v in self.vars}
-
-            # Check if constraints are satisfied for this typing
-            failed = None
-            for constr in self.constraints:
-                concrete_constr = constr.translate(m)
-                if not concrete_constr.eval():
-                    failed = concrete_constr
-                    break
-
-            if (failed is not None):
-                continue
-
-            yield concrete_var_map
-
-    def permits(self, concrete_typing):
-        # type: (VarTyping) -> bool
-        """
-        Return true iff this TypeEnv permits the (possibly partial) concrete
-        variable type mapping concrete_typing.
-        """
-        # Each variable has a concrete type, that is a subset of its inferred
-        # typeset.
-        for (v, typ) in concrete_typing.items():
-            assert typ.singleton_type() is not None
-            if not typ.get_typeset().issubset(self[v].get_typeset()):
-                return False
-
-        m = {self[v]: typ for (v, typ) in concrete_typing.items()}
-
-        # Constraints involving vars in concrete_typing are satisfied
-        for constr in self.constraints:
-            try:
-                # If the constraint includes only vars in concrete_typing, we
-                # can translate it using m. Otherwise we encounter a KeyError
-                # and ignore it
-                constr = constr.translate(m)
-                if not constr.eval():
-                    return False
-            except KeyError:
-                pass
-
-        return True
-
-    def dot(self):
-        # type: () -> str
-        """
-        Return a representation of self as a graph in dot format.
-            Nodes correspond to TypeVariables.
-            Dotted edges correspond to equivalences between TVS
-            Solid edges correspond to derivation relations between TVs.
-            Dashed edges correspond to equivalence constraints.
-        """
-        def label(s):
-            # type: (TypeVar) -> str
-            return "\"" + str(s) + "\""
-
-        # Add all registered TVs (as some of them may be singleton nodes not
-        # appearing in the graph
-        nodes = set()  # type: Set[TypeVar]
-        edges = set()  # type: Set[Tuple[TypeVar, TypeVar, str, str, Optional[str]]] # noqa
-
-        def add_nodes(*args):
-            # type: (*TypeVar) -> None
-            for tv in args:
-                nodes.add(tv)
-                while (tv.is_derived):
-                    nodes.add(tv.base)
-                    edges.add((tv, tv.base, "solid", "forward",
-                               tv.derived_func))
-                    tv = tv.base
-
-        for v in self.vars:
-            add_nodes(v.get_typevar())
-
-        for (tv1, tv2) in self.type_map.items():
-            # Add all intermediate TVs appearing in edges
-            add_nodes(tv1, tv2)
-            edges.add((tv1, tv2, "dotted", "forward", None))
-
-        for constr in self.constraints:
-            if isinstance(constr, TypesEqual):
-                add_nodes(constr.tv1, constr.tv2)
-                edges.add((constr.tv1, constr.tv2, "dashed", "none", "equal"))
-            elif isinstance(constr, WiderOrEq):
-                add_nodes(constr.tv1, constr.tv2)
-                edges.add((constr.tv1, constr.tv2, "dashed", "forward", ">="))
-            elif isinstance(constr, SameWidth):
-                add_nodes(constr.tv1, constr.tv2)
-                edges.add((constr.tv1, constr.tv2, "dashed", "none",
-                           "same_width"))
-            else:
-                assert False, "Can't display constraint {}".format(constr)
-
-        root_nodes = set([x for x in nodes
-                          if x not in self.type_map and not x.is_derived])
-
-        r = "digraph {\n"
-        for n in nodes:
-            r += label(n)
-            if n in root_nodes:
-                r += "[xlabel=\"{}\"]".format(self[n].get_typeset())
-            r += ";\n"
-
-        for (n1, n2, style, direction, elabel) in edges:
-            e = label(n1) + "->" + label(n2)
-            e += "[style={},dir={}".format(style, direction)
-
-            if elabel is not None:
-                e += ",label=\"{}\"".format(elabel)
-            e += "];\n"
-
-            r += e
-        r += "}"
-
-        return r
-
-
-if TYPE_CHECKING:
-    TypingError = str
-    TypingOrError = Union[TypeEnv, TypingError]
-
-
-def get_error(typing_or_err):
-    # type: (TypingOrError) -> Optional[TypingError]
-    """
-    Helper function to appease mypy when checking the result of typing.
-    """
-    if isinstance(typing_or_err, str):
-        if (TYPE_CHECKING):
-            return cast(TypingError, typing_or_err)
-        else:
-            return typing_or_err
-    else:
-        return None
-
-
-def get_type_env(typing_or_err):
-    # type: (TypingOrError) -> TypeEnv
-    """
-    Helper function to appease mypy when checking the result of typing.
-    """
-    assert isinstance(typing_or_err, TypeEnv), \
-        "Unexpected error: {}".format(typing_or_err)
-
-    if (TYPE_CHECKING):
-        return cast(TypeEnv, typing_or_err)
-    else:
-        return typing_or_err
-
-
-def subst(tv, tv_map):
-    # type: (TypeVar, TypeMap) -> TypeVar
-    """
-    Perform substition on the input tv using the TypeMap tv_map.
-    """
-    if tv in tv_map:
-        return tv_map[tv]
-
-    if tv.is_derived:
-        return TypeVar.derived(subst(tv.base, tv_map), tv.derived_func)
-
-    return tv
-
-
-def normalize_tv(tv):
-    # type: (TypeVar) -> TypeVar
-    """
-    Normalize a (potentially derived) TV using the following rules:
-        - vector and width derived functions commute
-        {HALF,DOUBLE}VECTOR({HALF,DOUBLE}WIDTH(base)) ->
-            {HALF,DOUBLE}WIDTH({HALF,DOUBLE}VECTOR(base))
-
-        - half/double pairs collapse
-        {HALF,DOUBLE}WIDTH({DOUBLE,HALF}WIDTH(base)) -> base
-        {HALF,DOUBLE}VECTOR({DOUBLE,HALF}VECTOR(base)) -> base
-    """
-    vector_derives = [TypeVar.HALFVECTOR, TypeVar.DOUBLEVECTOR]
-    width_derives = [TypeVar.HALFWIDTH, TypeVar.DOUBLEWIDTH]
-
-    if not tv.is_derived:
-        return tv
-
-    df = tv.derived_func
-
-    if (tv.base.is_derived):
-        base_df = tv.base.derived_func
-
-        # Reordering: {HALFWIDTH, DOUBLEWIDTH} commute with {HALFVECTOR,
-        # DOUBLEVECTOR}. Arbitrarily pick WIDTH < VECTOR
-        if df in vector_derives and base_df in width_derives:
-            return normalize_tv(
-                    TypeVar.derived(
-                        TypeVar.derived(tv.base.base, df), base_df))
-
-        # Cancelling: HALFWIDTH, DOUBLEWIDTH and HALFVECTOR, DOUBLEVECTOR
-        # cancel each other. Note: This doesn't hide any over/underflows,
-        # since we 1) assert the safety of each TV in the chain upon its
-        # creation, and 2) the base typeset is only allowed to shrink.
-
-        if (df, base_df) in \
-                [(TypeVar.HALFVECTOR, TypeVar.DOUBLEVECTOR),
-                 (TypeVar.DOUBLEVECTOR, TypeVar.HALFVECTOR),
-                 (TypeVar.HALFWIDTH, TypeVar.DOUBLEWIDTH),
-                 (TypeVar.DOUBLEWIDTH, TypeVar.HALFWIDTH)]:
-            return normalize_tv(tv.base.base)
-
-    return TypeVar.derived(normalize_tv(tv.base), df)
-
-
-def constrain_fixpoint(tv1, tv2):
-    # type: (TypeVar, TypeVar) -> None
-    """
-    Given typevars tv1 and tv2 (which could be derived from one another)
-    constrain their typesets to be the same. When one is derived from the
-    other, repeat the constrain process until fixpoint.
-    """
-    # Constrain tv2's typeset as long as tv1's typeset is changing.
-    while True:
-        old_tv1_ts = tv1.get_typeset().copy()
-        tv2.constrain_types(tv1)
-        if tv1.get_typeset() == old_tv1_ts:
-            break
-
-    old_tv2_ts = tv2.get_typeset().copy()
-    tv1.constrain_types(tv2)
-    assert old_tv2_ts == tv2.get_typeset()
-
-
-def unify(tv1, tv2, typ):
-    # type: (TypeVar, TypeVar, TypeEnv) -> TypingOrError
-    """
-    Unify tv1 and tv2 in the current type environment typ, and return an
-    updated type environment or error.
-    """
-    tv1 = normalize_tv(typ[tv1])
-    tv2 = normalize_tv(typ[tv2])
-
-    # Already unified
-    if tv1 == tv2:
-        return typ
-
-    if typ.rank(tv2) < typ.rank(tv1):
-        return unify(tv2, tv1, typ)
-
-    constrain_fixpoint(tv1, tv2)
-
-    if (tv1.get_typeset().size() == 0 or tv2.get_typeset().size() == 0):
-        return "Error: empty type created when unifying {} and {}"\
-               .format(tv1, tv2)
-
-    # Free -> Derived(Free)
-    if not tv1.is_derived:
-        typ.equivalent(tv1, tv2)
-        return typ
-
-    if (tv1.is_derived and TypeVar.is_bijection(tv1.derived_func)):
-        inv_f = TypeVar.inverse_func(tv1.derived_func)
-        return unify(tv1.base, normalize_tv(TypeVar.derived(tv2, inv_f)), typ)
-
-    typ.add_constraint(TypesEqual(tv1, tv2))
-    return typ
-
-
-def move_first(l, i):
-    # type: (List[T], int) -> List[T]
-    return [l[i]] + l[:i] + l[i+1:]
-
-
-def ti_def(definition, typ):
-    # type: (Def, TypeEnv) -> TypingOrError
-    """
-    Perform type inference on one Def in the current type environment typ and
-    return an updated type environment or error.
-
-    At a high level this works by creating fresh copies of each formal type var
-    in the Def's instruction's signature, and unifying the formal tv with the
-    corresponding actual tv.
-    """
-    expr = definition.expr
-    inst = expr.inst
-
-    # Create a dict m mapping each free typevar in the signature of definition
-    # to a fresh copy of itself.
-    free_formal_tvs = inst.all_typevars()
-    m = {tv: tv.get_fresh_copy(str(typ.get_uid())) for tv in free_formal_tvs}
-
-    # Update m with any explicitly bound type vars
-    for (idx, bound_typ) in enumerate(expr.typevars):
-        m[free_formal_tvs[idx]] = TypeVar.singleton(bound_typ)
-
-    # Get fresh copies for each typevar in the signature (both free and
-    # derived)
-    fresh_formal_tvs = \
-        [subst(inst.outs[i].typevar, m) for i in inst.value_results] +\
-        [subst(inst.ins[i].typevar, m) for i in inst.value_opnums]
-
-    # Get the list of actual Vars
-    actual_vars = []  # type: List[Expr]
-    actual_vars += [definition.defs[i] for i in inst.value_results]
-    actual_vars += [expr.args[i] for i in inst.value_opnums]
-
-    # Get the list of the actual TypeVars
-    actual_tvs = []
-    for v in actual_vars:
-        assert(isinstance(v, Var))
-        # Register with TypeEnv that this typevar corresponds ot variable v,
-        # and thus has a given rank
-        typ.register(v)
-        actual_tvs.append(v.get_typevar())
-
-    # Make sure we unify the control typevar first.
-    if inst.is_polymorphic:
-        idx = fresh_formal_tvs.index(m[inst.ctrl_typevar])
-        fresh_formal_tvs = move_first(fresh_formal_tvs, idx)
-        actual_tvs = move_first(actual_tvs, idx)
-
-    # Unify each actual typevar with the corresponding fresh formal tv
-    for (actual_tv, formal_tv) in zip(actual_tvs, fresh_formal_tvs):
-        typ_or_err = unify(actual_tv, formal_tv, typ)
-        err = get_error(typ_or_err)
-        if (err):
-            return "fail ti on {} <: {}: ".format(actual_tv, formal_tv) + err
-
-        typ = get_type_env(typ_or_err)
-
-    # Add any instruction specific constraints
-    for constr in inst.constraints:
-        typ.add_constraint(constr.translate(m))
-
-    return typ
-
-
-def ti_rtl(rtl, typ):
-    # type: (Rtl, TypeEnv) -> TypingOrError
-    """
-    Perform type inference on an Rtl in a starting type env typ. Return an
-    updated type environment or error.
-    """
-    for (i, d) in enumerate(rtl.rtl):
-        assert (isinstance(d, Def))
-        typ_or_err = ti_def(d, typ)
-        err = get_error(typ_or_err)  # type: Optional[TypingError]
-        if (err):
-            return "On line {}: ".format(i) + err
-
-        typ = get_type_env(typ_or_err)
-
-    return typ
-
-
-def ti_xform(xform, typ):
-    # type: (XForm, TypeEnv) -> TypingOrError
-    """
-    Perform type inference on an Rtl in a starting type env typ. Return an
-    updated type environment or error.
-    """
-    typ_or_err = ti_rtl(xform.src, typ)
-    err = get_error(typ_or_err)  # type: Optional[TypingError]
-    if (err):
-        return "In src pattern: " + err
-
-    typ = get_type_env(typ_or_err)
-
-    typ_or_err = ti_rtl(xform.dst, typ)
-    err = get_error(typ_or_err)
-    if (err):
-        return "In dst pattern: " + err
-
-    typ = get_type_env(typ_or_err)
-
-    return get_type_env(typ_or_err)
diff --git a/cranelift-codegen/meta-python/cdsl/types.py b/cranelift-codegen/meta-python/cdsl/types.py
deleted file mode 100644
index 26777152d..000000000
--- a/cranelift-codegen/meta-python/cdsl/types.py
+++ /dev/null
@@ -1,348 +0,0 @@
-"""Cranelift ValueType hierarchy"""
-from __future__ import absolute_import
-import math
-
-try:
-    from typing import Dict, List, cast, TYPE_CHECKING # noqa
-except ImportError:
-    TYPE_CHECKING = False
-    pass
-
-
-# Numbering scheme for value types:
-#
-# 0: Void
-# 0x01-0x6f: Special types
-# 0x70-0x7f: Lane types
-# 0x80-0xff: Vector types
-#
-# Vector types are encoded with the lane type in the low 4 bits and log2(lanes)
-# in the high 4 bits, giving a range of 2-256 lanes.
-LANE_BASE = 0x70
-
-
-# ValueType instances (i8, i32, ...) are provided in the `base.types` module.
-class ValueType(object):
-    """
-    A concrete SSA value type.
-
-    All SSA values have a type that is described by an instance of `ValueType`
-    or one of its subclasses.
-    """
-
-    # Map name -> ValueType.
-    _registry = dict()  # type: Dict[str, ValueType]
-
-    # List of all the lane types.
-    all_lane_types = list()  # type: List[LaneType]
-
-    # List of all the special types (neither lanes nor vectors).
-    all_special_types = list()  # type: List[SpecialType]
-
-    def __init__(self, name, membytes, doc):
-        # type: (str, int, str) -> None
-        self.name = name
-        self.number = None  # type: int
-        self.membytes = membytes
-        self.__doc__ = doc
-        assert name not in ValueType._registry
-        ValueType._registry[name] = self
-
-    def __str__(self):
-        # type: () -> str
-        return self.name
-
-    def rust_name(self):
-        # type: () -> str
-        return 'ir::types::' + self.name.upper()
-
-    @staticmethod
-    def by_name(name):
-        # type: (str) -> ValueType
-        if name in ValueType._registry:
-            return ValueType._registry[name]
-        else:
-            raise AttributeError("No type named '{}'".format(name))
-
-    def lane_bits(self):
-        # type: () -> int
-        """Return the number of bits in a lane."""
-        assert False, "Abstract"
-
-    def lane_count(self):
-        # type: () -> int
-        """Return the number of lanes."""
-        assert False, "Abstract"
-
-    def width(self):
-        # type: () -> int
-        """Return the total number of bits of an instance of this type."""
-        return self.lane_count() * self.lane_bits()
-
-    def wider_or_equal(self, other):
-        # type: (ValueType) -> bool
-        """
-        Return true iff:
-            1. self and other have equal number of lanes
-            2. each lane in self has at least as many bits as a lane in other
-        """
-        return (self.lane_count() == other.lane_count() and
-                self.lane_bits() >= other.lane_bits())
-
-
-class LaneType(ValueType):
-    """
-    A concrete scalar type that can appear as a vector lane too.
-
-    Also tracks a unique set of :py:class:`VectorType` instances with this type
-    as the lane type.
-    """
-
-    def __init__(self, name, membytes, doc):
-        # type: (str, int, str) -> None
-        super(LaneType, self).__init__(name, membytes, doc)
-        self._vectors = dict()  # type: Dict[int, VectorType]
-        # Assign numbers starting from LANE_BASE.
-        n = len(ValueType.all_lane_types)
-        ValueType.all_lane_types.append(self)
-        assert n < 16, 'Too many lane types'
-        self.number = LANE_BASE + n
-
-    def __repr__(self):
-        # type: () -> str
-        return 'LaneType({})'.format(self.name)
-
-    def by(self, lanes):
-        # type: (int) -> VectorType
-        """
-        Get a vector type with this type as the lane type.
-
-        For example, ``i32.by(4)`` returns the :obj:`i32x4` type.
-        """
-        if lanes in self._vectors:
-            return self._vectors[lanes]
-        else:
-            v = VectorType(self, lanes)
-            self._vectors[lanes] = v
-            return v
-
-    def lane_count(self):
-        # type: () -> int
-        """Return the number of lanes."""
-        return 1
-
-
-class VectorType(ValueType):
-    """
-    A concrete SIMD vector type.
-
-    A vector type has a lane type which is an instance of :class:`LaneType`,
-    and a positive number of lanes.
-    """
-
-    def __init__(self, base, lanes):
-        # type: (LaneType, int) -> None
-        super(VectorType, self).__init__(
-                name='{}x{}'.format(base.name, lanes),
-                membytes=lanes*base.membytes,
-                doc="""
-                A SIMD vector with {} lanes containing a `{}` each.
-                """.format(lanes, base.name))
-        assert lanes <= 256, "Too many lanes"
-        self.base = base
-        self.lanes = lanes
-        self.number = 16*int(math.log(lanes, 2)) + base.number
-
-    def __repr__(self):
-        # type: () -> str
-        return ('VectorType(base={}, lanes={})'
-                .format(self.base.name, self.lanes))
-
-    def lane_count(self):
-        # type: () -> int
-        """Return the number of lanes."""
-        return self.lanes
-
-    def lane_bits(self):
-        # type: () -> int
-        """Return the number of bits in a lane."""
-        return self.base.lane_bits()
-
-
-class SpecialType(ValueType):
-    """
-    A concrete scalar type that is neither a vector nor a lane type.
-
-    Special types cannot be used to form vectors.
-    """
-
-    def __init__(self, name, membytes, doc):
-        # type: (str, int, str) -> None
-        super(SpecialType, self).__init__(name, membytes, doc)
-        # Assign numbers starting from 1. (0 is INVALID)
-        ValueType.all_special_types.append(self)
-        self.number = len(ValueType.all_special_types)
-        assert self.number < LANE_BASE, 'Too many special types'
-
-    def __repr__(self):
-        # type: () -> str
-        return 'SpecialType({})'.format(self.name)
-
-    def lane_count(self):
-        # type: () -> int
-        """Return the number of lanes."""
-        return 1
-
-
-class IntType(LaneType):
-    """A concrete scalar integer type."""
-
-    def __init__(self, bits):
-        # type: (int) -> None
-        assert bits > 0, 'IntType must have positive number of bits'
-        warning = ""
-        if bits < 32:
-            warning += "\nWARNING: "
-            warning += "arithmetic on {}bit integers is incomplete".format(
-                bits)
-        super(IntType, self).__init__(
-                name='i{:d}'.format(bits),
-                membytes=bits // 8,
-                doc="An integer type with {} bits.{}".format(bits, warning))
-        self.bits = bits
-
-    def __repr__(self):
-        # type: () -> str
-        return 'IntType(bits={})'.format(self.bits)
-
-    @staticmethod
-    def with_bits(bits):
-        # type: (int) -> IntType
-        typ = ValueType.by_name('i{:d}'.format(bits))
-        if TYPE_CHECKING:
-            return cast(IntType, typ)
-        else:
-            return typ
-
-    def lane_bits(self):
-        # type: () -> int
-        """Return the number of bits in a lane."""
-        return self.bits
-
-
-class FloatType(LaneType):
-    """A concrete scalar floating point type."""
-
-    def __init__(self, bits, doc):
-        # type: (int, str) -> None
-        assert bits > 0, 'FloatType must have positive number of bits'
-        super(FloatType, self).__init__(
-                name='f{:d}'.format(bits),
-                membytes=bits // 8,
-                doc=doc)
-        self.bits = bits
-
-    def __repr__(self):
-        # type: () -> str
-        return 'FloatType(bits={})'.format(self.bits)
-
-    @staticmethod
-    def with_bits(bits):
-        # type: (int) -> FloatType
-        typ = ValueType.by_name('f{:d}'.format(bits))
-        if TYPE_CHECKING:
-            return cast(FloatType, typ)
-        else:
-            return typ
-
-    def lane_bits(self):
-        # type: () -> int
-        """Return the number of bits in a lane."""
-        return self.bits
-
-
-class BoolType(LaneType):
-    """A concrete scalar boolean type."""
-
-    def __init__(self, bits):
-        # type: (int) -> None
-        assert bits > 0, 'BoolType must have positive number of bits'
-        super(BoolType, self).__init__(
-                name='b{:d}'.format(bits),
-                membytes=bits // 8,
-                doc="A boolean type with {} bits.".format(bits))
-        self.bits = bits
-
-    def __repr__(self):
-        # type: () -> str
-        return 'BoolType(bits={})'.format(self.bits)
-
-    @staticmethod
-    def with_bits(bits):
-        # type: (int) -> BoolType
-        typ = ValueType.by_name('b{:d}'.format(bits))
-        if TYPE_CHECKING:
-            return cast(BoolType, typ)
-        else:
-            return typ
-
-    def lane_bits(self):
-        # type: () -> int
-        """Return the number of bits in a lane."""
-        return self.bits
-
-
-class FlagsType(SpecialType):
-    """
-    A type representing CPU flags.
-
-    Flags can't be stored in memory.
-    """
-
-    def __init__(self, name, doc):
-        # type: (str, str) -> None
-        super(FlagsType, self).__init__(name, 0, doc)
-
-    def __repr__(self):
-        # type: () -> str
-        return 'FlagsType({})'.format(self.name)
-
-
-class BVType(ValueType):
-    """A flat bitvector type. Used for semantics description only."""
-
-    def __init__(self, bits):
-        # type: (int) -> None
-        assert bits > 0, 'Must have positive number of bits'
-        super(BVType, self).__init__(
-                name='bv{:d}'.format(bits),
-                membytes=bits // 8,
-                doc="A bitvector type with {} bits.".format(bits))
-        self.bits = bits
-
-    def __repr__(self):
-        # type: () -> str
-        return 'BVType(bits={})'.format(self.bits)
-
-    @staticmethod
-    def with_bits(bits):
-        # type: (int) -> BVType
-        name = 'bv{:d}'.format(bits)
-        if name not in ValueType._registry:
-            return BVType(bits)
-
-        typ = ValueType.by_name(name)
-        if TYPE_CHECKING:
-            return cast(BVType, typ)
-        else:
-            return typ
-
-    def lane_bits(self):
-        # type: () -> int
-        """Return the number of bits in a lane."""
-        return self.bits
-
-    def lane_count(self):
-        # type: () -> int
-        """Return the number of lane. For BVtypes always 1."""
-        return 1
diff --git a/cranelift-codegen/meta-python/cdsl/typevar.py b/cranelift-codegen/meta-python/cdsl/typevar.py
deleted file mode 100644
index 9d2dace04..000000000
--- a/cranelift-codegen/meta-python/cdsl/typevar.py
+++ /dev/null
@@ -1,906 +0,0 @@
-"""
-Type variables for Parametric polymorphism.
-
-Cranelift instructions and instruction transformations can be specified to be
-polymorphic by using type variables.
-"""
-from __future__ import absolute_import
-import math
-from . import types, is_power_of_two
-from copy import copy
-
-try:
-    from typing import Tuple, Union, Iterable, Any, Set, TYPE_CHECKING # noqa
-    if TYPE_CHECKING:
-        from srcgen import Formatter  # noqa
-        Interval = Tuple[int, int]
-        # An Interval where `True` means 'everything'
-        BoolInterval = Union[bool, Interval]
-        # Set of special types: None, False, True, or iterable.
-        SpecialSpec = Union[bool, Iterable[types.SpecialType]]
-except ImportError:
-    pass
-
-MAX_LANES = 256
-MAX_BITS = 64
-MAX_BITVEC = MAX_BITS * MAX_LANES
-
-
-def int_log2(x):
-    # type: (int) -> int
-    return int(math.log(x, 2))
-
-
-def intersect(a, b):
-    # type: (Interval, Interval) -> Interval
-    """
-    Given two `(min, max)` inclusive intervals, compute their intersection.
-
-    Use `(None, None)` to represent the empty interval on input and output.
-    """
-    if a[0] is None or b[0] is None:
-        return (None, None)
-    lo = max(a[0], b[0])
-    assert lo is not None
-    hi = min(a[1], b[1])
-    assert hi is not None
-    if lo <= hi:
-        return (lo, hi)
-    else:
-        return (None, None)
-
-
-def is_empty(intv):
-    # type: (Interval) -> bool
-    return intv is None or intv is False or intv == (None, None)
-
-
-def encode_bitset(vals, size):
-    # type: (Iterable[int], int) -> int
-    """
-    Encode a set of values (each between 0 and size) as a bitset of width size.
-    """
-    res = 0
-    assert is_power_of_two(size) and size <= 64
-    for v in vals:
-        assert 0 <= v and v < size
-        res |= 1 << v
-    return res
-
-
-def pp_set(s):
-    # type: (Iterable[Any]) -> str
-    """
-    Return a consistent string representation of a set (ordering is fixed)
-    """
-    return '{' + ', '.join([repr(x) for x in sorted(s)]) + '}'
-
-
-def decode_interval(intv, full_range, default=None):
-    # type: (BoolInterval, Interval, int) -> Interval
-    """
-    Decode an interval specification which can take the following values:
-
-    True
-        Use the `full_range`.
-    `False` or `None`
-        An empty interval
-    (lo, hi)
-        An explicit interval
-    """
-    if isinstance(intv, tuple):
-        # mypy bug here: 'builtins.None' object is not iterable
-        lo, hi = intv
-        assert is_power_of_two(lo)
-        assert is_power_of_two(hi)
-        assert lo <= hi
-        assert lo >= full_range[0]
-        assert hi <= full_range[1]
-        return intv
-
-    if intv:
-        return full_range
-    else:
-        return (default, default)
-
-
-def interval_to_set(intv):
-    # type: (Interval) -> Set
-    if is_empty(intv):
-        return set()
-
-    (lo, hi) = intv
-    assert is_power_of_two(lo)
-    assert is_power_of_two(hi)
-    assert lo <= hi
-    return set([2**i for i in range(int_log2(lo), int_log2(hi)+1)])
-
-
-def legal_bool(bits):
-    # type: (int) -> bool
-    """
-    True iff bits is a legal bit width for a bool type.
-    bits == 1 || bits \\in { 8, 16, .. MAX_BITS }
-    """
-    return bits == 1 or \
-        (bits >= 8 and bits <= MAX_BITS and is_power_of_two(bits))
-
-
-class TypeSet(object):
-    """
-    A set of types.
-
-    We don't allow arbitrary subsets of types, but use a parametrized approach
-    instead.
-
-    Objects of this class can be used as dictionary keys.
-
-    Parametrized type sets are specified in terms of ranges:
-
-    - The permitted range of vector lanes, where 1 indicates a scalar type.
-    - The permitted range of integer types.
-    - The permitted range of floating point types, and
-    - The permitted range of boolean types.
-
-    The ranges are inclusive from smallest bit-width to largest bit-width.
-
-    A typeset representing scalar integer types `i8` through `i32`:
-
-    >>> TypeSet(ints=(8, 32))
-    TypeSet(lanes={1}, ints={8, 16, 32})
-
-    Passing `True` instead of a range selects all available scalar types:
-
-    >>> TypeSet(ints=True)
-    TypeSet(lanes={1}, ints={8, 16, 32, 64})
-    >>> TypeSet(floats=True)
-    TypeSet(lanes={1}, floats={32, 64})
-    >>> TypeSet(bools=True)
-    TypeSet(lanes={1}, bools={1, 8, 16, 32, 64})
-
-    Similarly, passing `True` for the lanes selects all possible scalar and
-    vector types:
-
-    >>> TypeSet(lanes=True, ints=True)
-    TypeSet(lanes={1, 2, 4, 8, 16, 32, 64, 128, 256}, ints={8, 16, 32, 64})
-
-    Finally, a type set can contain special types (derived from `SpecialType`)
-    which can't appear as lane types.
-
-    :param lanes: `(min, max)` inclusive range of permitted vector lane counts.
-    :param ints: `(min, max)` inclusive range of permitted scalar integer
-                 widths.
-    :param floats: `(min, max)` inclusive range of permitted scalar floating
-                   point widths.
-    :param bools: `(min, max)` inclusive range of permitted scalar boolean
-                  widths.
-    :param bitvecs : `(min, max)` inclusive range of permitted bitvector
-                  widths.
-    :param specials: Sequence of special types to appear in the set.
-    """
-
-    def __init__(
-            self,
-            lanes=None,     # type: BoolInterval
-            ints=None,      # type: BoolInterval
-            floats=None,    # type: BoolInterval
-            bools=None,     # type: BoolInterval
-            bitvecs=None,   # type: BoolInterval
-            specials=None   # type: SpecialSpec
-            ):
-        # type: (...) -> None
-        self.lanes = interval_to_set(decode_interval(lanes, (1, MAX_LANES), 1))
-        self.ints = interval_to_set(decode_interval(ints, (8, MAX_BITS)))
-        self.floats = interval_to_set(decode_interval(floats, (32, 64)))
-        self.bools = interval_to_set(decode_interval(bools, (1, MAX_BITS)))
-        self.bools = set(filter(legal_bool, self.bools))
-        self.bitvecs = interval_to_set(decode_interval(bitvecs,
-                                                       (1, MAX_BITVEC)))
-        # Allow specials=None, specials=True, specials=(...)
-        self.specials = set()  # type: Set[types.SpecialType]
-        if isinstance(specials, bool):
-            if specials:
-                self.specials = set(types.ValueType.all_special_types)
-        elif specials:
-            self.specials = set(specials)
-
-    def copy(self):
-        # type: (TypeSet) -> TypeSet
-        """
-        Return a copy of our self.
-        """
-        n = TypeSet()
-        n.lanes = copy(self.lanes)
-        n.ints = copy(self.ints)
-        n.floats = copy(self.floats)
-        n.bools = copy(self.bools)
-        n.bitvecs = copy(self.bitvecs)
-        n.specials = copy(self.specials)
-        return n
-
-    def typeset_key(self):
-        # type: () -> Tuple[Tuple, Tuple, Tuple, Tuple, Tuple, Tuple]
-        """Key tuple used for hashing and equality."""
-        return (tuple(sorted(list(self.lanes))),
-                tuple(sorted(list(self.ints))),
-                tuple(sorted(list(self.floats))),
-                tuple(sorted(list(self.bools))),
-                tuple(sorted(list(self.bitvecs))),
-                tuple(sorted(s.name for s in self.specials)))
-
-    def __hash__(self):
-        # type: () -> int
-        h = hash(self.typeset_key())
-        assert h == getattr(self, 'prev_hash', h), "TypeSet changed"
-        self.prev_hash = h
-        return h
-
-    def __eq__(self, other):
-        # type: (object) -> bool
-        if isinstance(other, TypeSet):
-            return self.typeset_key() == other.typeset_key()
-        else:
-            return False
-
-    def __ne__(self, other):
-        # type: (object) -> bool
-        return not self.__eq__(other)
-
-    def __repr__(self):
-        # type: () -> str
-        s = 'TypeSet(lanes={}'.format(pp_set(self.lanes))
-        if len(self.ints) > 0:
-            s += ', ints={}'.format(pp_set(self.ints))
-        if len(self.floats) > 0:
-            s += ', floats={}'.format(pp_set(self.floats))
-        if len(self.bools) > 0:
-            s += ', bools={}'.format(pp_set(self.bools))
-        if len(self.bitvecs) > 0:
-            s += ', bitvecs={}'.format(pp_set(self.bitvecs))
-        if len(self.specials) > 0:
-            s += ', specials=[{}]'.format(pp_set(self.specials))
-        return s + ')'
-
-    def emit_fields(self, fmt):
-        # type: (Formatter) -> None
-        """Emit field initializers for this typeset."""
-        assert len(self.bitvecs) == 0, "Bitvector types are not emitable."
-        fmt.comment(repr(self))
-
-        fields = (('lanes', 16),
-                  ('ints', 8),
-                  ('floats', 8),
-                  ('bools', 8))
-
-        for (field, bits) in fields:
-            vals = [int_log2(x) for x in getattr(self, field)]
-            fmt.line('{}: BitSet::<u{}>({}),'
-                     .format(field, bits, encode_bitset(vals, bits)))
-
-    def __iand__(self, other):
-        # type: (TypeSet) -> TypeSet
-        """
-        Intersect self with other type set.
-
-        >>> a = TypeSet(lanes=True, ints=(16, 32))
-        >>> a
-        TypeSet(lanes={1, 2, 4, 8, 16, 32, 64, 128, 256}, ints={16, 32})
-        >>> b = TypeSet(lanes=(4, 16), ints=True)
-        >>> a &= b
-        >>> a
-        TypeSet(lanes={4, 8, 16}, ints={16, 32})
-
-        >>> a = TypeSet(lanes=True, bools=(1, 8))
-        >>> b = TypeSet(lanes=True, bools=(16, 32))
-        >>> a &= b
-        >>> a
-        TypeSet(lanes={1, 2, 4, 8, 16, 32, 64, 128, 256})
-        """
-        self.lanes.intersection_update(other.lanes)
-        self.ints.intersection_update(other.ints)
-        self.floats.intersection_update(other.floats)
-        self.bools.intersection_update(other.bools)
-        self.bitvecs.intersection_update(other.bitvecs)
-        self.specials.intersection_update(other.specials)
-
-        return self
-
-    def issubset(self, other):
-        # type: (TypeSet) -> bool
-        """
-        Return true iff self is a subset of other
-        """
-        return self.lanes.issubset(other.lanes) and \
-            self.ints.issubset(other.ints) and \
-            self.floats.issubset(other.floats) and \
-            self.bools.issubset(other.bools) and \
-            self.bitvecs.issubset(other.bitvecs) and \
-            self.specials.issubset(other.specials)
-
-    def lane_of(self):
-        # type: () -> TypeSet
-        """
-        Return a TypeSet describing the image of self across lane_of
-        """
-        new = self.copy()
-        new.lanes = set([1])
-        new.bitvecs = set()
-        return new
-
-    def as_bool(self):
-        # type: () -> TypeSet
-        """
-        Return a TypeSet describing the image of self across as_bool
-        """
-        new = self.copy()
-        new.ints = set()
-        new.floats = set()
-        new.bitvecs = set()
-
-        if len(self.lanes.difference(set([1]))) > 0:
-            new.bools = self.ints.union(self.floats).union(self.bools)
-
-        if 1 in self.lanes:
-            new.bools.add(1)
-        return new
-
-    def half_width(self):
-        # type: () -> TypeSet
-        """
-        Return a TypeSet describing the image of self across halfwidth
-        """
-        new = self.copy()
-        new.ints = set([x//2 for x in self.ints if x > 8])
-        new.floats = set([x//2 for x in self.floats if x > 32])
-        new.bools = set([x//2 for x in self.bools if x > 8])
-        new.bitvecs = set([x//2 for x in self.bitvecs if x > 1])
-        new.specials = set()
-
-        return new
-
-    def double_width(self):
-        # type: () -> TypeSet
-        """
-        Return a TypeSet describing the image of self across doublewidth
-        """
-        new = self.copy()
-        new.ints = set([x*2 for x in self.ints if x < MAX_BITS])
-        new.floats = set([x*2 for x in self.floats if x < MAX_BITS])
-        new.bools = set(filter(legal_bool,
-                               set([x*2 for x in self.bools if x < MAX_BITS])))
-        new.bitvecs = set([x*2 for x in self.bitvecs if x < MAX_BITVEC])
-        new.specials = set()
-
-        return new
-
-    def half_vector(self):
-        # type: () -> TypeSet
-        """
-        Return a TypeSet describing the image of self across halfvector
-        """
-        new = self.copy()
-        new.bitvecs = set()
-        new.lanes = set([x//2 for x in self.lanes if x > 1])
-        new.specials = set()
-
-        return new
-
-    def double_vector(self):
-        # type: () -> TypeSet
-        """
-        Return a TypeSet describing the image of self across doublevector
-        """
-        new = self.copy()
-        new.bitvecs = set()
-        new.lanes = set([x*2 for x in self.lanes if x < MAX_LANES])
-        new.specials = set()
-
-        return new
-
-    def to_bitvec(self):
-        # type: () -> TypeSet
-        """
-        Return a TypeSet describing the image of self across to_bitvec
-        """
-        assert len(self.bitvecs) == 0
-        all_scalars = self.ints.union(self.floats.union(self.bools))
-
-        new = self.copy()
-        new.lanes = set([1])
-        new.ints = set()
-        new.bools = set()
-        new.floats = set()
-        new.bitvecs = set([lane_w * nlanes for lane_w in all_scalars
-                           for nlanes in self.lanes])
-        new.specials = set()
-
-        return new
-
-    def image(self, func):
-        # type: (str) -> TypeSet
-        """
-        Return the image of self across the derived function func
-        """
-        if (func == TypeVar.LANEOF):
-            return self.lane_of()
-        elif (func == TypeVar.ASBOOL):
-            return self.as_bool()
-        elif (func == TypeVar.HALFWIDTH):
-            return self.half_width()
-        elif (func == TypeVar.DOUBLEWIDTH):
-            return self.double_width()
-        elif (func == TypeVar.HALFVECTOR):
-            return self.half_vector()
-        elif (func == TypeVar.DOUBLEVECTOR):
-            return self.double_vector()
-        elif (func == TypeVar.TOBITVEC):
-            return self.to_bitvec()
-        else:
-            assert False, "Unknown derived function: " + func
-
-    def preimage(self, func):
-        # type: (str) -> TypeSet
-        """
-        Return the inverse image of self across the derived function func
-        """
-        # The inverse of the empty set is always empty
-        if (self.size() == 0):
-            return self
-
-        if (func == TypeVar.LANEOF):
-            new = self.copy()
-            new.bitvecs = set()
-            new.lanes = set([2**i for i in range(0, int_log2(MAX_LANES)+1)])
-            return new
-        elif (func == TypeVar.ASBOOL):
-            new = self.copy()
-            new.bitvecs = set()
-
-            if 1 not in self.bools:
-                new.ints = self.bools.difference(set([1]))
-                new.floats = self.bools.intersection(set([32, 64]))
-                # If b1 is not in our typeset, than lanes=1 cannot be in the
-                # pre-image, as as_bool() of scalars is always b1.
-                new.lanes = self.lanes.difference(set([1]))
-            else:
-                new.ints = set([2**x for x in range(3, 7)])
-                new.floats = set([32, 64])
-
-            return new
-        elif (func == TypeVar.HALFWIDTH):
-            return self.double_width()
-        elif (func == TypeVar.DOUBLEWIDTH):
-            return self.half_width()
-        elif (func == TypeVar.HALFVECTOR):
-            return self.double_vector()
-        elif (func == TypeVar.DOUBLEVECTOR):
-            return self.half_vector()
-        elif (func == TypeVar.TOBITVEC):
-            new = TypeSet()
-
-            # Start with all possible lanes/ints/floats/bools
-            lanes = interval_to_set(decode_interval(True, (1, MAX_LANES), 1))
-            ints = interval_to_set(decode_interval(True, (8, MAX_BITS)))
-            floats = interval_to_set(decode_interval(True, (32, 64)))
-            bools = interval_to_set(decode_interval(True, (1, MAX_BITS)))
-
-            # See which combinations have a size that appears in self.bitvecs
-            has_t = set()  # type: Set[Tuple[str, int, int]]
-            for l in lanes:
-                for i in ints:
-                    if i * l in self.bitvecs:
-                        has_t.add(('i', i, l))
-                for i in bools:
-                    if i * l in self.bitvecs:
-                        has_t.add(('b', i, l))
-                for i in floats:
-                    if i * l in self.bitvecs:
-                        has_t.add(('f', i, l))
-
-            for (t, width, lane) in has_t:
-                new.lanes.add(lane)
-                if (t == 'i'):
-                    new.ints.add(width)
-                elif (t == 'b'):
-                    new.bools.add(width)
-                else:
-                    assert t == 'f'
-                    new.floats.add(width)
-
-            return new
-        else:
-            assert False, "Unknown derived function: " + func
-
-    def size(self):
-        # type: () -> int
-        """
-        Return the number of concrete types represented by this typeset
-        """
-        return (len(self.lanes) * (len(self.ints) + len(self.floats) +
-                                   len(self.bools) + len(self.bitvecs)) +
-                len(self.specials))
-
-    def concrete_types(self):
-        # type: () -> Iterable[types.ValueType]
-        def by(scalar, lanes):
-            # type: (types.LaneType, int) -> types.ValueType
-            if (lanes == 1):
-                return scalar
-            else:
-                return scalar.by(lanes)
-
-        for nlanes in self.lanes:
-            for bits in self.ints:
-                yield by(types.IntType.with_bits(bits), nlanes)
-            for bits in self.floats:
-                yield by(types.FloatType.with_bits(bits), nlanes)
-            for bits in self.bools:
-                yield by(types.BoolType.with_bits(bits), nlanes)
-            for bits in self.bitvecs:
-                assert nlanes == 1
-                yield types.BVType.with_bits(bits)
-
-        for spec in self.specials:
-            yield spec
-
-    def get_singleton(self):
-        # type: () -> types.ValueType
-        """
-        Return the singleton type represented by self. Can only call on
-        typesets containing 1 type.
-        """
-        types = list(self.concrete_types())
-        assert len(types) == 1
-        return types[0]
-
-    def widths(self):
-        # type: () -> Set[int]
-        """ Return a set of the widths of all possible types in self"""
-        scalar_w = self.ints.union(self.floats.union(self.bools))
-        scalar_w = scalar_w.union(self.bitvecs)
-        return set(w * l for l in self.lanes for w in scalar_w)
-
-
-class TypeVar(object):
-    """
-    Type variables can be used in place of concrete types when defining
-    instructions. This makes the instructions *polymorphic*.
-
-    A type variable is restricted to vary over a subset of the value types.
-    This subset is specified by a set of flags that control the permitted base
-    types and whether the type variable can assume scalar or vector types, or
-    both.
-
-    :param name: Short name of type variable used in instruction descriptions.
-    :param doc: Documentation string.
-    :param ints: Allow all integer base types, or `(min, max)` bit-range.
-    :param floats: Allow all floating point base types, or `(min, max)`
-                   bit-range.
-    :param bools: Allow all boolean base types, or `(min, max)` bit-range.
-    :param scalars: Allow type variable to assume scalar types.
-    :param simd: Allow type variable to assume vector types, or `(min, max)`
-                 lane count range.
-    :param bitvecs: Allow all BitVec base types, or `(min, max)` bit-range.
-    """
-
-    def __init__(
-            self,
-            name,                   # type: str
-            doc,                    # type: str
-            ints=False,             # type: BoolInterval
-            floats=False,           # type: BoolInterval
-            bools=False,            # type: BoolInterval
-            scalars=True,           # type: bool
-            simd=False,             # type: BoolInterval
-            bitvecs=False,          # type: BoolInterval
-            base=None,              # type: TypeVar
-            derived_func=None,      # type: str
-            specials=None           # type: SpecialSpec
-            ):
-        # type: (...) -> None
-        self.name = name
-        self.__doc__ = doc
-        self.is_derived = isinstance(base, TypeVar)
-        if base:
-            assert self.is_derived
-            assert derived_func
-            self.base = base
-            self.derived_func = derived_func
-            self.name = '{}({})'.format(derived_func, base.name)
-        else:
-            min_lanes = 1 if scalars else 2
-            lanes = decode_interval(simd, (min_lanes, MAX_LANES), 1)
-            self.type_set = TypeSet(
-                    lanes=lanes,
-                    ints=ints,
-                    floats=floats,
-                    bools=bools,
-                    bitvecs=bitvecs,
-                    specials=specials)
-
-    @staticmethod
-    def singleton(typ):
-        # type: (types.ValueType) -> TypeVar
-        """Create a type variable that can only assume a single type."""
-        scalar = None  # type: types.ValueType
-        if isinstance(typ, types.VectorType):
-            scalar = typ.base
-            lanes = (typ.lanes, typ.lanes)
-        elif isinstance(typ, types.LaneType):
-            scalar = typ
-            lanes = (1, 1)
-        elif isinstance(typ, types.SpecialType):
-            return TypeVar(typ.name, typ.__doc__, specials=[typ])
-        else:
-            assert isinstance(typ, types.BVType)
-            scalar = typ
-            lanes = (1, 1)
-
-        ints = None
-        floats = None
-        bools = None
-        bitvecs = None
-
-        if isinstance(scalar, types.IntType):
-            ints = (scalar.bits, scalar.bits)
-        elif isinstance(scalar, types.FloatType):
-            floats = (scalar.bits, scalar.bits)
-        elif isinstance(scalar, types.BoolType):
-            bools = (scalar.bits, scalar.bits)
-        elif isinstance(scalar, types.BVType):
-            bitvecs = (scalar.bits, scalar.bits)
-
-        tv = TypeVar(
-                typ.name, typ.__doc__,
-                ints=ints, floats=floats, bools=bools,
-                bitvecs=bitvecs, simd=lanes)
-        return tv
-
-    def __str__(self):
-        # type: () -> str
-        return "`{}`".format(self.name)
-
-    def __repr__(self):
-        # type: () -> str
-        if self.is_derived:
-            return (
-                    'TypeVar({}, base={}, derived_func={})'
-                    .format(self.name, self.base, self.derived_func))
-        else:
-            return (
-                    'TypeVar({}, {})'
-                    .format(self.name, self.type_set))
-
-    def __hash__(self):
-        # type: () -> int
-        if (not self.is_derived):
-            return object.__hash__(self)
-
-        return hash((self.derived_func, self.base))
-
-    def __eq__(self, other):
-        # type: (object) -> bool
-        if not isinstance(other, TypeVar):
-            return False
-        if self.is_derived and other.is_derived:
-            return (
-                    self.derived_func == other.derived_func and
-                    self.base == other.base)
-        else:
-            return self is other
-
-    def __ne__(self, other):
-        # type: (object) -> bool
-        return not self.__eq__(other)
-
-    # Supported functions for derived type variables.
-    # The names here must match the method names on `ir::types::Type`.
-    # The camel_case of the names must match `enum OperandConstraint` in
-    # `instructions.rs`.
-    LANEOF = 'lane_of'
-    ASBOOL = 'as_bool'
-    HALFWIDTH = 'half_width'
-    DOUBLEWIDTH = 'double_width'
-    HALFVECTOR = 'half_vector'
-    DOUBLEVECTOR = 'double_vector'
-    TOBITVEC = 'to_bitvec'
-
-    @staticmethod
-    def is_bijection(func):
-        # type: (str) -> bool
-        return func in [
-            TypeVar.HALFWIDTH,
-            TypeVar.DOUBLEWIDTH,
-            TypeVar.HALFVECTOR,
-            TypeVar.DOUBLEVECTOR]
-
-    @staticmethod
-    def inverse_func(func):
-        # type: (str) -> str
-        return {
-            TypeVar.HALFWIDTH: TypeVar.DOUBLEWIDTH,
-            TypeVar.DOUBLEWIDTH: TypeVar.HALFWIDTH,
-            TypeVar.HALFVECTOR: TypeVar.DOUBLEVECTOR,
-            TypeVar.DOUBLEVECTOR: TypeVar.HALFVECTOR
-        }[func]
-
-    @staticmethod
-    def derived(base, derived_func):
-        # type: (TypeVar, str) -> TypeVar
-        """Create a type variable that is a function of another."""
-
-        # Safety checks to avoid over/underflows.
-        ts = base.get_typeset()
-
-        assert len(ts.specials) == 0, "Can't derive from special types"
-
-        if derived_func == TypeVar.HALFWIDTH:
-            if len(ts.ints) > 0:
-                assert min(ts.ints) > 8, "Can't halve all integer types"
-            if len(ts.floats) > 0:
-                assert min(ts.floats) > 32, "Can't halve all float types"
-            if len(ts.bools) > 0:
-                assert min(ts.bools) > 8, "Can't halve all boolean types"
-        elif derived_func == TypeVar.DOUBLEWIDTH:
-            if len(ts.ints) > 0:
-                assert max(ts.ints) < MAX_BITS,\
-                    "Can't double all integer types."
-            if len(ts.floats) > 0:
-                assert max(ts.floats) < MAX_BITS,\
-                    "Can't double all float types."
-            if len(ts.bools) > 0:
-                assert max(ts.bools) < MAX_BITS, "Can't double all bool types."
-        elif derived_func == TypeVar.HALFVECTOR:
-            assert min(ts.lanes) > 1, "Can't halve a scalar type"
-        elif derived_func == TypeVar.DOUBLEVECTOR:
-            assert max(ts.lanes) < MAX_LANES, "Can't double 256 lanes."
-
-        return TypeVar(None, None, base=base, derived_func=derived_func)
-
-    @staticmethod
-    def from_typeset(ts):
-        # type: (TypeSet) -> TypeVar
-        """ Create a type variable from a type set."""
-        tv = TypeVar(None, None)
-        tv.type_set = ts
-        return tv
-
-    def lane_of(self):
-        # type: () -> TypeVar
-        """
-        Return a derived type variable that is the scalar lane type of this
-        type variable.
-
-        When this type variable assumes a scalar type, the derived type will be
-        the same scalar type.
-        """
-        return TypeVar.derived(self, self.LANEOF)
-
-    def as_bool(self):
-        # type: () -> TypeVar
-        """
-        Return a derived type variable that has the same vector geometry as
-        this type variable, but with boolean lanes. Scalar types map to `b1`.
-        """
-        return TypeVar.derived(self, self.ASBOOL)
-
-    def half_width(self):
-        # type: () -> TypeVar
-        """
-        Return a derived type variable that has the same number of vector lanes
-        as this one, but the lanes are half the width.
-        """
-        return TypeVar.derived(self, self.HALFWIDTH)
-
-    def double_width(self):
-        # type: () -> TypeVar
-        """
-        Return a derived type variable that has the same number of vector lanes
-        as this one, but the lanes are double the width.
-        """
-        return TypeVar.derived(self, self.DOUBLEWIDTH)
-
-    def half_vector(self):
-        # type: () -> TypeVar
-        """
-        Return a derived type variable that has half the number of vector lanes
-        as this one, with the same lane type.
-        """
-        return TypeVar.derived(self, self.HALFVECTOR)
-
-    def double_vector(self):
-        # type: () -> TypeVar
-        """
-        Return a derived type variable that has twice the number of vector
-        lanes as this one, with the same lane type.
-        """
-        return TypeVar.derived(self, self.DOUBLEVECTOR)
-
-    def to_bitvec(self):
-        # type: () -> TypeVar
-        """
-        Return a derived type variable that represent a flat bitvector with
-        the same size as self
-        """
-        return TypeVar.derived(self, self.TOBITVEC)
-
-    def singleton_type(self):
-        # type: () -> types.ValueType
-        """
-        If the associated typeset has a single type return it. Otherwise return
-        None
-        """
-        ts = self.get_typeset()
-        if ts.size() != 1:
-            return None
-
-        return ts.get_singleton()
-
-    def free_typevar(self):
-        # type: () -> TypeVar
-        """
-        Get the free type variable controlling this one.
-        """
-        if self.is_derived:
-            return self.base.free_typevar()
-        elif self.singleton_type() is not None:
-            # A singleton type variable is not a proper free variable.
-            return None
-        else:
-            return self
-
-    def rust_expr(self):
-        # type: () -> str
-        """
-        Get a Rust expression that computes the type of this type variable.
-        """
-        if self.is_derived:
-            return '{}.{}()'.format(
-                    self.base.rust_expr(), self.derived_func)
-        elif self.singleton_type():
-            return self.singleton_type().rust_name()
-        else:
-            return self.name
-
-    def constrain_types_by_ts(self, ts):
-        # type: (TypeSet) -> None
-        """
-        Constrain the range of types this variable can assume to a subset of
-        those in the typeset ts.
-        """
-        if not self.is_derived:
-            self.type_set &= ts
-        else:
-            self.base.constrain_types_by_ts(ts.preimage(self.derived_func))
-
-    def constrain_types(self, other):
-        # type: (TypeVar) -> None
-        """
-        Constrain the range of types this variable can assume to a subset of
-        those `other` can assume.
-        """
-        if self is other:
-            return
-
-        self.constrain_types_by_ts(other.get_typeset())
-
-    def get_typeset(self):
-        # type: () -> TypeSet
-        """
-        Returns the typeset for this TV. If the TV is derived, computes it
-        recursively from the derived function and the base's typeset.
-        """
-        if not self.is_derived:
-            return self.type_set
-        else:
-            return self.base.get_typeset().image(self.derived_func)
-
-    def get_fresh_copy(self, name):
-        # type: (str) -> TypeVar
-        """
-        Get a fresh copy of self. Can only be called on free typevars.
-        """
-        assert not self.is_derived
-        tv = TypeVar.from_typeset(self.type_set.copy())
-        tv.name = name
-        return tv
diff --git a/cranelift-codegen/meta-python/cdsl/xform.py b/cranelift-codegen/meta-python/cdsl/xform.py
deleted file mode 100644
index 27aa515f9..000000000
--- a/cranelift-codegen/meta-python/cdsl/xform.py
+++ /dev/null
@@ -1,423 +0,0 @@
-"""
-Instruction transformations.
-"""
-from __future__ import absolute_import
-from .ast import Def, Var, Apply
-from .ti import ti_xform, TypeEnv, get_type_env, TypeConstraint
-from collections import OrderedDict
-from functools import reduce
-
-try:
-    from typing import Union, Iterator, Sequence, Iterable, List, Dict  # noqa
-    from typing import Optional, Set # noqa
-    from .ast import Expr, VarAtomMap  # noqa
-    from .isa import TargetISA  # noqa
-    from .typevar import TypeVar  # noqa
-    from .instructions import ConstrList, Instruction # noqa
-    DefApply = Union[Def, Apply]
-except ImportError:
-    pass
-
-
-def canonicalize_defapply(node):
-    # type: (DefApply) -> Def
-    """
-    Canonicalize a `Def` or `Apply` node into a `Def`.
-
-    An `Apply` becomes a `Def` with an empty list of defs.
-    """
-    if isinstance(node, Apply):
-        return Def((), node)
-    else:
-        return node
-
-
-class Rtl(object):
-    """
-    Register Transfer Language list.
-
-    An RTL object contains a list of register assignments in the form of `Def`
-    objects.
-
-    An RTL list can represent both a source pattern to be matched, or a
-    destination pattern to be inserted.
-    """
-
-    def __init__(self, *args):
-        # type: (*DefApply) -> None
-        self.rtl = tuple(map(canonicalize_defapply, args))
-
-    def copy(self, m):
-        # type: (VarAtomMap) -> Rtl
-        """
-        Return a copy of this rtl with all Vars substituted with copies or
-        according to m. Update m as necessary.
-        """
-        return Rtl(*[d.copy(m) for d in self.rtl])
-
-    def vars(self):
-        # type: () -> Set[Var]
-        """Return the set of all Vars in self that correspond to SSA values"""
-        return reduce(lambda x, y:  x.union(y),
-                      [d.vars() for d in self.rtl],
-                      set([]))
-
-    def definitions(self):
-        # type: () -> Set[Var]
-        """ Return the set of all Vars defined in self"""
-        return reduce(lambda x, y:  x.union(y),
-                      [d.definitions() for d in self.rtl],
-                      set([]))
-
-    def free_vars(self):
-        # type: () -> Set[Var]
-        """Return the set of free Vars corresp. to SSA vals used in self"""
-        def flow_f(s, d):
-            # type: (Set[Var], Def) -> Set[Var]
-            """Compute the change in the set of free vars across a Def"""
-            s = s.difference(set(d.defs))
-            uses = set(d.expr.args[i] for i in d.expr.inst.value_opnums)
-            for v in uses:
-                assert isinstance(v, Var)
-                s.add(v)
-
-            return s
-
-        return reduce(flow_f, reversed(self.rtl), set([]))
-
-    def substitution(self, other, s):
-        # type: (Rtl, VarAtomMap) -> Optional[VarAtomMap]
-        """
-        If the Rtl self agrees structurally with the Rtl other, return a
-        substitution to transform self to other. Two Rtls agree structurally if
-        they have the same sequence of Defs, that agree structurally.
-        """
-        if len(self.rtl) != len(other.rtl):
-            return None
-
-        for i in range(len(self.rtl)):
-            s = self.rtl[i].substitution(other.rtl[i], s)
-
-            if s is None:
-                return None
-
-        return s
-
-    def is_concrete(self):
-        # type: (Rtl) -> bool
-        """Return True iff every Var in the self has a singleton type."""
-        return all(v.get_typevar().singleton_type() is not None
-                   for v in self.vars())
-
-    def cleanup_concrete_rtl(self):
-        # type: (Rtl) -> None
-        """
-        Given that there is only 1 possible concrete typing T for self, assign
-        a singleton TV with type t=T[v] for each Var v \\in self. Its an error
-        to call this on an Rtl with more than 1 possible typing. This modifies
-        the Rtl in-place.
-        """
-        from .ti import ti_rtl, TypeEnv
-        # 1) Infer the types of all vars in res
-        typenv = get_type_env(ti_rtl(self, TypeEnv()))
-        typenv.normalize()
-        typenv = typenv.extract()
-
-        # 2) Make sure there is only one possible type assignment
-        typings = list(typenv.concrete_typings())
-        assert len(typings) == 1
-        typing = typings[0]
-
-        # 3) Assign the only possible type to each variable.
-        for v in typenv.vars:
-            assert typing[v].singleton_type() is not None
-            v.set_typevar(typing[v])
-
-    def __str__(self):
-        # type: () -> str
-        return "\n".join(map(str, self.rtl))
-
-
-class XForm(object):
-    """
-    An instruction transformation consists of a source and destination pattern.
-
-    Patterns are expressed in *register transfer language* as tuples of
-    `ast.Def` or `ast.Expr` nodes. A pattern may optionally have a sequence of
-    TypeConstraints, that additionally limit the set of cases when it applies.
-
-    A legalization pattern must have a source pattern containing only a single
-    instruction.
-
-    >>> from base.instructions import iconst, iadd, iadd_imm
-    >>> a = Var('a')
-    >>> c = Var('c')
-    >>> v = Var('v')
-    >>> x = Var('x')
-    >>> XForm(
-    ...     Rtl(c << iconst(v),
-    ...         a << iadd(x, c)),
-    ...     Rtl(a << iadd_imm(x, v)))
-    XForm(inputs=[Var(v), Var(x)], defs=[Var(c, src), Var(a, src, dst)],
-      c << iconst(v)
-      a << iadd(x, c)
-    =>
-      a << iadd_imm(x, v)
-    )
-    """
-
-    def __init__(self, src, dst, constraints=None):
-        # type: (Rtl, Rtl, Optional[ConstrList]) -> None
-        self.src = src
-        self.dst = dst
-        # Variables that are inputs to the source pattern.
-        self.inputs = list()  # type: List[Var]
-        # Variables defined in either src or dst.
-        self.defs = list()  # type: List[Var]
-
-        # Rewrite variables in src and dst RTL lists to our own copies.
-        # Map name -> private Var.
-        symtab = dict()  # type: Dict[str, Var]
-        self._rewrite_rtl(src, symtab, Var.SRCCTX)
-        num_src_inputs = len(self.inputs)
-        self._rewrite_rtl(dst, symtab, Var.DSTCTX)
-        # Needed for testing type inference on XForms
-        self.symtab = symtab
-
-        # Check for inconsistently used inputs.
-        for i in self.inputs:
-            if not i.is_input():
-                raise AssertionError(
-                        "'{}' used as both input and def".format(i))
-
-        # Check for spurious inputs in dst.
-        if len(self.inputs) > num_src_inputs:
-            raise AssertionError(
-                    "extra inputs in dst RTL: {}".format(
-                        self.inputs[num_src_inputs:]))
-
-        # Perform type inference and cleanup
-        raw_ti = get_type_env(ti_xform(self, TypeEnv()))
-        raw_ti.normalize()
-        self.ti = raw_ti.extract()
-
-        def interp_tv(tv):
-            # type: (TypeVar) -> TypeVar
-            """ Convert typevars according to symtab """
-            if not tv.name.startswith("typeof_"):
-                return tv
-            return symtab[tv.name[len("typeof_"):]].get_typevar()
-
-        self.constraints = []  # type: List[TypeConstraint]
-        if constraints is not None:
-            if isinstance(constraints, TypeConstraint):
-                constr_list = [constraints]  # type: Sequence[TypeConstraint]
-            else:
-                constr_list = constraints
-
-            for c in constr_list:
-                type_m = {tv: interp_tv(tv) for tv in c.tvs()}
-                inner_c = c.translate(type_m)
-                self.constraints.append(inner_c)
-                self.ti.add_constraint(inner_c)
-
-        # Sanity: The set of inferred free typevars should be a subset of the
-        # TVs corresponding to Vars appearing in src
-        free_typevars = set(self.ti.free_typevars())
-        src_vars = set(self.inputs).union(
-            [x for x in self.defs if not x.is_temp()])
-        src_tvs = set([v.get_typevar() for v in src_vars])
-        if (not free_typevars.issubset(src_tvs)):
-            raise AssertionError(
-                "Some free vars don't appear in src - {}"
-                .format(free_typevars.difference(src_tvs)))
-
-        # Update the type vars for each Var to their inferred values
-        for v in self.inputs + self.defs:
-            v.set_typevar(self.ti[v.get_typevar()])
-
-    def __repr__(self):
-        # type: () -> str
-        s = "XForm(inputs={}, defs={},\n  ".format(self.inputs, self.defs)
-        s += '\n  '.join(str(n) for n in self.src.rtl)
-        s += '\n=>\n  '
-        s += '\n  '.join(str(n) for n in self.dst.rtl)
-        s += '\n)'
-        return s
-
-    def _rewrite_rtl(self, rtl, symtab, context):
-        # type: (Rtl, Dict[str, Var], int) -> None
-        for line in rtl.rtl:
-            if isinstance(line, Def):
-                line.defs = tuple(
-                        self._rewrite_defs(line, symtab, context))
-                expr = line.expr
-            else:
-                expr = line
-            self._rewrite_expr(expr, symtab, context)
-
-    def _rewrite_expr(self, expr, symtab, context):
-        # type: (Apply, Dict[str, Var], int) -> None
-        """
-        Find all uses of variables in `expr` and replace them with our own
-        local symbols.
-        """
-
-        # Accept a whole expression tree.
-        stack = [expr]
-        while len(stack) > 0:
-            expr = stack.pop()
-            expr.args = tuple(
-                    self._rewrite_uses(expr, stack, symtab, context))
-
-    def _rewrite_defs(self, line, symtab, context):
-        # type: (Def, Dict[str, Var], int) -> Iterable[Var]
-        """
-        Given a tuple of symbols defined in a Def, rewrite them to local
-        symbols. Yield the new locals.
-        """
-        for sym in line.defs:
-            name = str(sym)
-            if name in symtab:
-                var = symtab[name]
-                if var.get_def(context):
-                    raise AssertionError("'{}' multiply defined".format(name))
-            else:
-                var = Var(name)
-                symtab[name] = var
-                self.defs.append(var)
-            var.set_def(context, line)
-            yield var
-
-    def _rewrite_uses(self, expr, stack, symtab, context):
-        # type: (Apply, List[Apply], Dict[str, Var], int) -> Iterable[Expr]
-        """
-        Given an `Apply` expr, rewrite all uses in its arguments to local
-        variables. Yield a sequence of new arguments.
-
-        Append any `Apply` arguments to `stack`.
-        """
-        for arg, operand in zip(expr.args, expr.inst.ins):
-            # Nested instructions are allowed. Visit recursively.
-            if isinstance(arg, Apply):
-                stack.append(arg)
-                yield arg
-                continue
-            if not isinstance(arg, Var):
-                assert not operand.is_value(), "Value arg must be `Var`"
-                yield arg
-                continue
-            # This is supposed to be a symbolic value reference.
-            name = str(arg)
-            if name in symtab:
-                var = symtab[name]
-                # The variable must be used consistently as a def or input.
-                if not var.is_input() and not var.get_def(context):
-                    raise AssertionError(
-                            "'{}' used as both input and def"
-                            .format(name))
-            else:
-                # First time use of variable.
-                var = Var(name)
-                symtab[name] = var
-                self.inputs.append(var)
-            yield var
-
-    def verify_legalize(self):
-        # type: () -> None
-        """
-        Verify that this is a valid legalization XForm.
-
-        - The source pattern must describe a single instruction.
-        - All values defined in the output pattern must be defined in the
-          destination pattern.
-        """
-        assert len(self.src.rtl) == 1, "Legalize needs single instruction."
-        for d in self.src.rtl[0].defs:
-            if not d.is_output():
-                raise AssertionError(
-                        '{} not defined in dest pattern'.format(d))
-
-    def apply(self, r, suffix=None):
-        # type: (Rtl, str) -> Rtl
-        """
-        Given a concrete Rtl r s.t. r matches self.src, return the
-        corresponding concrete self.dst. If suffix is provided, any temporary
-        defs are renamed with '.suffix' appended to their old name.
-        """
-        assert r.is_concrete()
-        s = self.src.substitution(r, {})  # type: VarAtomMap
-        assert s is not None
-
-        if (suffix is not None):
-            for v in self.dst.vars():
-                if v.is_temp():
-                    assert v not in s
-                    s[v] = Var(v.name + '.' + suffix)
-
-        dst = self.dst.copy(s)
-        dst.cleanup_concrete_rtl()
-        return dst
-
-
-class XFormGroup(object):
-    """
-    A group of related transformations.
-
-    :param isa: A target ISA whose instructions are allowed.
-    :param chain: A next level group to try if this one doesn't match.
-    """
-
-    def __init__(self, name, doc, isa=None, chain=None):
-        # type: (str, str, TargetISA, XFormGroup) -> None
-        self.xforms = list()  # type: List[XForm]
-        self.custom = OrderedDict()  # type: OrderedDict[Instruction, str]
-        self.name = name
-        self.__doc__ = doc
-        self.isa = isa
-        self.chain = chain
-
-    def __str__(self):
-        # type: () -> str
-        if self.isa:
-            return '{}.{}'.format(self.isa.name, self.name)
-        else:
-            return self.name
-
-    def rust_name(self):
-        # type: () -> str
-        """
-        Get the Rust name of this function implementing this transform.
-        """
-        if self.isa:
-            # This is a function in the same module as the LEGALIZE_ACTION
-            # table referring to it.
-            return self.name
-        else:
-            return 'crate::legalizer::{}'.format(self.name)
-
-    def legalize(self, src, dst):
-        # type: (Union[Def, Apply], Rtl) -> None
-        """
-        Add a legalization pattern to this group.
-
-        :param src: Single `Def` or `Apply` to be legalized.
-        :param dst: `Rtl` list of replacement instructions.
-        """
-        xform = XForm(Rtl(src), dst)
-        xform.verify_legalize()
-        self.xforms.append(xform)
-
-    def custom_legalize(self, inst, funcname):
-        # type: (Instruction, str) -> None
-        """
-        Add a custom legalization action for `inst`.
-
-        The `funcname` parameter is the fully qualified name of a Rust function
-        which takes the same arguments as the `isa::Legalize` actions.
-
-        The custom function will be called to legalize `inst` and any return
-        value is ignored.
-        """
-        assert inst not in self.custom, "Duplicate custom_legalize"
-        self.custom[inst] = funcname
diff --git a/cranelift-codegen/meta-python/check.sh b/cranelift-codegen/meta-python/check.sh
deleted file mode 100755
index fec63798d..000000000
--- a/cranelift-codegen/meta-python/check.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-topdir=$(dirname "$0")
-cd "$topdir"
-
-function runif {
-    if type "$1" > /dev/null 2>&1; then
-        version=$("$1" --version 2>&1)
-        echo "   === $1: $version ==="
-        "$@"
-    else
-        echo "$1 not found"
-    fi
-}
-
-# Style linting.
-runif flake8 .
-
-# Type checking.
-# TODO: Re-enable mypy on Travis osx. Pip currently installs mypy into a
-# directory which is not in the PATH.
-if [ "${TRAVIS_OS_NAME:-other}" != "osx" ]; then
-    runif mypy --py2 build.py
-fi
-
-# Python unit tests.
-runif python2.7 -m unittest discover
-
-# Then run the unit tests again with Python 3.
-# We get deprecation warnings about assertRaisesRegexp which was renamed in
-# Python 3, but there doesn't seem to be an easy workaround.
-runif python3 -Wignore:Deprecation -m unittest discover
diff --git a/cranelift-codegen/meta-python/constant_hash.py b/cranelift-codegen/meta-python/constant_hash.py
deleted file mode 100644
index e28293672..000000000
--- a/cranelift-codegen/meta-python/constant_hash.py
+++ /dev/null
@@ -1,63 +0,0 @@
-"""
-Generate constant hash tables.
-
-The `constant_hash` module can generate constant pre-populated hash tables. We
-don't attempt perfect hashing, but simply generate an open addressed
-quadratically probed hash table.
-"""
-from __future__ import absolute_import
-from cdsl import next_power_of_two
-
-try:
-    from typing import Any, List, Iterable, Callable  # noqa
-except ImportError:
-    pass
-
-
-def simple_hash(s):
-    # type: (str) -> int
-    """
-    Compute a primitive hash of a string.
-
-    Example:
-        >>> "0x%x" % simple_hash("Hello")
-        '0x2fa70c01'
-        >>> "0x%x" % simple_hash("world")
-        '0x5b0c31d5'
-    """
-    h = 5381
-    for c in s:
-        h = ((h ^ ord(c)) + ((h >> 6) + (h << 26))) & 0xffffffff
-    return h
-
-
-def compute_quadratic(items, hash_function):
-    # type: (Iterable[Any], Callable[[Any], int]) -> List[Any]
-    """
-    Compute an open addressed, quadratically probed hash table containing
-    `items`. The returned table is a list containing the elements of the
-    iterable `items` and `None` in unused slots.
-
-    :param items: Iterable set of items to place in hash table.
-    :param hash_function: Hash function which takes an item and returns a
-            number.
-
-    Simple example (see hash values above, they collide on slot 1):
-        >>> compute_quadratic(['Hello', 'world'], simple_hash)
-        [None, 'Hello', 'world', None]
-    """
-
-    items = list(items)
-    # Table size must be a power of two. Aim for >20% unused slots.
-    size = next_power_of_two(int(1.20*len(items)))
-    table = [None] * size  # type: List[Any]
-
-    for i in items:
-        h = hash_function(i) % size
-        s = 0
-        while table[h] is not None:
-            s += 1
-            h = (h + s) % size
-        table[h] = i
-
-    return table
diff --git a/cranelift-codegen/meta-python/gen_binemit.py b/cranelift-codegen/meta-python/gen_binemit.py
deleted file mode 100644
index c813d1297..000000000
--- a/cranelift-codegen/meta-python/gen_binemit.py
+++ /dev/null
@@ -1,170 +0,0 @@
-"""
-Generate binary emission code for each ISA.
-"""
-
-from __future__ import absolute_import
-from cdsl.registers import RegClass, Stack
-import srcgen
-
-try:
-    from typing import Sequence, List  # noqa
-    from cdsl.isa import TargetISA, EncRecipe, OperandConstraint  # noqa
-except ImportError:
-    pass
-
-
-def gen_recipe(recipe, fmt):
-    # type: (EncRecipe, srcgen.Formatter) -> None
-    """
-    Generate code to handle a single recipe.
-
-    - Unpack the instruction data, knowing the format.
-    - Determine register locations for operands with register constraints.
-    - Determine stack slot locations for operands with stack constraints.
-    - Call hand-written code for the actual emission.
-    """
-    iform = recipe.format
-    nvops = iform.num_value_operands
-    want_args = any(isinstance(i, RegClass) or isinstance(i, Stack)
-                    for i in recipe.ins)
-    assert not want_args or nvops > 0 or iform.has_value_list
-    want_outs = any(isinstance(o, RegClass) or isinstance(o, Stack)
-                    for o in recipe.outs)
-
-    # Regmove instructions get special treatment.
-    is_regmove = (recipe.format.name in ('RegMove', 'RegSpill', 'RegFill'))
-
-    # First unpack the instruction.
-    with fmt.indented(
-            'if let InstructionData::{} {{'.format(iform.name),
-            '}'):
-        fmt.line('opcode,')
-        for f in iform.imm_fields:
-            fmt.line('{},'.format(f.member))
-        if want_args:
-            if iform.has_value_list or nvops > 1:
-                fmt.line('ref args,')
-            else:
-                fmt.line('arg,')
-        fmt.line('..')
-        fmt.outdented_line('} = func.dfg[inst] {')
-
-        # Pass recipe arguments in this order: inputs, imm_fields, outputs.
-        args = ''
-
-        # Normalize to an `args` array.
-        if want_args and not is_regmove:
-            if iform.has_value_list:
-                fmt.line('let args = args.as_slice(&func.dfg.value_lists);')
-            elif nvops == 1:
-                fmt.line('let args = [arg];')
-            args += unwrap_values(recipe.ins, 'in', 'args', fmt)
-
-        for f in iform.imm_fields:
-            args += ', ' + f.member
-
-        # Unwrap interesting output arguments.
-        if want_outs:
-            if len(recipe.outs) == 1:
-                fmt.line('let results = [func.dfg.first_result(inst)];')
-            else:
-                fmt.line('let results = func.dfg.inst_results(inst);')
-            args += unwrap_values(recipe.outs, 'out', 'results', fmt)
-
-        # Special handling for regmove instructions. Update the register
-        # diversion tracker.
-        if recipe.format.name == 'RegMove':
-            fmt.line('divert.regmove(arg, src, dst);')
-        elif recipe.format.name == 'RegSpill':
-            fmt.line('divert.regspill(arg, src, dst);')
-        elif recipe.format.name == 'RegFill':
-            fmt.line('divert.regfill(arg, src, dst);')
-
-        # Call hand-written code. If the recipe contains a code snippet, use
-        # that. Otherwise cal a recipe function in the target ISA's binemit
-        # module.
-        if recipe.emit is None:
-            fmt.format(
-                    'return recipe_{}(func, inst, sink, bits{});',
-                    recipe.name.lower(), args)
-        else:
-            fmt.multi_line(recipe.emit)
-            fmt.line('return;')
-
-
-def unwrap_values(args, prefix, values, fmt):
-    # type: (Sequence[OperandConstraint], str, str, srcgen.Formatter) -> str  # noqa
-    """
-    Emit code that unwraps values living in registers or stack slots.
-
-    :param args: Input or output constraints.
-    :param prefix: Prefix to be used for the generated local variables.
-    :param values: Name of slice containing the values to be unwrapped.
-    :returns: Comma separated list of the generated variables
-    """
-    varlist = ''
-    for i, cst in enumerate(args):
-        if isinstance(cst, RegClass):
-            v = '{}_reg{}'.format(prefix, i)
-            varlist += ', ' + v
-            fmt.format(
-                    'let {} = divert.reg({}[{}], &func.locations);',
-                    v, values, i)
-        elif isinstance(cst, Stack):
-            v = '{}_stk{}'.format(prefix, i)
-            varlist += ', ' + v
-            with fmt.indented(
-                    'let {} = StackRef::masked('.format(v),
-                    ').unwrap();'):
-                fmt.format('divert.stack({}[{}], &func.locations),', values, i)
-                fmt.format('{},', cst.stack_base_mask())
-                fmt.line('&func.stack_slots,')
-    return varlist
-
-
-def gen_isa(isa, fmt):
-    # type: (TargetISA, srcgen.Formatter) -> None
-    """
-    Generate Binary emission code for `isa`.
-    """
-    fmt.doc_comment(
-            '''
-            Emit binary machine code for `inst` for the {} ISA.
-            '''.format(isa.name))
-    if len(isa.all_recipes) == 0:
-        # No encoding recipes: Emit a stub.
-        with fmt.indented('pub fn emit_inst<CS: CodeSink + ?Sized>('):
-            fmt.line('func: &Function,')
-            fmt.line('inst: Inst,')
-            fmt.line('_divert: &mut RegDiversions,')
-            fmt.line('_sink: &mut CS,')
-        with fmt.indented(') {', '}'):
-            fmt.line('bad_encoding(func, inst)')
-    else:
-        fmt.line('#[allow(unused_variables, unreachable_code)]')
-        with fmt.indented('pub fn emit_inst<CS: CodeSink + ?Sized>('):
-            fmt.line('func: &Function,')
-            fmt.line('inst: Inst,')
-            fmt.line('divert: &mut RegDiversions,')
-            fmt.line('sink: &mut CS,')
-        with fmt.indented(') {', '}'):
-            fmt.line('let encoding = func.encodings[inst];')
-            fmt.line('let bits = encoding.bits();')
-            with fmt.indented('match func.encodings[inst].recipe() {', '}'):
-                for i, recipe in enumerate(isa.all_recipes):
-                    fmt.comment('Recipe {}'.format(recipe.name))
-                    with fmt.indented('{} => {{'.format(i), '}'):
-                        gen_recipe(recipe, fmt)
-                fmt.line('_ => {},')
-            # Allow for un-encoded ghost instructions.
-            # Verifier checks the details.
-            with fmt.indented('if encoding.is_legal() {', '}'):
-                fmt.line('bad_encoding(func, inst);')
-
-
-def generate(isas, out_dir):
-    # type: (Sequence[TargetISA], str) -> None
-    for isa in isas:
-        fmt = srcgen.Formatter()
-        gen_isa(isa, fmt)
-        fmt.update_file('binemit-{}.rs'.format(isa.name), out_dir)
diff --git a/cranelift-codegen/meta-python/gen_build_deps.py b/cranelift-codegen/meta-python/gen_build_deps.py
deleted file mode 100644
index 637865a55..000000000
--- a/cranelift-codegen/meta-python/gen_build_deps.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-Generate build dependencies for Cargo.
-
-The `build.py` script is invoked by cargo when building cranelift-codegen to
-generate Rust code from the instruction descriptions. Cargo needs to know when
-it is necessary to rerun the build script.
-
-If the build script outputs lines of the form:
-
-    cargo:rerun-if-changed=/path/to/file
-
-cargo will rerun the build script when those files have changed since the last
-build.
-"""
-from __future__ import absolute_import, print_function
-import os
-from os.path import dirname, abspath, join
-
-try:
-    from typing import Iterable  # noqa
-except ImportError:
-    pass
-
-
-def generate():
-    # type: () -> None
-    print("Dependencies from meta language directory:")
-    meta = dirname(abspath(__file__))
-    for (dirpath, _, filenames) in os.walk(meta):
-        for f in filenames:
-            if f.endswith('.py'):
-                print("cargo:rerun-if-changed=" + join(dirpath, f))
diff --git a/cranelift-codegen/meta-python/gen_encoding.py b/cranelift-codegen/meta-python/gen_encoding.py
deleted file mode 100644
index 5f70c97ec..000000000
--- a/cranelift-codegen/meta-python/gen_encoding.py
+++ /dev/null
@@ -1,902 +0,0 @@
-"""
-Generate sources for instruction encoding.
-
-The tables and functions generated here support the `TargetISA::encode()`
-function which determines if a given instruction is legal, and if so, it's
-`Encoding` data which consists of a *recipe* and some *encoding* bits.
-
-The `encode` function doesn't actually generate the binary machine bits. Each
-recipe has a corresponding hand-written function to do that after registers
-are allocated.
-
-This is the information available to us:
-
-- The instruction to be encoded as an `InstructionData` reference.
-- The controlling type variable.
-- The data-flow graph giving us access to the types of all values involved.
-  This is needed for testing any secondary type variables.
-- A `PredicateView` reference for the ISA-specific settings for evaluating ISA
-  predicates.
-- The currently active CPU mode is determined by the ISA.
-
-## Level 1 table lookup
-
-The CPU mode provides the first table. The key is the instruction's controlling
-type variable. If the instruction is not polymorphic, use `INVALID` for the
-type variable. The table values are level 2 tables.
-
-## Level 2 table lookup
-
-The level 2 table is keyed by the instruction's opcode. The table values are
-*encoding lists*.
-
-The two-level table lookup allows the level 2 tables to be much smaller with
-good locality. Code in any given function usually only uses a few different
-types, so many of the level 2 tables will be cold.
-
-## Encoding lists
-
-An encoding list is a non-empty sequence of list entries. Each entry has
-one of these forms:
-
-1. Recipe + bits. Use this encoding if the recipe predicate is satisfied.
-2. Recipe + bits, final entry. Use this encoding if the recipe predicate is
-   satisfied. Otherwise, stop with the default legalization code.
-3. Stop with legalization code.
-4. Predicate + skip count. Test predicate and skip N entries if it is false.
-4. Predicate + stop. Test predicate and stop with the default legalization code
-   if it is false.
-
-The instruction predicate is also used to distinguish between polymorphic
-instructions with different types for secondary type variables.
-"""
-from __future__ import absolute_import
-import srcgen
-from constant_hash import compute_quadratic
-from unique_table import UniqueSeqTable
-from collections import OrderedDict, defaultdict
-import math
-from itertools import groupby
-from cdsl.registers import RegClass, Register, Stack
-from cdsl.predicates import FieldPredicate, TypePredicate
-from cdsl.settings import SettingGroup
-from cdsl.formats import instruction_context, InstructionFormat
-
-try:
-    from typing import Sequence, Set, Tuple, List, Dict, Iterable, DefaultDict, TYPE_CHECKING  # noqa
-    if TYPE_CHECKING:
-        from cdsl.isa import TargetISA, OperandConstraint, Encoding, CPUMode, EncRecipe, RecipePred  # noqa
-        from cdsl.predicates import PredNode, PredLeaf  # noqa
-        from cdsl.types import ValueType  # noqa
-        from cdsl.instructions import Instruction  # noqa
-        from cdsl.xform import XFormGroup  # noqa
-except ImportError:
-    pass
-
-
-def emit_instp(instp, fmt, has_func=False):
-    # type: (PredNode, srcgen.Formatter, bool) -> None
-    """
-    Emit code for matching an instruction predicate against an
-    `InstructionData` reference called `inst`.
-
-    The generated code is an `if let` pattern match that falls through if the
-    instruction has an unexpected format. This should lead to a panic.
-    """
-    iform = instp.predicate_context()
-
-    # Deal with pure type check predicates which apply to any instruction.
-    if iform == instruction_context:
-        fmt.line('let args = inst.arguments(&func.dfg.value_lists);')
-        fmt.line(instp.rust_predicate(0))
-        return
-
-    assert isinstance(iform, InstructionFormat)
-
-    # Which fields do we need in the InstructionData pattern match?
-    has_type_check = False
-    # Collect the leaf predicates.
-    leafs = set()  # type: Set[PredLeaf]
-    instp.predicate_leafs(leafs)
-    # All the leafs are FieldPredicate or TypePredicate instances. Here we just
-    # care about the field names.
-    fnames = set()  # type: Set[str]
-    for p in leafs:
-        if isinstance(p, FieldPredicate):
-            fnames.add(p.field.rust_destructuring_name())
-        else:
-            assert isinstance(p, TypePredicate)
-            has_type_check = True
-    fields = ', '.join(sorted(fnames))
-
-    with fmt.indented(
-            'if let crate::ir::InstructionData::{} {{ {}, .. }} = *inst {{'
-            .format(iform.name, fields), '}'):
-        if has_type_check:
-            # We could implement this if we need to.
-            assert has_func, "Recipe predicates can't check type variables."
-            fmt.line('let args = inst.arguments(&func.dfg.value_lists);')
-        elif has_func:
-            # Silence dead argument warning.
-            fmt.line('let _ = func;')
-        fmt.format('return {};', instp.rust_predicate(0))
-    fmt.line('unreachable!();')
-
-
-def emit_inst_predicates(instps, fmt):
-    # type: (OrderedDict[PredNode, int], srcgen.Formatter) -> None
-    """
-    Emit private functions for matching instruction predicates as well as a
-    static `INST_PREDICATES` array indexed by predicate number.
-    """
-    for instp, number in instps.items():
-        name = 'inst_predicate_{}'.format(number)
-        with fmt.indented(
-                'fn {}(func: &crate::ir::Function, '
-                'inst: &crate::ir::InstructionData)'
-                '-> bool {{'.format(name), '}'):
-            emit_instp(instp, fmt, has_func=True)
-
-    # Generate the static table.
-    with fmt.indented(
-            'pub static INST_PREDICATES: [InstPredicate; {}] = ['
-            .format(len(instps)), '];'):
-        for instp, number in instps.items():
-            fmt.format('inst_predicate_{},', number)
-
-
-def emit_recipe_predicates(isa, fmt):
-    # type: (TargetISA, srcgen.Formatter) -> None
-    """
-    Emit private functions for checking recipe predicates as well as a static
-    `RECIPE_PREDICATES` array indexed by recipe number.
-
-    A recipe predicate is a combination of an ISA predicate and an instruction
-    predicates. Many recipes have identical predicates.
-    """
-    # Table for uniquing recipe predicates. Maps predicate to generated
-    # function name.
-    pname = dict()  # type: Dict[RecipePred, str]
-
-    # Generate unique recipe predicates.
-    for rcp in isa.all_recipes:
-        p = rcp.recipe_pred()
-        if p is None or p in pname:
-            continue
-        name = 'recipe_predicate_{}'.format(rcp.name.lower())
-        pname[p] = name
-        isap, instp = p
-
-        # Generate the predicate function.
-        with fmt.indented(
-                'fn {}({}: crate::settings::PredicateView, '
-                '{}: &ir::InstructionData) -> bool {{'
-                .format(
-                    name,
-                    'isap' if isap else '_',
-                    'inst' if instp else '_'), '}'):
-            if isap:
-                n = isa.settings.predicate_number[isap]
-                with fmt.indented('if !isap.test({}) {{'.format(n), '}'):
-                    fmt.line('return false;')
-            if instp:
-                emit_instp(instp, fmt)
-            else:
-                fmt.line('true')
-
-    # Generate the static table.
-    with fmt.indented(
-            'pub static RECIPE_PREDICATES: [RecipePredicate; {}] = ['
-            .format(len(isa.all_recipes)), '];'):
-        for rcp in isa.all_recipes:
-            p = rcp.recipe_pred()
-            if p is None:
-                fmt.line('None,')
-            else:
-                fmt.format('Some({}),', pname[p])
-
-
-# The u16 values in an encoding list entry are interpreted as follows:
-#
-# NR = len(all_recipes)
-#
-# entry < 2*NR
-#     Try Encoding(entry/2, next_entry) if the recipe predicate is satisfied.
-#     If bit 0 is set, stop with the default legalization code.
-#     If bit 0 is clear, keep going down the list.
-# entry < PRED_START
-#     Stop with legalization code `entry - 2*NR`.
-#
-# Remaining entries are interpreted as (skip, pred) pairs, where:
-#
-#     skip = (entry - PRED_START) >> PRED_BITS
-#     pred = (entry - PRED_START) & PRED_MASK
-#
-# If the predicate is satisfied, keep going. Otherwise skip over the next
-# `skip` entries. If skip == 0, stop with the default legalization code.
-#
-# The `pred` predicate number is interpreted as an instruction predicate if it
-# is in range, otherwise an ISA predicate.
-
-
-class Encoder:
-    """
-    Encoder for the list format above.
-
-    Two parameters are needed:
-
-    :param NR: Number of recipes.
-    :param NI: Number of instruction predicates.
-    """
-
-    def __init__(self, isa):
-        # type: (TargetISA) -> None
-        self.isa = isa
-        self.NR = len(isa.all_recipes)
-        self.NI = len(isa.instp_number)
-        # u16 encoding list words.
-        self.words = list()  # type: List[int]
-        # Documentation comments: Index into `words` + comment.
-        self.docs = list()  # type: List[Tuple[int, str]]
-
-    # Encoding lists are represented as u16 arrays.
-    CODE_BITS = 16
-
-    # Beginning of the predicate code words.
-    PRED_START = 0x1000
-
-    # Number of bits used to hold a predicate number (instruction + ISA
-    # predicates.
-    PRED_BITS = 12
-
-    # Mask for extracting the predicate number.
-    PRED_MASK = (1 << PRED_BITS) - 1
-
-    def max_skip(self):
-        # type: () -> int
-        """The maximum number of entries that a predicate can skip."""
-        return (1 << (self.CODE_BITS - self.PRED_BITS)) - 1
-
-    def recipe(self, enc, final):
-        # type: (Encoding, bool) -> None
-        """Add a recipe+bits entry to the list."""
-        offset = len(self.words)
-        code = 2 * enc.recipe.number
-        doc = '--> {}'.format(enc)
-        if final:
-            code += 1
-            doc += ' and stop'
-
-        assert(code < self.PRED_START)
-        self.words.extend((code, enc.encbits))
-        self.docs.append((offset, doc))
-
-    def _pred(self, pred, skip, n):
-        # type: (PredNode, int, int) -> None
-        """Add a predicate entry."""
-        assert n <= self.PRED_MASK
-        code = n | (skip << self.PRED_BITS)
-        code += self.PRED_START
-        assert code < (1 << self.CODE_BITS)
-
-        if skip == 0:
-            doc = 'stop'
-        else:
-            doc = 'skip ' + str(skip)
-        doc = '{} unless {}'.format(doc, pred)
-
-        self.docs.append((len(self.words), doc))
-        self.words.append(code)
-
-    def instp(self, pred, skip):
-        # type: (PredNode, int) -> None
-        """Add an instruction predicate entry."""
-        number = self.isa.instp_number[pred]
-        self._pred(pred, skip, number)
-
-    def isap(self, pred, skip):
-        # type: (PredNode, int) -> None
-        """Add an ISA predicate entry."""
-        n = self.isa.settings.predicate_number[pred]
-        # ISA predicates follow the instruction predicates.
-        self._pred(pred, skip, self.NI + n)
-
-
-class EncNode(object):
-    """
-    An abstract node in the encoder tree for an instruction.
-
-    This tree is used to simplify the predicates guarding recipe+bits entries.
-    """
-
-    def size(self):
-        # type: () -> int
-        """Get the number of list entries needed to encode this tree."""
-        raise NotImplementedError('EncNode.size() is abstract')
-
-    def encode(self, encoder, final):
-        # type: (Encoder, bool) -> None
-        """Encode this tree."""
-        raise NotImplementedError('EncNode.encode() is abstract')
-
-    def optimize(self):
-        # type: () -> EncNode
-        """Transform this encoder tree into something simpler."""
-        return self
-
-    def predicate(self):
-        # type: () -> PredNode
-        """Get the predicate guarding this tree, or `None` for always"""
-        return None
-
-
-class EncPred(EncNode):
-    """
-    An encoder tree node which asserts a predicate on its child nodes.
-
-    A `None` predicate is always satisfied.
-    """
-
-    def __init__(self, pred, children):
-        # type: (PredNode, List[EncNode]) -> None
-        self.pred = pred
-        self.children = children
-
-    def size(self):
-        # type: () -> int
-        s = 1 if self.pred else 0
-        s += sum(c.size() for c in self.children)
-        return s
-
-    def encode(self, encoder, final):
-        # type: (Encoder, bool) -> None
-        if self.pred:
-            skip = 0 if final else self.size() - 1
-            ctx = self.pred.predicate_context()
-            if isinstance(ctx, SettingGroup):
-                encoder.isap(self.pred, skip)
-            else:
-                encoder.instp(self.pred, skip)
-
-        final_idx = len(self.children) - 1 if final else -1
-        for idx, node in enumerate(self.children):
-            node.encode(encoder, idx == final_idx)
-
-    def predicate(self):
-        # type: () -> PredNode
-        return self.pred
-
-    def optimize(self):
-        # type: () -> EncNode
-        """
-        Optimize a predicate node in the tree by combining child nodes that
-        have identical predicates.
-        """
-        cnodes = list()  # type: List[EncNode]
-        for pred, niter in groupby(
-                map(lambda c: c.optimize(), self.children),
-                key=lambda c: c.predicate()):
-            nodes = list(niter)
-            if pred is None or len(nodes) <= 1:
-                cnodes.extend(nodes)
-                continue
-
-            # We have multiple children with identical predicates.
-            # Group them all into `n0`.
-            n0 = nodes[0]
-            assert isinstance(n0, EncPred)
-            for n in nodes[1:]:
-                assert isinstance(n, EncPred)
-                n0.children.extend(n.children)
-
-            cnodes.append(n0)
-
-        # Finally strip a redundant grouping node.
-        if self.pred is None and len(cnodes) == 1:
-            return cnodes[0]
-        else:
-            self.children = cnodes
-            return self
-
-
-class EncLeaf(EncNode):
-    """
-    A leaf in the encoder tree.
-
-    This represents a single `Encoding`, without its predicates (they are
-    represented in the tree by parent nodes.
-    """
-
-    def __init__(self, encoding):
-        # type: (Encoding) -> None
-        self.encoding = encoding
-
-    def size(self):
-        # type: () -> int
-        # recipe + bits.
-        return 2
-
-    def encode(self, encoder, final):
-        # type: (Encoder, bool) -> None
-        encoder.recipe(self.encoding, final)
-
-
-class EncList(object):
-    """
-    List of instructions for encoding a given type + opcode pair.
-
-    An encoding list contains a sequence of predicates and encoding recipes,
-    all encoded as u16 values.
-
-    :param inst: The instruction opcode being encoded.
-    :param ty: Value of the controlling type variable, or `None`.
-    """
-
-    def __init__(self, inst, ty):
-        # type: (Instruction, ValueType) -> None
-        self.inst = inst
-        self.ty = ty
-        # List of applicable Encoding instances.
-        # These will have different predicates.
-        self.encodings = []  # type: List[Encoding]
-
-    def name(self):
-        # type: () -> str
-        name = self.inst.name
-        if self.ty:
-            name = '{}.{}'.format(name, self.ty.name)
-        if self.encodings:
-            name += ' ({})'.format(self.encodings[0].cpumode)
-        return name
-
-    def encoder_tree(self):
-        # type: () -> EncNode
-        """
-        Generate an optimized encoder tree for this list. The tree represents
-        all of the encodings with parent nodes for the predicates that need
-        checking.
-        """
-        forest = list()  # type: List[EncNode]
-        for enc in self.encodings:
-            n = EncLeaf(enc)  # type: EncNode
-            if enc.instp:
-                n = EncPred(enc.instp, [n])
-            if enc.isap:
-                n = EncPred(enc.isap, [n])
-            forest.append(n)
-
-        return EncPred(None, forest).optimize()
-
-    def encode(self, seq_table, doc_table, isa):
-        # type: (UniqueSeqTable, DefaultDict[int, List[str]], TargetISA) -> None  # noqa
-        """
-        Encode this list as a sequence of u16 numbers.
-
-        Adds the sequence to `seq_table` and records the returned offset as
-        `self.offset`.
-
-        Adds comment lines to `doc_table` keyed by seq_table offsets.
-        """
-        # Use an encoder object to hold the parameters.
-        encoder = Encoder(isa)
-        tree = self.encoder_tree()
-        tree.encode(encoder, True)
-
-        self.offset = seq_table.add(encoder.words)
-
-        # Add doc comments.
-        doc_table[self.offset].append(
-                '{:06x}: {}'.format(self.offset, self.name()))
-        for pos, doc in encoder.docs:
-            doc_table[self.offset + pos].append(doc)
-        doc_table[self.offset + len(encoder.words)].insert(
-                0, 'end of: {}'.format(self.name()))
-
-
-class Level2Table(object):
-    """
-    Level 2 table mapping instruction opcodes to `EncList` objects.
-
-    A level 2 table can be completely empty if it only holds a custom
-    legalization action for `ty`.
-
-    :param ty: Controlling type variable of all entries, or `None`.
-    :param legalize: Default legalize action for `ty`.
-    """
-
-    def __init__(self, ty, legalize):
-        # type: (ValueType, XFormGroup) -> None
-        self.ty = ty
-        self.legalize = legalize
-        # Maps inst -> EncList
-        self.lists = OrderedDict()  # type: OrderedDict[Instruction, EncList]
-
-    def __getitem__(self, inst):
-        # type: (Instruction) -> EncList
-        ls = self.lists.get(inst)
-        if not ls:
-            ls = EncList(inst, self.ty)
-            self.lists[inst] = ls
-        return ls
-
-    def is_empty(self):
-        # type: () -> bool
-        """
-        Check if this level 2 table is completely empty.
-
-        This can happen if the associated type simply has an overridden
-        legalize action.
-        """
-        return len(self.lists) == 0
-
-    def enclists(self):
-        # type: () -> Iterable[EncList]
-        return iter(self.lists.values())
-
-    def layout_hashtable(self, level2_hashtables, level2_doc):
-        # type: (List[EncList], DefaultDict[int, List[str]]) -> None
-        """
-        Compute the hash table mapping opcode -> enclist.
-
-        Append the hash table to `level2_hashtables` and record the offset.
-        """
-        def hash_func(enclist):
-            # type: (EncList) -> int
-            return enclist.inst.number
-        hash_table = compute_quadratic(self.lists.values(), hash_func)
-
-        self.hash_table_offset = len(level2_hashtables)
-        self.hash_table_len = len(hash_table)
-
-        level2_doc[self.hash_table_offset].append(
-                '{:06x}: {}, {} entries'.format(
-                    self.hash_table_offset,
-                    self.ty,
-                    self.hash_table_len))
-        level2_hashtables.extend(hash_table)
-
-
-class Level1Table(object):
-    """
-    Level 1 table mapping types to `Level2` objects.
-    """
-
-    def __init__(self, cpumode):
-        # type: (CPUMode) -> None
-        self.cpumode = cpumode
-        self.tables = OrderedDict()  # type: OrderedDict[ValueType, Level2Table]  # noqa
-
-        if cpumode.default_legalize is None:
-            raise AssertionError(
-                    'CPU mode {}.{} needs a default legalize action'
-                    .format(cpumode.isa, cpumode))
-        self.legalize_code = cpumode.isa.legalize_code(
-                cpumode.default_legalize)
-
-    def __getitem__(self, ty):
-        # type: (ValueType) -> Level2Table
-        tbl = self.tables.get(ty)
-        if not tbl:
-            legalize = self.cpumode.get_legalize_action(ty)
-            # Allocate a legalization code in a predictable order.
-            self.cpumode.isa.legalize_code(legalize)
-            tbl = Level2Table(ty, legalize)
-            self.tables[ty] = tbl
-        return tbl
-
-    def l2tables(self):
-        # type: () -> Iterable[Level2Table]
-        return (l2 for l2 in self.tables.values() if not l2.is_empty())
-
-
-def make_tables(cpumode):
-    # type: (CPUMode) -> Level1Table
-    """
-    Generate tables for `cpumode` as described above.
-    """
-    table = Level1Table(cpumode)
-    for enc in cpumode.encodings:
-        ty = enc.ctrl_typevar()
-        inst = enc.inst
-        table[ty][inst].encodings.append(enc)
-
-    # Ensure there are level 1 table entries for all types with a custom
-    # legalize action.
-    for ty in cpumode.type_legalize.keys():
-        table[ty]
-
-    return table
-
-
-def encode_enclists(level1, seq_table, doc_table, isa):
-    # type: (Level1Table, UniqueSeqTable, DefaultDict[int, List[str]], TargetISA) -> None  # noqa
-    """
-    Compute encodings and doc comments for encoding lists in `level1`.
-    """
-    for level2 in level1.l2tables():
-        for enclist in level2.enclists():
-            enclist.encode(seq_table, doc_table, isa)
-
-
-def emit_enclists(seq_table, doc_table, fmt):
-    # type: (UniqueSeqTable, DefaultDict[int, List[str]], srcgen.Formatter) -> None  # noqa
-    with fmt.indented(
-            'pub static ENCLISTS: [u16; {}] = ['.format(len(seq_table.table)),
-            '];'):
-        line = ''
-        for idx, entry in enumerate(seq_table.table):
-            if idx in doc_table:
-                if line:
-                    fmt.line(line)
-                    line = ''
-                for doc in doc_table[idx]:
-                    fmt.comment(doc)
-            line += '{:#06x}, '.format(entry)
-        if line:
-            fmt.line(line)
-
-
-def encode_level2_hashtables(level1, level2_hashtables, level2_doc):
-    # type: (Level1Table, List[EncList], DefaultDict[int, List[str]]) -> None
-    for level2 in level1.l2tables():
-        level2.layout_hashtable(level2_hashtables, level2_doc)
-
-
-def emit_level2_hashtables(level2_hashtables, offt, level2_doc, fmt):
-    # type: (List[EncList], str, DefaultDict[int, List[str]], srcgen.Formatter) -> None  # noqa
-    """
-    Emit the big concatenation of level 2 hash tables.
-    """
-    with fmt.indented(
-            'pub static LEVEL2: [Level2Entry<{}>; {}] = ['
-            .format(offt, len(level2_hashtables)),
-            '];'):
-        for offset, entry in enumerate(level2_hashtables):
-            if offset in level2_doc:
-                for doc in level2_doc[offset]:
-                    fmt.comment(doc)
-            if entry:
-                fmt.line(
-                        'Level2Entry ' +
-                        '{{ opcode: Some(crate::ir::Opcode::{}), '
-                        'offset: {:#08x} }},'
-                        .format(entry.inst.camel_name, entry.offset))
-            else:
-                fmt.line(
-                        'Level2Entry ' +
-                        '{ opcode: None, offset: 0 },')
-
-
-def emit_level1_hashtable(cpumode, level1, offt, fmt):
-    # type: (CPUMode, Level1Table, str, srcgen.Formatter) -> None  # noqa
-    """
-    Emit a level 1 hash table for `cpumode`.
-    """
-    def hash_func(level2):
-        # type: (Level2Table) -> int
-        return level2.ty.number if level2.ty is not None else 0
-    hash_table = compute_quadratic(level1.tables.values(), hash_func)
-
-    with fmt.indented(
-            'pub static LEVEL1_{}: [Level1Entry<{}>; {}] = ['
-            .format(cpumode.name.upper(), offt, len(hash_table)), '];'):
-        for level2 in hash_table:
-            # Empty hash table entry. Include the default legalization action.
-            if not level2:
-                fmt.format(
-                        'Level1Entry {{ ty: crate::ir::types::INVALID, '
-                        'log2len: !0, offset: 0, legalize: {} }},',
-                        level1.legalize_code)
-                continue
-
-            if level2.ty is not None:
-                tyname = level2.ty.rust_name()
-            else:
-                tyname = 'crate::ir::types::INVALID'
-
-            lcode = cpumode.isa.legalize_code(level2.legalize)
-
-            # Empty level 2 table: Only a specialized legalization action, no
-            # actual table.
-            # Set an offset that is out of bounds, but make sure it doesn't
-            # overflow its type when adding `1<<log2len`.
-            if level2.is_empty():
-                fmt.format(
-                        'Level1Entry {{ '
-                        'ty: {}, log2len: 0, offset: !0 - 1, '
-                        'legalize: {} }}, // {}',
-                        tyname, lcode, level2.legalize)
-                continue
-
-            # Proper level 2 hash table.
-            l2l = int(math.log(level2.hash_table_len, 2))
-            assert l2l > 0, "Level2 hash table too small"
-            fmt.format(
-                    'Level1Entry {{ '
-                    'ty: {}, log2len: {}, offset: {:#08x}, '
-                    'legalize: {} }}, // {}',
-                    tyname, l2l, level2.hash_table_offset,
-                    lcode, level2.legalize)
-
-
-def offset_type(length):
-    # type: (int) -> str
-    """
-    Compute an appropriate Rust integer type to use for offsets into a table of
-    the given length.
-    """
-    if length <= 0x10000:
-        return 'u16'
-    else:
-        assert length <= 0x100000000, "Table too big"
-        return 'u32'
-
-
-def emit_recipe_names(isa, fmt):
-    # type: (TargetISA, srcgen.Formatter) -> None
-    """
-    Emit a table of encoding recipe names keyed by recipe number.
-
-    This is used for pretty-printing encodings.
-    """
-    with fmt.indented(
-            'static RECIPE_NAMES: [&str; {}] = ['
-            .format(len(isa.all_recipes)), '];'):
-        for r in isa.all_recipes:
-            fmt.line('"{}",'.format(r.name))
-
-
-def emit_recipe_constraints(isa, fmt):
-    # type: (TargetISA, srcgen.Formatter) -> None
-    """
-    Emit a table of encoding recipe operand constraints keyed by recipe number.
-
-    These are used by the register allocator to pick registers that can be
-    properly encoded.
-    """
-    with fmt.indented(
-            'static RECIPE_CONSTRAINTS: [RecipeConstraints; {}] = ['
-            .format(len(isa.all_recipes)), '];'):
-        for r in isa.all_recipes:
-            fmt.comment('Constraints for recipe {}:'.format(r.name))
-            tied_i2o, tied_o2i = r.ties()
-            fixed_ins, fixed_outs = r.fixed_ops()
-            with fmt.indented('RecipeConstraints {', '},'):
-                emit_operand_constraints(
-                    r, r.ins, 'ins', tied_i2o, fixed_outs, fmt)
-                emit_operand_constraints(
-                    r, r.outs, 'outs', tied_o2i, fixed_ins, fmt)
-                fmt.format('fixed_ins: {},', str(bool(fixed_ins)).lower())
-                fmt.format('fixed_outs: {},', str(bool(fixed_outs)).lower())
-                fmt.format('tied_ops: {},', str(bool(tied_i2o)).lower())
-                fmt.format(
-                        'clobbers_flags: {},',
-                        str(bool(r.clobbers_flags)).lower())
-
-
-def emit_operand_constraints(
-        recipe,  # type: EncRecipe
-        seq,     # type: Sequence[OperandConstraint]
-        field,   # type: str
-        tied,    # type: Dict[int, int]
-        fixops,  # type: Set[Register]
-        fmt      # type: srcgen.Formatter
-        ):
-    # type: (...) -> None
-    """
-    Emit a struct field initializer for an array of operand constraints.
-
-    :param field: The name of the struct field to emit.
-    :param tied: Map of tied opnums to counterparts.
-    :param fix_ops: Set of fixed operands on the other side of the inst.
-    """
-    if len(seq) == 0:
-        fmt.line('{}: &[],'.format(field))
-        return
-    with fmt.indented('{}: &['.format(field), '],'):
-        for n, cons in enumerate(seq):
-            with fmt.indented('OperandConstraint {', '},'):
-                if isinstance(cons, RegClass):
-                    if n in tied:
-                        fmt.format('kind: ConstraintKind::Tied({}),', tied[n])
-                    else:
-                        fmt.line('kind: ConstraintKind::Reg,')
-                    fmt.format('regclass: &{}_DATA,', cons)
-                elif isinstance(cons, Register):
-                    assert n not in tied, "Can't tie fixed register operand"
-                    # See if this fixed register is also on the other side.
-                    t = 'FixedTied' if cons in fixops else 'FixedReg'
-                    fmt.format('kind: ConstraintKind::{}({}),', t, cons.unit)
-                    fmt.format('regclass: &{}_DATA,', cons.regclass)
-                elif isinstance(cons, int):
-                    # This is a tied output constraint. It should never happen
-                    # for input constraints.
-                    assert cons == tied[n], "Invalid tied constraint"
-                    fmt.format('kind: ConstraintKind::Tied({}),', cons)
-                    fmt.format('regclass: &{}_DATA,', recipe.ins[cons])
-                elif isinstance(cons, Stack):
-                    assert n not in tied, "Can't tie stack operand"
-                    fmt.line('kind: ConstraintKind::Stack,')
-                    fmt.format('regclass: &{}_DATA,', cons.regclass)
-                else:
-                    raise AssertionError(
-                            'Unsupported constraint {}'.format(cons))
-
-
-def emit_recipe_sizing(isa, fmt):
-    # type: (TargetISA, srcgen.Formatter) -> None
-    """
-    Emit a table of encoding recipe code size information.
-    """
-    with fmt.indented(
-            'static RECIPE_SIZING: [RecipeSizing; {}] = ['
-            .format(len(isa.all_recipes)), '];'):
-        for r in isa.all_recipes:
-            fmt.comment('Code size information for recipe {}:'.format(r.name))
-            with fmt.indented('RecipeSizing {', '},'):
-                fmt.format('base_size: {},', r.base_size)
-                fmt.format('compute_size: {},', r.compute_size)
-                if r.branch_range:
-                    fmt.format(
-                        'branch_range: '
-                        'Some(BranchRange {{ origin: {}, bits: {} }}),',
-                        *r.branch_range)
-                else:
-                    fmt.line('branch_range: None,')
-
-
-def gen_isa(isa, fmt):
-    # type: (TargetISA, srcgen.Formatter) -> None
-
-    # Make the `RECIPE_PREDICATES` table.
-    emit_recipe_predicates(isa, fmt)
-
-    # Make the `INST_PREDICATES` table.
-    emit_inst_predicates(isa.instp_number, fmt)
-
-    # Level1 tables, one per CPU mode
-    level1_tables = dict()
-
-    # Tables for enclists with comments.
-    seq_table = UniqueSeqTable()
-    doc_table = defaultdict(list)  # type: DefaultDict[int, List[str]]
-
-    # Single table containing all the level2 hash tables.
-    level2_hashtables = list()  # type: List[EncList]
-    level2_doc = defaultdict(list)  # type: DefaultDict[int, List[str]]
-
-    for cpumode in isa.cpumodes:
-        level2_doc[len(level2_hashtables)].append(cpumode.name)
-        level1 = make_tables(cpumode)
-        level1_tables[cpumode] = level1
-        encode_enclists(level1, seq_table, doc_table, isa)
-        encode_level2_hashtables(level1, level2_hashtables, level2_doc)
-
-    # Level 1 table encodes offsets into the level 2 table.
-    level1_offt = offset_type(len(level2_hashtables))
-    # Level 2 tables encodes offsets into seq_table.
-    level2_offt = offset_type(len(seq_table.table))
-
-    emit_enclists(seq_table, doc_table, fmt)
-    emit_level2_hashtables(level2_hashtables, level2_offt, level2_doc, fmt)
-    for cpumode in isa.cpumodes:
-        emit_level1_hashtable(
-                cpumode, level1_tables[cpumode], level1_offt, fmt)
-
-    emit_recipe_names(isa, fmt)
-    emit_recipe_constraints(isa, fmt)
-    emit_recipe_sizing(isa, fmt)
-
-    # Finally, tie it all together in an `EncInfo`.
-    with fmt.indented('pub static INFO: isa::EncInfo = isa::EncInfo {', '};'):
-        fmt.line('constraints: &RECIPE_CONSTRAINTS,')
-        fmt.line('sizing: &RECIPE_SIZING,')
-        fmt.line('names: &RECIPE_NAMES,')
-
-
-def generate(isas, out_dir):
-    # type: (Sequence[TargetISA], str) -> None
-    for isa in isas:
-        fmt = srcgen.Formatter()
-        gen_isa(isa, fmt)
-        fmt.update_file('encoding-{}.rs'.format(isa.name), out_dir)
diff --git a/cranelift-codegen/meta-python/isa/__init__.py b/cranelift-codegen/meta-python/isa/__init__.py
deleted file mode 100644
index d3cff62c9..000000000
--- a/cranelift-codegen/meta-python/isa/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-"""
-Cranelift target ISA definitions
---------------------------------
-
-The :py:mod:`isa` package contains sub-packages for each target instruction set
-architecture supported by Cranelift.
-"""
-from __future__ import absolute_import
-from cdsl.isa import TargetISA  # noqa
-from . import riscv, x86, arm32, arm64
-
-try:
-    from typing import List  # noqa
-except ImportError:
-    pass
-
-
-def all_isas():
-    # type: () -> List[TargetISA]
-    """
-    Get a list of all the supported target ISAs. Each target ISA is represented
-    as a :py:class:`cranelift.TargetISA` instance.
-    """
-    return [riscv.ISA, x86.ISA, arm32.ISA, arm64.ISA]
diff --git a/cranelift-codegen/meta-python/isa/arm32/__init__.py b/cranelift-codegen/meta-python/isa/arm32/__init__.py
deleted file mode 100644
index 06773f875..000000000
--- a/cranelift-codegen/meta-python/isa/arm32/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""
-ARM 32-bit Architecture
------------------------
-
-This target ISA generates code for ARMv7 and ARMv8 CPUs in 32-bit mode
-(AArch32). We support both ARM and Thumb2 instruction encodings.
-"""
-
-from __future__ import absolute_import
-from . import defs
-from . import settings, registers  # noqa
-from cdsl.isa import TargetISA  # noqa
-
-# Re-export the primary target ISA definition.
-ISA = defs.ISA.finish()  # type: TargetISA
diff --git a/cranelift-codegen/meta-python/isa/arm32/defs.py b/cranelift-codegen/meta-python/isa/arm32/defs.py
deleted file mode 100644
index 88b8c53db..000000000
--- a/cranelift-codegen/meta-python/isa/arm32/defs.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""
-ARM 32-bit definitions.
-
-Commonly used definitions.
-"""
-from __future__ import absolute_import
-from cdsl.isa import TargetISA, CPUMode
-import base.instructions
-from base.legalize import narrow
-
-ISA = TargetISA('arm32', [base.instructions.GROUP])  # type: TargetISA
-
-# CPU modes for 32-bit ARM and Thumb2.
-A32 = CPUMode('A32', ISA)
-T32 = CPUMode('T32', ISA)
-
-# TODO: Refine these.
-A32.legalize_type(narrow)
-T32.legalize_type(narrow)
diff --git a/cranelift-codegen/meta-python/isa/arm32/registers.py b/cranelift-codegen/meta-python/isa/arm32/registers.py
deleted file mode 100644
index 054e95fa0..000000000
--- a/cranelift-codegen/meta-python/isa/arm32/registers.py
+++ /dev/null
@@ -1,45 +0,0 @@
-"""
-ARM32 register banks.
-"""
-from __future__ import absolute_import
-from cdsl.registers import RegBank, RegClass
-from .defs import ISA
-
-
-# Define the larger float bank first to avoid the alignment gap.
-FloatRegs = RegBank(
-        'FloatRegs', ISA, r"""
-        Floating point registers.
-
-        The floating point register units correspond to the S-registers, but
-        extended as if there were 64 registers.
-
-        - S registers are one unit each.
-        - D registers are two units each, even D16 and above.
-        - Q registers are 4 units each.
-        """,
-        units=64, prefix='s')
-
-# Special register units:
-# - r15 is the program counter.
-# - r14 is the link register.
-# - r13 is usually the stack pointer.
-IntRegs = RegBank(
-        'IntRegs', ISA,
-        'General purpose registers',
-        units=16, prefix='r')
-
-FlagRegs = RegBank(
-        'FlagRegs', ISA,
-        'Flag registers',
-        units=1,
-        pressure_tracking=False,
-        names=['nzcv'])
-
-GPR = RegClass(IntRegs)
-S = RegClass(FloatRegs, count=32)
-D = RegClass(FloatRegs, width=2)
-Q = RegClass(FloatRegs, width=4)
-FLAG = RegClass(FlagRegs)
-
-RegClass.extract_names(globals())
diff --git a/cranelift-codegen/meta-python/isa/arm32/settings.py b/cranelift-codegen/meta-python/isa/arm32/settings.py
deleted file mode 100644
index 5cc948cf2..000000000
--- a/cranelift-codegen/meta-python/isa/arm32/settings.py
+++ /dev/null
@@ -1,11 +0,0 @@
-"""
-ARM32 settings.
-"""
-from __future__ import absolute_import
-from cdsl.settings import SettingGroup
-import base.settings as shared
-from .defs import ISA
-
-ISA.settings = SettingGroup('arm32', parent=shared.group)
-
-ISA.settings.close(globals())
diff --git a/cranelift-codegen/meta-python/isa/arm64/__init__.py b/cranelift-codegen/meta-python/isa/arm64/__init__.py
deleted file mode 100644
index fb9005c03..000000000
--- a/cranelift-codegen/meta-python/isa/arm64/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""
-ARM 64-bit Architecture
------------------------
-
-ARMv8 CPUs running the Aarch64 architecture.
-"""
-
-from __future__ import absolute_import
-from . import defs
-from . import settings, registers  # noqa
-from cdsl.isa import TargetISA  # noqa
-
-# Re-export the primary target ISA definition.
-ISA = defs.ISA.finish()  # type: TargetISA
diff --git a/cranelift-codegen/meta-python/isa/arm64/defs.py b/cranelift-codegen/meta-python/isa/arm64/defs.py
deleted file mode 100644
index 0350908f9..000000000
--- a/cranelift-codegen/meta-python/isa/arm64/defs.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""
-ARM64 definitions.
-
-Commonly used definitions.
-"""
-from __future__ import absolute_import
-from cdsl.isa import TargetISA, CPUMode
-import base.instructions
-from base.legalize import narrow
-
-ISA = TargetISA('arm64', [base.instructions.GROUP])  # type: TargetISA
-A64 = CPUMode('A64', ISA)
-
-# TODO: Refine these
-A64.legalize_type(narrow)
diff --git a/cranelift-codegen/meta-python/isa/arm64/registers.py b/cranelift-codegen/meta-python/isa/arm64/registers.py
deleted file mode 100644
index df680b1a1..000000000
--- a/cranelift-codegen/meta-python/isa/arm64/registers.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""
-Aarch64 register banks.
-"""
-from __future__ import absolute_import
-from cdsl.registers import RegBank, RegClass
-from .defs import ISA
-
-
-# The `x31` regunit serves as the stack pointer / zero register depending on
-# context. We reserve it and don't model the difference.
-IntRegs = RegBank(
-        'IntRegs', ISA,
-        'General purpose registers',
-        units=32, prefix='x')
-
-FloatRegs = RegBank(
-        'FloatRegs', ISA,
-        'Floating point registers',
-        units=32, prefix='v')
-
-FlagRegs = RegBank(
-        'FlagRegs', ISA,
-        'Flag registers',
-        units=1,
-        pressure_tracking=False,
-        names=['nzcv'])
-
-GPR = RegClass(IntRegs)
-FPR = RegClass(FloatRegs)
-FLAG = RegClass(FlagRegs)
-
-RegClass.extract_names(globals())
diff --git a/cranelift-codegen/meta-python/isa/arm64/settings.py b/cranelift-codegen/meta-python/isa/arm64/settings.py
deleted file mode 100644
index 9a2fc13dc..000000000
--- a/cranelift-codegen/meta-python/isa/arm64/settings.py
+++ /dev/null
@@ -1,11 +0,0 @@
-"""
-ARM64 settings.
-"""
-from __future__ import absolute_import
-from cdsl.settings import SettingGroup
-import base.settings as shared
-from .defs import ISA
-
-ISA.settings = SettingGroup('arm64', parent=shared.group)
-
-ISA.settings.close(globals())
diff --git a/cranelift-codegen/meta-python/isa/riscv/__init__.py b/cranelift-codegen/meta-python/isa/riscv/__init__.py
deleted file mode 100644
index b58dd68ae..000000000
--- a/cranelift-codegen/meta-python/isa/riscv/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""
-RISC-V Target
--------------
-
-`RISC-V <https://riscv.org/>`_ is an open instruction set architecture
-originally developed at UC Berkeley. It is a RISC-style ISA with either a
-32-bit (RV32I) or 64-bit (RV32I) base instruction set and a number of optional
-extensions:
-
-RV32M / RV64M
-    Integer multiplication and division.
-
-RV32A / RV64A
-    Atomics.
-
-RV32F / RV64F
-    Single-precision IEEE floating point.
-
-RV32D / RV64D
-    Double-precision IEEE floating point.
-
-RV32G / RV64G
-    General purpose instruction sets. This represents the union of the I, M, A,
-    F, and D instruction sets listed above.
-
-"""
-from __future__ import absolute_import
-from . import defs
-from . import encodings, settings, registers  # noqa
-from cdsl.isa import TargetISA  # noqa
-
-# Re-export the primary target ISA definition.
-ISA = defs.ISA.finish()  # type: TargetISA
diff --git a/cranelift-codegen/meta-python/isa/riscv/defs.py b/cranelift-codegen/meta-python/isa/riscv/defs.py
deleted file mode 100644
index 404895c50..000000000
--- a/cranelift-codegen/meta-python/isa/riscv/defs.py
+++ /dev/null
@@ -1,14 +0,0 @@
-"""
-RISC-V definitions.
-
-Commonly used definitions.
-"""
-from __future__ import absolute_import
-from cdsl.isa import TargetISA, CPUMode
-import base.instructions
-
-ISA = TargetISA('riscv', [base.instructions.GROUP])  # type: TargetISA
-
-# CPU modes for 32-bit and 64-bit operation.
-RV32 = CPUMode('RV32', ISA)
-RV64 = CPUMode('RV64', ISA)
diff --git a/cranelift-codegen/meta-python/isa/riscv/encodings.py b/cranelift-codegen/meta-python/isa/riscv/encodings.py
deleted file mode 100644
index 980a88ba0..000000000
--- a/cranelift-codegen/meta-python/isa/riscv/encodings.py
+++ /dev/null
@@ -1,169 +0,0 @@
-"""
-RISC-V Encodings.
-"""
-from __future__ import absolute_import
-from base import instructions as base
-from base import types
-from base.immediates import intcc
-from .defs import RV32, RV64
-from .recipes import OPIMM, OPIMM32, OP, OP32, LUI, BRANCH, JALR, JAL
-from .recipes import LOAD, STORE
-from .recipes import R, Rshamt, Ricmp, Ii, Iz, Iicmp, Iret, Icall, Icopy
-from .recipes import U, UJ, UJcall, SB, SBzero, GPsp, GPfi, Irmov, stacknull
-from .settings import use_m
-from cdsl.ast import Var
-from base.legalize import narrow, expand
-
-RV32.legalize_monomorphic(expand)
-RV32.legalize_type(
-        default=narrow,
-        i32=expand,
-        f32=expand,
-        f64=expand)
-
-RV64.legalize_monomorphic(expand)
-RV64.legalize_type(
-        default=narrow,
-        i32=expand,
-        i64=expand,
-        f32=expand,
-        f64=expand)
-
-# Dummies for instruction predicates.
-x = Var('x')
-y = Var('y')
-dest = Var('dest')
-args = Var('args')
-
-# Basic arithmetic binary instructions are encoded in an R-type instruction.
-for inst,           inst_imm,      f3,    f7 in [
-        (base.iadd, base.iadd_imm, 0b000, 0b0000000),
-        (base.isub, None,          0b000, 0b0100000),
-        (base.bxor, base.bxor_imm, 0b100, 0b0000000),
-        (base.bor,  base.bor_imm,  0b110, 0b0000000),
-        (base.band, base.band_imm, 0b111, 0b0000000)
-        ]:
-    RV32.enc(inst.i32, R, OP(f3, f7))
-    RV64.enc(inst.i64, R, OP(f3, f7))
-
-    # Immediate versions for add/xor/or/and.
-    if inst_imm:
-        RV32.enc(inst_imm.i32, Ii, OPIMM(f3))
-        RV64.enc(inst_imm.i64, Ii, OPIMM(f3))
-
-# 32-bit ops in RV64.
-RV64.enc(base.iadd.i32, R, OP32(0b000, 0b0000000))
-RV64.enc(base.isub.i32, R, OP32(0b000, 0b0100000))
-# There are no andiw/oriw/xoriw variations.
-RV64.enc(base.iadd_imm.i32, Ii, OPIMM32(0b000))
-
-# Use iadd_imm with %x0 to materialize constants.
-RV32.enc(base.iconst.i32, Iz, OPIMM(0b000))
-RV64.enc(base.iconst.i32, Iz, OPIMM(0b000))
-RV64.enc(base.iconst.i64, Iz, OPIMM(0b000))
-
-# Dynamic shifts have the same masking semantics as the clif base instructions.
-for inst,           inst_imm,      f3,    f7 in [
-        (base.ishl, base.ishl_imm, 0b001, 0b0000000),
-        (base.ushr, base.ushr_imm, 0b101, 0b0000000),
-        (base.sshr, base.sshr_imm, 0b101, 0b0100000),
-        ]:
-    RV32.enc(inst.i32.i32, R, OP(f3, f7))
-    RV64.enc(inst.i64.i64, R, OP(f3, f7))
-    RV64.enc(inst.i32.i32, R, OP32(f3, f7))
-    # Allow i32 shift amounts in 64-bit shifts.
-    RV64.enc(inst.i64.i32, R, OP(f3, f7))
-    RV64.enc(inst.i32.i64, R, OP32(f3, f7))
-
-    # Immediate shifts.
-    RV32.enc(inst_imm.i32, Rshamt, OPIMM(f3, f7))
-    RV64.enc(inst_imm.i64, Rshamt, OPIMM(f3, f7))
-    RV64.enc(inst_imm.i32, Rshamt, OPIMM32(f3, f7))
-
-# Signed and unsigned integer 'less than'. There are no 'w' variants for
-# comparing 32-bit numbers in RV64.
-RV32.enc(base.icmp.i32(intcc.slt, x, y), Ricmp, OP(0b010, 0b0000000))
-RV64.enc(base.icmp.i64(intcc.slt, x, y), Ricmp, OP(0b010, 0b0000000))
-RV32.enc(base.icmp.i32(intcc.ult, x, y), Ricmp, OP(0b011, 0b0000000))
-RV64.enc(base.icmp.i64(intcc.ult, x, y), Ricmp, OP(0b011, 0b0000000))
-
-RV32.enc(base.icmp_imm.i32(intcc.slt, x, y), Iicmp, OPIMM(0b010))
-RV64.enc(base.icmp_imm.i64(intcc.slt, x, y), Iicmp, OPIMM(0b010))
-RV32.enc(base.icmp_imm.i32(intcc.ult, x, y), Iicmp, OPIMM(0b011))
-RV64.enc(base.icmp_imm.i64(intcc.ult, x, y), Iicmp, OPIMM(0b011))
-
-# Integer constants with the low 12 bits clear are materialized by lui.
-RV32.enc(base.iconst.i32, U, LUI())
-RV64.enc(base.iconst.i32, U, LUI())
-RV64.enc(base.iconst.i64, U, LUI())
-
-# "M" Standard Extension for Integer Multiplication and Division.
-# Gated by the `use_m` flag.
-RV32.enc(base.imul.i32, R, OP(0b000, 0b0000001), isap=use_m)
-RV64.enc(base.imul.i64, R, OP(0b000, 0b0000001), isap=use_m)
-RV64.enc(base.imul.i32, R, OP32(0b000, 0b0000001), isap=use_m)
-
-# Control flow.
-
-# Unconditional branches.
-RV32.enc(base.jump, UJ, JAL())
-RV64.enc(base.jump, UJ, JAL())
-RV32.enc(base.call, UJcall, JAL())
-RV64.enc(base.call, UJcall, JAL())
-
-# Conditional branches.
-for cond,           f3 in [
-        (intcc.eq,  0b000),
-        (intcc.ne,  0b001),
-        (intcc.slt, 0b100),
-        (intcc.sge, 0b101),
-        (intcc.ult, 0b110),
-        (intcc.uge, 0b111)
-        ]:
-    RV32.enc(base.br_icmp.i32(cond, x, y, dest, args), SB, BRANCH(f3))
-    RV64.enc(base.br_icmp.i64(cond, x, y, dest, args), SB, BRANCH(f3))
-
-for inst,           f3 in [
-        (base.brz,  0b000),
-        (base.brnz, 0b001)
-        ]:
-    RV32.enc(inst.i32, SBzero, BRANCH(f3))
-    RV64.enc(inst.i64, SBzero, BRANCH(f3))
-    RV32.enc(inst.b1, SBzero, BRANCH(f3))
-    RV64.enc(inst.b1, SBzero, BRANCH(f3))
-
-# Returns are a special case of JALR using %x1 to hold the return address.
-# The return address is provided by a special-purpose `link` return value that
-# is added by legalize_signature().
-RV32.enc(base.x_return, Iret, JALR())
-RV64.enc(base.x_return, Iret, JALR())
-RV32.enc(base.call_indirect.i32, Icall, JALR())
-RV64.enc(base.call_indirect.i64, Icall, JALR())
-
-# Spill and fill.
-RV32.enc(base.spill.i32, GPsp, STORE(0b010))
-RV64.enc(base.spill.i32, GPsp, STORE(0b010))
-RV64.enc(base.spill.i64, GPsp, STORE(0b011))
-RV32.enc(base.fill.i32, GPfi, LOAD(0b010))
-RV64.enc(base.fill.i32, GPfi, LOAD(0b010))
-RV64.enc(base.fill.i64, GPfi, LOAD(0b011))
-
-# Register copies.
-RV32.enc(base.copy.i32, Icopy, OPIMM(0b000))
-RV64.enc(base.copy.i64, Icopy, OPIMM(0b000))
-RV64.enc(base.copy.i32, Icopy, OPIMM32(0b000))
-
-RV32.enc(base.regmove.i32, Irmov, OPIMM(0b000))
-RV64.enc(base.regmove.i64, Irmov, OPIMM(0b000))
-RV64.enc(base.regmove.i32, Irmov, OPIMM32(0b000))
-
-RV32.enc(base.copy.b1, Icopy, OPIMM(0b000))
-RV64.enc(base.copy.b1, Icopy, OPIMM(0b000))
-RV32.enc(base.regmove.b1, Irmov, OPIMM(0b000))
-RV64.enc(base.regmove.b1, Irmov, OPIMM(0b000))
-
-# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
-# into a no-op.
-for ty in [types.i64, types.i32, types.i16, types.i8, types.f64, types.f32]:
-    RV64.enc(base.copy_nop.bind(ty), stacknull, 0)
-    RV32.enc(base.copy_nop.bind(ty), stacknull, 0)
diff --git a/cranelift-codegen/meta-python/isa/riscv/recipes.py b/cranelift-codegen/meta-python/isa/riscv/recipes.py
deleted file mode 100644
index ff27c31f8..000000000
--- a/cranelift-codegen/meta-python/isa/riscv/recipes.py
+++ /dev/null
@@ -1,230 +0,0 @@
-"""
-RISC-V Encoding recipes.
-
-The encoding recipes defined here more or less correspond to the RISC-V native
-instruction formats described in the reference:
-
-    The RISC-V Instruction Set Manual
-    Volume I: User-Level ISA
-    Version 2.1
-"""
-from __future__ import absolute_import
-from cdsl.isa import EncRecipe
-from cdsl.predicates import IsSignedInt
-from cdsl.registers import Stack
-from base.formats import Binary, BinaryImm, MultiAry, IntCompare, IntCompareImm
-from base.formats import Unary, UnaryImm, BranchIcmp, Branch, Jump
-from base.formats import Call, CallIndirect, RegMove
-from .registers import GPR
-
-# The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit
-# instructions have 11 as the two low bits, with bits 6:2 determining the base
-# opcode.
-#
-# Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ...
-# The functions below encode the encbits.
-
-
-def LOAD(funct3):
-    # type: (int) -> int
-    assert funct3 <= 0b111
-    return 0b00000 | (funct3 << 5)
-
-
-def STORE(funct3):
-    # type: (int) -> int
-    assert funct3 <= 0b111
-    return 0b01000 | (funct3 << 5)
-
-
-def BRANCH(funct3):
-    # type: (int) -> int
-    assert funct3 <= 0b111
-    return 0b11000 | (funct3 << 5)
-
-
-def JALR(funct3=0):
-    # type: (int) -> int
-    assert funct3 <= 0b111
-    return 0b11001 | (funct3 << 5)
-
-
-def JAL():
-    # type: () -> int
-    return 0b11011
-
-
-def OPIMM(funct3, funct7=0):
-    # type: (int, int) -> int
-    assert funct3 <= 0b111
-    return 0b00100 | (funct3 << 5) | (funct7 << 8)
-
-
-def OPIMM32(funct3, funct7=0):
-    # type: (int, int) -> int
-    assert funct3 <= 0b111
-    return 0b00110 | (funct3 << 5) | (funct7 << 8)
-
-
-def OP(funct3, funct7):
-    # type: (int, int) -> int
-    assert funct3 <= 0b111
-    assert funct7 <= 0b1111111
-    return 0b01100 | (funct3 << 5) | (funct7 << 8)
-
-
-def OP32(funct3, funct7):
-    # type: (int, int) -> int
-    assert funct3 <= 0b111
-    assert funct7 <= 0b1111111
-    return 0b01110 | (funct3 << 5) | (funct7 << 8)
-
-
-def AIUPC():
-    # type: () -> int
-    return 0b00101
-
-
-def LUI():
-    # type: () -> int
-    return 0b01101
-
-
-# R-type 32-bit instructions: These are mostly binary arithmetic instructions.
-# The encbits are `opcode[6:2] | (funct3 << 5) | (funct7 << 8)
-R = EncRecipe(
-        'R', Binary, base_size=4, ins=(GPR, GPR), outs=GPR,
-        emit='put_r(bits, in_reg0, in_reg1, out_reg0, sink);')
-
-# R-type with an immediate shift amount instead of rs2.
-Rshamt = EncRecipe(
-        'Rshamt', BinaryImm, base_size=4, ins=GPR, outs=GPR,
-        emit='put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);')
-
-# R-type encoding of an integer comparison.
-Ricmp = EncRecipe(
-        'Ricmp', IntCompare, base_size=4, ins=(GPR, GPR), outs=GPR,
-        emit='put_r(bits, in_reg0, in_reg1, out_reg0, sink);')
-
-Ii = EncRecipe(
-        'Ii', BinaryImm, base_size=4, ins=GPR, outs=GPR,
-        instp=IsSignedInt(BinaryImm.imm, 12),
-        emit='put_i(bits, in_reg0, imm.into(), out_reg0, sink);')
-
-# I-type instruction with a hardcoded %x0 rs1.
-Iz = EncRecipe(
-        'Iz', UnaryImm, base_size=4, ins=(), outs=GPR,
-        instp=IsSignedInt(UnaryImm.imm, 12),
-        emit='put_i(bits, 0, imm.into(), out_reg0, sink);')
-
-# I-type encoding of an integer comparison.
-Iicmp = EncRecipe(
-        'Iicmp', IntCompareImm, base_size=4, ins=GPR, outs=GPR,
-        instp=IsSignedInt(IntCompareImm.imm, 12),
-        emit='put_i(bits, in_reg0, imm.into(), out_reg0, sink);')
-
-# I-type encoding for `jalr` as a return instruction. We won't use the
-# immediate offset.
-# The variable return values are not encoded.
-Iret = EncRecipe(
-        'Iret', MultiAry, base_size=4, ins=(), outs=(),
-        emit='''
-        // Return instructions are always a jalr to %x1.
-        // The return address is provided as a special-purpose link argument.
-        put_i(
-            bits,
-            1, // rs1 = %x1
-            0, // no offset.
-            0, // rd = %x0: no address written.
-            sink,
-        );
-        ''')
-
-# I-type encoding for `jalr` as a call_indirect.
-Icall = EncRecipe(
-        'Icall', CallIndirect, base_size=4, ins=GPR, outs=(),
-        emit='''
-        // call_indirect instructions are jalr with rd=%x1.
-        put_i(
-            bits,
-            in_reg0,
-            0, // no offset.
-            1, // rd = %x1: link register.
-            sink,
-        );
-        ''')
-
-
-# Copy of a GPR is implemented as addi x, 0.
-Icopy = EncRecipe(
-        'Icopy', Unary, base_size=4, ins=GPR, outs=GPR,
-        emit='put_i(bits, in_reg0, 0, out_reg0, sink);')
-
-# Same for a GPR regmove.
-Irmov = EncRecipe(
-        'Irmov', RegMove, base_size=4, ins=GPR, outs=(),
-        emit='put_i(bits, src, 0, dst, sink);')
-
-# U-type instructions have a 20-bit immediate that targets bits 12-31.
-U = EncRecipe(
-        'U', UnaryImm, base_size=4, ins=(), outs=GPR,
-        instp=IsSignedInt(UnaryImm.imm, 32, 12),
-        emit='put_u(bits, imm.into(), out_reg0, sink);')
-
-# UJ-type unconditional branch instructions.
-UJ = EncRecipe(
-        'UJ', Jump, base_size=4, ins=(), outs=(), branch_range=(0, 21),
-        emit='''
-        let dest = i64::from(func.offsets[destination]);
-        let disp = dest - i64::from(sink.offset());
-        put_uj(bits, disp, 0, sink);
-        ''')
-
-UJcall = EncRecipe(
-        'UJcall', Call, base_size=4, ins=(), outs=(),
-        emit='''
-        sink.reloc_external(Reloc::RiscvCall,
-                            &func.dfg.ext_funcs[func_ref].name,
-                            0);
-        // rd=%x1 is the standard link register.
-        put_uj(bits, 0, 1, sink);
-        ''')
-
-# SB-type branch instructions.
-SB = EncRecipe(
-        'SB', BranchIcmp, base_size=4,
-        ins=(GPR, GPR), outs=(),
-        branch_range=(0, 13),
-        emit='''
-        let dest = i64::from(func.offsets[destination]);
-        let disp = dest - i64::from(sink.offset());
-        put_sb(bits, disp, in_reg0, in_reg1, sink);
-        ''')
-
-# SB-type branch instruction with rs2 fixed to zero.
-SBzero = EncRecipe(
-        'SBzero', Branch, base_size=4,
-        ins=(GPR), outs=(),
-        branch_range=(0, 13),
-        emit='''
-        let dest = i64::from(func.offsets[destination]);
-        let disp = dest - i64::from(sink.offset());
-        put_sb(bits, disp, in_reg0, 0, sink);
-        ''')
-
-# Spill of a GPR.
-GPsp = EncRecipe(
-        'GPsp', Unary, base_size=4,
-        ins=GPR, outs=Stack(GPR),
-        emit='unimplemented!();')
-
-# Fill of a GPR.
-GPfi = EncRecipe(
-        'GPfi', Unary, base_size=4,
-        ins=Stack(GPR), outs=GPR,
-        emit='unimplemented!();')
-
-# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
-# into a no-op.
-stacknull = EncRecipe('stacknull', Unary, base_size=0,
-                      ins=Stack(GPR), outs=Stack(GPR), emit='')
diff --git a/cranelift-codegen/meta-python/isa/riscv/registers.py b/cranelift-codegen/meta-python/isa/riscv/registers.py
deleted file mode 100644
index d9d43f043..000000000
--- a/cranelift-codegen/meta-python/isa/riscv/registers.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""
-RISC-V register banks.
-"""
-from __future__ import absolute_import
-from cdsl.registers import RegBank, RegClass
-from .defs import ISA
-
-
-# We include `x0`, a.k.a `zero` in the register bank. It will be reserved.
-IntRegs = RegBank(
-        'IntRegs', ISA,
-        'General purpose registers',
-        units=32, prefix='x')
-
-FloatRegs = RegBank(
-        'FloatRegs', ISA,
-        'Floating point registers',
-        units=32, prefix='f')
-
-GPR = RegClass(IntRegs)
-FPR = RegClass(FloatRegs)
-
-RegClass.extract_names(globals())
diff --git a/cranelift-codegen/meta-python/isa/riscv/settings.py b/cranelift-codegen/meta-python/isa/riscv/settings.py
deleted file mode 100644
index c8b88db55..000000000
--- a/cranelift-codegen/meta-python/isa/riscv/settings.py
+++ /dev/null
@@ -1,31 +0,0 @@
-"""
-RISC-V settings.
-"""
-from __future__ import absolute_import
-from cdsl.settings import SettingGroup, BoolSetting
-from cdsl.predicates import And
-import base.settings as shared
-from .defs import ISA
-
-ISA.settings = SettingGroup('riscv', parent=shared.group)
-
-supports_m = BoolSetting("CPU supports the 'M' extension (mul/div)")
-supports_a = BoolSetting("CPU supports the 'A' extension (atomics)")
-supports_f = BoolSetting("CPU supports the 'F' extension (float)")
-supports_d = BoolSetting("CPU supports the 'D' extension (double)")
-
-enable_m = BoolSetting(
-        "Enable the use of 'M' instructions if available",
-        default=True)
-
-enable_e = BoolSetting(
-        "Enable the 'RV32E' instruction set with only 16 registers")
-
-use_m = And(supports_m, enable_m)
-use_a = And(supports_a, shared.enable_atomics)
-use_f = And(supports_f, shared.enable_float)
-use_d = And(supports_d, shared.enable_float)
-
-full_float = And(shared.enable_simd, supports_f, supports_d)
-
-ISA.settings.close(globals())
diff --git a/cranelift-codegen/meta-python/isa/x86/__init__.py b/cranelift-codegen/meta-python/isa/x86/__init__.py
deleted file mode 100644
index 9691ce647..000000000
--- a/cranelift-codegen/meta-python/isa/x86/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-"""
-x86 Target Architecture
------------------------
-
-This target ISA generates code for x86 CPUs with two separate CPU modes:
-
-`I32`
-    32-bit x86 architecture, also known as 'IA-32', also sometimes referred
-    to as 'i386', however note that Cranelift depends on instructions not
-    in the original `i386`, such as SSE2, CMOVcc, and UD2.
-
-`I64`
-    x86-64 architecture, also known as 'AMD64`, `Intel 64`, and 'x64'.
-"""
-
-from __future__ import absolute_import
-from . import defs
-from . import encodings, settings, registers  # noqa
-from cdsl.isa import TargetISA  # noqa
-
-# Re-export the primary target ISA definition.
-ISA = defs.ISA.finish()  # type: TargetISA
diff --git a/cranelift-codegen/meta-python/isa/x86/defs.py b/cranelift-codegen/meta-python/isa/x86/defs.py
deleted file mode 100644
index 00ac2bbbf..000000000
--- a/cranelift-codegen/meta-python/isa/x86/defs.py
+++ /dev/null
@@ -1,28 +0,0 @@
-"""
-x86 definitions.
-
-Commonly used definitions.
-"""
-from __future__ import absolute_import
-from cdsl.isa import TargetISA, CPUMode
-import base.instructions
-from . import instructions as x86
-from base.immediates import floatcc
-
-ISA = TargetISA('x86', [base.instructions.GROUP, x86.GROUP])  # type: TargetISA
-
-# CPU modes for 32-bit and 64-bit operation.
-X86_64 = CPUMode('I64', ISA)
-X86_32 = CPUMode('I32', ISA)
-
-# The set of floating point condition codes that are directly supported.
-# Other condition codes need to be reversed or expressed as two tests.
-supported_floatccs = [
-        floatcc.ord,
-        floatcc.uno,
-        floatcc.one,
-        floatcc.ueq,
-        floatcc.gt,
-        floatcc.ge,
-        floatcc.ult,
-        floatcc.ule]
diff --git a/cranelift-codegen/meta-python/isa/x86/encodings.py b/cranelift-codegen/meta-python/isa/x86/encodings.py
deleted file mode 100644
index 8da4912e7..000000000
--- a/cranelift-codegen/meta-python/isa/x86/encodings.py
+++ /dev/null
@@ -1,771 +0,0 @@
-"""
-x86 Encodings.
-"""
-from __future__ import absolute_import
-from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
-from cdsl.predicates import IsUnsignedInt
-from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
-from base import instructions as base
-from base import types
-from base.formats import UnaryIeee32, UnaryIeee64, UnaryImm
-from base.formats import FuncAddr, Call, LoadComplex, StoreComplex
-from .defs import X86_64, X86_32
-from . import recipes as r
-from . import settings as cfg
-from . import instructions as x86
-from .legalize import x86_expand
-from base.legalize import narrow, widen, expand_flags
-from .settings import use_sse41, not_all_ones_funcaddrs_and_not_is_pic, \
-    all_ones_funcaddrs_and_not_is_pic, is_pic, not_is_pic
-
-try:
-    from typing import TYPE_CHECKING, Any  # noqa
-    if TYPE_CHECKING:
-        from cdsl.instructions import MaybeBoundInst  # noqa
-        from cdsl.predicates import FieldPredicate # noqa
-except ImportError:
-    pass
-
-
-X86_32.legalize_monomorphic(expand_flags)
-X86_32.legalize_type(
-    default=narrow,
-    b1=expand_flags,
-    i8=widen,
-    i16=widen,
-    i32=x86_expand,
-    f32=x86_expand,
-    f64=x86_expand)
-
-X86_64.legalize_monomorphic(expand_flags)
-X86_64.legalize_type(
-    default=narrow,
-    b1=expand_flags,
-    i8=widen,
-    i16=widen,
-    i32=x86_expand,
-    i64=x86_expand,
-    f32=x86_expand,
-    f64=x86_expand)
-
-
-#
-# Helper functions for generating encodings.
-#
-
-def enc_x86_64(inst, recipe, *args, **kwargs):
-    # type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
-    """
-    Add encodings for `inst` to X86_64 with and without a REX prefix.
-    """
-    X86_64.enc(inst, *recipe.rex(*args, **kwargs))
-    X86_64.enc(inst, *recipe(*args, **kwargs))
-
-
-def enc_x86_64_instp(inst, recipe, instp, *args, **kwargs):
-    # type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
-    """
-    Add encodings for `inst` to X86_64 with and without a REX prefix.
-    """
-    X86_64.enc(inst, *recipe.rex(*args, **kwargs), instp=instp)
-    X86_64.enc(inst, *recipe(*args, **kwargs), instp=instp)
-
-
-def enc_both(inst, recipe, *args, **kwargs):
-    # type: (MaybeBoundInst, r.TailRecipe, *int, **Any) -> None
-    """
-    Add encodings for `inst` to both X86_32 and X86_64.
-    """
-    X86_32.enc(inst, *recipe(*args, **kwargs))
-    enc_x86_64(inst, recipe, *args, **kwargs)
-
-
-def enc_both_instp(inst, recipe, instp, *args, **kwargs):
-    # type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **Any) -> None
-    """
-    Add encodings for `inst` to both X86_32 and X86_64.
-    """
-    X86_32.enc(inst, *recipe(*args, **kwargs), instp=instp)
-    enc_x86_64_instp(inst, recipe, instp, *args, **kwargs)
-
-
-def enc_i32_i64(inst, recipe, *args, **kwargs):
-    # type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
-    """
-    Add encodings for `inst.i32` to X86_32.
-    Add encodings for `inst.i32` to X86_64 with and without REX.
-    Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
-    """
-    X86_32.enc(inst.i32, *recipe(*args, **kwargs))
-
-    # REX-less encoding must come after REX encoding so we don't use it by
-    # default. Otherwise reg-alloc would never use r8 and up.
-    X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs))
-    X86_64.enc(inst.i32, *recipe(*args, **kwargs))
-
-    X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))
-
-
-def enc_i32_i64_instp(inst, recipe, instp, *args, **kwargs):
-    # type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
-    """
-    Add encodings for `inst.i32` to X86_32.
-    Add encodings for `inst.i32` to X86_64 with and without REX.
-    Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
-
-    Similar to `enc_i32_i64` but applies `instp` to each encoding.
-    """
-    X86_32.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
-
-    # REX-less encoding must come after REX encoding so we don't use it by
-    # default. Otherwise reg-alloc would never use r8 and up.
-    X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs), instp=instp)
-    X86_64.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
-
-    X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs), instp=instp)
-
-
-def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
-    # type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None
-    """
-    Add encodings for `inst.i32` to X86_32.
-    Add encodings for `inst.i32` to X86_64 with and without REX.
-    Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
-    argument to determine whether or not to set the REX.W bit.
-    """
-    X86_32.enc(inst.i32.any, *recipe(*args, **kwargs))
-
-    # REX-less encoding must come after REX encoding so we don't use it by
-    # default. Otherwise reg-alloc would never use r8 and up.
-    X86_64.enc(inst.i32.any, *recipe.rex(*args, **kwargs))
-    X86_64.enc(inst.i32.any, *recipe(*args, **kwargs))
-
-    if w_bit:
-        X86_64.enc(inst.i64.any, *recipe.rex(*args, w=1, **kwargs))
-    else:
-        X86_64.enc(inst.i64.any, *recipe.rex(*args, **kwargs))
-        X86_64.enc(inst.i64.any, *recipe(*args, **kwargs))
-
-
-for inst,           opc in [
-        (base.iadd, 0x01),
-        (base.isub, 0x29),
-        (base.band, 0x21),
-        (base.bor,  0x09),
-        (base.bxor, 0x31)]:
-    enc_i32_i64(inst, r.rr, opc)
-
-# x86 has a bitwise not instruction NOT.
-enc_i32_i64(base.bnot, r.ur, 0xf7, rrr=2)
-
-# Also add a `b1` encodings for the logic instructions.
-# TODO: Should this be done with 8-bit instructions? It would improve
-# partial register dependencies.
-enc_both(base.band.b1, r.rr, 0x21)
-enc_both(base.bor.b1,  r.rr, 0x09)
-enc_both(base.bxor.b1, r.rr, 0x31)
-
-enc_i32_i64(base.imul, r.rrx, 0x0f, 0xaf)
-enc_i32_i64(x86.sdivmodx, r.div, 0xf7, rrr=7)
-enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6)
-
-enc_i32_i64(x86.smulx, r.mulx, 0xf7, rrr=5)
-enc_i32_i64(x86.umulx, r.mulx, 0xf7, rrr=4)
-
-enc_i32_i64(base.copy, r.umr, 0x89)
-for ty in [types.b1, types.i8, types.i16]:
-    enc_both(base.copy.bind(ty), r.umr, 0x89)
-
-# For x86-64, only define REX forms for now, since we can't describe the
-# special regunit immediate operands with the current constraint language.
-for ty in [types.i8, types.i16, types.i32]:
-    X86_32.enc(base.regmove.bind(ty), *r.rmov(0x89))
-    X86_64.enc(base.regmove.bind(ty), *r.rmov.rex(0x89))
-X86_64.enc(base.regmove.i64, *r.rmov.rex(0x89, w=1))
-
-enc_both(base.regmove.b1, r.rmov, 0x89)
-enc_both(base.regmove.i8, r.rmov, 0x89)
-
-# Immediate instructions with sign-extended 8-bit and 32-bit immediate.
-for inst,               rrr in [
-        (base.iadd_imm, 0),
-        (base.band_imm, 4),
-        (base.bor_imm,  1),
-        (base.bxor_imm, 6)]:
-    enc_i32_i64(inst, r.r_ib, 0x83, rrr=rrr)
-    enc_i32_i64(inst, r.r_id, 0x81, rrr=rrr)
-
-# TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as
-# band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.
-
-# Immediate constants.
-X86_32.enc(base.iconst.i32, *r.pu_id(0xb8))
-
-X86_64.enc(base.iconst.i32, *r.pu_id.rex(0xb8))
-X86_64.enc(base.iconst.i32, *r.pu_id(0xb8))
-# The 32-bit immediate movl also zero-extends to 64 bits.
-X86_64.enc(base.iconst.i64, *r.pu_id.rex(0xb8),
-           instp=IsUnsignedInt(UnaryImm.imm, 32))
-X86_64.enc(base.iconst.i64, *r.pu_id(0xb8),
-           instp=IsUnsignedInt(UnaryImm.imm, 32))
-# Sign-extended 32-bit immediate.
-X86_64.enc(base.iconst.i64, *r.u_id.rex(0xc7, rrr=0, w=1))
-# Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
-X86_64.enc(base.iconst.i64, *r.pu_iq.rex(0xb8, w=1))
-
-# bool constants.
-enc_both(base.bconst.b1, r.pu_id_bool, 0xb8)
-
-# Shifts and rotates.
-# Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
-# and 16-bit shifts would need explicit masking.
-for inst,           rrr in [
-        (base.rotl, 0),
-        (base.rotr, 1),
-        (base.ishl, 4),
-        (base.ushr, 5),
-        (base.sshr, 7)]:
-    # Cannot use enc_i32_i64 for this pattern because instructions require
-    # .any suffix.
-    X86_32.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
-    X86_64.enc(inst.i64.any, *r.rc.rex(0xd3, rrr=rrr, w=1))
-    X86_64.enc(inst.i32.any, *r.rc.rex(0xd3, rrr=rrr))
-    X86_64.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
-
-for inst,           rrr in [
-        (base.rotl_imm, 0),
-        (base.rotr_imm, 1),
-        (base.ishl_imm, 4),
-        (base.ushr_imm, 5),
-        (base.sshr_imm, 7)]:
-    enc_i32_i64(inst, r.r_ib, 0xc1, rrr=rrr)
-
-# Population count.
-X86_32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
-X86_64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1),
-           isap=cfg.use_popcnt)
-X86_64.enc(base.popcnt.i32, *r.urm.rex(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
-X86_64.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
-
-# Count leading zero bits.
-X86_32.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
-X86_64.enc(base.clz.i64, *r.urm.rex(0xf3, 0x0f, 0xbd, w=1),
-           isap=cfg.use_lzcnt)
-X86_64.enc(base.clz.i32, *r.urm.rex(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
-X86_64.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
-
-# Count trailing zero bits.
-X86_32.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
-X86_64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1),
-           isap=cfg.use_bmi1)
-X86_64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
-X86_64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
-
-#
-# Loads and stores.
-#
-
-ldcomplexp = LengthEquals(LoadComplex, 2)
-for recipe in [r.ldWithIndex, r.ldWithIndexDisp8, r.ldWithIndexDisp32]:
-    enc_i32_i64_instp(base.load_complex, recipe, ldcomplexp, 0x8b)
-    enc_x86_64_instp(base.uload32_complex, recipe, ldcomplexp, 0x8b)
-    X86_64.enc(base.sload32_complex, *recipe.rex(0x63, w=1),
-               instp=ldcomplexp)
-    enc_i32_i64_instp(base.uload16_complex, recipe, ldcomplexp, 0x0f, 0xb7)
-    enc_i32_i64_instp(base.sload16_complex, recipe, ldcomplexp, 0x0f, 0xbf)
-    enc_i32_i64_instp(base.uload8_complex, recipe, ldcomplexp, 0x0f, 0xb6)
-    enc_i32_i64_instp(base.sload8_complex, recipe, ldcomplexp, 0x0f, 0xbe)
-
-stcomplexp = LengthEquals(StoreComplex, 3)
-for recipe in [r.stWithIndex, r.stWithIndexDisp8, r.stWithIndexDisp32]:
-    enc_i32_i64_instp(base.store_complex, recipe, stcomplexp, 0x89)
-    enc_x86_64_instp(base.istore32_complex, recipe, stcomplexp, 0x89)
-    enc_both_instp(base.istore16_complex.i32, recipe, stcomplexp, 0x66, 0x89)
-    enc_x86_64_instp(base.istore16_complex.i64, recipe, stcomplexp, 0x66, 0x89)
-
-for recipe in [r.stWithIndex_abcd,
-               r.stWithIndexDisp8_abcd,
-               r.stWithIndexDisp32_abcd]:
-    enc_both_instp(base.istore8_complex.i32, recipe, stcomplexp, 0x88)
-    enc_x86_64_instp(base.istore8_complex.i64, recipe, stcomplexp, 0x88)
-
-for recipe in [r.st, r.stDisp8, r.stDisp32]:
-    enc_i32_i64_ld_st(base.store, True, recipe, 0x89)
-    enc_x86_64(base.istore32.i64.any, recipe, 0x89)
-    enc_i32_i64_ld_st(base.istore16, False, recipe, 0x66, 0x89)
-
-# Byte stores are more complicated because the registers they can address
-# depends of the presence of a REX prefix. The st*_abcd recipes fall back to
-# the corresponding st* recipes when a REX prefix is applied.
-for recipe in [r.st_abcd, r.stDisp8_abcd, r.stDisp32_abcd]:
-    enc_both(base.istore8.i32.any, recipe, 0x88)
-    enc_x86_64(base.istore8.i64.any, recipe, 0x88)
-
-enc_i32_i64(base.spill, r.spillSib32, 0x89)
-enc_i32_i64(base.regspill, r.regspill32, 0x89)
-
-# Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
-# constraining the permitted registers.
-# See MIN_SPILL_SLOT_SIZE which makes this safe.
-for ty in [types.b1, types.i8, types.i16]:
-    enc_both(base.spill.bind(ty), r.spillSib32, 0x89)
-    enc_both(base.regspill.bind(ty), r.regspill32, 0x89)
-
-for recipe in [r.ld, r.ldDisp8, r.ldDisp32]:
-    enc_i32_i64_ld_st(base.load, True, recipe, 0x8b)
-    enc_x86_64(base.uload32.i64, recipe, 0x8b)
-    X86_64.enc(base.sload32.i64, *recipe.rex(0x63, w=1))
-    enc_i32_i64_ld_st(base.uload16, True, recipe, 0x0f, 0xb7)
-    enc_i32_i64_ld_st(base.sload16, True, recipe, 0x0f, 0xbf)
-    enc_i32_i64_ld_st(base.uload8, True, recipe, 0x0f, 0xb6)
-    enc_i32_i64_ld_st(base.sload8, True, recipe, 0x0f, 0xbe)
-
-enc_i32_i64(base.fill, r.fillSib32, 0x8b)
-enc_i32_i64(base.regfill, r.regfill32, 0x8b)
-
-# Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
-for ty in [types.b1, types.i8, types.i16]:
-    enc_both(base.fill.bind(ty), r.fillSib32, 0x8b)
-    enc_both(base.regfill.bind(ty), r.regfill32, 0x8b)
-
-# Push and Pop
-X86_32.enc(x86.push.i32, *r.pushq(0x50))
-enc_x86_64(x86.push.i64, r.pushq, 0x50)
-
-X86_32.enc(x86.pop.i32, *r.popq(0x58))
-enc_x86_64(x86.pop.i64, r.popq, 0x58)
-
-# Copy Special
-# For x86-64, only define REX forms for now, since we can't describe the
-# special regunit immediate operands with the current constraint language.
-X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
-X86_32.enc(base.copy_special, *r.copysp(0x89))
-
-# Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
-# into a no-op.  Ideally we could to make this encoding available for
-# all types, and write `base.copy_nop.any`, but it appears that the
-# controlling type variable must not polymorphic.  So we make do with
-# the following limited set, and guard the generating transformation in
-# regalloc/reload.rs accordingly.
-#
-# The same encoding is generated for both the 64- and 32-bit architectures.
-# Note that we can't use `enc_both` here, because that attempts to create a
-# variant with a REX prefix in the 64-bit-architecture case.  But since
-# there's no actual instruction for the REX prefix to modify the meaning of,
-# it will modify the meaning of whatever instruction happens to follow this
-# one, which is obviously wrong.  Note also that we can and indeed *must*
-# claim that there's a 64-bit encoding for the 32-bit arch case, even though
-# no such single instruction actually exists for the 32-bit arch case.
-for ty in [types.i64, types.i32, types.i16, types.i8, types.f64, types.f32]:
-    X86_64.enc(base.copy_nop.bind(ty), r.stacknull, 0)
-    X86_32.enc(base.copy_nop.bind(ty), r.stacknull, 0)
-
-# Adjust SP down by a dynamic value (or up, with a negative operand).
-X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29))
-X86_64.enc(base.adjust_sp_down.i64, *r.adjustsp.rex(0x29, w=1))
-
-# Adjust SP up by an immediate (or down, with a negative immediate)
-X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_ib(0x83))
-X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_id(0x81))
-X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_ib.rex(0x83, w=1))
-X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_id.rex(0x81, w=1))
-
-# Adjust SP down by an immediate (or up, with a negative immediate)
-X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_ib(0x83, rrr=5))
-X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_id(0x81, rrr=5))
-X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_ib.rex(0x83, rrr=5, w=1))
-X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_id.rex(0x81, rrr=5, w=1))
-
-#
-# Float loads and stores.
-#
-
-enc_both(base.load.f32.any, r.fld, 0xf3, 0x0f, 0x10)
-enc_both(base.load.f32.any, r.fldDisp8, 0xf3, 0x0f, 0x10)
-enc_both(base.load.f32.any, r.fldDisp32, 0xf3, 0x0f, 0x10)
-
-enc_both(base.load_complex.f32, r.fldWithIndex, 0xf3, 0x0f, 0x10)
-enc_both(base.load_complex.f32, r.fldWithIndexDisp8, 0xf3, 0x0f, 0x10)
-enc_both(base.load_complex.f32, r.fldWithIndexDisp32, 0xf3, 0x0f, 0x10)
-
-enc_both(base.load.f64.any, r.fld, 0xf2, 0x0f, 0x10)
-enc_both(base.load.f64.any, r.fldDisp8, 0xf2, 0x0f, 0x10)
-enc_both(base.load.f64.any, r.fldDisp32, 0xf2, 0x0f, 0x10)
-
-enc_both(base.load_complex.f64, r.fldWithIndex, 0xf2, 0x0f, 0x10)
-enc_both(base.load_complex.f64, r.fldWithIndexDisp8, 0xf2, 0x0f, 0x10)
-enc_both(base.load_complex.f64, r.fldWithIndexDisp32, 0xf2, 0x0f, 0x10)
-
-enc_both(base.store.f32.any, r.fst, 0xf3, 0x0f, 0x11)
-enc_both(base.store.f32.any, r.fstDisp8, 0xf3, 0x0f, 0x11)
-enc_both(base.store.f32.any, r.fstDisp32, 0xf3, 0x0f, 0x11)
-
-enc_both(base.store_complex.f32, r.fstWithIndex, 0xf3, 0x0f, 0x11)
-enc_both(base.store_complex.f32, r.fstWithIndexDisp8, 0xf3, 0x0f, 0x11)
-enc_both(base.store_complex.f32, r.fstWithIndexDisp32, 0xf3, 0x0f, 0x11)
-
-enc_both(base.store.f64.any, r.fst, 0xf2, 0x0f, 0x11)
-enc_both(base.store.f64.any, r.fstDisp8, 0xf2, 0x0f, 0x11)
-enc_both(base.store.f64.any, r.fstDisp32, 0xf2, 0x0f, 0x11)
-
-enc_both(base.store_complex.f64, r.fstWithIndex, 0xf2, 0x0f, 0x11)
-enc_both(base.store_complex.f64, r.fstWithIndexDisp8, 0xf2, 0x0f, 0x11)
-enc_both(base.store_complex.f64, r.fstWithIndexDisp32, 0xf2, 0x0f, 0x11)
-
-enc_both(base.fill.f32, r.ffillSib32, 0xf3, 0x0f, 0x10)
-enc_both(base.regfill.f32, r.fregfill32, 0xf3, 0x0f, 0x10)
-enc_both(base.fill.f64, r.ffillSib32, 0xf2, 0x0f, 0x10)
-enc_both(base.regfill.f64, r.fregfill32, 0xf2, 0x0f, 0x10)
-
-enc_both(base.spill.f32, r.fspillSib32, 0xf3, 0x0f, 0x11)
-enc_both(base.regspill.f32, r.fregspill32, 0xf3, 0x0f, 0x11)
-enc_both(base.spill.f64, r.fspillSib32, 0xf2, 0x0f, 0x11)
-enc_both(base.regspill.f64, r.fregspill32, 0xf2, 0x0f, 0x11)
-
-#
-# Function addresses.
-#
-
-# Non-PIC, all-ones funcaddresses.
-X86_32.enc(base.func_addr.i32, *r.fnaddr4(0xb8),
-           isap=not_all_ones_funcaddrs_and_not_is_pic)
-X86_64.enc(base.func_addr.i64, *r.fnaddr8.rex(0xb8, w=1),
-           isap=not_all_ones_funcaddrs_and_not_is_pic)
-
-# Non-PIC, all-zeros funcaddresses.
-X86_32.enc(base.func_addr.i32, *r.allones_fnaddr4(0xb8),
-           isap=all_ones_funcaddrs_and_not_is_pic)
-X86_64.enc(base.func_addr.i64, *r.allones_fnaddr8.rex(0xb8, w=1),
-           isap=all_ones_funcaddrs_and_not_is_pic)
-
-# 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's
-# pc-relative field.
-X86_64.enc(base.func_addr.i64, *r.pcrel_fnaddr8.rex(0x8d, w=1),
-           instp=IsColocatedFunc(FuncAddr.func_ref))
-
-# 64-bit, non-colocated, PIC.
-X86_64.enc(base.func_addr.i64, *r.got_fnaddr8.rex(0x8b, w=1),
-           isap=is_pic)
-
-#
-# Global addresses.
-#
-
-# Non-PIC
-X86_32.enc(base.symbol_value.i32, *r.gvaddr4(0xb8),
-           isap=not_is_pic)
-X86_64.enc(base.symbol_value.i64, *r.gvaddr8.rex(0xb8, w=1),
-           isap=not_is_pic)
-
-# PIC, colocated
-X86_64.enc(base.symbol_value.i64, *r.pcrel_gvaddr8.rex(0x8d, w=1),
-           isap=is_pic,
-           instp=IsColocatedData())
-
-# PIC, non-colocated
-X86_64.enc(base.symbol_value.i64, *r.got_gvaddr8.rex(0x8b, w=1),
-           isap=is_pic)
-
-#
-# Stack addresses.
-#
-# TODO: Add encoding rules for stack_load and stack_store, so that they
-# don't get legalized to stack_addr + load/store.
-#
-X86_32.enc(base.stack_addr.i32, *r.spaddr4_id(0x8d))
-X86_64.enc(base.stack_addr.i64, *r.spaddr8_id.rex(0x8d, w=1))
-
-#
-# Call/return
-#
-
-# 32-bit, both PIC and non-PIC.
-X86_32.enc(base.call, *r.call_id(0xe8))
-
-# 64-bit, colocated, both PIC and non-PIC. Use the call instruction's
-# pc-relative field.
-X86_64.enc(base.call, *r.call_id(0xe8),
-           instp=IsColocatedFunc(Call.func_ref))
-
-# 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version,
-# since non-PIC is currently using the large model, which requires calls be
-# lowered to func_addr+call_indirect.
-X86_64.enc(base.call, *r.call_plt_id(0xe8), isap=is_pic)
-
-X86_32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))
-X86_64.enc(base.call_indirect.i64, *r.call_r.rex(0xff, rrr=2))
-X86_64.enc(base.call_indirect.i64, *r.call_r(0xff, rrr=2))
-
-X86_32.enc(base.x_return, *r.ret(0xc3))
-X86_64.enc(base.x_return, *r.ret(0xc3))
-
-#
-# Branches
-#
-X86_32.enc(base.jump, *r.jmpb(0xeb))
-X86_64.enc(base.jump, *r.jmpb(0xeb))
-X86_32.enc(base.jump, *r.jmpd(0xe9))
-X86_64.enc(base.jump, *r.jmpd(0xe9))
-
-enc_both(base.brif, r.brib, 0x70)
-enc_both(base.brif, r.brid, 0x0f, 0x80)
-
-# Not all float condition codes are legal, see `supported_floatccs`.
-enc_both(base.brff, r.brfb, 0x70)
-enc_both(base.brff, r.brfd, 0x0f, 0x80)
-
-# Note that the tjccd opcode will be prefixed with 0x0f.
-enc_i32_i64(base.brz, r.tjccb, 0x74)
-enc_i32_i64(base.brz, r.tjccd, 0x84)
-enc_i32_i64(base.brnz, r.tjccb, 0x75)
-enc_i32_i64(base.brnz, r.tjccd, 0x85)
-
-# Branch on a b1 value in a register only looks at the low 8 bits. See also
-# bint encodings below.
-#
-# Start with the worst-case encoding for X86_32 only. The register allocator
-# can't handle a branch with an ABCD-constrained operand.
-X86_32.enc(base.brz.b1, *r.t8jccd_long(0x84))
-X86_32.enc(base.brnz.b1, *r.t8jccd_long(0x85))
-
-enc_both(base.brz.b1, r.t8jccb_abcd, 0x74)
-enc_both(base.brz.b1, r.t8jccd_abcd, 0x84)
-enc_both(base.brnz.b1, r.t8jccb_abcd, 0x75)
-enc_both(base.brnz.b1, r.t8jccd_abcd, 0x85)
-
-#
-# Jump tables
-#
-X86_64.enc(base.jump_table_entry.i64.any.any, *r.jt_entry.rex(0x63, w=1))
-X86_32.enc(base.jump_table_entry.i32.any.any, *r.jt_entry(0x8b))
-
-X86_64.enc(base.jump_table_base.i64, *r.jt_base.rex(0x8d, w=1))
-X86_32.enc(base.jump_table_base.i32, *r.jt_base(0x8d))
-
-enc_x86_64(base.indirect_jump_table_br.i64, r.indirect_jmp, 0xff, rrr=4)
-X86_32.enc(base.indirect_jump_table_br.i32, *r.indirect_jmp(0xff, rrr=4))
-
-#
-# Trap as ud2
-#
-X86_32.enc(base.trap, *r.trap(0x0f, 0x0b))
-X86_64.enc(base.trap, *r.trap(0x0f, 0x0b))
-
-# Debug trap as int3
-X86_32.enc(base.debugtrap, r.debugtrap, 0)
-X86_64.enc(base.debugtrap, r.debugtrap, 0)
-
-# Using a standard EncRecipe, not the TailRecipe.
-X86_32.enc(base.trapif, r.trapif, 0)
-X86_64.enc(base.trapif, r.trapif, 0)
-X86_32.enc(base.trapff, r.trapff, 0)
-X86_64.enc(base.trapff, r.trapff, 0)
-
-#
-# Comparisons
-#
-enc_i32_i64(base.icmp, r.icscc, 0x39)
-enc_i32_i64(base.icmp_imm, r.icscc_ib, 0x83, rrr=7)
-enc_i32_i64(base.icmp_imm, r.icscc_id, 0x81, rrr=7)
-enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
-enc_i32_i64(base.ifcmp_imm, r.rcmp_ib, 0x83, rrr=7)
-enc_i32_i64(base.ifcmp_imm, r.rcmp_id, 0x81, rrr=7)
-# TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
-
-X86_32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39))
-X86_64.enc(base.ifcmp_sp.i64, *r.rcmp_sp.rex(0x39, w=1))
-
-#
-# Convert flags to bool.
-#
-# This encodes `b1` as an 8-bit low register with the value 0 or 1.
-enc_both(base.trueif, r.seti_abcd, 0x0f, 0x90)
-enc_both(base.trueff, r.setf_abcd, 0x0f, 0x90)
-
-#
-# Conditional move (a.k.a integer select)
-#
-enc_i32_i64(base.selectif, r.cmov, 0x0F, 0x40)
-
-#
-# Bit scan forwards and reverse
-#
-enc_i32_i64(x86.bsf, r.bsf_and_bsr, 0x0F, 0xBC)
-enc_i32_i64(x86.bsr, r.bsf_and_bsr, 0x0F, 0xBD)
-
-#
-# Convert bool to int.
-#
-# This assumes that b1 is represented as an 8-bit low register with the value 0
-# or 1.
-#
-# Encode movzbq as movzbl, because it's equivalent and shorter.
-X86_32.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
-X86_64.enc(base.bint.i64.b1, *r.urm_noflags.rex(0x0f, 0xb6))
-X86_64.enc(base.bint.i64.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
-X86_64.enc(base.bint.i32.b1, *r.urm_noflags.rex(0x0f, 0xb6))
-X86_64.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
-
-# Numerical conversions.
-
-# Reducing an integer is a no-op.
-X86_32.enc(base.ireduce.i8.i16, r.null, 0)
-X86_32.enc(base.ireduce.i8.i32, r.null, 0)
-X86_32.enc(base.ireduce.i16.i32, r.null, 0)
-
-X86_64.enc(base.ireduce.i8.i16, r.null, 0)
-X86_64.enc(base.ireduce.i8.i32, r.null, 0)
-X86_64.enc(base.ireduce.i16.i32, r.null, 0)
-X86_64.enc(base.ireduce.i8.i64, r.null, 0)
-X86_64.enc(base.ireduce.i16.i64, r.null, 0)
-X86_64.enc(base.ireduce.i32.i64, r.null, 0)
-
-# TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
-# instructions for %al/%ax/%eax to %ax/%eax/%rax.
-
-# movsbl
-X86_32.enc(base.sextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xbe))
-X86_64.enc(base.sextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xbe))
-X86_64.enc(base.sextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xbe))
-
-# movswl
-X86_32.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
-X86_64.enc(base.sextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xbf))
-X86_64.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
-
-# movsbq
-X86_64.enc(base.sextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xbe, w=1))
-
-# movswq
-X86_64.enc(base.sextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xbf, w=1))
-
-# movslq
-X86_64.enc(base.sextend.i64.i32, *r.urm_noflags.rex(0x63, w=1))
-
-# movzbl
-X86_32.enc(base.uextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xb6))
-X86_64.enc(base.uextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xb6))
-X86_64.enc(base.uextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xb6))
-
-# movzwl
-X86_32.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
-X86_64.enc(base.uextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xb7))
-X86_64.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
-
-# movzbq, encoded as movzbl because it's equivalent and shorter
-X86_64.enc(base.uextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xb6))
-X86_64.enc(base.uextend.i64.i8, *r.urm_noflags_abcd(0x0f, 0xb6))
-
-# movzwq, encoded as movzwl because it's equivalent and shorter
-X86_64.enc(base.uextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xb7))
-X86_64.enc(base.uextend.i64.i16, *r.urm_noflags(0x0f, 0xb7))
-
-# A 32-bit register copy clears the high 32 bits.
-X86_64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
-X86_64.enc(base.uextend.i64.i32, *r.umr(0x89))
-
-
-#
-# Floating point
-#
-
-# floating-point constants equal to 0.0 can be encoded using either
-# `xorps` or `xorpd`, for 32-bit and 64-bit floats respectively.
-X86_32.enc(base.f32const, *r.f32imm_z(0x0f, 0x57),
-           instp=IsZero32BitFloat(UnaryIeee32.imm))
-X86_32.enc(base.f64const, *r.f64imm_z(0x66, 0x0f, 0x57),
-           instp=IsZero64BitFloat(UnaryIeee64.imm))
-
-enc_x86_64_instp(base.f32const, r.f32imm_z,
-                 IsZero32BitFloat(UnaryIeee32.imm), 0x0f, 0x57)
-enc_x86_64_instp(base.f64const, r.f64imm_z,
-                 IsZero64BitFloat(UnaryIeee64.imm), 0x66, 0x0f, 0x57)
-
-# movd
-enc_both(base.bitcast.f32.i32, r.frurm, 0x66, 0x0f, 0x6e)
-enc_both(base.bitcast.i32.f32, r.rfumr, 0x66, 0x0f, 0x7e)
-
-# movq
-X86_64.enc(base.bitcast.f64.i64, *r.frurm.rex(0x66, 0x0f, 0x6e, w=1))
-X86_64.enc(base.bitcast.i64.f64, *r.rfumr.rex(0x66, 0x0f, 0x7e, w=1))
-
-# movaps
-enc_both(base.copy.f32, r.furm, 0x0f, 0x28)
-enc_both(base.copy.f64, r.furm, 0x0f, 0x28)
-
-# For x86-64, only define REX forms for now, since we can't describe the
-# special regunit immediate operands with the current constraint language.
-X86_32.enc(base.regmove.f32, *r.frmov(0x0f, 0x28))
-X86_64.enc(base.regmove.f32, *r.frmov.rex(0x0f, 0x28))
-
-# For x86-64, only define REX forms for now, since we can't describe the
-# special regunit immediate operands with the current constraint language.
-X86_32.enc(base.regmove.f64, *r.frmov(0x0f, 0x28))
-X86_64.enc(base.regmove.f64, *r.frmov.rex(0x0f, 0x28))
-
-# cvtsi2ss
-enc_i32_i64(base.fcvt_from_sint.f32, r.frurm, 0xf3, 0x0f, 0x2a)
-
-# cvtsi2sd
-enc_i32_i64(base.fcvt_from_sint.f64, r.frurm, 0xf2, 0x0f, 0x2a)
-
-# cvtss2sd
-enc_both(base.fpromote.f64.f32, r.furm, 0xf3, 0x0f, 0x5a)
-
-# cvtsd2ss
-enc_both(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a)
-
-# cvttss2si
-enc_both(x86.cvtt2si.i32.f32, r.rfurm, 0xf3, 0x0f, 0x2c)
-X86_64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1))
-
-# cvttsd2si
-enc_both(x86.cvtt2si.i32.f64, r.rfurm, 0xf2, 0x0f, 0x2c)
-X86_64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1))
-
-# Exact square roots.
-enc_both(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51)
-enc_both(base.sqrt.f64, r.furm, 0xf2, 0x0f, 0x51)
-
-# Rounding. The recipe looks at the opcode to pick an immediate.
-for inst in [
-        base.nearest,
-        base.floor,
-        base.ceil,
-        base.trunc]:
-    enc_both(inst.f32, r.furmi_rnd, 0x66, 0x0f, 0x3a, 0x0a, isap=use_sse41)
-    enc_both(inst.f64, r.furmi_rnd, 0x66, 0x0f, 0x3a, 0x0b, isap=use_sse41)
-
-
-# Binary arithmetic ops.
-for inst,           opc in [
-        (base.fadd, 0x58),
-        (base.fsub, 0x5c),
-        (base.fmul, 0x59),
-        (base.fdiv, 0x5e),
-        (x86.fmin,  0x5d),
-        (x86.fmax,  0x5f)]:
-    enc_both(inst.f32, r.fa, 0xf3, 0x0f, opc)
-    enc_both(inst.f64, r.fa, 0xf2, 0x0f, opc)
-
-# Binary bitwise ops.
-for inst,               opc in [
-        (base.band,     0x54),
-        (base.bor,      0x56),
-        (base.bxor,     0x57)]:
-    enc_both(inst.f32, r.fa, 0x0f, opc)
-    enc_both(inst.f64, r.fa, 0x0f, opc)
-
-# The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y.
-enc_both(base.band_not.f32, r.fax, 0x0f, 0x55)
-enc_both(base.band_not.f64, r.fax, 0x0f, 0x55)
-
-# Comparisons.
-#
-# This only covers the condition codes in `supported_floatccs`, the rest are
-# handled by legalization patterns.
-enc_both(base.fcmp.f32, r.fcscc, 0x0f, 0x2e)
-enc_both(base.fcmp.f64, r.fcscc, 0x66, 0x0f, 0x2e)
-
-enc_both(base.ffcmp.f32, r.fcmp, 0x0f, 0x2e)
-enc_both(base.ffcmp.f64, r.fcmp, 0x66, 0x0f, 0x2e)
diff --git a/cranelift-codegen/meta-python/isa/x86/instructions.py b/cranelift-codegen/meta-python/isa/x86/instructions.py
deleted file mode 100644
index 6adc2ad68..000000000
--- a/cranelift-codegen/meta-python/isa/x86/instructions.py
+++ /dev/null
@@ -1,173 +0,0 @@
-"""
-Supplementary instruction definitions for x86.
-
-This module defines additional instructions that are useful only to the x86
-target ISA.
-"""
-
-from base.types import iflags
-from cdsl.operands import Operand
-from cdsl.typevar import TypeVar
-from cdsl.instructions import Instruction, InstructionGroup
-
-
-GROUP = InstructionGroup("x86", "x86-specific instruction set")
-
-iWord = TypeVar('iWord', 'A scalar integer machine word', ints=(32, 64))
-
-nlo = Operand('nlo', iWord, doc='Low part of numerator')
-nhi = Operand('nhi', iWord, doc='High part of numerator')
-d = Operand('d', iWord, doc='Denominator')
-q = Operand('q', iWord, doc='Quotient')
-r = Operand('r', iWord, doc='Remainder')
-
-udivmodx = Instruction(
-        'x86_udivmodx', r"""
-        Extended unsigned division.
-
-        Concatenate the bits in `nhi` and `nlo` to form the numerator.
-        Interpret the bits as an unsigned number and divide by the unsigned
-        denominator `d`. Trap when `d` is zero or if the quotient is larger
-        than the range of the output.
-
-        Return both quotient and remainder.
-        """,
-        ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
-
-sdivmodx = Instruction(
-        'x86_sdivmodx', r"""
-        Extended signed division.
-
-        Concatenate the bits in `nhi` and `nlo` to form the numerator.
-        Interpret the bits as a signed number and divide by the signed
-        denominator `d`. Trap when `d` is zero or if the quotient is outside
-        the range of the output.
-
-        Return both quotient and remainder.
-        """,
-        ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
-
-argL = Operand('argL', iWord)
-argR = Operand('argR', iWord)
-resLo = Operand('resLo', iWord)
-resHi = Operand('resHi', iWord)
-
-umulx = Instruction(
-        'x86_umulx', r"""
-        Unsigned integer multiplication, producing a double-length result.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(argL, argR), outs=(resLo, resHi))
-
-smulx = Instruction(
-        'x86_smulx', r"""
-        Signed integer multiplication, producing a double-length result.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        """,
-        ins=(argL, argR), outs=(resLo, resHi))
-
-Float = TypeVar(
-        'Float', 'A scalar or vector floating point number',
-        floats=True, simd=True)
-IntTo = TypeVar(
-        'IntTo', 'An integer type with the same number of lanes',
-        ints=(32, 64), simd=True)
-
-x = Operand('x', Float)
-a = Operand('a', IntTo)
-
-cvtt2si = Instruction(
-        'x86_cvtt2si', r"""
-        Convert with truncation floating point to signed integer.
-
-        The source floating point operand is converted to a signed integer by
-        rounding towards zero. If the result can't be represented in the output
-        type, returns the smallest signed value the output type can represent.
-
-        This instruction does not trap.
-        """,
-        ins=x, outs=a)
-
-x = Operand('x', Float)
-a = Operand('a', Float)
-y = Operand('y', Float)
-
-fmin = Instruction(
-        'x86_fmin', r"""
-        Floating point minimum with x86 semantics.
-
-        This is equivalent to the C ternary operator `x < y ? x : y` which
-        differs from :inst:`fmin` when either operand is NaN or when comparing
-        +0.0 to -0.0.
-
-        When the two operands don't compare as LT, `y` is returned unchanged,
-        even if it is a signalling NaN.
-        """,
-        ins=(x, y), outs=a)
-
-fmax = Instruction(
-        'x86_fmax', r"""
-        Floating point maximum with x86 semantics.
-
-        This is equivalent to the C ternary operator `x > y ? x : y` which
-        differs from :inst:`fmax` when either operand is NaN or when comparing
-        +0.0 to -0.0.
-
-        When the two operands don't compare as GT, `y` is returned unchanged,
-        even if it is a signalling NaN.
-        """,
-        ins=(x, y), outs=a)
-
-
-x = Operand('x', iWord)
-
-push = Instruction(
-    'x86_push', r"""
-    Pushes a value onto the stack.
-
-    Decrements the stack pointer and stores the specified value on to the top.
-
-    This is polymorphic in i32 and i64. However, it is only implemented for i64
-    in 64-bit mode, and only for i32 in 32-bit mode.
-    """,
-    ins=x, can_store=True, other_side_effects=True)
-
-pop = Instruction(
-    'x86_pop', r"""
-    Pops a value from the stack.
-
-    Loads a value from the top of the stack and then increments the stack
-    pointer.
-
-    This is polymorphic in i32 and i64. However, it is only implemented for i64
-    in 64-bit mode, and only for i32 in 32-bit mode.
-    """,
-    outs=x, can_load=True, other_side_effects=True)
-
-y = Operand('y', iWord)
-rflags = Operand('rflags', iflags)
-
-bsr = Instruction(
-    'x86_bsr', r"""
-    Bit Scan Reverse -- returns the bit-index of the most significant 1
-    in the word. Result is undefined if the argument is zero. However, it
-    sets the Z flag depending on the argument, so it is at least easy to
-    detect and handle that case.
-
-    This is polymorphic in i32 and i64. It is implemented for both i64 and
-    i32 in 64-bit mode, and only for i32 in 32-bit mode.
-    """,
-    ins=x, outs=(y, rflags))
-
-bsf = Instruction(
-    'x86_bsf', r"""
-    Bit Scan Forwards -- returns the bit-index of the least significant 1
-    in the word. Is otherwise identical to 'bsr', just above.
-    """,
-    ins=x, outs=(y, rflags))
-
-GROUP.close()
diff --git a/cranelift-codegen/meta-python/isa/x86/legalize.py b/cranelift-codegen/meta-python/isa/x86/legalize.py
deleted file mode 100644
index 15f08a07b..000000000
--- a/cranelift-codegen/meta-python/isa/x86/legalize.py
+++ /dev/null
@@ -1,229 +0,0 @@
-"""
-Custom legalization patterns for x86.
-"""
-from __future__ import absolute_import
-from cdsl.ast import Var
-from cdsl.xform import Rtl, XFormGroup
-from base.immediates import imm64, intcc, floatcc
-from base import legalize as shared
-from base import instructions as insts
-from . import instructions as x86
-from .defs import ISA
-
-x86_expand = XFormGroup(
-        'x86_expand',
-        """
-        Legalize instructions by expansion.
-
-        Use x86-specific instructions if needed.
-        """,
-        isa=ISA, chain=shared.expand_flags)
-
-a = Var('a')
-dead = Var('dead')
-x = Var('x')
-xhi = Var('xhi')
-y = Var('y')
-a1 = Var('a1')
-a2 = Var('a2')
-
-#
-# Division and remainder.
-#
-# The srem expansion requires custom code because srem INT_MIN, -1 is not
-# allowed to trap. The other ops need to check avoid_div_traps.
-x86_expand.custom_legalize(insts.sdiv, 'expand_sdivrem')
-x86_expand.custom_legalize(insts.srem, 'expand_sdivrem')
-x86_expand.custom_legalize(insts.udiv, 'expand_udivrem')
-x86_expand.custom_legalize(insts.urem, 'expand_udivrem')
-
-#
-# Double length (widening) multiplication
-#
-resLo = Var('resLo')
-resHi = Var('resHi')
-x86_expand.legalize(
-        resHi << insts.umulhi(x, y),
-        Rtl(
-            (resLo, resHi) << x86.umulx(x, y)
-        ))
-
-x86_expand.legalize(
-        resHi << insts.smulhi(x, y),
-        Rtl(
-            (resLo, resHi) << x86.smulx(x, y)
-        ))
-
-# Floating point condition codes.
-#
-# The 8 condition codes in `supported_floatccs` are directly supported by a
-# `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
-# patterns.
-
-# Equality needs an explicit `ord` test which checks the parity bit.
-x86_expand.legalize(
-        a << insts.fcmp(floatcc.eq, x, y),
-        Rtl(
-            a1 << insts.fcmp(floatcc.ord, x, y),
-            a2 << insts.fcmp(floatcc.ueq, x, y),
-            a << insts.band(a1, a2)
-        ))
-x86_expand.legalize(
-        a << insts.fcmp(floatcc.ne, x, y),
-        Rtl(
-            a1 << insts.fcmp(floatcc.uno, x, y),
-            a2 << insts.fcmp(floatcc.one, x, y),
-            a << insts.bor(a1, a2)
-        ))
-
-# Inequalities that need to be reversed.
-for cc,               rev_cc in [
-        (floatcc.lt,  floatcc.gt),
-        (floatcc.le,  floatcc.ge),
-        (floatcc.ugt, floatcc.ult),
-        (floatcc.uge, floatcc.ule)]:
-    x86_expand.legalize(
-            a << insts.fcmp(cc, x, y),
-            Rtl(
-                a << insts.fcmp(rev_cc, y, x)
-            ))
-
-# We need to modify the CFG for min/max legalization.
-x86_expand.custom_legalize(insts.fmin, 'expand_minmax')
-x86_expand.custom_legalize(insts.fmax, 'expand_minmax')
-
-# Conversions from unsigned need special handling.
-x86_expand.custom_legalize(insts.fcvt_from_uint, 'expand_fcvt_from_uint')
-# Conversions from float to int can trap and modify the control flow graph.
-x86_expand.custom_legalize(insts.fcvt_to_sint, 'expand_fcvt_to_sint')
-x86_expand.custom_legalize(insts.fcvt_to_uint, 'expand_fcvt_to_uint')
-x86_expand.custom_legalize(insts.fcvt_to_sint_sat, 'expand_fcvt_to_sint_sat')
-x86_expand.custom_legalize(insts.fcvt_to_uint_sat, 'expand_fcvt_to_uint_sat')
-
-# Count leading and trailing zeroes, for baseline x86_64
-c_minus_one = Var('c_minus_one')
-c_thirty_one = Var('c_thirty_one')
-c_thirty_two = Var('c_thirty_two')
-c_sixty_three = Var('c_sixty_three')
-c_sixty_four = Var('c_sixty_four')
-index1 = Var('index1')
-r2flags = Var('r2flags')
-index2 = Var('index2')
-
-x86_expand.legalize(
-    a << insts.clz.i64(x),
-    Rtl(
-        c_minus_one << insts.iconst(imm64(-1)),
-        c_sixty_three << insts.iconst(imm64(63)),
-        (index1, r2flags) << x86.bsr(x),
-        index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1),
-        a << insts.isub(c_sixty_three, index2),
-    ))
-
-x86_expand.legalize(
-    a << insts.clz.i32(x),
-    Rtl(
-        c_minus_one << insts.iconst(imm64(-1)),
-        c_thirty_one << insts.iconst(imm64(31)),
-        (index1, r2flags) << x86.bsr(x),
-        index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1),
-        a << insts.isub(c_thirty_one, index2),
-    ))
-
-x86_expand.legalize(
-    a << insts.ctz.i64(x),
-    Rtl(
-        c_sixty_four << insts.iconst(imm64(64)),
-        (index1, r2flags) << x86.bsf(x),
-        a << insts.selectif(intcc.eq, r2flags, c_sixty_four, index1),
-    ))
-
-x86_expand.legalize(
-    a << insts.ctz.i32(x),
-    Rtl(
-        c_thirty_two << insts.iconst(imm64(32)),
-        (index1, r2flags) << x86.bsf(x),
-        a << insts.selectif(intcc.eq, r2flags, c_thirty_two, index1),
-    ))
-
-
-# Population count for baseline x86_64
-qv1 = Var('qv1')
-qv3 = Var('qv3')
-qv4 = Var('qv4')
-qv5 = Var('qv5')
-qv6 = Var('qv6')
-qv7 = Var('qv7')
-qv8 = Var('qv8')
-qv9 = Var('qv9')
-qv10 = Var('qv10')
-qv11 = Var('qv11')
-qv12 = Var('qv12')
-qv13 = Var('qv13')
-qv14 = Var('qv14')
-qv15 = Var('qv15')
-qv16 = Var('qv16')
-qc77 = Var('qc77')
-qc0F = Var('qc0F')
-qc01 = Var('qc01')
-x86_expand.legalize(
-    qv16 << insts.popcnt.i64(qv1),
-    Rtl(
-        qv3 << insts.ushr_imm(qv1, imm64(1)),
-        qc77 << insts.iconst(imm64(0x7777777777777777)),
-        qv4 << insts.band(qv3, qc77),
-        qv5 << insts.isub(qv1, qv4),
-        qv6 << insts.ushr_imm(qv4, imm64(1)),
-        qv7 << insts.band(qv6, qc77),
-        qv8 << insts.isub(qv5, qv7),
-        qv9 << insts.ushr_imm(qv7, imm64(1)),
-        qv10 << insts.band(qv9, qc77),
-        qv11 << insts.isub(qv8, qv10),
-        qv12 << insts.ushr_imm(qv11, imm64(4)),
-        qv13 << insts.iadd(qv11, qv12),
-        qc0F << insts.iconst(imm64(0x0F0F0F0F0F0F0F0F)),
-        qv14 << insts.band(qv13, qc0F),
-        qc01 << insts.iconst(imm64(0x0101010101010101)),
-        qv15 << insts.imul(qv14, qc01),
-        qv16 << insts.ushr_imm(qv15, imm64(56))
-    ))
-
-lv1 = Var('lv1')
-lv3 = Var('lv3')
-lv4 = Var('lv4')
-lv5 = Var('lv5')
-lv6 = Var('lv6')
-lv7 = Var('lv7')
-lv8 = Var('lv8')
-lv9 = Var('lv9')
-lv10 = Var('lv10')
-lv11 = Var('lv11')
-lv12 = Var('lv12')
-lv13 = Var('lv13')
-lv14 = Var('lv14')
-lv15 = Var('lv15')
-lv16 = Var('lv16')
-lc77 = Var('lc77')
-lc0F = Var('lc0F')
-lc01 = Var('lc01')
-x86_expand.legalize(
-    lv16 << insts.popcnt.i32(lv1),
-    Rtl(
-        lv3 << insts.ushr_imm(lv1, imm64(1)),
-        lc77 << insts.iconst(imm64(0x77777777)),
-        lv4 << insts.band(lv3, lc77),
-        lv5 << insts.isub(lv1, lv4),
-        lv6 << insts.ushr_imm(lv4, imm64(1)),
-        lv7 << insts.band(lv6, lc77),
-        lv8 << insts.isub(lv5, lv7),
-        lv9 << insts.ushr_imm(lv7, imm64(1)),
-        lv10 << insts.band(lv9, lc77),
-        lv11 << insts.isub(lv8, lv10),
-        lv12 << insts.ushr_imm(lv11, imm64(4)),
-        lv13 << insts.iadd(lv11, lv12),
-        lc0F << insts.iconst(imm64(0x0F0F0F0F)),
-        lv14 << insts.band(lv13, lc0F),
-        lc01 << insts.iconst(imm64(0x01010101)),
-        lv15 << insts.imul(lv14, lc01),
-        lv16 << insts.ushr_imm(lv15, imm64(24))
-    ))
diff --git a/cranelift-codegen/meta-python/isa/x86/recipes.py b/cranelift-codegen/meta-python/isa/x86/recipes.py
deleted file mode 100644
index c596fcd10..000000000
--- a/cranelift-codegen/meta-python/isa/x86/recipes.py
+++ /dev/null
@@ -1,2059 +0,0 @@
-"""
-x86 Encoding recipes.
-"""
-from __future__ import absolute_import
-from cdsl.isa import EncRecipe
-from cdsl.predicates import IsSignedInt, IsEqual, Or
-from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
-from cdsl.registers import RegClass
-from base.formats import Unary, UnaryIeee32, UnaryIeee64, UnaryImm, UnaryBool
-from base.formats import Binary, BinaryImm
-from base.formats import MultiAry, NullAry
-from base.formats import Trap, Call, CallIndirect, Store, Load
-from base.formats import IntCompare, IntCompareImm, FloatCompare
-from base.formats import IntCond, FloatCond
-from base.formats import IntSelect, IntCondTrap, FloatCondTrap
-from base.formats import Jump, Branch, BranchInt, BranchFloat
-from base.formats import BranchTableEntry, BranchTableBase, IndirectJump
-from base.formats import Ternary, FuncAddr, UnaryGlobalValue
-from base.formats import RegMove, RegSpill, RegFill, CopySpecial
-from base.formats import LoadComplex, StoreComplex
-from base.formats import StackLoad
-from .registers import GPR, ABCD, FPR
-from .registers import GPR8, FPR8, FLAG
-from .registers import StackGPR32, StackFPR32
-from .defs import supported_floatccs
-from .settings import use_sse41
-
-try:
-    from typing import Tuple, Dict, Sequence, Any  # noqa
-    from cdsl.instructions import InstructionFormat  # noqa
-    from cdsl.isa import ConstraintSeq, BranchRange, PredNode, OperandConstraint  # noqa
-except ImportError:
-    pass
-
-
-# Opcode representation.
-#
-# Cranelift requires each recipe to have a single encoding size in bytes, and
-# x86 opcodes are variable length, so we use separate recipes for different
-# styles of opcodes and prefixes. The opcode format is indicated by the recipe
-# name prefix:
-
-OPCODE_PREFIX = {
-        # Prefix bytes       Name     mmpp
-        ():                 ('Op1', 0b0000),
-        (0x66,):            ('Mp1', 0b0001),
-        (0xf3,):            ('Mp1', 0b0010),
-        (0xf2,):            ('Mp1', 0b0011),
-        (0x0f,):            ('Op2', 0b0100),
-        (0x66, 0x0f):       ('Mp2', 0b0101),
-        (0xf3, 0x0f):       ('Mp2', 0b0110),
-        (0xf2, 0x0f):       ('Mp2', 0b0111),
-        (0x0f, 0x38):       ('Op3', 0b1000),
-        (0x66, 0x0f, 0x38): ('Mp3', 0b1001),
-        (0xf3, 0x0f, 0x38): ('Mp3', 0b1010),
-        (0xf2, 0x0f, 0x38): ('Mp3', 0b1011),
-        (0x0f, 0x3a):       ('Op3', 0b1100),
-        (0x66, 0x0f, 0x3a): ('Mp3', 0b1101),
-        (0xf3, 0x0f, 0x3a): ('Mp3', 0b1110),
-        (0xf2, 0x0f, 0x3a): ('Mp3', 0b1111)
-        }
-
-# The table above does not include the REX prefix which goes after the
-# mandatory prefix. VEX/XOP and EVEX prefixes are not yet supported. Encodings
-# using any of these prefixes are represented by separate recipes.
-#
-# The encoding bits are:
-#
-# 0-7:   The opcode byte <op>.
-# 8-9:   pp, mandatory prefix:
-#        00 none (Op*)
-#        01 66   (Mp*)
-#        10 F3   (Mp*)
-#        11 F2   (Mp*)
-# 10-11: mm, opcode map:
-#        00 <op>        (Op1/Mp1)
-#        01 0F <op>     (Op2/Mp2)
-#        10 0F 38 <op>  (Op3/Mp3)
-#        11 0F 3A <op>  (Op3/Mp3)
-# 12-14  rrr, opcode bits for the ModR/M byte for certain opcodes.
-# 15:    REX.W bit (or VEX.W/E)
-#
-# There is some redundancy between bits 8-11 and the recipe names, but we have
-# enough bits, and the pp+mm format is ready for supporting VEX prefixes.
-
-
-def decode_ops(ops, rrr=0, w=0):
-    # type: (Tuple[int, ...], int, int) -> Tuple[str, int]
-    """
-    Given a sequence of opcode bytes, compute the recipe name prefix and
-    encoding bits.
-    """
-    assert rrr <= 0b111
-    assert w <= 1
-    name, mmpp = OPCODE_PREFIX[ops[:-1]]
-    op = ops[-1]
-    assert op <= 256
-    return (name, op | (mmpp << 8) | (rrr << 12) | (w << 15))
-
-
-def replace_put_op(emit, prefix):
-    # type: (str, str) -> str
-    """
-    Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the
-    corresponding `put_*` function from the `binemit.rs` module.
-    """
-    if emit is None:
-        return None
-    else:
-        return emit.replace('PUT_OP', 'put_' + prefix.lower())
-
-
-# Register class mapping for no-REX instructions.
-NOREX_MAP = {
-        GPR: GPR8,
-        FPR: FPR8
-    }
-
-
-def map_regs_norex(regs):
-    # type: (Sequence[OperandConstraint]) -> Sequence[OperandConstraint]
-    return tuple(NOREX_MAP.get(rc, rc) if isinstance(rc, RegClass) else rc
-                 for rc in regs)
-
-
-class TailRecipe:
-    """
-    Generate encoding recipes on demand.
-
-    x86 encodings are somewhat orthogonal with the opcode representation on
-    one side and the ModR/M, SIB and immediate fields on the other side.
-
-    A `TailRecipe` represents the part of an encoding that follow the opcode.
-    It is used to generate full encoding recipes on demand when combined with
-    an opcode.
-
-    The arguments are the same as for an `EncRecipe`, except for `size` which
-    does not include the size of the opcode.
-
-    The `when_prefixed` parameter specifies a recipe that should be substituted
-    for this one when a REX (or VEX) prefix is present. This is relevant for
-    recipes that can only access the ABCD registers without a REX prefix, but
-    are able to access all registers with a prefix.
-
-    The `requires_prefix` parameter indicates that the recipe can't be used
-    without a REX prefix.
-
-    The `emit` parameter contains Rust code to actually emit an encoding, like
-    `EncRecipe` does it. Additionally, the text `PUT_OP` is substituted with
-    the proper `put_*` function from the `x86/binemit.rs` module.
-    """
-
-    def __init__(
-            self,
-            name,                   # type: str
-            format,                 # type: InstructionFormat
-            base_size,              # type: int
-            ins,                    # type: ConstraintSeq
-            outs,                   # type: ConstraintSeq
-            branch_range=None,      # type: int
-            clobbers_flags=True,    # type: bool
-            instp=None,             # type: PredNode
-            isap=None,              # type: PredNode
-            when_prefixed=None,     # type: TailRecipe
-            requires_prefix=False,  # type: bool
-            emit=None,              # type: str
-            compute_size=None       # type: str
-            ):
-        # type: (...) -> None
-        self.name = name
-        self.format = format
-        self.base_size = base_size
-        self.ins = ins
-        self.outs = outs
-        self.branch_range = branch_range
-        self.clobbers_flags = clobbers_flags
-        self.instp = instp
-        self.isap = isap
-        self.when_prefixed = when_prefixed
-        self.requires_prefix = requires_prefix
-        self.emit = emit
-        self.compute_size = compute_size
-
-        # Cached recipes, keyed by name prefix.
-        self.recipes = dict()  # type: Dict[str, EncRecipe]
-
-    def __call__(self, *ops, **kwargs):
-        # type: (*int, **int) -> Tuple[EncRecipe, int]
-        """
-        Create an encoding recipe and encoding bits for the opcode bytes in
-        `ops`.
-        """
-        assert not self.requires_prefix, "Tail recipe requires REX prefix."
-        rrr = kwargs.get('rrr', 0)
-        w = kwargs.get('w', 0)
-        name, bits = decode_ops(ops, rrr, w)
-        base_size = len(ops) + self.base_size
-
-        # All branch ranges are relative to the end of the instruction.
-        branch_range = None  # type BranchRange
-        if self.branch_range is not None:
-            branch_range = (base_size, self.branch_range)
-
-        if name not in self.recipes:
-            recipe = EncRecipe(
-                name + self.name,
-                self.format,
-                base_size,
-                ins=self.ins,
-                outs=self.outs,
-                branch_range=branch_range,
-                clobbers_flags=self.clobbers_flags,
-                instp=self.instp,
-                isap=self.isap,
-                emit=replace_put_op(self.emit, name),
-                compute_size=self.compute_size)
-
-            recipe.ins = map_regs_norex(recipe.ins)
-            recipe.outs = map_regs_norex(recipe.outs)
-            self.recipes[name] = recipe
-        return (self.recipes[name], bits)
-
-    def rex(self, *ops, **kwargs):
-        # type: (*int, **int) -> Tuple[EncRecipe, int]
-        """
-        Create a REX encoding recipe and encoding bits for the opcode bytes in
-        `ops`.
-
-        The recipe will always generate a REX prefix, whether it is required or
-        not. For instructions that don't require a REX prefix, two encodings
-        should be added: One with REX and one without.
-        """
-        # Use the prefixed alternative recipe when applicable.
-        if self.when_prefixed:
-            return self.when_prefixed.rex(*ops, **kwargs)
-
-        rrr = kwargs.get('rrr', 0)
-        w = kwargs.get('w', 0)
-        name, bits = decode_ops(ops, rrr, w)
-        name = 'Rex' + name
-        base_size = 1 + len(ops) + self.base_size
-
-        # All branch ranges are relative to the end of the instruction.
-        branch_range = None  # type BranchRange
-        if self.branch_range is not None:
-            branch_range = (base_size, self.branch_range)
-
-        if name not in self.recipes:
-            recipe = EncRecipe(
-                name + self.name,
-                self.format,
-                base_size,
-                ins=self.ins,
-                outs=self.outs,
-                branch_range=branch_range,
-                clobbers_flags=self.clobbers_flags,
-                instp=self.instp,
-                isap=self.isap,
-                emit=replace_put_op(self.emit, name),
-                compute_size=self.compute_size)
-            self.recipes[name] = recipe
-
-        return (self.recipes[name], bits)
-
-    @staticmethod
-    def check_names(globs):
-        # type: (Dict[str, Any]) -> None
-        for name, obj in globs.items():
-            if isinstance(obj, TailRecipe):
-                assert name == obj.name, "Mismatched TailRecipe name: " + name
-
-
-def floatccs(iform):
-    # type: (InstructionFormat) -> PredNode
-    """
-    Return an instruction predicate that checks in `iform.cond` is one of the
-    directly supported floating point condition codes.
-    """
-    return Or(*(IsEqual(iform.cond, cc) for cc in supported_floatccs))
-
-
-def valid_scale(iform):
-    # type: (InstructionFormat) -> PredNode
-    """
-    Return an instruction predicate that checks if `iform.imm` is a valid
-    `scale` for a SIB byte.
-    """
-    return Or(IsEqual(iform.imm, 1),
-              IsEqual(iform.imm, 2),
-              IsEqual(iform.imm, 4),
-              IsEqual(iform.imm, 8))
-
-
-# A null unary instruction that takes a GPR register. Can be used for identity
-# copies and no-op conversions.
-null = EncRecipe('null', Unary, base_size=0, ins=GPR, outs=0, emit='')
-
-stacknull = EncRecipe('stacknull', Unary, base_size=0, ins=StackGPR32,
-                      outs=StackGPR32, emit='')
-
-debugtrap = EncRecipe('debugtrap', NullAry, base_size=1, ins=(), outs=(),
-                      emit='''
-                      sink.put1(0xcc);
-                      ''')
-
-# XX opcode, no ModR/M.
-trap = TailRecipe(
-        'trap', Trap, base_size=0, ins=(), outs=(),
-        emit='''
-        sink.trap(code, func.srclocs[inst]);
-        PUT_OP(bits, BASE_REX, sink);
-        ''')
-
-# Macro: conditional jump over a ud2.
-trapif = EncRecipe(
-        'trapif', IntCondTrap, base_size=4, ins=FLAG.rflags, outs=(),
-        clobbers_flags=False,
-        emit='''
-        // Jump over a 2-byte ud2.
-        sink.put1(0x70 | (icc2opc(cond.inverse()) as u8));
-        sink.put1(2);
-        // ud2.
-        sink.trap(code, func.srclocs[inst]);
-        sink.put1(0x0f);
-        sink.put1(0x0b);
-        ''')
-
-trapff = EncRecipe(
-        'trapff', FloatCondTrap, base_size=4, ins=FLAG.rflags, outs=(),
-        clobbers_flags=False,
-        instp=floatccs(FloatCondTrap),
-        emit='''
-        // Jump over a 2-byte ud2.
-        sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8));
-        sink.put1(2);
-        // ud2.
-        sink.trap(code, func.srclocs[inst]);
-        sink.put1(0x0f);
-        sink.put1(0x0b);
-        ''')
-
-
-# XX /r
-rr = TailRecipe(
-        'rr', Binary, base_size=1, ins=(GPR, GPR), outs=0,
-        emit='''
-        PUT_OP(bits, rex2(in_reg0, in_reg1), sink);
-        modrm_rr(in_reg0, in_reg1, sink);
-        ''')
-
-# XX /r with operands swapped. (RM form).
-rrx = TailRecipe(
-        'rrx', Binary, base_size=1, ins=(GPR, GPR), outs=0,
-        emit='''
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        modrm_rr(in_reg1, in_reg0, sink);
-        ''')
-
-# XX /r with FPR ins and outs. A form.
-fa = TailRecipe(
-        'fa', Binary, base_size=1, ins=(FPR, FPR), outs=0,
-        emit='''
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        modrm_rr(in_reg1, in_reg0, sink);
-        ''')
-
-# XX /r with FPR ins and outs. A form with input operands swapped.
-fax = TailRecipe(
-        'fax', Binary, base_size=1, ins=(FPR, FPR), outs=1,
-        emit='''
-        PUT_OP(bits, rex2(in_reg0, in_reg1), sink);
-        modrm_rr(in_reg0, in_reg1, sink);
-        ''')
-
-# XX /n for a unary operation with extension bits.
-ur = TailRecipe(
-        'ur', Unary, base_size=1, ins=GPR, outs=0,
-        emit='''
-        PUT_OP(bits, rex1(in_reg0), sink);
-        modrm_r_bits(in_reg0, bits, sink);
-        ''')
-
-# XX /r, but for a unary operator with separate input/output register, like
-# copies. MR form, preserving flags.
-umr = TailRecipe(
-        'umr', Unary, base_size=1, ins=GPR, outs=GPR,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, rex2(out_reg0, in_reg0), sink);
-        modrm_rr(out_reg0, in_reg0, sink);
-        ''')
-
-# Same as umr, but with FPR -> GPR registers.
-rfumr = TailRecipe(
-        'rfumr', Unary, base_size=1, ins=FPR, outs=GPR,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, rex2(out_reg0, in_reg0), sink);
-        modrm_rr(out_reg0, in_reg0, sink);
-        ''')
-
-# XX /r, but for a unary operator with separate input/output register.
-# RM form. Clobbers FLAGS.
-urm = TailRecipe(
-        'urm', Unary, base_size=1, ins=GPR, outs=GPR,
-        emit='''
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        modrm_rr(in_reg0, out_reg0, sink);
-        ''')
-
-# XX /r. Same as urm, but doesn't clobber FLAGS.
-urm_noflags = TailRecipe(
-        'urm_noflags', Unary, base_size=1, ins=GPR, outs=GPR,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        modrm_rr(in_reg0, out_reg0, sink);
-        ''')
-
-# XX /r. Same as urm_noflags, but input limited to ABCD.
-urm_noflags_abcd = TailRecipe(
-        'urm_noflags_abcd', Unary, base_size=1, ins=ABCD, outs=GPR,
-        when_prefixed=urm_noflags,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        modrm_rr(in_reg0, out_reg0, sink);
-        ''')
-
-# XX /r, RM form, FPR -> FPR.
-furm = TailRecipe(
-        'furm', Unary, base_size=1, ins=FPR, outs=FPR,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        modrm_rr(in_reg0, out_reg0, sink);
-        ''')
-
-# XX /r, RM form, GPR -> FPR.
-frurm = TailRecipe(
-        'frurm', Unary, base_size=1, ins=GPR, outs=FPR,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        modrm_rr(in_reg0, out_reg0, sink);
-        ''')
-
-# XX /r, RM form, FPR -> GPR.
-rfurm = TailRecipe(
-        'rfurm', Unary, base_size=1, ins=FPR, outs=GPR,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        modrm_rr(in_reg0, out_reg0, sink);
-        ''')
-
-# XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
-furmi_rnd = TailRecipe(
-        'furmi_rnd', Unary, base_size=2, ins=FPR, outs=FPR,
-        isap=use_sse41,
-        emit='''
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        modrm_rr(in_reg0, out_reg0, sink);
-        sink.put1(match opcode {
-            Opcode::Nearest => 0b00,
-            Opcode::Floor => 0b01,
-            Opcode::Ceil => 0b10,
-            Opcode::Trunc => 0b11,
-            x => panic!("{} unexpected for furmi_rnd", opcode),
-        });
-        ''')
-
-# XX /r, for regmove instructions.
-rmov = TailRecipe(
-        'rmov', RegMove, base_size=1, ins=GPR, outs=(),
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, rex2(dst, src), sink);
-        modrm_rr(dst, src, sink);
-        ''')
-
-# XX /r, for regmove instructions (FPR version, RM encoded).
-frmov = TailRecipe(
-        'frmov', RegMove, base_size=1, ins=FPR, outs=(),
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, rex2(src, dst), sink);
-        modrm_rr(src, dst, sink);
-        ''')
-
-# XX /n with one arg in %rcx, for shifts.
-rc = TailRecipe(
-        'rc', Binary, base_size=1, ins=(GPR, GPR.rcx), outs=0,
-        emit='''
-        PUT_OP(bits, rex1(in_reg0), sink);
-        modrm_r_bits(in_reg0, bits, sink);
-        ''')
-
-# XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx.
-div = TailRecipe(
-        'div', Ternary, base_size=1,
-        ins=(GPR.rax, GPR.rdx, GPR), outs=(GPR.rax, GPR.rdx),
-        emit='''
-        sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]);
-        PUT_OP(bits, rex1(in_reg2), sink);
-        modrm_r_bits(in_reg2, bits, sink);
-        ''')
-
-# XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo)
-mulx = TailRecipe(
-        'mulx', Binary, base_size=1,
-        ins=(GPR.rax, GPR), outs=(GPR.rax, GPR.rdx),
-        emit='''
-        PUT_OP(bits, rex1(in_reg1), sink);
-        modrm_r_bits(in_reg1, bits, sink);
-        ''')
-
-# XX /n ib with 8-bit immediate sign-extended.
-r_ib = TailRecipe(
-        'r_ib', BinaryImm, base_size=2, ins=GPR, outs=0,
-        instp=IsSignedInt(BinaryImm.imm, 8),
-        emit='''
-        PUT_OP(bits, rex1(in_reg0), sink);
-        modrm_r_bits(in_reg0, bits, sink);
-        let imm: i64 = imm.into();
-        sink.put1(imm as u8);
-        ''')
-
-# XX /n id with 32-bit immediate sign-extended.
-r_id = TailRecipe(
-        'r_id', BinaryImm, base_size=5, ins=GPR, outs=0,
-        instp=IsSignedInt(BinaryImm.imm, 32),
-        emit='''
-        PUT_OP(bits, rex1(in_reg0), sink);
-        modrm_r_bits(in_reg0, bits, sink);
-        let imm: i64 = imm.into();
-        sink.put4(imm as u32);
-        ''')
-
-# XX /n id with 32-bit immediate sign-extended. UnaryImm version.
-u_id = TailRecipe(
-        'u_id', UnaryImm, base_size=5, ins=(), outs=GPR,
-        instp=IsSignedInt(UnaryImm.imm, 32),
-        emit='''
-        PUT_OP(bits, rex1(out_reg0), sink);
-        modrm_r_bits(out_reg0, bits, sink);
-        let imm: i64 = imm.into();
-        sink.put4(imm as u32);
-        ''')
-
-# XX+rd id unary with 32-bit immediate. Note no recipe predicate.
-pu_id = TailRecipe(
-        'pu_id', UnaryImm, base_size=4, ins=(), outs=GPR,
-        emit='''
-        // The destination register is encoded in the low bits of the opcode.
-        // No ModR/M.
-        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-        let imm: i64 = imm.into();
-        sink.put4(imm as u32);
-        ''')
-
-# XX+rd id unary with bool immediate. Note no recipe predicate.
-pu_id_bool = TailRecipe(
-        'pu_id_bool', UnaryBool, base_size=4, ins=(), outs=GPR,
-        emit='''
-        // The destination register is encoded in the low bits of the opcode.
-        // No ModR/M.
-        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-        let imm: u32 = if imm { 1 } else { 0 };
-        sink.put4(imm);
-        ''')
-
-# XX+rd iq unary with 64-bit immediate.
-pu_iq = TailRecipe(
-        'pu_iq', UnaryImm, base_size=8, ins=(), outs=GPR,
-        emit='''
-        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-        let imm: i64 = imm.into();
-        sink.put8(imm as u64);
-        ''')
-
-# XX /n Unary with floating point 32-bit immediate equal to zero.
-f32imm_z = TailRecipe(
-    'f32imm_z', UnaryIeee32, base_size=1, ins=(), outs=FPR,
-    instp=IsZero32BitFloat(UnaryIeee32.imm),
-    emit='''
-        PUT_OP(bits, rex2(out_reg0, out_reg0), sink);
-        modrm_rr(out_reg0, out_reg0, sink);
-    ''')
-
-# XX /n Unary with floating point 64-bit immediate equal to zero.
-f64imm_z = TailRecipe(
-    'f64imm_z', UnaryIeee64, base_size=1, ins=(), outs=FPR,
-    instp=IsZero64BitFloat(UnaryIeee64.imm),
-    emit='''
-        PUT_OP(bits, rex2(out_reg0, out_reg0), sink);
-        modrm_rr(out_reg0, out_reg0, sink);
-    ''')
-
-pushq = TailRecipe(
-    'pushq', Unary, base_size=0, ins=GPR, outs=(),
-    emit='''
-    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-    PUT_OP(bits | (in_reg0 & 7), rex1(in_reg0), sink);
-    ''')
-
-popq = TailRecipe(
-    'popq', NullAry, base_size=0, ins=(), outs=GPR,
-    emit='''
-    PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-    ''')
-
-# XX /r, for regmove instructions.
-copysp = TailRecipe(
-        'copysp', CopySpecial, base_size=1, ins=(), outs=(),
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, rex2(dst, src), sink);
-        modrm_rr(dst, src, sink);
-        ''')
-
-adjustsp = TailRecipe(
-    'adjustsp', Unary, base_size=1, ins=(GPR), outs=(),
-    emit='''
-    PUT_OP(bits, rex2(RU::rsp.into(), in_reg0), sink);
-    modrm_rr(RU::rsp.into(), in_reg0, sink);
-    ''')
-
-adjustsp_ib = TailRecipe(
-    'adjustsp_ib', UnaryImm, base_size=2, ins=(), outs=(),
-    instp=IsSignedInt(UnaryImm.imm, 8),
-    emit='''
-    PUT_OP(bits, rex1(RU::rsp.into()), sink);
-    modrm_r_bits(RU::rsp.into(), bits, sink);
-    let imm: i64 = imm.into();
-    sink.put1(imm as u8);
-    ''')
-
-adjustsp_id = TailRecipe(
-    'adjustsp_id', UnaryImm, base_size=5, ins=(), outs=(),
-    instp=IsSignedInt(UnaryImm.imm, 32),
-    emit='''
-    PUT_OP(bits, rex1(RU::rsp.into()), sink);
-    modrm_r_bits(RU::rsp.into(), bits, sink);
-    let imm: i64 = imm.into();
-    sink.put4(imm as u32);
-    ''')
-
-
-# XX+rd id with Abs4 function relocation.
-fnaddr4 = TailRecipe(
-        'fnaddr4', FuncAddr, base_size=4, ins=(), outs=GPR,
-        emit='''
-        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-        sink.reloc_external(Reloc::Abs4,
-                            &func.dfg.ext_funcs[func_ref].name,
-                            0);
-        sink.put4(0);
-        ''')
-
-# XX+rd iq with Abs8 function relocation.
-fnaddr8 = TailRecipe(
-        'fnaddr8', FuncAddr, base_size=8, ins=(), outs=GPR,
-        emit='''
-        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-        sink.reloc_external(Reloc::Abs8,
-                            &func.dfg.ext_funcs[func_ref].name,
-                            0);
-        sink.put8(0);
-        ''')
-
-# Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey).
-allones_fnaddr4 = TailRecipe(
-        'allones_fnaddr4', FuncAddr, base_size=4, ins=(), outs=GPR,
-        emit='''
-        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-        sink.reloc_external(Reloc::Abs4,
-                            &func.dfg.ext_funcs[func_ref].name,
-                            0);
-        // Write the immediate as `!0` for the benefit of BaldrMonkey.
-        sink.put4(!0);
-        ''')
-
-# Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey).
-allones_fnaddr8 = TailRecipe(
-        'allones_fnaddr8', FuncAddr, base_size=8, ins=(), outs=GPR,
-        emit='''
-        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-        sink.reloc_external(Reloc::Abs8,
-                            &func.dfg.ext_funcs[func_ref].name,
-                            0);
-        // Write the immediate as `!0` for the benefit of BaldrMonkey.
-        sink.put8(!0);
-        ''')
-
-pcrel_fnaddr8 = TailRecipe(
-        'pcrel_fnaddr8', FuncAddr, base_size=5, ins=(), outs=GPR,
-        # rex2 gets passed 0 for r/m register because the upper bit of
-        # r/m doesnt get decoded when in rip-relative addressing mode.
-        emit='''
-        PUT_OP(bits, rex2(0, out_reg0), sink);
-        modrm_riprel(out_reg0, sink);
-        // The addend adjusts for the difference between the end of the
-        // instruction and the beginning of the immediate field.
-        sink.reloc_external(Reloc::X86PCRel4,
-                            &func.dfg.ext_funcs[func_ref].name,
-                            -4);
-        sink.put4(0);
-        ''')
-
-got_fnaddr8 = TailRecipe(
-        'got_fnaddr8', FuncAddr, base_size=5, ins=(), outs=GPR,
-        # rex2 gets passed 0 for r/m register because the upper bit of
-        # r/m doesnt get decoded when in rip-relative addressing mode.
-        emit='''
-        PUT_OP(bits, rex2(0, out_reg0), sink);
-        modrm_riprel(out_reg0, sink);
-        // The addend adjusts for the difference between the end of the
-        // instruction and the beginning of the immediate field.
-        sink.reloc_external(Reloc::X86GOTPCRel4,
-                            &func.dfg.ext_funcs[func_ref].name,
-                            -4);
-        sink.put4(0);
-        ''')
-
-
-# XX+rd id with Abs4 globalsym relocation.
-gvaddr4 = TailRecipe(
-        'gvaddr4', UnaryGlobalValue, base_size=4, ins=(), outs=GPR,
-        emit='''
-        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-        sink.reloc_external(Reloc::Abs4,
-                            &func.global_values[global_value].symbol_name(),
-                            0);
-        sink.put4(0);
-        ''')
-
-# XX+rd iq with Abs8 globalsym relocation.
-gvaddr8 = TailRecipe(
-        'gvaddr8', UnaryGlobalValue, base_size=8, ins=(), outs=GPR,
-        emit='''
-        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-        sink.reloc_external(Reloc::Abs8,
-                            &func.global_values[global_value].symbol_name(),
-                            0);
-        sink.put8(0);
-        ''')
-
-# XX+rd iq with PCRel4 globalsym relocation.
-pcrel_gvaddr8 = TailRecipe(
-        'pcrel_gvaddr8', UnaryGlobalValue, base_size=5, ins=(), outs=GPR,
-        emit='''
-        PUT_OP(bits, rex2(0, out_reg0), sink);
-        modrm_rm(5, out_reg0, sink);
-        // The addend adjusts for the difference between the end of the
-        // instruction and the beginning of the immediate field.
-        sink.reloc_external(Reloc::X86PCRel4,
-                            &func.global_values[global_value].symbol_name(),
-                            -4);
-        sink.put4(0);
-        ''')
-
-# XX+rd iq with Abs8 globalsym relocation.
-got_gvaddr8 = TailRecipe(
-        'got_gvaddr8', UnaryGlobalValue, base_size=5, ins=(), outs=GPR,
-        emit='''
-        PUT_OP(bits, rex2(0, out_reg0), sink);
-        modrm_rm(5, out_reg0, sink);
-        // The addend adjusts for the difference between the end of the
-        // instruction and the beginning of the immediate field.
-        sink.reloc_external(Reloc::X86GOTPCRel4,
-                            &func.global_values[global_value].symbol_name(),
-                            -4);
-        sink.put4(0);
-        ''')
-
-#
-# Stack addresses.
-#
-# TODO: Alternative forms for 8-bit immediates, when applicable.
-#
-
-spaddr4_id = TailRecipe(
-        'spaddr4_id', StackLoad, base_size=6, ins=(), outs=GPR,
-        emit='''
-        let sp = StackRef::sp(stack_slot, &func.stack_slots);
-        let base = stk_base(sp.base);
-        PUT_OP(bits, rex2(out_reg0, base), sink);
-        modrm_sib_disp8(out_reg0, sink);
-        sib_noindex(base, sink);
-        let imm : i32 = offset.into();
-        sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
-        ''')
-
-spaddr8_id = TailRecipe(
-        'spaddr8_id', StackLoad, base_size=6, ins=(), outs=GPR,
-        emit='''
-        let sp = StackRef::sp(stack_slot, &func.stack_slots);
-        let base = stk_base(sp.base);
-        PUT_OP(bits, rex2(base, out_reg0), sink);
-        modrm_sib_disp32(out_reg0, sink);
-        sib_noindex(base, sink);
-        let imm : i32 = offset.into();
-        sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
-        ''')
-
-
-#
-# Store recipes.
-#
-
-# XX /r register-indirect store with no offset.
-st = TailRecipe(
-        'st', Store, base_size=1, ins=(GPR, GPR), outs=(),
-        instp=IsEqual(Store.offset, 0),
-        clobbers_flags=False,
-        compute_size="size_plus_maybe_sib_or_offset_for_in_reg_1",
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        if needs_sib_byte(in_reg1) {
-            modrm_sib(in_reg0, sink);
-            sib_noindex(in_reg1, sink);
-        } else if needs_offset(in_reg1) {
-            modrm_disp8(in_reg1, in_reg0, sink);
-            sink.put1(0);
-        } else {
-            modrm_rm(in_reg1, in_reg0, sink);
-        }
-        ''')
-
-# XX /r register-indirect store with index and no offset.
-stWithIndex = TailRecipe(
-    'stWithIndex', StoreComplex, base_size=2,
-    ins=(GPR, GPR, GPR),
-    outs=(),
-    instp=IsEqual(StoreComplex.offset, 0),
-    clobbers_flags=False,
-    compute_size="size_plus_maybe_offset_for_in_reg_1",
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-    // The else branch always inserts an SIB byte.
-    if needs_offset(in_reg1) {
-        modrm_sib_disp8(in_reg0, sink);
-        sib(0, in_reg2, in_reg1, sink);
-        sink.put1(0);
-    } else {
-        modrm_sib(in_reg0, sink);
-        sib(0, in_reg2, in_reg1, sink);
-    }
-    ''')
-
-# XX /r register-indirect store with no offset.
-# Only ABCD allowed for stored value. This is for byte stores with no REX.
-st_abcd = TailRecipe(
-        'st_abcd', Store, base_size=1, ins=(ABCD, GPR), outs=(),
-        instp=IsEqual(Store.offset, 0),
-        when_prefixed=st,
-        clobbers_flags=False,
-        compute_size="size_plus_maybe_sib_or_offset_for_in_reg_1",
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        if needs_sib_byte(in_reg1) {
-            modrm_sib(in_reg0, sink);
-            sib_noindex(in_reg1, sink);
-        } else if needs_offset(in_reg1) {
-            modrm_disp8(in_reg1, in_reg0, sink);
-            sink.put1(0);
-        } else {
-            modrm_rm(in_reg1, in_reg0, sink);
-        }
-        ''')
-
-# XX /r register-indirect store with index and no offset.
-# Only ABCD allowed for stored value. This is for byte stores with no REX.
-stWithIndex_abcd = TailRecipe(
-    'stWithIndex_abcd', StoreComplex, base_size=2,
-    ins=(ABCD, GPR, GPR),
-    outs=(),
-    instp=IsEqual(StoreComplex.offset, 0),
-    clobbers_flags=False,
-    compute_size="size_plus_maybe_offset_for_in_reg_1",
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-    // The else branch always inserts an SIB byte.
-    if needs_offset(in_reg1) {
-        modrm_sib_disp8(in_reg0, sink);
-        sib(0, in_reg2, in_reg1, sink);
-        sink.put1(0);
-    } else {
-        modrm_sib(in_reg0, sink);
-        sib(0, in_reg2, in_reg1, sink);
-    }
-    ''')
-
-# XX /r register-indirect store of FPR with no offset.
-fst = TailRecipe(
-        'fst', Store, base_size=1, ins=(FPR, GPR), outs=(),
-        instp=IsEqual(Store.offset, 0),
-        clobbers_flags=False,
-        compute_size="size_plus_maybe_sib_or_offset_for_in_reg_1",
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        if needs_sib_byte(in_reg1) {
-            modrm_sib(in_reg0, sink);
-            sib_noindex(in_reg1, sink);
-        } else if needs_offset(in_reg1) {
-            modrm_disp8(in_reg1, in_reg0, sink);
-            sink.put1(0);
-        } else {
-            modrm_rm(in_reg1, in_reg0, sink);
-        }
-        ''')
-# XX /r register-indirect store with index and no offset of FPR.
-fstWithIndex = TailRecipe(
-        'fstWithIndex', StoreComplex, base_size=2,
-        ins=(FPR, GPR, GPR), outs=(),
-        instp=IsEqual(StoreComplex.offset, 0),
-        clobbers_flags=False,
-        compute_size="size_plus_maybe_offset_for_in_reg_1",
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-        // The else branch always inserts an SIB byte.
-        if needs_offset(in_reg1) {
-            modrm_sib_disp8(in_reg0, sink);
-            sib(0, in_reg2, in_reg1, sink);
-            sink.put1(0);
-        } else {
-            modrm_sib(in_reg0, sink);
-            sib(0, in_reg2, in_reg1, sink);
-        }
-        ''')
-
-# XX /r register-indirect store with 8-bit offset.
-stDisp8 = TailRecipe(
-        'stDisp8', Store, base_size=2, ins=(GPR, GPR), outs=(),
-        instp=IsSignedInt(Store.offset, 8),
-        clobbers_flags=False,
-        compute_size="size_plus_maybe_sib_for_in_reg_1",
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        if needs_sib_byte(in_reg1) {
-            modrm_sib_disp8(in_reg0, sink);
-            sib_noindex(in_reg1, sink);
-        } else {
-            modrm_disp8(in_reg1, in_reg0, sink);
-        }
-        let offset: i32 = offset.into();
-        sink.put1(offset as u8);
-        ''')
-
-# XX /r register-indirect store with index and 8-bit offset.
-stWithIndexDisp8 = TailRecipe(
-    'stWithIndexDisp8', StoreComplex, base_size=3,
-    ins=(GPR, GPR, GPR),
-    outs=(),
-    instp=IsSignedInt(StoreComplex.offset, 8),
-    clobbers_flags=False,
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-    modrm_sib_disp8(in_reg0, sink);
-    sib(0, in_reg2, in_reg1, sink);
-    let offset: i32 = offset.into();
-    sink.put1(offset as u8);
-    ''')
-
-# XX /r register-indirect store with 8-bit offset.
-# Only ABCD allowed for stored value. This is for byte stores with no REX.
-stDisp8_abcd = TailRecipe(
-        'stDisp8_abcd', Store, base_size=2, ins=(ABCD, GPR), outs=(),
-        instp=IsSignedInt(Store.offset, 8),
-        when_prefixed=stDisp8,
-        clobbers_flags=False,
-        compute_size="size_plus_maybe_sib_for_in_reg_1",
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        if needs_sib_byte(in_reg1) {
-            modrm_sib_disp8(in_reg0, sink);
-            sib_noindex(in_reg1, sink);
-        } else {
-            modrm_disp8(in_reg1, in_reg0, sink);
-        }
-        let offset: i32 = offset.into();
-        sink.put1(offset as u8);
-        ''')
-
-# XX /r register-indirect store with index and 8-bit offset.
-# Only ABCD allowed for stored value. This is for byte stores with no REX.
-stWithIndexDisp8_abcd = TailRecipe(
-    'stWithIndexDisp8_abcd', StoreComplex, base_size=3,
-    ins=(ABCD, GPR, GPR),
-    outs=(),
-    instp=IsSignedInt(StoreComplex.offset, 8),
-    clobbers_flags=False,
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-    modrm_sib_disp8(in_reg0, sink);
-    sib(0, in_reg2, in_reg1, sink);
-    let offset: i32 = offset.into();
-    sink.put1(offset as u8);
-    ''')
-
-# XX /r register-indirect store with 8-bit offset of FPR.
-fstDisp8 = TailRecipe(
-        'fstDisp8', Store, base_size=2, ins=(FPR, GPR), outs=(),
-        instp=IsSignedInt(Store.offset, 8),
-        clobbers_flags=False,
-        compute_size='size_plus_maybe_sib_for_in_reg_1',
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        if needs_sib_byte(in_reg1) {
-            modrm_sib_disp8(in_reg0, sink);
-            sib_noindex(in_reg1, sink);
-        } else {
-            modrm_disp8(in_reg1, in_reg0, sink);
-        }
-        let offset: i32 = offset.into();
-        sink.put1(offset as u8);
-        ''')
-
-# XX /r register-indirect store with index and 8-bit offset of FPR.
-fstWithIndexDisp8 = TailRecipe(
-    'fstWithIndexDisp8', StoreComplex, base_size=3,
-    ins=(FPR, GPR, GPR),
-    outs=(),
-    instp=IsSignedInt(StoreComplex.offset, 8),
-    clobbers_flags=False,
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-    modrm_sib_disp8(in_reg0, sink);
-    sib(0, in_reg2, in_reg1, sink);
-    let offset: i32 = offset.into();
-    sink.put1(offset as u8);
-    ''')
-
-# XX /r register-indirect store with 32-bit offset.
-stDisp32 = TailRecipe(
-        'stDisp32', Store, base_size=5, ins=(GPR, GPR), outs=(),
-        clobbers_flags=False,
-        compute_size='size_plus_maybe_sib_for_in_reg_1',
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        if needs_sib_byte(in_reg1) {
-            modrm_sib_disp32(in_reg0, sink);
-            sib_noindex(in_reg1, sink);
-        } else {
-            modrm_disp32(in_reg1, in_reg0, sink);
-        }
-        let offset: i32 = offset.into();
-        sink.put4(offset as u32);
-        ''')
-
-# XX /r register-indirect store with index and 32-bit offset.
-stWithIndexDisp32 = TailRecipe(
-    'stWithIndexDisp32', StoreComplex, base_size=6,
-    ins=(GPR, GPR, GPR),
-    outs=(),
-    instp=IsSignedInt(StoreComplex.offset, 32),
-    clobbers_flags=False,
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-    modrm_sib_disp32(in_reg0, sink);
-    sib(0, in_reg2, in_reg1, sink);
-    let offset: i32 = offset.into();
-    sink.put4(offset as u32);
-    ''')
-
-# XX /r register-indirect store with 32-bit offset.
-# Only ABCD allowed for stored value. This is for byte stores with no REX.
-stDisp32_abcd = TailRecipe(
-        'stDisp32_abcd', Store, base_size=5, ins=(ABCD, GPR), outs=(),
-        when_prefixed=stDisp32,
-        clobbers_flags=False,
-        compute_size="size_plus_maybe_sib_for_in_reg_1",
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        if needs_sib_byte(in_reg1) {
-            modrm_sib_disp32(in_reg0, sink);
-            sib_noindex(in_reg1, sink);
-        } else {
-            modrm_disp32(in_reg1, in_reg0, sink);
-        }
-        let offset: i32 = offset.into();
-        sink.put4(offset as u32);
-        ''')
-
-# XX /r register-indirect store with index and 32-bit offset.
-# Only ABCD allowed for stored value. This is for byte stores with no REX.
-stWithIndexDisp32_abcd = TailRecipe(
-    'stWithIndexDisp32_abcd', StoreComplex, base_size=6,
-    ins=(ABCD, GPR, GPR),
-    outs=(),
-    instp=IsSignedInt(StoreComplex.offset, 32),
-    clobbers_flags=False,
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-    modrm_sib_disp32(in_reg0, sink);
-    sib(0, in_reg2, in_reg1, sink);
-    let offset: i32 = offset.into();
-    sink.put4(offset as u32);
-    ''')
-
-# XX /r register-indirect store with 32-bit offset of FPR.
-fstDisp32 = TailRecipe(
-        'fstDisp32', Store, base_size=5, ins=(FPR, GPR), outs=(),
-        clobbers_flags=False,
-        compute_size='size_plus_maybe_sib_for_in_reg_1',
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        if needs_sib_byte(in_reg1) {
-            modrm_sib_disp32(in_reg0, sink);
-            sib_noindex(in_reg1, sink);
-        } else {
-            modrm_disp32(in_reg1, in_reg0, sink);
-        }
-        let offset: i32 = offset.into();
-        sink.put4(offset as u32);
-        ''')
-
-# XX /r register-indirect store with index and 32-bit offset of FPR.
-fstWithIndexDisp32 = TailRecipe(
-    'fstWithIndexDisp32', StoreComplex, base_size=6,
-    ins=(FPR, GPR, GPR),
-    outs=(),
-    instp=IsSignedInt(StoreComplex.offset, 32),
-    clobbers_flags=False,
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-    modrm_sib_disp32(in_reg0, sink);
-    sib(0, in_reg2, in_reg1, sink);
-    let offset: i32 = offset.into();
-    sink.put4(offset as u32);
-    ''')
-
-# Unary spill with SIB and 32-bit displacement.
-spillSib32 = TailRecipe(
-        'spillSib32', Unary, base_size=6, ins=GPR, outs=StackGPR32,
-        clobbers_flags=False,
-        emit='''
-        sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-        let base = stk_base(out_stk0.base);
-        PUT_OP(bits, rex2(base, in_reg0), sink);
-        modrm_sib_disp32(in_reg0, sink);
-        sib_noindex(base, sink);
-        sink.put4(out_stk0.offset as u32);
-        ''')
-
-# Like spillSib32, but targeting an FPR rather than a GPR.
-fspillSib32 = TailRecipe(
-        'fspillSib32', Unary, base_size=6, ins=FPR, outs=StackFPR32,
-        clobbers_flags=False,
-        emit='''
-        sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-        let base = stk_base(out_stk0.base);
-        PUT_OP(bits, rex2(base, in_reg0), sink);
-        modrm_sib_disp32(in_reg0, sink);
-        sib_noindex(base, sink);
-        sink.put4(out_stk0.offset as u32);
-        ''')
-
-# Regspill using RSP-relative addressing.
-regspill32 = TailRecipe(
-        'regspill32', RegSpill, base_size=6, ins=GPR, outs=(),
-        clobbers_flags=False,
-        emit='''
-        sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-        let dst = StackRef::sp(dst, &func.stack_slots);
-        let base = stk_base(dst.base);
-        PUT_OP(bits, rex2(base, src), sink);
-        modrm_sib_disp32(src, sink);
-        sib_noindex(base, sink);
-        sink.put4(dst.offset as u32);
-        ''')
-
-# Like regspill32, but targeting an FPR rather than a GPR.
-fregspill32 = TailRecipe(
-        'fregspill32', RegSpill, base_size=6, ins=FPR, outs=(),
-        clobbers_flags=False,
-        emit='''
-        sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-        let dst = StackRef::sp(dst, &func.stack_slots);
-        let base = stk_base(dst.base);
-        PUT_OP(bits, rex2(base, src), sink);
-        modrm_sib_disp32(src, sink);
-        sib_noindex(base, sink);
-        sink.put4(dst.offset as u32);
-        ''')
-
-#
-# Load recipes
-#
-
-# XX /r load with no offset.
-ld = TailRecipe(
-        'ld', Load, base_size=1, ins=(GPR), outs=(GPR),
-        instp=IsEqual(Load.offset, 0),
-        clobbers_flags=False,
-        compute_size="size_plus_maybe_sib_or_offset_for_in_reg_0",
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        if needs_sib_byte(in_reg0) {
-            modrm_sib(out_reg0, sink);
-            sib_noindex(in_reg0, sink);
-        } else if needs_offset(in_reg0) {
-            modrm_disp8(in_reg0, out_reg0, sink);
-            sink.put1(0);
-        } else {
-            modrm_rm(in_reg0, out_reg0, sink);
-        }
-        ''')
-
-# XX /r load with index and no offset.
-ldWithIndex = TailRecipe(
-    'ldWithIndex', LoadComplex, base_size=2,
-    ins=(GPR, GPR),
-    outs=(GPR),
-    instp=IsEqual(LoadComplex.offset, 0),
-    clobbers_flags=False,
-    compute_size="size_plus_maybe_offset_for_in_reg_0",
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
-    // The else branch always inserts an SIB byte.
-    if needs_offset(in_reg0) {
-        modrm_sib_disp8(out_reg0, sink);
-        sib(0, in_reg1, in_reg0, sink);
-        sink.put1(0);
-    } else {
-        modrm_sib(out_reg0, sink);
-        sib(0, in_reg1, in_reg0, sink);
-    }
-    ''')
-
-# XX /r float load with no offset.
-fld = TailRecipe(
-        'fld', Load, base_size=1, ins=(GPR), outs=(FPR),
-        instp=IsEqual(Load.offset, 0),
-        clobbers_flags=False,
-        compute_size="size_plus_maybe_sib_or_offset_for_in_reg_0",
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        if needs_sib_byte(in_reg0) {
-            modrm_sib(out_reg0, sink);
-            sib_noindex(in_reg0, sink);
-        } else if needs_offset(in_reg0) {
-            modrm_disp8(in_reg0, out_reg0, sink);
-            sink.put1(0);
-        } else {
-            modrm_rm(in_reg0, out_reg0, sink);
-        }
-        ''')
-
-# XX /r float load with index and no offset.
-fldWithIndex = TailRecipe(
-    'fldWithIndex', LoadComplex, base_size=2,
-    ins=(GPR, GPR),
-    outs=(FPR),
-    instp=IsEqual(LoadComplex.offset, 0),
-    clobbers_flags=False,
-    compute_size="size_plus_maybe_offset_for_in_reg_0",
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
-    // The else branch always inserts an SIB byte.
-    if needs_offset(in_reg0) {
-        modrm_sib_disp8(out_reg0, sink);
-        sib(0, in_reg1, in_reg0, sink);
-        sink.put1(0);
-    } else {
-        modrm_sib(out_reg0, sink);
-        sib(0, in_reg1, in_reg0, sink);
-    }
-    ''')
-
-# XX /r load with 8-bit offset.
-ldDisp8 = TailRecipe(
-        'ldDisp8', Load, base_size=2, ins=(GPR), outs=(GPR),
-        instp=IsSignedInt(Load.offset, 8),
-        clobbers_flags=False,
-        compute_size="size_plus_maybe_sib_for_in_reg_0",
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        if needs_sib_byte(in_reg0) {
-            modrm_sib_disp8(out_reg0, sink);
-            sib_noindex(in_reg0, sink);
-        } else {
-            modrm_disp8(in_reg0, out_reg0, sink);
-        }
-        let offset: i32 = offset.into();
-        sink.put1(offset as u8);
-        ''')
-
-# XX /r load with index and 8-bit offset.
-ldWithIndexDisp8 = TailRecipe(
-    'ldWithIndexDisp8', LoadComplex, base_size=3,
-    ins=(GPR, GPR),
-    outs=(GPR),
-    instp=IsSignedInt(LoadComplex.offset, 8),
-    clobbers_flags=False,
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
-    modrm_sib_disp8(out_reg0, sink);
-    sib(0, in_reg1, in_reg0, sink);
-    let offset: i32 = offset.into();
-    sink.put1(offset as u8);
-    ''')
-
-# XX /r float load with 8-bit offset.
-fldDisp8 = TailRecipe(
-        'fldDisp8', Load, base_size=2, ins=(GPR), outs=(FPR),
-        instp=IsSignedInt(Load.offset, 8),
-        clobbers_flags=False,
-        compute_size="size_plus_maybe_sib_for_in_reg_0",
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        if needs_sib_byte(in_reg0) {
-            modrm_sib_disp8(out_reg0, sink);
-            sib_noindex(in_reg0, sink);
-        } else {
-            modrm_disp8(in_reg0, out_reg0, sink);
-        }
-        let offset: i32 = offset.into();
-        sink.put1(offset as u8);
-        ''')
-
-# XX /r float load with 8-bit offset.
-fldWithIndexDisp8 = TailRecipe(
-    'fldWithIndexDisp8', LoadComplex, base_size=3,
-    ins=(GPR, GPR),
-    outs=(FPR),
-    instp=IsSignedInt(LoadComplex.offset, 8),
-    clobbers_flags=False,
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
-    modrm_sib_disp8(out_reg0, sink);
-    sib(0, in_reg1, in_reg0, sink);
-    let offset: i32 = offset.into();
-    sink.put1(offset as u8);
-    ''')
-
-# XX /r load with 32-bit offset.
-ldDisp32 = TailRecipe(
-        'ldDisp32', Load, base_size=5, ins=(GPR), outs=(GPR),
-        instp=IsSignedInt(Load.offset, 32),
-        clobbers_flags=False,
-        compute_size='size_plus_maybe_sib_for_in_reg_0',
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        if needs_sib_byte(in_reg0) {
-            modrm_sib_disp32(out_reg0, sink);
-            sib_noindex(in_reg0, sink);
-        } else {
-            modrm_disp32(in_reg0, out_reg0, sink);
-        }
-        let offset: i32 = offset.into();
-        sink.put4(offset as u32);
-        ''')
-
-# XX /r load with index and 32-bit offset.
-ldWithIndexDisp32 = TailRecipe(
-    'ldWithIndexDisp32', LoadComplex, base_size=6,
-    ins=(GPR, GPR),
-    outs=(GPR),
-    instp=IsSignedInt(LoadComplex.offset, 32),
-    clobbers_flags=False,
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
-    modrm_sib_disp32(out_reg0, sink);
-    sib(0, in_reg1, in_reg0, sink);
-    let offset: i32 = offset.into();
-    sink.put4(offset as u32);
-    ''')
-
-# XX /r float load with 32-bit offset.
-fldDisp32 = TailRecipe(
-        'fldDisp32', Load, base_size=5, ins=(GPR), outs=(FPR),
-        instp=IsSignedInt(Load.offset, 32),
-        clobbers_flags=False,
-        compute_size="size_plus_maybe_sib_for_in_reg_0",
-        emit='''
-        if !flags.notrap() {
-            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-        }
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        if needs_sib_byte(in_reg0) {
-            modrm_sib_disp32(out_reg0, sink);
-            sib_noindex(in_reg0, sink);
-        } else {
-            modrm_disp32(in_reg0, out_reg0, sink);
-        }
-        let offset: i32 = offset.into();
-        sink.put4(offset as u32);
-        ''')
-
-# XX /r float load with index and 32-bit offset.
-fldWithIndexDisp32 = TailRecipe(
-    'fldWithIndexDisp32', LoadComplex, base_size=6,
-    ins=(GPR, GPR),
-    outs=(FPR),
-    instp=IsSignedInt(LoadComplex.offset, 32),
-    clobbers_flags=False,
-    emit='''
-    if !flags.notrap() {
-        sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-    }
-    PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
-    modrm_sib_disp32(out_reg0, sink);
-    sib(0, in_reg1, in_reg0, sink);
-    let offset: i32 = offset.into();
-    sink.put4(offset as u32);
-    ''')
-
-# Unary fill with SIB and 32-bit displacement.
-fillSib32 = TailRecipe(
-        'fillSib32', Unary, base_size=6, ins=StackGPR32, outs=GPR,
-        clobbers_flags=False,
-        emit='''
-        let base = stk_base(in_stk0.base);
-        PUT_OP(bits, rex2(base, out_reg0), sink);
-        modrm_sib_disp32(out_reg0, sink);
-        sib_noindex(base, sink);
-        sink.put4(in_stk0.offset as u32);
-        ''')
-
-# Like fillSib32, but targeting an FPR rather than a GPR.
-ffillSib32 = TailRecipe(
-        'ffillSib32', Unary, base_size=6, ins=StackFPR32, outs=FPR,
-        clobbers_flags=False,
-        emit='''
-        let base = stk_base(in_stk0.base);
-        PUT_OP(bits, rex2(base, out_reg0), sink);
-        modrm_sib_disp32(out_reg0, sink);
-        sib_noindex(base, sink);
-        sink.put4(in_stk0.offset as u32);
-        ''')
-
-# Regfill with RSP-relative 32-bit displacement.
-regfill32 = TailRecipe(
-        'regfill32', RegFill, base_size=6, ins=StackGPR32, outs=(),
-        clobbers_flags=False,
-        emit='''
-        let src = StackRef::sp(src, &func.stack_slots);
-        let base = stk_base(src.base);
-        PUT_OP(bits, rex2(base, dst), sink);
-        modrm_sib_disp32(dst, sink);
-        sib_noindex(base, sink);
-        sink.put4(src.offset as u32);
-        ''')
-
-# Like regfill32, but targeting an FPR rather than a GPR.
-fregfill32 = TailRecipe(
-        'fregfill32', RegFill, base_size=6, ins=StackFPR32, outs=(),
-        clobbers_flags=False,
-        emit='''
-        let src = StackRef::sp(src, &func.stack_slots);
-        let base = stk_base(src.base);
-        PUT_OP(bits, rex2(base, dst), sink);
-        modrm_sib_disp32(dst, sink);
-        sib_noindex(base, sink);
-        sink.put4(src.offset as u32);
-        ''')
-
-#
-# Call/return
-#
-call_id = TailRecipe(
-        'call_id', Call, base_size=4, ins=(), outs=(),
-        emit='''
-        sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-        PUT_OP(bits, BASE_REX, sink);
-        // The addend adjusts for the difference between the end of the
-        // instruction and the beginning of the immediate field.
-        sink.reloc_external(Reloc::X86CallPCRel4,
-                            &func.dfg.ext_funcs[func_ref].name,
-                            -4);
-        sink.put4(0);
-        ''')
-
-call_plt_id = TailRecipe(
-        'call_plt_id', Call, base_size=4, ins=(), outs=(),
-        emit='''
-        sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-        PUT_OP(bits, BASE_REX, sink);
-        sink.reloc_external(Reloc::X86CallPLTRel4,
-                            &func.dfg.ext_funcs[func_ref].name,
-                            -4);
-        sink.put4(0);
-        ''')
-
-call_r = TailRecipe(
-        'call_r', CallIndirect, base_size=1, ins=GPR, outs=(),
-        emit='''
-        sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-        PUT_OP(bits, rex1(in_reg0), sink);
-        modrm_r_bits(in_reg0, bits, sink);
-        ''')
-
-ret = TailRecipe(
-        'ret', MultiAry, base_size=0, ins=(), outs=(),
-        emit='''
-        PUT_OP(bits, BASE_REX, sink);
-        ''')
-
-#
-# Branches
-#
-jmpb = TailRecipe(
-        'jmpb', Jump, base_size=1, ins=(), outs=(),
-        branch_range=8,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, BASE_REX, sink);
-        disp1(destination, func, sink);
-        ''')
-
-jmpd = TailRecipe(
-        'jmpd', Jump, base_size=4, ins=(), outs=(),
-        branch_range=32,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, BASE_REX, sink);
-        disp4(destination, func, sink);
-        ''')
-
-brib = TailRecipe(
-        'brib', BranchInt, base_size=1, ins=FLAG.rflags, outs=(),
-        branch_range=8,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits | icc2opc(cond), BASE_REX, sink);
-        disp1(destination, func, sink);
-        ''')
-
-brid = TailRecipe(
-        'brid', BranchInt, base_size=4, ins=FLAG.rflags, outs=(),
-        branch_range=32,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits | icc2opc(cond), BASE_REX, sink);
-        disp4(destination, func, sink);
-        ''')
-
-brfb = TailRecipe(
-        'brfb', BranchFloat, base_size=1, ins=FLAG.rflags, outs=(),
-        branch_range=8,
-        clobbers_flags=False,
-        instp=floatccs(BranchFloat),
-        emit='''
-        PUT_OP(bits | fcc2opc(cond), BASE_REX, sink);
-        disp1(destination, func, sink);
-        ''')
-
-brfd = TailRecipe(
-        'brfd', BranchFloat, base_size=4, ins=FLAG.rflags, outs=(),
-        branch_range=32,
-        clobbers_flags=False,
-        instp=floatccs(BranchFloat),
-        emit='''
-        PUT_OP(bits | fcc2opc(cond), BASE_REX, sink);
-        disp4(destination, func, sink);
-        ''')
-
-indirect_jmp = TailRecipe(
-        'indirect_jmp', IndirectJump, base_size=1, ins=GPR, outs=(),
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, rex1(in_reg0), sink);
-        modrm_r_bits(in_reg0, bits, sink);
-        ''')
-
-jt_entry = TailRecipe(
-        'jt_entry', BranchTableEntry, base_size=2,
-        ins=(GPR, GPR),
-        outs=(GPR),
-        clobbers_flags=False,
-        instp=valid_scale(BranchTableEntry),
-        compute_size="size_plus_maybe_offset_for_in_reg_1",
-        emit='''
-        PUT_OP(bits, rex3(in_reg1, out_reg0, in_reg0), sink);
-        if needs_offset(in_reg1) {
-            modrm_sib_disp8(out_reg0, sink);
-            sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
-            sink.put1(0);
-        } else {
-            modrm_sib(out_reg0, sink);
-            sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
-        }
-        ''')
-
-jt_base = TailRecipe(
-        'jt_base', BranchTableBase, base_size=5, ins=(), outs=(GPR),
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits, rex2(0, out_reg0), sink);
-        modrm_riprel(out_reg0, sink);
-
-        // No reloc is needed here as the jump table is emitted directly after
-        // the function body.
-        jt_disp4(table, func, sink);
-        ''')
-
-#
-# Test flags and set a register.
-#
-# These setCC instructions only set the low 8 bits, and they can only write
-# ABCD registers without a REX prefix.
-#
-# Other instruction encodings accepting `b1` inputs have the same constraints
-# and only look at the low 8 bits of the input register.
-#
-
-seti = TailRecipe(
-        'seti', IntCond, base_size=1, ins=FLAG.rflags, outs=GPR,
-        requires_prefix=True,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits | icc2opc(cond), rex1(out_reg0), sink);
-        modrm_r_bits(out_reg0, bits, sink);
-        ''')
-seti_abcd = TailRecipe(
-        'seti_abcd', IntCond, base_size=1, ins=FLAG.rflags, outs=ABCD,
-        when_prefixed=seti,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits | icc2opc(cond), rex1(out_reg0), sink);
-        modrm_r_bits(out_reg0, bits, sink);
-        ''')
-
-setf = TailRecipe(
-        'setf', FloatCond, base_size=1, ins=FLAG.rflags, outs=GPR,
-        requires_prefix=True,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits | fcc2opc(cond), rex1(out_reg0), sink);
-        modrm_r_bits(out_reg0, bits, sink);
-        ''')
-setf_abcd = TailRecipe(
-        'setf_abcd', FloatCond, base_size=1, ins=FLAG.rflags, outs=ABCD,
-        when_prefixed=setf,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits | fcc2opc(cond), rex1(out_reg0), sink);
-        modrm_r_bits(out_reg0, bits, sink);
-        ''')
-
-#
-# Conditional move (a.k.a integer select)
-# (maybe-REX.W) 0F 4x modrm(r,r)
-# 1 byte, modrm(r,r), is after the opcode
-#
-cmov = TailRecipe(
-        'cmov', IntSelect, base_size=1, ins=(FLAG.rflags, GPR, GPR), outs=2,
-        requires_prefix=False,
-        clobbers_flags=False,
-        emit='''
-        PUT_OP(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink);
-        modrm_rr(in_reg1, in_reg2, sink);
-        ''')
-
-#
-# Bit scan forwards and reverse
-#
-bsf_and_bsr = TailRecipe(
-        'bsf_and_bsr', Unary, base_size=1, ins=GPR, outs=(GPR, FLAG.rflags),
-        requires_prefix=False,
-        clobbers_flags=True,
-        emit='''
-        PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
-        modrm_rr(in_reg0, out_reg0, sink);
-        ''')
-
-#
-# Compare and set flags.
-#
-
-# XX /r, MR form. Compare two GPR registers and set flags.
-rcmp = TailRecipe(
-        'rcmp', Binary, base_size=1, ins=(GPR, GPR), outs=FLAG.rflags,
-        emit='''
-        PUT_OP(bits, rex2(in_reg0, in_reg1), sink);
-        modrm_rr(in_reg0, in_reg1, sink);
-        ''')
-
-# XX /r, RM form. Compare two FPR registers and set flags.
-fcmp = TailRecipe(
-        'fcmp', Binary, base_size=1, ins=(FPR, FPR), outs=FLAG.rflags,
-        emit='''
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        modrm_rr(in_reg1, in_reg0, sink);
-        ''')
-
-# XX /n, MI form with imm8.
-rcmp_ib = TailRecipe(
-        'rcmp_ib', BinaryImm, base_size=2, ins=GPR, outs=FLAG.rflags,
-        instp=IsSignedInt(BinaryImm.imm, 8),
-        emit='''
-        PUT_OP(bits, rex1(in_reg0), sink);
-        modrm_r_bits(in_reg0, bits, sink);
-        let imm: i64 = imm.into();
-        sink.put1(imm as u8);
-        ''')
-
-# XX /n, MI form with imm32.
-rcmp_id = TailRecipe(
-        'rcmp_id', BinaryImm, base_size=5, ins=GPR, outs=FLAG.rflags,
-        instp=IsSignedInt(BinaryImm.imm, 32),
-        emit='''
-        PUT_OP(bits, rex1(in_reg0), sink);
-        modrm_r_bits(in_reg0, bits, sink);
-        let imm: i64 = imm.into();
-        sink.put4(imm as u32);
-        ''')
-
-# Same as rcmp, but second operand is the stack pointer.
-rcmp_sp = TailRecipe(
-        'rcmp_sp', Unary, base_size=1, ins=GPR, outs=FLAG.rflags,
-        emit='''
-        PUT_OP(bits, rex2(in_reg0, RU::rsp.into()), sink);
-        modrm_rr(in_reg0, RU::rsp.into(), sink);
-        ''')
-
-# Test-and-branch.
-#
-# This recipe represents the macro fusion of a test and a conditional branch.
-# This serves two purposes:
-#
-# 1. Guarantee that the test and branch get scheduled next to each other so
-#    macro fusion is guaranteed to be possible.
-# 2. Hide the status flags from Cranelift which doesn't currently model flags.
-#
-# The encoding bits affect both the test and the branch instruction:
-#
-# Bits 0-7 are the Jcc opcode.
-# Bits 8-15 control the test instruction which always has opcode byte 0x85.
-tjccb = TailRecipe(
-        'tjccb', Branch, base_size=1 + 2, ins=GPR, outs=(),
-        branch_range=8,
-        emit='''
-        // test r, r.
-        PUT_OP((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
-        modrm_rr(in_reg0, in_reg0, sink);
-        // Jcc instruction.
-        sink.put1(bits as u8);
-        disp1(destination, func, sink);
-        ''')
-
-tjccd = TailRecipe(
-        'tjccd', Branch, base_size=1 + 6, ins=GPR, outs=(),
-        branch_range=32,
-        emit='''
-        // test r, r.
-        PUT_OP((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
-        modrm_rr(in_reg0, in_reg0, sink);
-        // Jcc instruction.
-        sink.put1(0x0f);
-        sink.put1(bits as u8);
-        disp4(destination, func, sink);
-        ''')
-
-# 8-bit test-and-branch.
-#
-# Same as tjccb, but only looks at the low 8 bits of the register, for b1
-# types.
-t8jccb = TailRecipe(
-        't8jccb', Branch, base_size=1 + 2, ins=GPR, outs=(),
-        branch_range=8,
-        requires_prefix=True,
-        emit='''
-        // test8 r, r.
-        PUT_OP((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
-        modrm_rr(in_reg0, in_reg0, sink);
-        // Jcc instruction.
-        sink.put1(bits as u8);
-        disp1(destination, func, sink);
-        ''')
-t8jccb_abcd = TailRecipe(
-        't8jccb_abcd', Branch, base_size=1 + 2, ins=ABCD, outs=(),
-        branch_range=8,
-        when_prefixed=t8jccb,
-        emit='''
-        // test8 r, r.
-        PUT_OP((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
-        modrm_rr(in_reg0, in_reg0, sink);
-        // Jcc instruction.
-        sink.put1(bits as u8);
-        disp1(destination, func, sink);
-        ''')
-
-t8jccd = TailRecipe(
-        't8jccd', Branch, base_size=1 + 6, ins=GPR, outs=(),
-        branch_range=32,
-        requires_prefix=True,
-        emit='''
-        // test8 r, r.
-        PUT_OP((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
-        modrm_rr(in_reg0, in_reg0, sink);
-        // Jcc instruction.
-        sink.put1(0x0f);
-        sink.put1(bits as u8);
-        disp4(destination, func, sink);
-        ''')
-t8jccd_abcd = TailRecipe(
-        't8jccd_abcd', Branch, base_size=1 + 6, ins=ABCD, outs=(),
-        branch_range=32,
-        when_prefixed=t8jccd,
-        emit='''
-        // test8 r, r.
-        PUT_OP((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
-        modrm_rr(in_reg0, in_reg0, sink);
-        // Jcc instruction.
-        sink.put1(0x0f);
-        sink.put1(bits as u8);
-        disp4(destination, func, sink);
-        ''')
-
-# Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode.
-# The register allocator can't handle a branch instruction with constrained
-# operands like the t8jccd_abcd above. This variant can accept the b1 opernd in
-# any register, but is is larger because it uses a 32-bit test instruction with
-# a 0xff immediate.
-t8jccd_long = TailRecipe(
-        't8jccd_long', Branch, base_size=5 + 6, ins=GPR, outs=(),
-        branch_range=32,
-        emit='''
-        // test32 r, 0xff.
-        PUT_OP((bits & 0xff00) | 0xf7, rex1(in_reg0), sink);
-        modrm_r_bits(in_reg0, bits, sink);
-        sink.put4(0xff);
-        // Jcc instruction.
-        sink.put1(0x0f);
-        sink.put1(bits as u8);
-        disp4(destination, func, sink);
-        ''')
-
-# Comparison that produces a `b1` result in a GPR.
-#
-# This is a macro of a `cmp` instruction followed by a `setCC` instruction.
-# This is not a great solution because:
-#
-# - The cmp+setcc combination is not recognized by CPU's macro fusion.
-# - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC`
-#   instructions may need a REX independently.
-# - Modeling CPU flags in the type system would be better.
-#
-# Since the `setCC` instructions only write an 8-bit register, we use that as
-# our `b1` representation: A `b1` value is represented as a GPR where the low 8
-# bits are known to be 0 or 1. The high bits are undefined.
-#
-# This bandaid macro doesn't support a REX prefix for the final `setCC`
-# instruction, so it is limited to the `ABCD` register class for booleans.
-# The omission of a `when_prefixed` alternative is deliberate here.
-icscc = TailRecipe(
-        'icscc', IntCompare, base_size=1 + 3, ins=(GPR, GPR), outs=ABCD,
-        emit='''
-        // Comparison instruction.
-        PUT_OP(bits, rex2(in_reg0, in_reg1), sink);
-        modrm_rr(in_reg0, in_reg1, sink);
-        // `setCC` instruction, no REX.
-        use crate::ir::condcodes::IntCC::*;
-        let setcc = match cond {
-            Equal => 0x94,
-            NotEqual => 0x95,
-            SignedLessThan => 0x9c,
-            SignedGreaterThanOrEqual => 0x9d,
-            SignedGreaterThan => 0x9f,
-            SignedLessThanOrEqual => 0x9e,
-            UnsignedLessThan => 0x92,
-            UnsignedGreaterThanOrEqual => 0x93,
-            UnsignedGreaterThan => 0x97,
-            UnsignedLessThanOrEqual => 0x96,
-        };
-        sink.put1(0x0f);
-        sink.put1(setcc);
-        modrm_rr(out_reg0, 0, sink);
-        ''')
-
-icscc_ib = TailRecipe(
-        'icscc_ib', IntCompareImm, base_size=2 + 3, ins=GPR, outs=ABCD,
-        instp=IsSignedInt(IntCompareImm.imm, 8),
-        emit='''
-        // Comparison instruction.
-        PUT_OP(bits, rex1(in_reg0), sink);
-        modrm_r_bits(in_reg0, bits, sink);
-        let imm: i64 = imm.into();
-        sink.put1(imm as u8);
-        // `setCC` instruction, no REX.
-        use crate::ir::condcodes::IntCC::*;
-        let setcc = match cond {
-            Equal => 0x94,
-            NotEqual => 0x95,
-            SignedLessThan => 0x9c,
-            SignedGreaterThanOrEqual => 0x9d,
-            SignedGreaterThan => 0x9f,
-            SignedLessThanOrEqual => 0x9e,
-            UnsignedLessThan => 0x92,
-            UnsignedGreaterThanOrEqual => 0x93,
-            UnsignedGreaterThan => 0x97,
-            UnsignedLessThanOrEqual => 0x96,
-        };
-        sink.put1(0x0f);
-        sink.put1(setcc);
-        modrm_rr(out_reg0, 0, sink);
-        ''')
-
-icscc_id = TailRecipe(
-        'icscc_id', IntCompareImm, base_size=5 + 3, ins=GPR, outs=ABCD,
-        instp=IsSignedInt(IntCompareImm.imm, 32),
-        emit='''
-        // Comparison instruction.
-        PUT_OP(bits, rex1(in_reg0), sink);
-        modrm_r_bits(in_reg0, bits, sink);
-        let imm: i64 = imm.into();
-        sink.put4(imm as u32);
-        // `setCC` instruction, no REX.
-        use crate::ir::condcodes::IntCC::*;
-        let setcc = match cond {
-            Equal => 0x94,
-            NotEqual => 0x95,
-            SignedLessThan => 0x9c,
-            SignedGreaterThanOrEqual => 0x9d,
-            SignedGreaterThan => 0x9f,
-            SignedLessThanOrEqual => 0x9e,
-            UnsignedLessThan => 0x92,
-            UnsignedGreaterThanOrEqual => 0x93,
-            UnsignedGreaterThan => 0x97,
-            UnsignedLessThanOrEqual => 0x96,
-        };
-        sink.put1(0x0f);
-        sink.put1(setcc);
-        modrm_rr(out_reg0, 0, sink);
-        ''')
-
-# Make a FloatCompare instruction predicate with the supported condition codes.
-
-# Same thing for floating point.
-#
-# The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
-#
-#    ZPC OSA
-# UN 111 000
-# GT 000 000
-# LT 001 000
-# EQ 100 000
-#
-# Not all floating point condition codes are supported.
-# The omission of a `when_prefixed` alternative is deliberate here.
-fcscc = TailRecipe(
-        'fcscc', FloatCompare, base_size=1 + 3, ins=(FPR, FPR), outs=ABCD,
-        instp=floatccs(FloatCompare),
-        emit='''
-        // Comparison instruction.
-        PUT_OP(bits, rex2(in_reg1, in_reg0), sink);
-        modrm_rr(in_reg1, in_reg0, sink);
-        // `setCC` instruction, no REX.
-        use crate::ir::condcodes::FloatCC::*;
-        let setcc = match cond {
-            Ordered                    => 0x9b, // EQ|LT|GT => setnp (P=0)
-            Unordered                  => 0x9a, // UN       => setp  (P=1)
-            OrderedNotEqual            => 0x95, // LT|GT    => setne (Z=0),
-            UnorderedOrEqual           => 0x94, // UN|EQ    => sete  (Z=1)
-            GreaterThan                => 0x97, // GT       => seta  (C=0&Z=0)
-            GreaterThanOrEqual         => 0x93, // GT|EQ    => setae (C=0)
-            UnorderedOrLessThan        => 0x92, // UN|LT    => setb  (C=1)
-            UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1)
-            Equal |                       // EQ
-            NotEqual |                    // UN|LT|GT
-            LessThan |                    // LT
-            LessThanOrEqual |             // LT|EQ
-            UnorderedOrGreaterThan |      // UN|GT
-            UnorderedOrGreaterThanOrEqual // UN|GT|EQ
-            => panic!("{} not supported by fcscc", cond),
-        };
-        sink.put1(0x0f);
-        sink.put1(setcc);
-        modrm_rr(out_reg0, 0, sink);
-        ''')
-
-TailRecipe.check_names(globals())
diff --git a/cranelift-codegen/meta-python/isa/x86/registers.py b/cranelift-codegen/meta-python/isa/x86/registers.py
deleted file mode 100644
index 3cca0bc37..000000000
--- a/cranelift-codegen/meta-python/isa/x86/registers.py
+++ /dev/null
@@ -1,61 +0,0 @@
-"""
-x86 register banks.
-
-While the floating-point registers are straight-forward, the general purpose
-register bank has a few quirks on x86. We have these encodings of the 8-bit
-registers:
-
-         I32 I64  |  16b 32b  64b
-    000  AL  AL   |  AX  EAX  RAX
-    001  CL  CL   |  CX  ECX  RCX
-    010  DL  DL   |  DX  EDX  RDX
-    011  BL  BL   |  BX  EBX  RBX
-    100  AH  SPL  |  SP  ESP  RSP
-    101  CH  BPL  |  BP  EBP  RBP
-    110  DH  SIL  |  SI  ESI  RSI
-    111  BH  DIL  |  DI  EDI  RDI
-
-Here, the I64 column refers to the registers you get with a REX prefix. Without
-the REX prefix, you get the I32 registers.
-
-The 8-bit registers are not that useful since WebAssembly only has i32 and i64
-data types, and the H-registers even less so. Rather than trying to model the
-H-registers accurately, we'll avoid using them in both I32 and I64 modes.
-"""
-from __future__ import absolute_import
-from cdsl.registers import RegBank, RegClass, Stack
-from .defs import ISA
-
-
-IntRegs = RegBank(
-        'IntRegs', ISA,
-        'General purpose registers',
-        units=16, prefix='r',
-        names='rax rcx rdx rbx rsp rbp rsi rdi'.split())
-
-FloatRegs = RegBank(
-        'FloatRegs', ISA,
-        'SSE floating point registers',
-        units=16, prefix='xmm')
-
-FlagRegs = RegBank(
-        'FlagRegs', ISA,
-        'Flag registers',
-        units=1,
-        pressure_tracking=False,
-        names=['rflags'])
-
-GPR = RegClass(IntRegs)
-GPR8 = GPR[0:8]
-ABCD = GPR[0:4]
-FPR = RegClass(FloatRegs)
-FPR8 = FPR[0:8]
-FLAG = RegClass(FlagRegs)
-
-# Constraints for stack operands.
-
-# Stack operand with a 32-bit signed displacement from either RBP or RSP.
-StackGPR32 = Stack(GPR)
-StackFPR32 = Stack(FPR)
-
-RegClass.extract_names(globals())
diff --git a/cranelift-codegen/meta-python/isa/x86/settings.py b/cranelift-codegen/meta-python/isa/x86/settings.py
deleted file mode 100644
index 74633349e..000000000
--- a/cranelift-codegen/meta-python/isa/x86/settings.py
+++ /dev/null
@@ -1,61 +0,0 @@
-"""
-x86 settings.
-"""
-from __future__ import absolute_import
-from cdsl.settings import SettingGroup, BoolSetting, Preset
-from cdsl.predicates import And, Not
-import base.settings as shared
-from .defs import ISA
-
-ISA.settings = SettingGroup('x86', parent=shared.group)
-
-# The has_* settings here correspond to CPUID bits.
-
-# CPUID.01H:ECX
-has_sse3 = BoolSetting("SSE3: CPUID.01H:ECX.SSE3[bit 0]")
-has_ssse3 = BoolSetting("SSSE3: CPUID.01H:ECX.SSSE3[bit 9]")
-has_sse41 = BoolSetting("SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]")
-has_sse42 = BoolSetting("SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]")
-has_popcnt = BoolSetting("POPCNT: CPUID.01H:ECX.POPCNT[bit 23]")
-has_avx = BoolSetting("AVX: CPUID.01H:ECX.AVX[bit 28]")
-
-# CPUID.(EAX=07H, ECX=0H):EBX
-has_bmi1 = BoolSetting("BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]")
-has_bmi2 = BoolSetting("BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]")
-
-# CPUID.EAX=80000001H:ECX
-has_lzcnt = BoolSetting("LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]")
-
-
-# The use_* settings here are used to determine if a feature can be used.
-
-use_sse41 = And(has_sse41)
-use_sse42 = And(has_sse42, use_sse41)
-use_popcnt = And(has_popcnt, has_sse42)
-use_bmi1 = And(has_bmi1)
-use_lzcnt = And(has_lzcnt)
-
-is_pic = And(shared.is_pic)
-not_is_pic = Not(shared.is_pic)
-all_ones_funcaddrs_and_not_is_pic = And(shared.allones_funcaddrs,
-                                        Not(shared.is_pic))
-not_all_ones_funcaddrs_and_not_is_pic = And(Not(shared.allones_funcaddrs),
-                                            Not(shared.is_pic))
-
-# Presets corresponding to x86 CPUs.
-
-baseline = Preset()
-
-nehalem = Preset(
-        has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt)
-haswell = Preset(nehalem, has_bmi1, has_bmi2, has_lzcnt)
-broadwell = Preset(haswell)
-skylake = Preset(broadwell)
-cannonlake = Preset(skylake)
-icelake = Preset(cannonlake)
-
-znver1 = Preset(
-        has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt,
-        has_bmi1, has_bmi2, has_lzcnt)
-
-ISA.settings.close(globals())
diff --git a/cranelift-codegen/meta-python/mypy.ini b/cranelift-codegen/meta-python/mypy.ini
deleted file mode 100644
index 877e4c9ff..000000000
--- a/cranelift-codegen/meta-python/mypy.ini
+++ /dev/null
@@ -1,5 +0,0 @@
-[mypy]
-disallow_untyped_defs = True
-warn_unused_ignores = True
-warn_return_any = True
-strict_optional = False
diff --git a/cranelift-codegen/meta-python/semantics/__init__.py b/cranelift-codegen/meta-python/semantics/__init__.py
deleted file mode 100644
index 1ce6b4671..000000000
--- a/cranelift-codegen/meta-python/semantics/__init__.py
+++ /dev/null
@@ -1,77 +0,0 @@
-"""Definitions for the semantics segment of the Cranelift language."""
-from cdsl.ti import TypeEnv, ti_rtl, get_type_env
-from cdsl.operands import ImmediateKind
-from cdsl.ast import Var
-
-try:
-    from typing import List, Dict, Tuple # noqa
-    from cdsl.ast import VarAtomMap  # noqa
-    from cdsl.xform import XForm, Rtl # noqa
-    from cdsl.ti import VarTyping # noqa
-    from cdsl.instructions import Instruction, InstructionSemantics # noqa
-except ImportError:
-    pass
-
-
-def verify_semantics(inst, src, xforms):
-    # type: (Instruction, Rtl, InstructionSemantics) -> None
-    """
-    Verify that the semantics transforms in xforms correctly describe the
-    instruction described by the src Rtl. This involves checking that:
-        0) src is a single instance of inst
-        1) For all x \\in xforms x.src is a single instance of inst
-        2) For any concrete values V of Literals in inst:
-            For all concrete typing T of inst:
-                Exists single x \\in xforms that applies to src conretazied to
-                V and T
-    """
-    # 0) The source rtl is always a single instance of inst
-    assert len(src.rtl) == 1 and src.rtl[0].expr.inst == inst
-
-    # 1) For all XForms x, x.src is a single instance of inst
-    for x in xforms:
-        assert len(x.src.rtl) == 1 and x.src.rtl[0].expr.inst == inst
-
-    variants = [src]  # type: List[Rtl]
-
-    # 2) For all enumerated immediates, compute all the possible
-    #    versions of src with the concrete value filled in.
-    for i in inst.imm_opnums:
-        op = inst.ins[i]
-        if not (isinstance(op.kind, ImmediateKind) and
-                op.kind.is_enumerable()):
-            continue
-
-        new_variants = []  # type: List[Rtl]
-        for rtl_var in variants:
-            s = {v: v for v in rtl_var.vars()}  # type: VarAtomMap
-            arg = rtl_var.rtl[0].expr.args[i]
-            assert isinstance(arg, Var)
-            for val in op.kind.possible_values():
-                s[arg] = val
-                new_variants.append(rtl_var.copy(s))
-        variants = new_variants
-
-    # For any possible version of the src with concrete enumerated immediates
-    for src in variants:
-        # 2) Any possible typing should be covered by exactly ONE semantic
-        # XForm
-        src = src.copy({})
-        typenv = get_type_env(ti_rtl(src, TypeEnv()))
-        typenv.normalize()
-        typenv = typenv.extract()
-
-        for t in typenv.concrete_typings():
-            matching_xforms = []  # type: List[XForm]
-            for x in xforms:
-                if src.substitution(x.src, {}) is None:
-                    continue
-
-                # Translate t using x.symtab
-                t = {x.symtab[str(v)]:  tv for (v, tv) in t.items()}
-                if (x.ti.permits(t)):
-                    matching_xforms.append(x)
-
-            assert len(matching_xforms) == 1,\
-                ("Possible typing {} of {} not matched by exactly one case " +
-                 ": {}").format(t, src.rtl[0], matching_xforms)
diff --git a/cranelift-codegen/meta-python/semantics/elaborate.py b/cranelift-codegen/meta-python/semantics/elaborate.py
deleted file mode 100644
index 1525b1dee..000000000
--- a/cranelift-codegen/meta-python/semantics/elaborate.py
+++ /dev/null
@@ -1,146 +0,0 @@
-"""
-Tools to elaborate a given Rtl with concrete types into its semantically
-equivalent primitive version. Its elaborated primitive version contains only
-primitive cranelift instructions, which map well to SMTLIB functions.
-"""
-from .primitives import GROUP as PRIMITIVES, prim_to_bv, prim_from_bv
-from cdsl.xform import Rtl
-from cdsl.ast import Var
-
-try:
-    from typing import TYPE_CHECKING, Dict, Union, List, Set, Tuple # noqa
-    from cdsl.xform import XForm # noqa
-    from cdsl.ast import Def, VarAtomMap # noqa
-    from cdsl.ti import VarTyping # noqa
-except ImportError:
-    TYPE_CHECKING = False
-
-
-def find_matching_xform(d):
-    # type: (Def) -> XForm
-    """
-    Given a concrete Def d, find the unique semantic XForm x in
-    d.expr.inst.semantics that applies to it.
-    """
-    res = []  # type: List[XForm]
-    typing = {v:   v.get_typevar() for v in d.vars()}  # type: VarTyping
-
-    for x in d.expr.inst.semantics:
-        subst = d.substitution(x.src.rtl[0], {})
-
-        # There may not be a substitution if there are concrete Enumerator
-        # values in the src pattern. (e.g. specifying the semantics of icmp.eq,
-        # icmp.ge... as separate transforms)
-        if (subst is None):
-            continue
-
-        inner_typing = {}  # type: VarTyping
-        for (v, tv) in typing.items():
-            inner_v = subst[v]
-            assert isinstance(inner_v, Var)
-            inner_typing[inner_v] = tv
-
-        if x.ti.permits(inner_typing):
-            res.append(x)
-
-    assert len(res) == 1, "Couldn't find semantic transform for {}".format(d)
-    return res[0]
-
-
-def cleanup_semantics(r, outputs):
-    # type: (Rtl, Set[Var]) -> Rtl
-    """
-    The elaboration process creates a lot of redundant prim_to_bv conversions.
-    Cleanup the following cases:
-
-    1) prim_to_bv/prim_from_bv pair:
-        a.0 << prim_from_bv(bva.0)
-        ...
-        bva.1 << prim_to_bv(a.0)  <-- redundant, replace by bva.0
-        ...
-
-    2) prim_to_bv/prim_to-bv pair:
-        bva.0 << prim_to_bv(a)
-        ...
-        bva.1 << prim_to_bv(a) <-- redundant, replace by bva.0
-        ...
-    """
-    new_defs = []  # type: List[Def]
-    subst_m = {v: v for v in r.vars()}  # type: VarAtomMap
-    definition = {}  # type: Dict[Var, Def]
-    prim_to_bv_map = {}  # type: Dict[Var, Def]
-
-    # Pass 1: Remove redundant prim_to_bv
-    for d in r.rtl:
-        inst = d.expr.inst
-
-        if (inst == prim_to_bv):
-            arg = d.expr.args[0]
-            df = d.defs[0]
-            assert isinstance(arg, Var)
-
-            if arg in definition:
-                def_loc = definition[arg]
-                if def_loc.expr.inst == prim_from_bv:
-                    assert isinstance(def_loc.expr.args[0], Var)
-                    subst_m[df] = def_loc.expr.args[0]
-                    continue
-
-            if arg in prim_to_bv_map:
-                subst_m[df] = prim_to_bv_map[arg].defs[0]
-                continue
-
-            prim_to_bv_map[arg] = d
-
-        new_def = d.copy(subst_m)
-
-        for v in new_def.defs:
-            assert v not in definition  # Guaranteed by SSA
-            definition[v] = new_def
-
-        new_defs.append(new_def)
-
-    # Pass 2: Remove dead prim_from_bv
-    live = set(outputs)  # type: Set[Var]
-    for d in new_defs:
-        live = live.union(d.uses())
-
-    new_defs = [d for d in new_defs if not (d.expr.inst == prim_from_bv and
-                                            d.defs[0] not in live)]
-
-    return Rtl(*new_defs)
-
-
-def elaborate(r):
-    # type: (Rtl) -> Rtl
-    """
-    Given a concrete Rtl r, return a semantically equivalent Rtl r1 containing
-    only primitive instructions.
-    """
-    fp = False
-    primitives = set(PRIMITIVES.instructions)
-    idx = 0
-
-    res = Rtl(*r.rtl)
-    outputs = res.definitions()
-
-    while not fp:
-        assert res.is_concrete()
-        new_defs = []  # type: List[Def]
-        fp = True
-
-        for d in res.rtl:
-            inst = d.expr.inst
-
-            if (inst not in primitives):
-                t = find_matching_xform(d)
-                transformed = t.apply(Rtl(d), str(idx))
-                idx += 1
-                new_defs.extend(transformed.rtl)
-                fp = False
-            else:
-                new_defs.append(d)
-
-        res.rtl = tuple(new_defs)
-
-    return cleanup_semantics(res, outputs)
diff --git a/cranelift-codegen/meta-python/semantics/macros.py b/cranelift-codegen/meta-python/semantics/macros.py
deleted file mode 100644
index 566bf92ea..000000000
--- a/cranelift-codegen/meta-python/semantics/macros.py
+++ /dev/null
@@ -1,45 +0,0 @@
-"""
-Useful semantics "macro" instructions built on top of
-the primitives.
-"""
-from __future__ import absolute_import
-from cdsl.operands import Operand
-from cdsl.typevar import TypeVar
-from cdsl.instructions import Instruction, InstructionGroup
-from base.types import b1
-from base.immediates import imm64
-from cdsl.ast import Var
-from cdsl.xform import Rtl
-from semantics.primitives import bv_from_imm64, bvite
-import base.formats # noqa
-
-GROUP = InstructionGroup("primitive_macros", "Semantic macros instruction set")
-AnyBV = TypeVar('AnyBV', bitvecs=True, doc="")
-x = Var('x')
-y = Var('y')
-imm = Var('imm')
-a = Var('a')
-
-#
-# Bool-to-bv1
-#
-BV1 = TypeVar("BV1", bitvecs=(1, 1), doc="")
-bv1_op = Operand('bv1_op', BV1, doc="")
-cond_op = Operand("cond", b1, doc="")
-bool2bv = Instruction(
-        'bool2bv', r"""Convert a b1 value to a 1-bit BV""",
-        ins=cond_op, outs=bv1_op)
-
-v1 = Var('v1')
-v2 = Var('v2')
-bvone = Var('bvone')
-bvzero = Var('bvzero')
-bool2bv.set_semantics(
-        v1 << bool2bv(v2),
-        Rtl(
-            bvone << bv_from_imm64(imm64(1)),
-            bvzero << bv_from_imm64(imm64(0)),
-            v1 << bvite(v2, bvone, bvzero)
-        ))
-
-GROUP.close()
diff --git a/cranelift-codegen/meta-python/semantics/primitives.py b/cranelift-codegen/meta-python/semantics/primitives.py
deleted file mode 100644
index a3e498f4e..000000000
--- a/cranelift-codegen/meta-python/semantics/primitives.py
+++ /dev/null
@@ -1,133 +0,0 @@
-"""
-Cranelift primitive instruction set.
-
-This module defines a primitive instruction set, in terms of which the base set
-is described. Most instructions in this set correspond 1-1 with an SMTLIB
-bitvector function.
-"""
-from __future__ import absolute_import
-from cdsl.operands import Operand
-from cdsl.typevar import TypeVar
-from cdsl.instructions import Instruction, InstructionGroup
-from cdsl.ti import WiderOrEq
-from base.types import b1
-from base.immediates import imm64
-import base.formats # noqa
-
-GROUP = InstructionGroup("primitive", "Primitive instruction set")
-
-BV = TypeVar('BV', 'A bitvector type.', bitvecs=True)
-BV1 = TypeVar('BV1', 'A single bit bitvector.', bitvecs=(1, 1))
-Real = TypeVar('Real', 'Any real type.', ints=True, floats=True,
-               bools=True, simd=True)
-
-x = Operand('x', BV, doc="A semantic value X")
-y = Operand('x', BV, doc="A semantic value Y (same width as X)")
-a = Operand('a', BV, doc="A semantic value A (same width as X)")
-cond = Operand('b', TypeVar.singleton(b1), doc='A b1 value')
-
-real = Operand('real', Real, doc="A real cranelift value")
-fromReal = Operand('fromReal', Real.to_bitvec(),
-                   doc="A real cranelift value converted to a BV")
-
-#
-# BV Conversion/Materialization
-#
-prim_to_bv = Instruction(
-        'prim_to_bv', r"""
-        Convert an SSA Value to a flat bitvector
-        """,
-        ins=(real), outs=(fromReal))
-
-prim_from_bv = Instruction(
-        'prim_from_bv', r"""
-        Convert a flat bitvector to a real SSA Value.
-        """,
-        ins=(fromReal), outs=(real))
-
-N = Operand('N', imm64)
-bv_from_imm64 = Instruction(
-        'bv_from_imm64', r"""Materialize an imm64 as a bitvector.""",
-        ins=(N), outs=a)
-
-#
-# Generics
-#
-bvite = Instruction(
-        'bvite', r"""Bitvector ternary operator""",
-        ins=(cond, x, y), outs=a)
-
-
-xh = Operand('xh', BV.half_width(),
-             doc="A semantic value representing the upper half of X")
-xl = Operand('xl', BV.half_width(),
-             doc="A semantic value representing the lower half of X")
-bvsplit = Instruction(
-        'bvsplit', r"""
-        """,
-        ins=(x), outs=(xh, xl))
-
-xy = Operand('xy', BV.double_width(),
-             doc="A semantic value representing the concatenation of X and Y")
-bvconcat = Instruction(
-        'bvconcat', r"""
-        """,
-        ins=(x, y), outs=xy)
-
-bvadd = Instruction(
-        'bvadd', r"""
-        Standard 2's complement addition. Equivalent to wrapping integer
-        addition: :math:`a := x + y \pmod{2^B}`.
-
-        This instruction does not depend on the signed/unsigned interpretation
-        of the operands.
-        """,
-        ins=(x, y), outs=a)
-#
-# Bitvector comparisons
-#
-
-bveq = Instruction(
-        'bveq', r"""Unsigned bitvector equality""",
-        ins=(x, y), outs=cond)
-bvne = Instruction(
-        'bveq', r"""Unsigned bitvector inequality""",
-        ins=(x, y), outs=cond)
-bvsge = Instruction(
-        'bvsge', r"""Signed bitvector greater or equal""",
-        ins=(x, y), outs=cond)
-bvsgt = Instruction(
-        'bvsgt', r"""Signed bitvector greater than""",
-        ins=(x, y), outs=cond)
-bvsle = Instruction(
-        'bvsle', r"""Signed bitvector less than or equal""",
-        ins=(x, y), outs=cond)
-bvslt = Instruction(
-        'bvslt', r"""Signed bitvector less than""",
-        ins=(x, y), outs=cond)
-bvuge = Instruction(
-        'bvuge', r"""Unsigned bitvector greater or equal""",
-        ins=(x, y), outs=cond)
-bvugt = Instruction(
-        'bvugt', r"""Unsigned bitvector greater than""",
-        ins=(x, y), outs=cond)
-bvule = Instruction(
-        'bvule', r"""Unsigned bitvector less than or equal""",
-        ins=(x, y), outs=cond)
-bvult = Instruction(
-        'bvult', r"""Unsigned bitvector less than""",
-        ins=(x, y), outs=cond)
-
-# Extensions
-ToBV = TypeVar('ToBV', 'A bitvector type.', bitvecs=True)
-x1 = Operand('x1', ToBV, doc="")
-
-bvzeroext = Instruction(
-        'bvzeroext', r"""Unsigned bitvector extension""",
-        ins=x, outs=x1, constraints=WiderOrEq(ToBV, BV))
-
-bvsignext = Instruction(
-        'bvsignext', r"""Signed bitvector extension""",
-        ins=x, outs=x1, constraints=WiderOrEq(ToBV, BV))
-
-GROUP.close()
diff --git a/cranelift-codegen/meta-python/semantics/smtlib.py b/cranelift-codegen/meta-python/semantics/smtlib.py
deleted file mode 100644
index 9ae9fbfaa..000000000
--- a/cranelift-codegen/meta-python/semantics/smtlib.py
+++ /dev/null
@@ -1,241 +0,0 @@
-"""
-Tools to emit SMTLIB bitvector queries encoding concrete RTLs containing only
-primitive instructions.
-"""
-from .primitives import GROUP as PRIMITIVES, prim_from_bv, prim_to_bv, bvadd,\
-    bvult, bvzeroext, bvsplit, bvconcat, bvsignext
-from cdsl.ast import Var
-from cdsl.types import BVType
-from .elaborate import elaborate
-from z3 import BitVec, ZeroExt, SignExt, And, Extract, Concat, Not, Solver,\
-    unsat, BoolRef, BitVecVal, If
-from z3.z3core import Z3_mk_eq
-
-try:
-    from typing import TYPE_CHECKING, Tuple, Dict, List # noqa
-    from cdsl.xform import Rtl, XForm # noqa
-    from cdsl.ast import VarAtomMap, Atom # noqa
-    from cdsl.ti import VarTyping # noqa
-    if TYPE_CHECKING:
-        from z3 import ExprRef, BitVecRef # noqa
-        Z3VarMap = Dict[Var, BitVecRef]
-except ImportError:
-    TYPE_CHECKING = False
-
-
-# Use this for constructing a == b instead of == since MyPy doesn't
-# accept overloading of __eq__ that doesn't return bool
-def mk_eq(e1, e2):
-    # type: (ExprRef, ExprRef) -> ExprRef
-    """Return a z3 expression equivalent to e1 == e2"""
-    return BoolRef(Z3_mk_eq(e1.ctx_ref(), e1.as_ast(), e2.as_ast()), e1.ctx)
-
-
-def to_smt(r):
-    # type: (Rtl) -> Tuple[List[ExprRef], Z3VarMap]
-    """
-    Encode a concrete primitive Rtl r sa z3 query.
-    Returns a tuple (query, var_m) where:
-        - query is a list of z3 expressions
-        - var_m is a map from Vars v with non-BVType to their correspodning z3
-          bitvector variable.
-    """
-    assert r.is_concrete()
-    # Should contain only primitives
-    primitives = set(PRIMITIVES.instructions)
-    assert set(d.expr.inst for d in r.rtl).issubset(primitives)
-
-    q = []  # type: List[ExprRef]
-    m = {}  # type: Z3VarMap
-
-    # Build declarations for any bitvector Vars
-    var_to_bv = {}  # type: Z3VarMap
-    for v in r.vars():
-        typ = v.get_typevar().singleton_type()
-        if not isinstance(typ, BVType):
-            continue
-
-        var_to_bv[v] = BitVec(v.name, typ.bits)
-
-    # Encode each instruction as a equality assertion
-    for d in r.rtl:
-        inst = d.expr.inst
-
-        exp = None  # type: ExprRef
-        # For prim_to_bv/prim_from_bv just update var_m. No assertion needed
-        if inst == prim_to_bv:
-            assert isinstance(d.expr.args[0], Var)
-            m[d.expr.args[0]] = var_to_bv[d.defs[0]]
-            continue
-
-        if inst == prim_from_bv:
-            assert isinstance(d.expr.args[0], Var)
-            m[d.defs[0]] = var_to_bv[d.expr.args[0]]
-            continue
-
-        if inst in [bvadd, bvult]:  # Binary instructions
-            assert len(d.expr.args) == 2 and len(d.defs) == 1
-            lhs = d.expr.args[0]
-            rhs = d.expr.args[1]
-            df = d.defs[0]
-            assert isinstance(lhs, Var) and isinstance(rhs, Var)
-
-            if inst == bvadd:  # Normal binary - output type same as args
-                exp = (var_to_bv[lhs] + var_to_bv[rhs])
-            else:
-                assert inst == bvult
-                exp = (var_to_bv[lhs] < var_to_bv[rhs])
-                # Comparison binary - need to convert bool to BitVec 1
-                exp = If(exp, BitVecVal(1, 1), BitVecVal(0, 1))
-
-            exp = mk_eq(var_to_bv[df], exp)
-        elif inst == bvzeroext:
-            arg = d.expr.args[0]
-            df = d.defs[0]
-            assert isinstance(arg, Var)
-            fromW = arg.get_typevar().singleton_type().width()
-            toW = df.get_typevar().singleton_type().width()
-
-            exp = mk_eq(var_to_bv[df], ZeroExt(toW-fromW, var_to_bv[arg]))
-        elif inst == bvsignext:
-            arg = d.expr.args[0]
-            df = d.defs[0]
-            assert isinstance(arg, Var)
-            fromW = arg.get_typevar().singleton_type().width()
-            toW = df.get_typevar().singleton_type().width()
-
-            exp = mk_eq(var_to_bv[df], SignExt(toW-fromW, var_to_bv[arg]))
-        elif inst == bvsplit:
-            arg = d.expr.args[0]
-            assert isinstance(arg, Var)
-            arg_typ = arg.get_typevar().singleton_type()
-            width = arg_typ.width()
-            assert (width % 2 == 0)
-
-            lo = d.defs[0]
-            hi = d.defs[1]
-
-            exp = And(mk_eq(var_to_bv[lo],
-                      Extract(width//2-1, 0, var_to_bv[arg])),
-                      mk_eq(var_to_bv[hi],
-                      Extract(width-1, width//2, var_to_bv[arg])))
-        elif inst == bvconcat:
-            assert isinstance(d.expr.args[0], Var) and \
-                isinstance(d.expr.args[1], Var)
-            lo = d.expr.args[0]
-            hi = d.expr.args[1]
-            df = d.defs[0]
-
-            # Z3 Concat expects hi bits first, then lo bits
-            exp = mk_eq(var_to_bv[df], Concat(var_to_bv[hi], var_to_bv[lo]))
-        else:
-            assert False, "Unknown primitive instruction {}".format(inst)
-
-        q.append(exp)
-
-    return (q, m)
-
-
-def equivalent(r1, r2, inp_m, out_m):
-    # type: (Rtl, Rtl, VarAtomMap, VarAtomMap) -> List[ExprRef]
-    """
-    Given:
-        - concrete source Rtl r1
-        - concrete dest Rtl r2
-        - VarAtomMap inp_m mapping r1's non-bitvector inputs to r2
-        - VarAtomMap out_m mapping r1's non-bitvector outputs to r2
-
-    Build a query checking whether r1 and r2 are semantically equivalent.
-    If the returned query is unsatisfiable, then r1 and r2 are equivalent.
-    Otherwise, the satisfying example for the query gives us values
-    for which the two Rtls disagree.
-    """
-    # Sanity - inp_m is a bijection from the set of inputs of r1 to the set of
-    # inputs of r2
-    assert set(r1.free_vars()) == set(inp_m.keys())
-    assert set(r2.free_vars()) == set(inp_m.values())
-
-    # Note that the same rule is not expected to hold for out_m due to
-    # temporaries/intermediates. out_m specified which values are enough for
-    # equivalence.
-
-    # Rename the vars in r1 and r2 with unique suffixes to avoid conflicts
-    src_m = {v: Var(v.name + ".a", v.get_typevar()) for v in r1.vars()}  # type: VarAtomMap # noqa
-    dst_m = {v: Var(v.name + ".b", v.get_typevar()) for v in r2.vars()}  # type: VarAtomMap # noqa
-    r1 = r1.copy(src_m)
-    r2 = r2.copy(dst_m)
-
-    def _translate(m, k_m, v_m):
-        # type: (VarAtomMap, VarAtomMap, VarAtomMap) -> VarAtomMap
-        """Obtain a new map from m, by mapping m's keys with k_m and m's values
-        with v_m"""
-        res = {}  # type: VarAtomMap
-        for (k, v) in m1.items():
-            new_k = k_m[k]
-            new_v = v_m[v]
-            assert isinstance(new_k, Var)
-            res[new_k] = new_v
-
-        return res
-
-    # Convert inp_m, out_m in terms of variables with the .a/.b suffixes
-    inp_m = _translate(inp_m, src_m, dst_m)
-    out_m = _translate(out_m, src_m, dst_m)
-
-    # Encode r1 and r2 as SMT queries
-    (q1, m1) = to_smt(r1)
-    (q2, m2) = to_smt(r2)
-
-    # Build an expression for the equality of real Cranelift inputs of
-    # r1 and r2
-    args_eq_exp = []  # type: List[ExprRef]
-
-    for (v1, v2) in inp_m.items():
-        assert isinstance(v2, Var)
-        args_eq_exp.append(mk_eq(m1[v1], m2[v2]))
-
-    # Build an expression for the equality of real Cranelift outputs of
-    # r1 and r2
-    results_eq_exp = []  # type: List[ExprRef]
-    for (v1, v2) in out_m.items():
-        assert isinstance(v2, Var)
-        results_eq_exp.append(mk_eq(m1[v1], m2[v2]))
-
-    # Put the whole query together
-    return q1 + q2 + args_eq_exp + [Not(And(*results_eq_exp))]
-
-
-def xform_correct(x, typing):
-    # type: (XForm, VarTyping) -> bool
-    """
-    Given an XForm x and a concrete variable typing for x check whether x is
-    semantically preserving for the concrete typing.
-    """
-    assert x.ti.permits(typing)
-
-    # Create copies of the x.src and x.dst with their concrete types
-    src_m = {v: Var(v.name, typing[v]) for v in x.src.vars()}  # type: VarAtomMap # noqa
-    src = x.src.copy(src_m)
-    dst = x.apply(src)
-    dst_m = x.dst.substitution(dst, {})
-
-    # Build maps for the inputs/outputs for src->dst
-    inp_m = {}  # type: VarAtomMap
-    out_m = {}  # type: VarAtomMap
-
-    for v in x.src.vars():
-        src_v = src_m[v]
-        assert isinstance(src_v, Var)
-        if v.is_input():
-            inp_m[src_v] = dst_m[v]
-        elif v.is_output():
-            out_m[src_v] = dst_m[v]
-
-    # Get the primitive semantic Rtls for src and dst
-    prim_src = elaborate(src)
-    prim_dst = elaborate(dst)
-    asserts = equivalent(prim_src, prim_dst, inp_m, out_m)
-
-    s = Solver()
-    s.add(*asserts)
-    return s.check() == unsat
diff --git a/cranelift-codegen/meta-python/semantics/test_elaborate.py b/cranelift-codegen/meta-python/semantics/test_elaborate.py
deleted file mode 100644
index 9ca938bc1..000000000
--- a/cranelift-codegen/meta-python/semantics/test_elaborate.py
+++ /dev/null
@@ -1,392 +0,0 @@
-from __future__ import absolute_import
-from base.instructions import vselect, vsplit, vconcat, iconst, iadd, bint
-from base.instructions import b1, icmp, ireduce, iadd_cout
-from base.immediates import intcc, imm64
-from base.types import i64, i8, b32, i32, i16, f32
-from cdsl.typevar import TypeVar
-from cdsl.ast import Var
-from cdsl.xform import Rtl
-from unittest import TestCase
-from .elaborate import elaborate
-from .primitives import prim_to_bv, bvsplit, prim_from_bv, bvconcat, bvadd, \
-    bvult, bv_from_imm64, bvite
-import base.semantics  # noqa
-
-
-def concrete_rtls_eq(r1, r2):
-    # type: (Rtl, Rtl) -> bool
-    """
-    Check whether 2 concrete Rtls are equivalent. That is:
-        1) They are structurally the same (i.e. there is a substitution between
-        them)
-        2) Corresponding Vars between them have the same singleton type.
-    """
-    assert r1.is_concrete()
-    assert r2.is_concrete()
-
-    s = r1.substitution(r2, {})
-
-    if s is None:
-        return False
-
-    for (v, v1) in s.items():
-        if v.get_typevar().singleton_type() !=\
-           v1.get_typevar().singleton_type():
-            return False
-
-    return True
-
-
-class TestCleanupConcreteRtl(TestCase):
-    """
-    Test cleanup_concrete_rtl(). cleanup_concrete_rtl() should take Rtls for
-    which we can infer a single concrete typing, and update the TypeVars
-    in-place to singleton TVs.
-    """
-    def test_cleanup_concrete_rtl(self):
-        # type: () -> None
-        typ = i64.by(4)
-        x = Var('x')
-        lo = Var('lo')
-        hi = Var('hi')
-
-        r = Rtl(
-                (lo, hi) << vsplit(x),
-        )
-        r1 = r.copy({})
-        s = r.substitution(r1, {})
-
-        s[x].set_typevar(TypeVar.singleton(typ))
-        r1.cleanup_concrete_rtl()
-        assert s is not None
-        assert s[x].get_typevar().singleton_type() == typ
-        assert s[lo].get_typevar().singleton_type() == i64.by(2)
-        assert s[hi].get_typevar().singleton_type() == i64.by(2)
-
-    def test_cleanup_concrete_rtl_fail(self):
-        # type: () -> None
-        x = Var('x')
-        lo = Var('lo')
-        hi = Var('hi')
-        r = Rtl(
-                (lo, hi) << vsplit(x),
-        )
-
-        with self.assertRaises(AssertionError):
-            r.cleanup_concrete_rtl()
-
-    def test_cleanup_concrete_rtl_ireduce(self):
-        # type: () -> None
-        x = Var('x')
-        y = Var('y')
-        r = Rtl(
-                y << ireduce(x),
-        )
-        r1 = r.copy({})
-        s = r.substitution(r1, {})
-        s[x].set_typevar(TypeVar.singleton(i8.by(2)))
-        r1.cleanup_concrete_rtl()
-
-        assert s is not None
-        assert s[x].get_typevar().singleton_type() == i8.by(2)
-        assert s[y].get_typevar().singleton_type() == i8.by(2)
-
-    def test_cleanup_concrete_rtl_ireduce_bad(self):
-        # type: () -> None
-        x = Var('x')
-        y = Var('y')
-        x.set_typevar(TypeVar.singleton(i16.by(1)))
-        r = Rtl(
-                y << ireduce(x),
-        )
-
-        with self.assertRaises(AssertionError):
-            r.cleanup_concrete_rtl()
-
-    def test_vselect_icmpimm(self):
-        # type: () -> None
-        x = Var('x')
-        y = Var('y')
-        z = Var('z')
-        w = Var('w')
-        v = Var('v')
-        zeroes = Var('zeroes')
-        imm0 = Var("imm0")
-
-        r = Rtl(
-                zeroes << iconst(imm0),
-                y << icmp(intcc.eq, x, zeroes),
-                v << vselect(y, z, w),
-        )
-        r1 = r.copy({})
-
-        s = r.substitution(r1, {})
-        s[zeroes].set_typevar(TypeVar.singleton(i32.by(4)))
-        s[z].set_typevar(TypeVar.singleton(f32.by(4)))
-
-        r1.cleanup_concrete_rtl()
-
-        assert s is not None
-        assert s[zeroes].get_typevar().singleton_type() == i32.by(4)
-        assert s[x].get_typevar().singleton_type() == i32.by(4)
-        assert s[y].get_typevar().singleton_type() == b32.by(4)
-        assert s[z].get_typevar().singleton_type() == f32.by(4)
-        assert s[w].get_typevar().singleton_type() == f32.by(4)
-        assert s[v].get_typevar().singleton_type() == f32.by(4)
-
-    def test_bint(self):
-        # type: () -> None
-        x = Var('x')
-        y = Var('y')
-        z = Var('z')
-        w = Var('w')
-        v = Var('v')
-        u = Var('u')
-
-        r = Rtl(
-            z << iadd(x, y),
-            w << bint(v),
-            u << iadd(z, w)
-        )
-        r1 = r.copy({})
-        s = r.substitution(r1, {})
-
-        s[x].set_typevar(TypeVar.singleton(i32.by(8)))
-        s[z].set_typevar(TypeVar.singleton(i32.by(8)))
-        # TODO: Relax this to simd=True
-        s[v].set_typevar(TypeVar('v', '', bools=(1, 1), simd=(8, 8)))
-        r1.cleanup_concrete_rtl()
-
-        assert s is not None
-        assert s[x].get_typevar().singleton_type() == i32.by(8)
-        assert s[y].get_typevar().singleton_type() == i32.by(8)
-        assert s[z].get_typevar().singleton_type() == i32.by(8)
-        assert s[w].get_typevar().singleton_type() == i32.by(8)
-        assert s[u].get_typevar().singleton_type() == i32.by(8)
-        assert s[v].get_typevar().singleton_type() == b1.by(8)
-
-
-class TestElaborate(TestCase):
-    """
-    Test semantics elaboration.
-    """
-    def setUp(self):
-        # type: () -> None
-        self.v0 = Var("v0")
-        self.v1 = Var("v1")
-        self.v2 = Var("v2")
-        self.v3 = Var("v3")
-        self.v4 = Var("v4")
-        self.v5 = Var("v5")
-        self.v6 = Var("v6")
-        self.v7 = Var("v7")
-        self.v8 = Var("v8")
-        self.v9 = Var("v9")
-        self.imm0 = Var("imm0")
-        self.IxN_nonscalar = TypeVar("IxN_nonscalar", "", ints=True,
-                                     scalars=False, simd=True)
-        self.TxN = TypeVar("TxN", "", ints=True, bools=True, floats=True,
-                           scalars=False, simd=True)
-        self.b1 = TypeVar.singleton(b1)
-
-    def test_elaborate_vsplit(self):
-        # type: () -> None
-        i32.by(4)  # Make sure i32x4 exists.
-        i32.by(2)  # Make sure i32x2 exists.
-        r = Rtl(
-                (self.v0, self.v1) << vsplit.i32x4(self.v2),
-        )
-        r.cleanup_concrete_rtl()
-        sem = elaborate(r)
-        bvx = Var('bvx')
-        bvlo = Var('bvlo')
-        bvhi = Var('bvhi')
-        x = Var('x')
-        lo = Var('lo')
-        hi = Var('hi')
-
-        exp = Rtl(
-            bvx << prim_to_bv.i32x4(x),
-            (bvlo, bvhi) << bvsplit.bv128(bvx),
-            lo << prim_from_bv.i32x2(bvlo),
-            hi << prim_from_bv.i32x2(bvhi)
-        )
-        exp.cleanup_concrete_rtl()
-
-        assert concrete_rtls_eq(sem, exp)
-
-    def test_elaborate_vconcat(self):
-        # type: () -> None
-        i32.by(4)  # Make sure i32x4 exists.
-        i32.by(2)  # Make sure i32x2 exists.
-        r = Rtl(
-                self.v0 << vconcat.i32x2(self.v1, self.v2),
-        )
-        r.cleanup_concrete_rtl()
-        sem = elaborate(r)
-        bvx = Var('bvx')
-        bvlo = Var('bvlo')
-        bvhi = Var('bvhi')
-        x = Var('x')
-        lo = Var('lo')
-        hi = Var('hi')
-
-        exp = Rtl(
-            bvlo << prim_to_bv.i32x2(lo),
-            bvhi << prim_to_bv.i32x2(hi),
-            bvx << bvconcat.bv64(bvlo, bvhi),
-            x << prim_from_bv.i32x4(bvx)
-        )
-        exp.cleanup_concrete_rtl()
-
-        assert concrete_rtls_eq(sem, exp)
-
-    def test_elaborate_iadd_simple(self):
-        # type: () -> None
-        i32.by(2)  # Make sure i32x2 exists.
-        x = Var('x')
-        y = Var('y')
-        a = Var('a')
-        bvx = Var('bvx')
-        bvy = Var('bvy')
-        bva = Var('bva')
-        r = Rtl(
-                a << iadd.i32(x, y),
-        )
-        r.cleanup_concrete_rtl()
-        sem = elaborate(r)
-        exp = Rtl(
-            bvx << prim_to_bv.i32(x),
-            bvy << prim_to_bv.i32(y),
-            bva << bvadd.bv32(bvx, bvy),
-            a << prim_from_bv.i32(bva)
-        )
-        exp.cleanup_concrete_rtl()
-
-        assert concrete_rtls_eq(sem, exp)
-
-    def test_elaborate_iadd_elaborate_1(self):
-        # type: () -> None
-        i32.by(2)  # Make sure i32x2 exists.
-        r = Rtl(
-                self.v0 << iadd.i32x2(self.v1, self.v2),
-        )
-        r.cleanup_concrete_rtl()
-        sem = elaborate(r)
-        x = Var('x')
-        y = Var('y')
-        a = Var('a')
-        bvx_1 = Var('bvx_1')
-        bvx_2 = Var('bvx_2')
-        bvx_5 = Var('bvx_5')
-        bvlo_1 = Var('bvlo_1')
-        bvlo_2 = Var('bvlo_2')
-        bvhi_1 = Var('bvhi_1')
-        bvhi_2 = Var('bvhi_2')
-
-        bva_3 = Var('bva_3')
-        bva_4 = Var('bva_4')
-
-        exp = Rtl(
-            bvx_1 << prim_to_bv.i32x2(x),
-            (bvlo_1, bvhi_1) << bvsplit.bv64(bvx_1),
-            bvx_2 << prim_to_bv.i32x2(y),
-            (bvlo_2, bvhi_2) << bvsplit.bv64(bvx_2),
-            bva_3 << bvadd.bv32(bvlo_1, bvlo_2),
-            bva_4 << bvadd.bv32(bvhi_1, bvhi_2),
-            bvx_5 << bvconcat.bv32(bva_3, bva_4),
-            a << prim_from_bv.i32x2(bvx_5)
-        )
-        exp.cleanup_concrete_rtl()
-
-        assert concrete_rtls_eq(sem, exp)
-
-    def test_elaborate_iadd_elaborate_2(self):
-        # type: () -> None
-        i8.by(4)  # Make sure i32x2 exists.
-        r = Rtl(
-                self.v0 << iadd.i8x4(self.v1, self.v2),
-        )
-        r.cleanup_concrete_rtl()
-
-        sem = elaborate(r)
-        x = Var('x')
-        y = Var('y')
-        a = Var('a')
-        bvx_1 = Var('bvx_1')
-        bvx_2 = Var('bvx_2')
-        bvx_5 = Var('bvx_5')
-        bvx_10 = Var('bvx_10')
-        bvx_15 = Var('bvx_15')
-
-        bvlo_1 = Var('bvlo_1')
-        bvlo_2 = Var('bvlo_2')
-        bvlo_6 = Var('bvlo_6')
-        bvlo_7 = Var('bvlo_7')
-        bvlo_11 = Var('bvlo_11')
-        bvlo_12 = Var('bvlo_12')
-
-        bvhi_1 = Var('bvhi_1')
-        bvhi_2 = Var('bvhi_2')
-        bvhi_6 = Var('bvhi_6')
-        bvhi_7 = Var('bvhi_7')
-        bvhi_11 = Var('bvhi_11')
-        bvhi_12 = Var('bvhi_12')
-
-        bva_8 = Var('bva_8')
-        bva_9 = Var('bva_9')
-        bva_13 = Var('bva_13')
-        bva_14 = Var('bva_14')
-
-        exp = Rtl(
-            bvx_1 << prim_to_bv.i8x4(x),
-            (bvlo_1, bvhi_1) << bvsplit.bv32(bvx_1),
-            bvx_2 << prim_to_bv.i8x4(y),
-            (bvlo_2, bvhi_2) << bvsplit.bv32(bvx_2),
-            (bvlo_6, bvhi_6) << bvsplit.bv16(bvlo_1),
-            (bvlo_7, bvhi_7) << bvsplit.bv16(bvlo_2),
-            bva_8 << bvadd.bv8(bvlo_6, bvlo_7),
-            bva_9 << bvadd.bv8(bvhi_6, bvhi_7),
-            bvx_10 << bvconcat.bv8(bva_8, bva_9),
-            (bvlo_11, bvhi_11) << bvsplit.bv16(bvhi_1),
-            (bvlo_12, bvhi_12) << bvsplit.bv16(bvhi_2),
-            bva_13 << bvadd.bv8(bvlo_11, bvlo_12),
-            bva_14 << bvadd.bv8(bvhi_11, bvhi_12),
-            bvx_15 << bvconcat.bv8(bva_13, bva_14),
-            bvx_5 << bvconcat.bv16(bvx_10, bvx_15),
-            a << prim_from_bv.i8x4(bvx_5)
-        )
-        exp.cleanup_concrete_rtl()
-        assert concrete_rtls_eq(sem, exp)
-
-    def test_elaborate_iadd_cout_simple(self):
-        # type: () -> None
-        x = Var('x')
-        y = Var('y')
-        a = Var('a')
-        c_out = Var('c_out')
-        bvc_out = Var('bvc_out')
-        bc_out = Var('bc_out')
-        bvx = Var('bvx')
-        bvy = Var('bvy')
-        bva = Var('bva')
-        bvone = Var('bvone')
-        bvzero = Var('bvzero')
-        r = Rtl(
-                (a, c_out) << iadd_cout.i32(x, y),
-        )
-        r.cleanup_concrete_rtl()
-        sem = elaborate(r)
-        exp = Rtl(
-            bvx << prim_to_bv.i32(x),
-            bvy << prim_to_bv.i32(y),
-            bva << bvadd.bv32(bvx, bvy),
-            bc_out << bvult.bv32(bva, bvx),
-            bvone << bv_from_imm64(imm64(1)),
-            bvzero << bv_from_imm64(imm64(0)),
-            bvc_out << bvite(bc_out, bvone, bvzero),
-            a << prim_from_bv.i32(bva),
-            c_out << prim_from_bv.b1(bvc_out)
-        )
-        exp.cleanup_concrete_rtl()
-        assert concrete_rtls_eq(sem, exp)
diff --git a/cranelift-codegen/meta-python/srcgen.py b/cranelift-codegen/meta-python/srcgen.py
deleted file mode 100644
index df5794ced..000000000
--- a/cranelift-codegen/meta-python/srcgen.py
+++ /dev/null
@@ -1,277 +0,0 @@
-"""
-Source code generator.
-
-The `srcgen` module contains generic helper routines and classes for generating
-source code.
-
-"""
-from __future__ import absolute_import
-import sys
-import os
-from collections import OrderedDict
-
-try:
-    from typing import Any, List, Set, Tuple  # noqa
-except ImportError:
-    pass
-
-
-class Formatter(object):
-    """
-    Source code formatter class.
-
-    - Collect source code to be written to a file.
-    - Keep track of indentation.
-
-    Indentation example:
-
-        >>> f = Formatter()
-        >>> f.line('Hello line 1')
-        >>> f.writelines()
-        Hello line 1
-        >>> f.indent_push()
-        >>> f.comment('Nested comment')
-        >>> f.indent_pop()
-        >>> f.format('Back {} again', 'home')
-        >>> f.writelines()
-        Hello line 1
-            // Nested comment
-        Back home again
-
-    """
-
-    shiftwidth = 4
-
-    def __init__(self):
-        # type: () -> None
-        self.indent = ''
-        self.lines = []  # type: List[str]
-
-    def indent_push(self):
-        # type: () -> None
-        """Increase current indentation level by one."""
-        self.indent += ' ' * self.shiftwidth
-
-    def indent_pop(self):
-        # type: () -> None
-        """Decrease indentation by one level."""
-        assert self.indent != '', 'Already at top level indentation'
-        self.indent = self.indent[0:-self.shiftwidth]
-
-    def line(self, s=None):
-        # type: (str) -> None
-        """Add an indented line."""
-        if s:
-            self.lines.append('{}{}\n'.format(self.indent, s))
-        else:
-            self.lines.append('\n')
-
-    def outdented_line(self, s):
-        # type: (str) -> None
-        """
-        Emit a line outdented one level.
-
-        This is used for '} else {' and similar things inside a single indented
-        block.
-        """
-        self.lines.append('{}{}\n'.format(self.indent[0:-self.shiftwidth], s))
-
-    def writelines(self, f=None):
-        # type: (Any) -> None
-        """Write all lines to `f`."""
-        if not f:
-            f = sys.stdout
-        f.writelines(self.lines)
-
-    def update_file(self, filename, directory):
-        # type: (str, str) -> None
-        if directory is not None:
-            filename = os.path.join(directory, filename)
-        with open(filename, 'w') as f:
-            self.writelines(f)
-
-    class _IndentedScope(object):
-        def __init__(self, fmt, after):
-            # type: (Formatter, str) -> None
-            self.fmt = fmt
-            self.after = after
-
-        def __enter__(self):
-            # type: () -> None
-            self.fmt.indent_push()
-
-        def __exit__(self, t, v, tb):
-            # type: (object, object, object) -> None
-            self.fmt.indent_pop()
-            if self.after:
-                self.fmt.line(self.after)
-
-    def indented(self, before=None, after=None):
-        # type: (str, str) -> Formatter._IndentedScope
-        """
-        Return a scope object for use with a `with` statement:
-
-            >>> f = Formatter()
-            >>> with f.indented('prefix {', '} suffix'):
-            ...     f.line('hello')
-            >>> f.writelines()
-            prefix {
-                hello
-            } suffix
-
-        The optional `before` and `after` parameters are surrounding lines
-        which are *not* indented.
-        """
-        if before:
-            self.line(before)
-        return Formatter._IndentedScope(self, after)
-
-    def format(self, fmt, *args):
-        # type: (str, *Any) -> None
-        self.line(fmt.format(*args))
-
-    def multi_line(self, s):
-        # type: (str) -> None
-        """Add one or more lines after stripping common indentation."""
-        for l in parse_multiline(s):
-            self.line(l)
-
-    def comment(self, s):
-        # type: (str) -> None
-        """Add a comment line."""
-        self.line('// ' + s)
-
-    def doc_comment(self, s):
-        # type: (str) -> None
-        """Add a (multi-line) documentation comment."""
-        for l in parse_multiline(s):
-            self.line('/// ' + l if l else '///')
-
-    def match(self, m):
-        # type: (Match) -> None
-        """
-        Add a match expression.
-
-        Example:
-
-            >>> f = Formatter()
-            >>> m = Match('x')
-            >>> m.arm('Orange', ['a', 'b'], 'some body')
-            >>> m.arm('Yellow', ['a', 'b'], 'some body')
-            >>> m.arm('Green', ['a', 'b'], 'different body')
-            >>> m.arm('Blue', ['x', 'y'], 'some body')
-            >>> f.match(m)
-            >>> f.writelines()
-            match x {
-                Orange { a, b } |
-                Yellow { a, b } => {
-                    some body
-                }
-                Green { a, b } => {
-                    different body
-                }
-                Blue { x, y } => {
-                    some body
-                }
-            }
-
-        """
-        with self.indented('match {} {{'.format(m.expr), '}'):
-            for (fields, body), names in m.arms.items():
-                with self.indented('', '}'):
-                    names_left = len(names)
-                    for name in names.keys():
-                        fields_str = ', '.join(fields)
-                        if len(fields) != 0:
-                            fields_str = '{{ {} }} '.format(fields_str)
-                        names_left -= 1
-                        if names_left > 0:
-                            suffix = '|'
-                        else:
-                            suffix = '=> {'
-                        self.outdented_line(name + ' ' + fields_str + suffix)
-                        if names_left == 0:
-                            self.multi_line(body)
-
-
-def _indent(s):
-    # type: (str) -> int
-    """
-    Compute the indentation of s, or None of an empty line.
-
-    Example:
-        >>> _indent("foo")
-        0
-        >>> _indent("    bar")
-        4
-        >>> _indent("   ")
-        >>> _indent("")
-    """
-    t = s.lstrip()
-    return len(s) - len(t) if t else None
-
-
-def parse_multiline(s):
-    # type: (str) -> List[str]
-    """
-    Given a multi-line string, split it into a sequence of lines after
-    stripping a common indentation, as described in the "trim" function
-    from PEP 257. This is useful for strings defined with doc strings:
-        >>> parse_multiline('\\n    hello\\n    world\\n')
-        ['hello', 'world']
-    """
-    if not s:
-        return []
-    # Convert tabs to spaces (following the normal Python rules)
-    # and split into a list of lines:
-    lines = s.expandtabs().splitlines()
-    # Determine minimum indentation (first line doesn't count):
-    indent = sys.maxsize
-    for line in lines[1:]:
-        stripped = line.lstrip()
-        if stripped:
-            indent = min(indent, len(line) - len(stripped))
-    # Remove indentation (first line is special):
-    trimmed = [lines[0].strip()]
-    if indent < sys.maxsize:
-        for line in lines[1:]:
-            trimmed.append(line[indent:].rstrip())
-    # Strip off trailing and leading blank lines:
-    while trimmed and not trimmed[-1]:
-        trimmed.pop()
-    while trimmed and not trimmed[0]:
-        trimmed.pop(0)
-    return trimmed
-
-
-class Match(object):
-    """
-    Match formatting class.
-
-    Match objects collect all the information needed to emit a Rust `match`
-    expression, automatically deduplicating overlapping identical arms.
-
-    Example:
-
-        >>> m = Match('x')
-        >>> m.arm('Orange', ['a', 'b'], 'some body')
-        >>> m.arm('Yellow', ['a', 'b'], 'some body')
-        >>> m.arm('Green', ['a', 'b'], 'different body')
-        >>> m.arm('Blue', ['x', 'y'], 'some body')
-        >>> assert(len(m.arms) == 3)
-
-    Note that this class is ignorant of Rust types, and considers two fields
-    with the same name to be equivalent.
-    """
-
-    def __init__(self, expr):
-        # type: (str) -> None
-        self.expr = expr
-        self.arms = OrderedDict()  # type: OrderedDict[Tuple[Tuple[str, ...], str], OrderedDict[str, None]]  # noqa
-
-    def arm(self, name, fields, body):
-        # type: (str, List[str], str) -> None
-        key = (tuple(fields), body)
-        if key not in self.arms:
-            self.arms[key] = OrderedDict()
-        self.arms[key][name] = None
diff --git a/cranelift-codegen/meta-python/stubs/z3/__init__.pyi b/cranelift-codegen/meta-python/stubs/z3/__init__.pyi
deleted file mode 100644
index 2fd6c8341..000000000
--- a/cranelift-codegen/meta-python/stubs/z3/__init__.pyi
+++ /dev/null
@@ -1,151 +0,0 @@
-from typing import overload, Tuple, Any, List, Iterable, Union, TypeVar
-from .z3types import Ast, ContextObj
-
-TExprRef = TypeVar("TExprRef", bound="ExprRef")
-
-class Context:
-  ...
-
-class Z3PPObject:
-  ...
-
-class AstRef(Z3PPObject):
-  @overload
-  def __init__(self, ast: Ast, ctx: Context) -> None:
-    self.ast: Ast = ...
-    self.ctx: Context= ...
-
-  @overload
-  def __init__(self, ast: Ast) -> None:
-    self.ast: Ast = ...
-    self.ctx: Context= ...
-  def ctx_ref(self) -> ContextObj:  ...
-  def as_ast(self) -> Ast:  ...
-  def children(self) -> List[AstRef]: ...
-
-class SortRef(AstRef):
-  ...
-
-class FuncDeclRef(AstRef):
-  def arity(self) -> int: ...
-  def name(self) -> str:  ...
-
-class ExprRef(AstRef):
-  def eq(self, other: ExprRef) -> ExprRef:  ...
-  def sort(self) -> SortRef:  ...
-  def decl(self) -> FuncDeclRef:  ...
-
-class BoolSortRef(SortRef):
-  ...
-
-class BoolRef(ExprRef):
-  ...
-
-
-def is_true(a: BoolRef) -> bool:  ...
-def is_false(a: BoolRef) -> bool:  ...
-def is_int_value(a: AstRef) -> bool:  ...
-def substitute(a: AstRef, *m: Tuple[AstRef, AstRef]) -> AstRef: ...
-
-
-class ArithSortRef(SortRef):
-  ...
-
-class ArithRef(ExprRef):
-  def __neg__(self) -> ExprRef: ...
-  def __le__(self, other: ArithRef) -> ArithRef:  ...
-  def __lt__(self, other: ArithRef) -> ArithRef:  ...
-  def __ge__(self, other: ArithRef) -> ArithRef:  ...
-  def __gt__(self, other: ArithRef) -> ArithRef:  ...
-  def __add__(self, other: ArithRef) -> ArithRef:  ...
-  def __sub__(self, other: ArithRef) -> ArithRef:  ...
-  def __mul__(self, other: ArithRef) -> ArithRef:  ...
-  def __div__(self, other: ArithRef) -> ArithRef:  ...
-  def __mod__(self, other: ArithRef) -> ArithRef:  ...
-
-class IntNumRef(ArithRef):
-  def as_long(self) -> int: ...
-
-class BitVecRef(ExprRef):
-  def __neg__(self) -> ExprRef: ...
-  def __le__(self, other: BitVecRef) -> ExprRef:  ...
-  def __lt__(self, other: BitVecRef) -> ExprRef:  ...
-  def __ge__(self, other: BitVecRef) -> ExprRef:  ...
-  def __gt__(self, other: BitVecRef) -> ExprRef:  ...
-  def __add__(self, other: BitVecRef) -> BitVecRef:  ...
-  def __sub__(self, other: BitVecRef) -> BitVecRef:  ...
-  def __mul__(self, other: BitVecRef) -> BitVecRef:  ...
-  def __div__(self, other: BitVecRef) -> BitVecRef:  ...
-  def __mod__(self, other: BitVecRef) -> BitVecRef:  ...
-
-class BitVecNumRef(BitVecRef):
-  def as_long(self) -> int: ...
-
-class CheckSatResult: ...
-
-class ModelRef(Z3PPObject):
-  def __getitem__(self, k:  FuncDeclRef) -> IntNumRef:  ...
-  def decls(self) ->  Iterable[FuncDeclRef]:  ...
-
-class Solver(Z3PPObject):
-  @overload
-  def __init__(self) -> None:
-    self.ctx: Context = ...
-  @overload
-  def __init__(self, ctx:Context) -> None:
-    self.ctx: Context = ...
-
-  def add(self, e:ExprRef) -> None: ...
-  def to_smt2(self) -> str: ...
-  def check(self) -> CheckSatResult: ...
-  def push(self) -> None:  ...
-  def pop(self) -> None:  ...
-  def model(self) -> ModelRef:  ...
-
-sat: CheckSatResult = ...
-unsat: CheckSatResult = ...
-
-@overload
-def Int(name: str) -> ArithRef: ...
-@overload
-def Int(name: str, ctx: Context) -> ArithRef: ...
-
-@overload
-def Bool(name: str) -> BoolRef: ...
-@overload
-def Bool(name: str, ctx: Context) -> BoolRef: ...
-
-def BitVec(name: str, width: int) -> BitVecRef: ...
-
-@overload
-def parse_smt2_string(s: str) -> ExprRef: ...
-@overload
-def parse_smt2_string(s: str, ctx: Context) -> ExprRef: ...
-
-# Can't give more precise types here since func signature is
-# a vararg list of ExprRef optionally followed by a Context
-def Or(*args: Union[ExprRef, Context]) -> ExprRef: ...
-def And(*args: Union[ExprRef, Context]) -> ExprRef: ...
-@overload
-def Not(p: ExprRef) -> ExprRef: ...
-@overload
-def Not(p: ExprRef, ctx: Context) -> ExprRef: ...
-def Implies(a: ExprRef, b: ExprRef, ctx:Context) -> ExprRef: ...
-def If(a: ExprRef, b:TExprRef, c:TExprRef) -> TExprRef:  ...
-
-def ZeroExt(width: int, expr: BitVecRef) -> BitVecRef:  ...
-def SignExt(width: int, expr: BitVecRef) -> BitVecRef:  ...
-def Extract(hi: int, lo: int, expr: BitVecRef) -> BitVecRef:  ...
-def Concat(expr1: BitVecRef, expr2: BitVecRef) -> BitVecRef:  ...
-
-def Function(name: str, *sig: Tuple[SortRef,...]) -> FuncDeclRef:  ...
-
-def IntVal(val: int, ctx: Context) -> IntNumRef:  ...
-@overload
-def BoolVal(val: bool, ctx: Context) -> BoolRef:  ...
-@overload
-def BoolVal(val: bool) -> BoolRef:  ...
-@overload
-def BitVecVal(val: int, bits: int, ctx: Context) -> BitVecNumRef:  ...
-@overload
-def BitVecVal(val: int, bits: int) -> BitVecNumRef:  ...
diff --git a/cranelift-codegen/meta-python/stubs/z3/z3core.pyi b/cranelift-codegen/meta-python/stubs/z3/z3core.pyi
deleted file mode 100644
index 36f1f8879..000000000
--- a/cranelift-codegen/meta-python/stubs/z3/z3core.pyi
+++ /dev/null
@@ -1,3 +0,0 @@
-from .z3types import Ast, ContextObj
-def Z3_mk_eq(ctx: ContextObj, a: Ast, b: Ast) -> Ast: ...
-def Z3_mk_div(ctx: ContextObj, a: Ast, b: Ast) -> Ast: ...
diff --git a/cranelift-codegen/meta-python/stubs/z3/z3types.pyi b/cranelift-codegen/meta-python/stubs/z3/z3types.pyi
deleted file mode 100644
index fa8fc446d..000000000
--- a/cranelift-codegen/meta-python/stubs/z3/z3types.pyi
+++ /dev/null
@@ -1,12 +0,0 @@
-from typing import Any
-
-class Z3Exception(Exception):
-  def __init__(self, a: Any) -> None:
-    self.value = a
-    ...
-
-class ContextObj:
-  ...
-
-class Ast:
-  ...
diff --git a/cranelift-codegen/meta-python/test_constant_hash.py b/cranelift-codegen/meta-python/test_constant_hash.py
deleted file mode 100644
index e76f09aed..000000000
--- a/cranelift-codegen/meta-python/test_constant_hash.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from __future__ import absolute_import
-import doctest
-import constant_hash
-
-
-def load_tests(loader, tests, ignore):
-    tests.addTests(doctest.DocTestSuite(constant_hash))
-    return tests
diff --git a/cranelift-codegen/meta-python/test_srcgen.py b/cranelift-codegen/meta-python/test_srcgen.py
deleted file mode 100644
index 2fb5e0fb6..000000000
--- a/cranelift-codegen/meta-python/test_srcgen.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from __future__ import absolute_import
-import doctest
-import srcgen
-
-
-def load_tests(loader, tests, ignore):
-    tests.addTests(doctest.DocTestSuite(srcgen))
-    return tests
diff --git a/cranelift-codegen/meta-python/unique_table.py b/cranelift-codegen/meta-python/unique_table.py
deleted file mode 100644
index d8482b3c7..000000000
--- a/cranelift-codegen/meta-python/unique_table.py
+++ /dev/null
@@ -1,80 +0,0 @@
-"""
-Generate a table of unique items.
-
-The `UniqueTable` class collects items into an array, removing duplicates. Each
-item is mapped to its offset in the final array.
-
-This is a compression technique for compile-time generated tables.
-"""
-
-try:
-    from typing import Any, List, Dict, Tuple, Sequence  # noqa
-except ImportError:
-    pass
-
-
-class UniqueTable:
-    """
-    Collect items into the `table` list, removing duplicates.
-    """
-    def __init__(self):
-        # type: () -> None
-        # List of items added in order.
-        self.table = list()  # type: List[Any]
-        # Map item -> index.
-        self.index = dict()  # type: Dict[Any, int]
-
-    def add(self, item):
-        # type: (Any) -> int
-        """
-        Add a single item to the table if it isn't already there.
-
-        Return the offset into `self.table` of the item.
-        """
-        if item in self.index:
-            return self.index[item]
-
-        idx = len(self.table)
-        self.index[item] = idx
-        self.table.append(item)
-        return idx
-
-
-class UniqueSeqTable:
-    """
-    Collect sequences into the `table` list, removing duplicates.
-
-    Sequences don't have to be of the same length.
-    """
-    def __init__(self):
-        # type: () -> None
-        self.table = list()  # type: List[Any]
-        # Map seq -> index.
-        self.index = dict()  # type: Dict[Tuple[Any, ...], int]
-
-    def add(self, seq):
-        # type: (Sequence[Any]) -> int
-        """
-        Add a sequence of items to the table. If the table already contains the
-        items in `seq` in the same order, use those instead.
-
-        Return the offset into `self.table` of the beginning of `seq`.
-        """
-        if len(seq) == 0:
-            return 0
-        tseq = tuple(seq)
-        if tseq in self.index:
-            return self.index[tseq]
-
-        idx = len(self.table)
-        self.table.extend(tseq)
-
-        # Add seq and all sub-sequences to `index`.
-        index = self.index  # type: Dict[Tuple[Any, ...], int]
-        assert index is not None
-        for length in range(1, len(tseq) + 1):
-            for offset in range(len(tseq) - length + 1):
-                key = tseq[offset:offset+length]
-                index[key] = idx + offset
-
-        return idx
diff --git a/cranelift-codegen/meta/Cargo.toml b/cranelift-codegen/meta/Cargo.toml
index d8a211e61..d869c412b 100644
--- a/cranelift-codegen/meta/Cargo.toml
+++ b/cranelift-codegen/meta/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "cranelift-codegen-meta"
 authors = ["The Cranelift Project Developers"]
-version = "0.31.0"
+version = "0.43.1"
 description = "Metaprogram for cranelift-codegen code generator library"
 license = "Apache-2.0 WITH LLVM-exception"
 repository = "https://github.com/CraneStation/cranelift"
@@ -9,7 +9,7 @@ readme = "README.md"
 edition = "2018"
 
 [dependencies]
-cranelift-entity = { path = "../../cranelift-entity", version = "0.31.0", default-features = false }
+cranelift-entity = { path = "../../cranelift-entity", version = "0.43.1", default-features = false }
 
 [badges]
 maintenance = { status = "experimental" }
diff --git a/cranelift-codegen/meta/src/cdsl/ast.rs b/cranelift-codegen/meta/src/cdsl/ast.rs
index f4ebea076..798ee29d0 100644
--- a/cranelift-codegen/meta/src/cdsl/ast.rs
+++ b/cranelift-codegen/meta/src/cdsl/ast.rs
@@ -8,10 +8,10 @@ use cranelift_entity::{entity_impl, PrimaryMap};
 
 use std::fmt;
 
+#[derive(Debug)]
 pub enum Expr {
     Var(VarIndex),
     Literal(Literal),
-    Apply(Apply),
 }
 
 impl Expr {
@@ -39,7 +39,6 @@ impl Expr {
         match self {
             Expr::Var(var_index) => var_pool.get(*var_index).to_rust_code(),
             Expr::Literal(literal) => literal.to_rust_code(),
-            Expr::Apply(a) => a.to_rust_code(var_pool),
         }
     }
 }
@@ -81,9 +80,6 @@ impl DefPool {
     pub fn get(&self, index: DefIndex) -> &Def {
         self.pool.get(index).unwrap()
     }
-    pub fn get_mut(&mut self, index: DefIndex) -> &mut Def {
-        self.pool.get_mut(index).unwrap()
-    }
     pub fn next_index(&self) -> DefIndex {
         self.pool.next_key()
     }
@@ -368,6 +364,7 @@ impl VarPool {
 ///
 /// An `Apply` AST expression is created by using function call syntax on instructions. This
 /// applies to both bound and unbound polymorphic instructions.
+#[derive(Debug)]
 pub struct Apply {
     pub inst: Instruction,
     pub args: Vec<Expr>,
@@ -376,11 +373,17 @@ pub struct Apply {
 
 impl Apply {
     pub fn new(target: InstSpec, args: Vec<Expr>) -> Self {
-        let (inst, value_types) = match target.into() {
+        let (inst, value_types) = match target {
             InstSpec::Inst(inst) => (inst, Vec::new()),
             InstSpec::Bound(bound_inst) => (bound_inst.inst, bound_inst.value_types),
         };
 
+        // Apply should only operate on concrete value types, not "any".
+        let value_types = value_types
+            .into_iter()
+            .map(|vt| vt.expect("shouldn't be Any"))
+            .collect();
+
         // Basic check on number of arguments.
         assert!(
             inst.operands_in.len() == args.len(),
@@ -424,17 +427,7 @@ impl Apply {
         format!("{}({})", inst_name, args)
     }
 
-    fn to_rust_code(&self, var_pool: &VarPool) -> String {
-        let args = self
-            .args
-            .iter()
-            .map(|arg| arg.to_rust_code(var_pool))
-            .collect::<Vec<_>>()
-            .join(", ");
-        format!("{}({})", self.inst.name, args)
-    }
-
-    fn inst_predicate(
+    pub fn inst_predicate(
         &self,
         format_registry: &FormatRegistry,
         var_pool: &VarPool,
@@ -448,7 +441,8 @@ impl Apply {
                 // Ignore free variables for now.
                 continue;
             }
-            pred = pred.and(InstructionPredicate::new_is_field_equal(
+            pred = pred.and(InstructionPredicate::new_is_field_equal_ast(
+                iform,
                 &format_field,
                 arg.to_rust_code(var_pool),
             ));
diff --git a/cranelift-codegen/meta/src/cdsl/cpu_modes.rs b/cranelift-codegen/meta/src/cdsl/cpu_modes.rs
index 5e0e5b476..4b4406d2d 100644
--- a/cranelift-codegen/meta/src/cdsl/cpu_modes.rs
+++ b/cranelift-codegen/meta/src/cdsl/cpu_modes.rs
@@ -1,25 +1,34 @@
-use crate::cdsl::types::LaneType;
-use crate::cdsl::xform::{TransformGroup, TransformGroupIndex, TransformGroups};
-
-use std::collections::{HashMap, HashSet};
+use std::collections::{hash_map, HashMap, HashSet};
 use std::iter::FromIterator;
 
+use crate::cdsl::encodings::Encoding;
+use crate::cdsl::types::{LaneType, ValueType};
+use crate::cdsl::xform::{TransformGroup, TransformGroupIndex};
+
 pub struct CpuMode {
-    _name: &'static str,
+    pub name: &'static str,
     default_legalize: Option<TransformGroupIndex>,
     monomorphic_legalize: Option<TransformGroupIndex>,
-    typed_legalize: HashMap<String, TransformGroupIndex>,
+    typed_legalize: HashMap<ValueType, TransformGroupIndex>,
+    pub encodings: Vec<Encoding>,
 }
 
 impl CpuMode {
     pub fn new(name: &'static str) -> Self {
         Self {
-            _name: name,
+            name,
             default_legalize: None,
             monomorphic_legalize: None,
             typed_legalize: HashMap::new(),
+            encodings: Vec::new(),
         }
     }
+
+    pub fn set_encodings(&mut self, encodings: Vec<Encoding>) {
+        assert!(self.encodings.is_empty(), "clobbering encodings");
+        self.encodings = encodings;
+    }
+
     pub fn legalize_monomorphic(&mut self, group: &TransformGroup) {
         assert!(self.monomorphic_legalize.is_none());
         self.monomorphic_legalize = Some(group.id);
@@ -31,39 +40,28 @@ impl CpuMode {
     pub fn legalize_type(&mut self, lane_type: impl Into<LaneType>, group: &TransformGroup) {
         assert!(self
             .typed_legalize
-            .insert(lane_type.into().to_string(), group.id)
+            .insert(lane_type.into().into(), group.id)
             .is_none());
     }
 
-    /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the
-    /// transitive set of TransformGroup this TargetIsa uses.
-    pub fn transitive_transform_groups(
-        &self,
-        all_groups: &TransformGroups,
-    ) -> Vec<TransformGroupIndex> {
-        let mut roots = Vec::new();
-        if let Some(i) = &self.default_legalize {
-            roots.push(*i);
-        }
-        if let Some(i) = &self.monomorphic_legalize {
-            roots.push(*i);
-        }
-        roots.extend(self.typed_legalize.values().cloned());
-
-        let mut set = HashSet::new();
-        for root in roots {
-            set.insert(root);
-            let mut base = root;
-            // Follow the chain of chain_with.
-            while let Some(chain_with) = &all_groups.get(base).chain_with {
-                set.insert(*chain_with);
-                base = *chain_with;
-            }
+    pub fn get_default_legalize_code(&self) -> TransformGroupIndex {
+        self.default_legalize
+            .expect("a finished CpuMode must have a default legalize code")
+    }
+    pub fn get_legalize_code_for(&self, typ: &Option<ValueType>) -> TransformGroupIndex {
+        match typ {
+            Some(typ) => self
+                .typed_legalize
+                .get(typ)
+                .map(|x| *x)
+                .unwrap_or_else(|| self.get_default_legalize_code()),
+            None => self
+                .monomorphic_legalize
+                .unwrap_or_else(|| self.get_default_legalize_code()),
         }
-
-        let mut ret = Vec::from_iter(set);
-        ret.sort();
-        ret
+    }
+    pub fn get_legalized_types(&self) -> hash_map::Keys<ValueType, TransformGroupIndex> {
+        self.typed_legalize.keys()
     }
 
     /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly
diff --git a/cranelift-codegen/meta/src/cdsl/encodings.rs b/cranelift-codegen/meta/src/cdsl/encodings.rs
new file mode 100644
index 000000000..cd05eb9b3
--- /dev/null
+++ b/cranelift-codegen/meta/src/cdsl/encodings.rs
@@ -0,0 +1,160 @@
+use std::rc::Rc;
+
+use crate::cdsl::instructions::{
+    InstSpec, Instruction, InstructionPredicate, InstructionPredicateNode,
+    InstructionPredicateNumber, InstructionPredicateRegistry, ValueTypeOrAny,
+};
+use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes};
+use crate::cdsl::settings::SettingPredicateNumber;
+use crate::cdsl::types::ValueType;
+
+/// Encoding for a concrete instruction.
+///
+/// An `Encoding` object ties an instruction opcode with concrete type variables together with an
+/// encoding recipe and encoding encbits.
+///
+/// The concrete instruction can be in three different forms:
+///
+/// 1. A naked opcode: `trap` for non-polymorphic instructions.
+/// 2. With bound type variables: `iadd.i32` for polymorphic instructions.
+/// 3. With operands providing constraints: `icmp.i32(intcc.eq, x, y)`.
+///
+/// If the instruction is polymorphic, all type variables must be provided.
+pub struct EncodingContent {
+    /// The `Instruction` or `BoundInstruction` being encoded.
+    inst: InstSpec,
+
+    /// The `EncodingRecipe` to use.
+    pub recipe: EncodingRecipeNumber,
+
+    /// Additional encoding bits to be interpreted by `recipe`.
+    pub encbits: u16,
+
+    /// An instruction predicate that must be true to allow selecting this encoding.
+    pub inst_predicate: Option<InstructionPredicateNumber>,
+
+    /// An ISA predicate that must be true to allow selecting this encoding.
+    pub isa_predicate: Option<SettingPredicateNumber>,
+
+    /// The value type this encoding has been bound to, for encodings of polymorphic instructions.
+    pub bound_type: Option<ValueType>,
+}
+
+impl EncodingContent {
+    pub fn inst(&self) -> &Instruction {
+        self.inst.inst()
+    }
+    pub fn to_rust_comment(&self, recipes: &Recipes) -> String {
+        format!("[{}#{:02x}]", recipes[self.recipe].name, self.encbits)
+    }
+}
+
+pub type Encoding = Rc<EncodingContent>;
+
+pub struct EncodingBuilder {
+    inst: InstSpec,
+    recipe: EncodingRecipeNumber,
+    encbits: u16,
+    inst_predicate: Option<InstructionPredicate>,
+    isa_predicate: Option<SettingPredicateNumber>,
+    bound_type: Option<ValueType>,
+}
+
+impl EncodingBuilder {
+    pub fn new(inst: InstSpec, recipe: EncodingRecipeNumber, encbits: u16) -> Self {
+        let (inst_predicate, bound_type) = match &inst {
+            InstSpec::Bound(inst) => {
+                let other_typevars = &inst.inst.polymorphic_info.as_ref().unwrap().other_typevars;
+
+                assert!(
+                    inst.value_types.len() == other_typevars.len() + 1,
+                    "partially bound polymorphic instruction"
+                );
+
+                // Add secondary type variables to the instruction predicate.
+                let value_types = &inst.value_types;
+                let mut inst_predicate = None;
+                for (typevar, value_type) in other_typevars.iter().zip(value_types.iter().skip(1)) {
+                    let value_type = match value_type {
+                        ValueTypeOrAny::Any => continue,
+                        ValueTypeOrAny::ValueType(vt) => vt,
+                    };
+                    let type_predicate =
+                        InstructionPredicate::new_typevar_check(&inst.inst, typevar, value_type);
+                    inst_predicate = Some(type_predicate.into());
+                }
+
+                let ctrl_type = value_types[0]
+                    .clone()
+                    .expect("Controlling type shouldn't be Any");
+                (inst_predicate, Some(ctrl_type))
+            }
+
+            InstSpec::Inst(inst) => {
+                assert!(
+                    inst.polymorphic_info.is_none(),
+                    "unbound polymorphic instruction"
+                );
+                (None, None)
+            }
+        };
+
+        Self {
+            inst,
+            recipe,
+            encbits,
+            inst_predicate,
+            isa_predicate: None,
+            bound_type,
+        }
+    }
+
+    pub fn inst_predicate(mut self, inst_predicate: InstructionPredicateNode) -> Self {
+        let inst_predicate = Some(match self.inst_predicate {
+            Some(node) => node.and(inst_predicate),
+            None => inst_predicate.into(),
+        });
+        self.inst_predicate = inst_predicate;
+        self
+    }
+
+    pub fn isa_predicate(mut self, isa_predicate: SettingPredicateNumber) -> Self {
+        assert!(self.isa_predicate.is_none());
+        self.isa_predicate = Some(isa_predicate);
+        self
+    }
+
+    pub fn build(
+        self,
+        recipes: &Recipes,
+        inst_pred_reg: &mut InstructionPredicateRegistry,
+    ) -> Encoding {
+        let inst_predicate = self.inst_predicate.map(|pred| inst_pred_reg.insert(pred));
+
+        let inst = self.inst.inst();
+        assert!(
+            inst.format == recipes[self.recipe].format,
+            format!(
+                "Inst {} and recipe {} must have the same format!",
+                inst.name, recipes[self.recipe].name
+            )
+        );
+
+        assert_eq!(
+            inst.is_branch && !inst.is_indirect_branch,
+            recipes[self.recipe].branch_range.is_some(),
+            "Inst {}'s is_branch contradicts recipe {} branch_range!",
+            inst.name,
+            recipes[self.recipe].name
+        );
+
+        Rc::new(EncodingContent {
+            inst: self.inst,
+            recipe: self.recipe,
+            encbits: self.encbits,
+            inst_predicate,
+            isa_predicate: self.isa_predicate,
+            bound_type: self.bound_type,
+        })
+    }
+}
diff --git a/cranelift-codegen/meta/src/cdsl/formats.rs b/cranelift-codegen/meta/src/cdsl/formats.rs
index cefcfa162..cf82da180 100644
--- a/cranelift-codegen/meta/src/cdsl/formats.rs
+++ b/cranelift-codegen/meta/src/cdsl/formats.rs
@@ -12,9 +12,6 @@ use cranelift_entity::{entity_impl, PrimaryMap};
 /// data type.
 #[derive(Debug)]
 pub struct FormatField {
-    /// Immediate operand number in parent.
-    immnum: usize,
-
     /// Immediate operand kind.
     pub kind: OperandKind,
 
@@ -53,7 +50,7 @@ pub struct InstructionFormat {
 
 impl fmt::Display for InstructionFormat {
     fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
-        let args = self
+        let imm_args = self
             .imm_fields
             .iter()
             .map(|field| format!("{}: {}", field.member, field.kind.name))
@@ -61,12 +58,26 @@ impl fmt::Display for InstructionFormat {
             .join(", ");
         fmt.write_fmt(format_args!(
             "{}(imms=({}), vals={})",
-            self.name, args, self.num_value_operands
+            self.name, imm_args, self.num_value_operands
         ))?;
         Ok(())
     }
 }
 
+impl InstructionFormat {
+    pub fn imm_by_name(&self, name: &'static str) -> &FormatField {
+        self.imm_fields
+            .iter()
+            .find(|&field| field.member == name)
+            .unwrap_or_else(|| {
+                panic!(
+                    "unexpected immediate field named {} in instruction format {}",
+                    name, self.name
+                )
+            })
+    }
+}
+
 pub struct InstructionFormatBuilder {
     name: &'static str,
     num_value_operands: usize,
@@ -75,27 +86,6 @@ pub struct InstructionFormatBuilder {
     typevar_operand: Option<usize>,
 }
 
-pub struct ImmParameter {
-    kind: OperandKind,
-    member: &'static str,
-}
-impl Into<ImmParameter> for (&'static str, &OperandKind) {
-    fn into(self) -> ImmParameter {
-        ImmParameter {
-            kind: self.1.clone(),
-            member: self.0,
-        }
-    }
-}
-impl Into<ImmParameter> for &OperandKind {
-    fn into(self) -> ImmParameter {
-        ImmParameter {
-            kind: self.clone(),
-            member: self.default_member.unwrap(),
-        }
-    }
-}
-
 impl InstructionFormatBuilder {
     pub fn new(name: &'static str) -> Self {
         Self {
@@ -117,12 +107,19 @@ impl InstructionFormatBuilder {
         self
     }
 
-    pub fn imm(mut self, param: impl Into<ImmParameter>) -> Self {
-        let imm_param = param.into();
+    pub fn imm(mut self, operand_kind: &OperandKind) -> Self {
         let field = FormatField {
-            immnum: self.imm_fields.len(),
-            kind: imm_param.kind,
-            member: imm_param.member,
+            kind: operand_kind.clone(),
+            member: operand_kind.default_member.unwrap(),
+        };
+        self.imm_fields.push(field);
+        self
+    }
+
+    pub fn imm_with_name(mut self, member: &'static str, operand_kind: &OperandKind) -> Self {
+        let field = FormatField {
+            kind: operand_kind.clone(),
+            member,
         };
         self.imm_fields.push(field);
         self
@@ -187,7 +184,9 @@ impl FormatRegistry {
             if operand.is_value() {
                 num_values += 1;
             }
-            has_varargs = has_varargs || operand.is_varargs();
+            if !has_varargs {
+                has_varargs = operand.is_varargs();
+            }
             if let Some(imm_key) = operand.kind.imm_key() {
                 imm_keys.push(imm_key);
             }
@@ -200,6 +199,14 @@ impl FormatRegistry {
             .expect("unknown InstructionFormat; please define it in shared/formats.rs first")
     }
 
+    pub fn by_name(&self, name: &str) -> InstructionFormatIndex {
+        self.map
+            .iter()
+            .find(|(_key, value)| value.name == name)
+            .unwrap_or_else(|| panic!("format with name '{}' doesn't exist", name))
+            .0
+    }
+
     pub fn get(&self, index: InstructionFormatIndex) -> &InstructionFormat {
         self.map.get(index).unwrap()
     }
diff --git a/cranelift-codegen/meta/src/cdsl/instructions.rs b/cranelift-codegen/meta/src/cdsl/instructions.rs
index d57bcc254..2737099cc 100644
--- a/cranelift-codegen/meta/src/cdsl/instructions.rs
+++ b/cranelift-codegen/meta/src/cdsl/instructions.rs
@@ -1,47 +1,62 @@
+use cranelift_entity::{entity_impl, PrimaryMap};
+
+use std::collections::HashMap;
+use std::fmt;
+use std::ops;
+use std::rc::Rc;
+
 use crate::cdsl::camel_case;
 use crate::cdsl::formats::{
     FormatField, FormatRegistry, InstructionFormat, InstructionFormatIndex,
 };
 use crate::cdsl::operands::Operand;
 use crate::cdsl::type_inference::Constraint;
-use crate::cdsl::types::{LaneType, ValueType};
+use crate::cdsl::types::{LaneType, ReferenceType, ValueType, VectorType};
 use crate::cdsl::typevar::TypeVar;
 
-use std::fmt;
-use std::ops;
-use std::rc::Rc;
-use std::slice;
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct OpcodeNumber(u32);
+entity_impl!(OpcodeNumber);
 
-pub struct InstructionGroupBuilder<'format_reg> {
+pub type AllInstructions = PrimaryMap<OpcodeNumber, Instruction>;
+
+pub struct InstructionGroupBuilder<'format_reg, 'all_inst> {
     _name: &'static str,
     _doc: &'static str,
     format_registry: &'format_reg FormatRegistry,
-    instructions: Vec<Instruction>,
+    all_instructions: &'all_inst mut AllInstructions,
+    own_instructions: Vec<Instruction>,
 }
 
-impl<'format_reg> InstructionGroupBuilder<'format_reg> {
+impl<'format_reg, 'all_inst> InstructionGroupBuilder<'format_reg, 'all_inst> {
     pub fn new(
         name: &'static str,
         doc: &'static str,
+        all_instructions: &'all_inst mut AllInstructions,
         format_registry: &'format_reg FormatRegistry,
     ) -> Self {
         Self {
             _name: name,
             _doc: doc,
             format_registry,
-            instructions: Vec::new(),
+            all_instructions,
+            own_instructions: Vec::new(),
         }
     }
 
     pub fn push(&mut self, builder: InstructionBuilder) {
-        self.instructions.push(builder.build(self.format_registry));
+        let opcode_number = OpcodeNumber(self.all_instructions.next_key().as_u32());
+        let inst = builder.build(self.format_registry, opcode_number);
+        // Note this clone is cheap, since Instruction is a Rc<> wrapper for InstructionContent.
+        self.own_instructions.push(inst.clone());
+        self.all_instructions.push(inst);
     }
 
     pub fn build(self) -> InstructionGroup {
         InstructionGroup {
             _name: self._name,
             _doc: self._doc,
-            instructions: self.instructions,
+            instructions: self.own_instructions,
         }
     }
 }
@@ -56,31 +71,30 @@ pub struct InstructionGroup {
 }
 
 impl InstructionGroup {
-    pub fn iter(&self) -> slice::Iter<Instruction> {
-        self.instructions.iter()
-    }
-
     pub fn by_name(&self, name: &'static str) -> &Instruction {
         self.instructions
             .iter()
-            .find(|inst| inst.name == name)
+            .find(|inst| &inst.name == name)
             .expect(&format!("unexisting instruction with name {}", name))
     }
 }
 
+#[derive(Debug)]
 pub struct PolymorphicInfo {
     pub use_typevar_operand: bool,
     pub ctrl_typevar: TypeVar,
     pub other_typevars: Vec<TypeVar>,
 }
 
+#[derive(Debug)]
 pub struct InstructionContent {
     /// Instruction mnemonic, also becomes opcode name.
     pub name: String,
     pub camel_name: String,
+    pub opcode_number: OpcodeNumber,
 
     /// Documentation string.
-    doc: String,
+    pub doc: String,
 
     /// Input operands. This can be a mix of SSA value operands and other operand kinds.
     pub operands_in: Vec<Operand>,
@@ -96,9 +110,12 @@ pub struct InstructionContent {
     /// polymorphic, set otherwise.
     pub polymorphic_info: Option<PolymorphicInfo>,
 
+    /// Indices in operands_in of input operands that are values.
     pub value_opnums: Vec<usize>,
-    pub value_results: Vec<usize>,
+    /// Indices in operands_in of input operands that are immediates or entities.
     pub imm_opnums: Vec<usize>,
+    /// Indices in operands_out of output operands that are values.
+    pub value_results: Vec<usize>,
 
     /// True for instructions that terminate the EBB.
     pub is_terminator: bool,
@@ -124,7 +141,7 @@ pub struct InstructionContent {
     pub writes_cpu_flags: bool,
 }
 
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct Instruction {
     content: Rc<InstructionContent>,
 }
@@ -138,23 +155,13 @@ impl ops::Deref for Instruction {
 
 impl Instruction {
     pub fn snake_name(&self) -> &str {
-        if self.name == "return" {
+        if &self.name == "return" {
             "return_"
         } else {
             &self.name
         }
     }
 
-    pub fn doc_comment_first_line(&self) -> &str {
-        for line in self.doc.split("\n") {
-            let stripped = line.trim();
-            if stripped.len() > 0 {
-                return stripped;
-            }
-        }
-        ""
-    }
-
     pub fn all_typevars(&self) -> Vec<&TypeVar> {
         match &self.polymorphic_info {
             Some(poly) => {
@@ -167,7 +174,28 @@ impl Instruction {
     }
 
     pub fn bind(&self, lane_type: impl Into<LaneType>) -> BoundInstruction {
-        bind(self.clone(), lane_type.into(), Vec::new())
+        bind(self.clone(), Some(lane_type.into()), Vec::new())
+    }
+
+    pub fn bind_ref(&self, reference_type: impl Into<ReferenceType>) -> BoundInstruction {
+        bind_ref(self.clone(), Some(reference_type.into()), Vec::new())
+    }
+
+    pub fn bind_vector_from_lane(
+        &self,
+        lane_type: impl Into<LaneType>,
+        vector_size_in_bits: u64,
+    ) -> BoundInstruction {
+        bind_vector(
+            self.clone(),
+            lane_type.into(),
+            vector_size_in_bits,
+            Vec::new(),
+        )
+    }
+
+    pub fn bind_any(&self) -> BoundInstruction {
+        bind(self.clone(), None, Vec::new())
     }
 }
 
@@ -300,12 +328,10 @@ impl InstructionBuilder {
         self
     }
 
-    fn build(self, format_registry: &FormatRegistry) -> Instruction {
+    fn build(self, format_registry: &FormatRegistry, opcode_number: OpcodeNumber) -> Instruction {
         let operands_in = self.operands_in.unwrap_or_else(Vec::new);
         let operands_out = self.operands_out.unwrap_or_else(Vec::new);
 
-        let format_index = format_registry.lookup(&operands_in);
-
         let mut value_opnums = Vec::new();
         let mut imm_opnums = Vec::new();
         for (i, op) in operands_in.iter().enumerate() {
@@ -318,13 +344,13 @@ impl InstructionBuilder {
             }
         }
 
-        let mut value_results = Vec::new();
-        for (i, op) in operands_out.iter().enumerate() {
-            if op.is_value() {
-                value_results.push(i);
-            }
-        }
+        let value_results = operands_out
+            .iter()
+            .enumerate()
+            .filter_map(|(i, op)| if op.is_value() { Some(i) } else { None })
+            .collect();
 
+        let format_index = format_registry.lookup(&operands_in);
         let format = format_registry.get(format_index);
         let polymorphic_info =
             verify_polymorphic(&operands_in, &operands_out, &format, &value_opnums);
@@ -333,10 +359,12 @@ impl InstructionBuilder {
         let writes_cpu_flags = operands_out.iter().any(|op| op.is_cpu_flags());
 
         let camel_name = camel_case(&self.name);
+
         Instruction {
             content: Rc::new(InstructionContent {
                 name: self.name,
                 camel_name,
+                opcode_number,
                 doc: self.doc,
                 operands_in,
                 operands_out,
@@ -362,15 +390,52 @@ impl InstructionBuilder {
     }
 }
 
+/// A thin wrapper like Option<ValueType>, but with more precise semantics.
+#[derive(Clone)]
+pub enum ValueTypeOrAny {
+    ValueType(ValueType),
+    Any,
+}
+
+impl ValueTypeOrAny {
+    pub fn expect(self, msg: &str) -> ValueType {
+        match self {
+            ValueTypeOrAny::ValueType(vt) => vt,
+            ValueTypeOrAny::Any => panic!(format!("Unexpected Any: {}", msg)),
+        }
+    }
+}
+
 #[derive(Clone)]
 pub struct BoundInstruction {
     pub inst: Instruction,
-    pub value_types: Vec<ValueType>,
+    pub value_types: Vec<ValueTypeOrAny>,
 }
 
 impl BoundInstruction {
     pub fn bind(self, lane_type: impl Into<LaneType>) -> BoundInstruction {
-        bind(self.inst, lane_type.into(), self.value_types)
+        bind(self.inst, Some(lane_type.into()), self.value_types)
+    }
+
+    pub fn bind_ref(self, reference_type: impl Into<ReferenceType>) -> BoundInstruction {
+        bind_ref(self.inst, Some(reference_type.into()), self.value_types)
+    }
+
+    pub fn bind_vector_from_lane(
+        self,
+        lane_type: impl Into<LaneType>,
+        vector_size_in_bits: u64,
+    ) -> BoundInstruction {
+        bind_vector(
+            self.inst,
+            lane_type.into(),
+            vector_size_in_bits,
+            self.value_types,
+        )
+    }
+
+    pub fn bind_any(self) -> BoundInstruction {
+        bind(self.inst, None, self.value_types)
     }
 }
 
@@ -394,12 +459,8 @@ fn verify_polymorphic(
     }
 
     // Verify the use of type variables.
-    let mut use_typevar_operand = false;
-    let mut ctrl_typevar = None;
-    let mut other_typevars = None;
-    let mut maybe_error_message = None;
-
     let tv_op = format.typevar_operand;
+    let mut maybe_error_message = None;
     if let Some(tv_op) = tv_op {
         if tv_op < value_opnums.len() {
             let op_num = value_opnums[tv_op];
@@ -408,11 +469,13 @@ fn verify_polymorphic(
             if (free_typevar.is_some() && tv == &free_typevar.unwrap())
                 || tv.singleton_type().is_some()
             {
-                match verify_ctrl_typevar(tv, &value_opnums, &operands_in, &operands_out) {
-                    Ok(typevars) => {
-                        other_typevars = Some(typevars);
-                        ctrl_typevar = Some(tv.clone());
-                        use_typevar_operand = true;
+                match is_ctrl_typevar_candidate(tv, &operands_in, &operands_out) {
+                    Ok(other_typevars) => {
+                        return Some(PolymorphicInfo {
+                            use_typevar_operand: true,
+                            ctrl_typevar: tv.clone(),
+                            other_typevars,
+                        });
                     }
                     Err(error_message) => {
                         maybe_error_message = Some(error_message);
@@ -422,33 +485,32 @@ fn verify_polymorphic(
         }
     };
 
-    if !use_typevar_operand {
-        if operands_out.len() == 0 {
-            match maybe_error_message {
-                Some(msg) => panic!(msg),
-                None => panic!("typevar_operand must be a free type variable"),
-            }
-        }
-
-        let tv = operands_out[0].type_var().unwrap();
-        let free_typevar = tv.free_typevar();
-        if free_typevar.is_some() && tv != &free_typevar.unwrap() {
-            panic!("first result must be a free type variable");
+    // If we reached here, it means the type variable indicated as the typevar operand couldn't
+    // control every other input and output type variable. We need to look at the result type
+    // variables.
+    if operands_out.len() == 0 {
+        // No result means no other possible type variable, so it's a type inference failure.
+        match maybe_error_message {
+            Some(msg) => panic!(msg),
+            None => panic!("typevar_operand must be a free type variable"),
         }
+    }
 
-        other_typevars =
-            Some(verify_ctrl_typevar(tv, &value_opnums, &operands_in, &operands_out).unwrap());
-        ctrl_typevar = Some(tv.clone());
+    // Otherwise, try to infer the controlling type variable by looking at the first result.
+    let tv = operands_out[0].type_var().unwrap();
+    let free_typevar = tv.free_typevar();
+    if free_typevar.is_some() && tv != &free_typevar.unwrap() {
+        panic!("first result must be a free type variable");
     }
 
-    // rustc is not capable to determine this statically, so enforce it with options.
-    assert!(ctrl_typevar.is_some());
-    assert!(other_typevars.is_some());
+    // At this point, if the next unwrap() fails, it means the output type couldn't be used as a
+    // controlling type variable either; panicking is the right behavior.
+    let other_typevars = is_ctrl_typevar_candidate(tv, &operands_in, &operands_out).unwrap();
 
     Some(PolymorphicInfo {
-        use_typevar_operand,
-        ctrl_typevar: ctrl_typevar.unwrap(),
-        other_typevars: other_typevars.unwrap(),
+        use_typevar_operand: false,
+        ctrl_typevar: tv.clone(),
+        other_typevars,
     })
 }
 
@@ -460,57 +522,51 @@ fn verify_polymorphic(
 ///
 /// All polymorphic results must be derived from `ctrl_typevar`.
 ///
-/// Return a vector of other type variables used, or panics.
-fn verify_ctrl_typevar(
+/// Return a vector of other type variables used, or a string explaining what went wrong.
+fn is_ctrl_typevar_candidate(
     ctrl_typevar: &TypeVar,
-    value_opnums: &Vec<usize>,
     operands_in: &Vec<Operand>,
     operands_out: &Vec<Operand>,
 ) -> Result<Vec<TypeVar>, String> {
     let mut other_typevars = Vec::new();
 
     // Check value inputs.
-    for &op_num in value_opnums {
-        let typ = operands_in[op_num].type_var();
+    for input in operands_in {
+        if !input.is_value() {
+            continue;
+        }
 
-        let tv = if let Some(typ) = typ {
-            typ.free_typevar()
-        } else {
-            None
-        };
+        let typ = input.type_var().unwrap();
+        let free_typevar = typ.free_typevar();
 
         // Non-polymorphic or derived from ctrl_typevar is OK.
-        let tv = match tv {
-            Some(tv) => {
-                if &tv == ctrl_typevar {
-                    continue;
-                }
-                tv
-            }
-            None => continue,
-        };
+        if free_typevar.is_none() {
+            continue;
+        }
+        let free_typevar = free_typevar.unwrap();
+        if &free_typevar == ctrl_typevar {
+            continue;
+        }
 
         // No other derived typevars allowed.
-        if typ.is_some() && typ.unwrap() != &tv {
+        if typ != &free_typevar {
             return Err(format!(
-                "{:?}: type variable {} must be derived from {:?}",
-                operands_in[op_num],
-                typ.unwrap().name,
-                ctrl_typevar
+                "{:?}: type variable {} must be derived from {:?} while it is derived from {:?}",
+                input, typ.name, ctrl_typevar, free_typevar
             ));
         }
 
         // Other free type variables can only be used once each.
         for other_tv in &other_typevars {
-            if &tv == other_tv {
+            if &free_typevar == other_tv {
                 return Err(format!(
-                    "type variable {} can't be used more than once",
-                    tv.name
+                    "non-controlling type variable {} can't be used more than once",
+                    free_typevar.name
                 ));
             }
         }
 
-        other_typevars.push(tv);
+        other_typevars.push(free_typevar);
     }
 
     // Check outputs.
@@ -520,10 +576,10 @@ fn verify_ctrl_typevar(
         }
 
         let typ = result.type_var().unwrap();
-        let tv = typ.free_typevar();
+        let free_typevar = typ.free_typevar();
 
-        // Non-polymorphic or derived form ctrl_typevar is OK.
-        if tv.is_none() || &tv.unwrap() == ctrl_typevar {
+        // Non-polymorphic or derived from ctrl_typevar is OK.
+        if free_typevar.is_none() || &free_typevar.unwrap() == ctrl_typevar {
             continue;
         }
 
@@ -533,12 +589,123 @@ fn verify_ctrl_typevar(
     Ok(other_typevars)
 }
 
-/// A basic node in an instruction predicate: either an atom, or an AND of two conditions.
-pub enum InstructionPredicateNode {
-    /// Is the field member (first member) equal to the actual argument (which name is the second
-    /// field)?
-    IsFieldEqual(String, String),
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub enum FormatPredicateKind {
+    /// Is the field member equal to the expected value (stored here)?
+    IsEqual(String),
+
+    /// Is the immediate instruction format field representable as an n-bit two's complement
+    /// integer? (with width: first member, scale: second member).
+    /// The predicate is true if the field is in the range: `-2^(width-1) -- 2^(width-1)-1` and a
+    /// multiple of `2^scale`.
+    IsSignedInt(usize, usize),
+
+    /// Is the immediate instruction format field representable as an n-bit unsigned integer? (with
+    /// width: first member, scale: second member).
+    /// The predicate is true if the field is in the range: `0 -- 2^width - 1` and a multiple of
+    /// `2^scale`.
+    IsUnsignedInt(usize, usize),
+
+    /// Is the immediate format field member an integer equal to zero?
+    IsZeroInt,
+    /// Is the immediate format field member equal to zero? (float32 version)
+    IsZero32BitFloat,
+
+    /// Is the immediate format field member equal to zero? (float64 version)
+    IsZero64BitFloat,
+
+    /// Has the value list (in member_name) the size specified in parameter?
+    LengthEquals(usize),
+
+    /// Is the referenced function colocated?
+    IsColocatedFunc,
+
+    /// Is the referenced data object colocated?
+    IsColocatedData,
+}
+
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub struct FormatPredicateNode {
+    format_name: &'static str,
+    member_name: &'static str,
+    kind: FormatPredicateKind,
+}
+
+impl FormatPredicateNode {
+    fn new(
+        format: &InstructionFormat,
+        field_name: &'static str,
+        kind: FormatPredicateKind,
+    ) -> Self {
+        let member_name = format.imm_by_name(field_name).member;
+        Self {
+            format_name: format.name,
+            member_name,
+            kind,
+        }
+    }
+
+    fn new_raw(
+        format: &InstructionFormat,
+        member_name: &'static str,
+        kind: FormatPredicateKind,
+    ) -> Self {
+        Self {
+            format_name: format.name,
+            member_name,
+            kind,
+        }
+    }
+
+    fn destructuring_member_name(&self) -> &'static str {
+        match &self.kind {
+            FormatPredicateKind::LengthEquals(_) => {
+                // Length operates on the argument value list.
+                assert!(self.member_name == "args");
+                "ref args"
+            }
+            _ => self.member_name,
+        }
+    }
+
+    fn rust_predicate(&self) -> String {
+        match &self.kind {
+            FormatPredicateKind::IsEqual(arg) => {
+                format!("predicates::is_equal({}, {})", self.member_name, arg)
+            }
+            FormatPredicateKind::IsSignedInt(width, scale) => format!(
+                "predicates::is_signed_int({}, {}, {})",
+                self.member_name, width, scale
+            ),
+            FormatPredicateKind::IsUnsignedInt(width, scale) => format!(
+                "predicates::is_unsigned_int({}, {}, {})",
+                self.member_name, width, scale
+            ),
+            FormatPredicateKind::IsZeroInt => {
+                format!("predicates::is_zero_int({})", self.member_name)
+            }
+            FormatPredicateKind::IsZero32BitFloat => {
+                format!("predicates::is_zero_32_bit_float({})", self.member_name)
+            }
+            FormatPredicateKind::IsZero64BitFloat => {
+                format!("predicates::is_zero_64_bit_float({})", self.member_name)
+            }
+            FormatPredicateKind::LengthEquals(num) => format!(
+                "predicates::has_length_of({}, {}, func)",
+                self.member_name, num
+            ),
+            FormatPredicateKind::IsColocatedFunc => {
+                format!("predicates::is_colocated_func({}, func)", self.member_name,)
+            }
+            FormatPredicateKind::IsColocatedData => {
+                format!("predicates::is_colocated_data({}, func)", self.member_name)
+            }
+        }
+    }
+}
 
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub enum TypePredicateNode {
     /// Is the value argument (at the index designated by the first member) the same type as the
     /// type name (second member)?
     TypeVarCheck(usize, String),
@@ -546,43 +713,113 @@ pub enum InstructionPredicateNode {
     /// Is the controlling type variable the same type as the one designated by the type name
     /// (only member)?
     CtrlTypeVarCheck(String),
-
-    /// A combination of two other predicates.
-    And(Vec<InstructionPredicateNode>),
 }
 
-impl InstructionPredicateNode {
+impl TypePredicateNode {
     fn rust_predicate(&self) -> String {
         match self {
-            InstructionPredicateNode::IsFieldEqual(field_name, arg) => {
-                let new_args = vec![field_name.clone(), arg.clone()];
-                format!("crate::predicates::is_equal({})", new_args.join(", "))
-            }
-            InstructionPredicateNode::TypeVarCheck(index, value_type_name) => format!(
+            TypePredicateNode::TypeVarCheck(index, value_type_name) => format!(
                 "func.dfg.value_type(args[{}]) == {}",
                 index, value_type_name
             ),
-            InstructionPredicateNode::CtrlTypeVarCheck(value_type_name) => {
+            TypePredicateNode::CtrlTypeVarCheck(value_type_name) => {
                 format!("func.dfg.ctrl_typevar(inst) == {}", value_type_name)
             }
+        }
+    }
+}
+
+/// A basic node in an instruction predicate: either an atom, or an AND of two conditions.
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub enum InstructionPredicateNode {
+    FormatPredicate(FormatPredicateNode),
+
+    TypePredicate(TypePredicateNode),
+
+    /// An AND-combination of two or more other predicates.
+    And(Vec<InstructionPredicateNode>),
+
+    /// An OR-combination of two or more other predicates.
+    Or(Vec<InstructionPredicateNode>),
+}
+
+impl InstructionPredicateNode {
+    fn rust_predicate(&self) -> String {
+        match self {
+            InstructionPredicateNode::FormatPredicate(node) => node.rust_predicate(),
+            InstructionPredicateNode::TypePredicate(node) => node.rust_predicate(),
             InstructionPredicateNode::And(nodes) => nodes
                 .iter()
                 .map(|x| x.rust_predicate())
                 .collect::<Vec<_>>()
-                .join(" &&\n"),
+                .join(" && "),
+            InstructionPredicateNode::Or(nodes) => nodes
+                .iter()
+                .map(|x| x.rust_predicate())
+                .collect::<Vec<_>>()
+                .join(" || "),
+        }
+    }
+
+    pub fn format_destructuring_member_name(&self) -> &str {
+        match self {
+            InstructionPredicateNode::FormatPredicate(format_pred) => {
+                format_pred.destructuring_member_name()
+            }
+            _ => panic!("Only for leaf format predicates"),
+        }
+    }
+
+    pub fn format_name(&self) -> &str {
+        match self {
+            InstructionPredicateNode::FormatPredicate(format_pred) => format_pred.format_name,
+            _ => panic!("Only for leaf format predicates"),
         }
     }
+
+    pub fn is_type_predicate(&self) -> bool {
+        match self {
+            InstructionPredicateNode::FormatPredicate(_)
+            | InstructionPredicateNode::And(_)
+            | InstructionPredicateNode::Or(_) => false,
+            InstructionPredicateNode::TypePredicate(_) => true,
+        }
+    }
+
+    fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> {
+        let mut ret = Vec::new();
+        match self {
+            InstructionPredicateNode::And(nodes) | InstructionPredicateNode::Or(nodes) => {
+                for node in nodes {
+                    ret.extend(node.collect_leaves());
+                }
+            }
+            _ => ret.push(self),
+        }
+        ret
+    }
 }
 
+#[derive(Clone, Hash, PartialEq, Eq)]
 pub struct InstructionPredicate {
     node: Option<InstructionPredicateNode>,
 }
 
+impl Into<InstructionPredicate> for InstructionPredicateNode {
+    fn into(self) -> InstructionPredicate {
+        InstructionPredicate { node: Some(self) }
+    }
+}
+
 impl InstructionPredicate {
     pub fn new() -> Self {
         Self { node: None }
     }
 
+    pub fn unwrap(self) -> InstructionPredicateNode {
+        self.node.unwrap()
+    }
+
     pub fn new_typevar_check(
         inst: &Instruction,
         type_var: &TypeVar,
@@ -596,18 +833,132 @@ impl InstructionPredicate {
             .next()
             .unwrap()
             .0;
-        InstructionPredicateNode::TypeVarCheck(index, value_type.rust_name())
+        InstructionPredicateNode::TypePredicate(TypePredicateNode::TypeVarCheck(
+            index,
+            value_type.rust_name(),
+        ))
+    }
+
+    pub fn new_ctrl_typevar_check(value_type: &ValueType) -> InstructionPredicateNode {
+        InstructionPredicateNode::TypePredicate(TypePredicateNode::CtrlTypeVarCheck(
+            value_type.rust_name(),
+        ))
     }
 
     pub fn new_is_field_equal(
-        format_field: &FormatField,
+        format: &InstructionFormat,
+        field_name: &'static str,
         imm_value: String,
     ) -> InstructionPredicateNode {
-        InstructionPredicateNode::IsFieldEqual(format_field.member.into(), imm_value)
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsEqual(imm_value),
+        ))
     }
 
-    pub fn new_ctrl_typevar_check(value_type: &ValueType) -> InstructionPredicateNode {
-        InstructionPredicateNode::CtrlTypeVarCheck(value_type.rust_name())
+    /// Used only for the AST module, which directly passes in the format field.
+    pub fn new_is_field_equal_ast(
+        format: &InstructionFormat,
+        field: &FormatField,
+        imm_value: String,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw(
+            format,
+            field.member,
+            FormatPredicateKind::IsEqual(imm_value),
+        ))
+    }
+
+    pub fn new_is_signed_int(
+        format: &InstructionFormat,
+        field_name: &'static str,
+        width: usize,
+        scale: usize,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsSignedInt(width, scale),
+        ))
+    }
+
+    pub fn new_is_unsigned_int(
+        format: &InstructionFormat,
+        field_name: &'static str,
+        width: usize,
+        scale: usize,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsUnsignedInt(width, scale),
+        ))
+    }
+
+    pub fn new_is_zero_int(
+        format: &InstructionFormat,
+        field_name: &'static str,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsZeroInt,
+        ))
+    }
+
+    pub fn new_is_zero_32bit_float(
+        format: &InstructionFormat,
+        field_name: &'static str,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsZero32BitFloat,
+        ))
+    }
+
+    pub fn new_is_zero_64bit_float(
+        format: &InstructionFormat,
+        field_name: &'static str,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsZero64BitFloat,
+        ))
+    }
+
+    pub fn new_length_equals(format: &InstructionFormat, size: usize) -> InstructionPredicateNode {
+        assert!(
+            format.has_value_list,
+            "the format must be variadic in number of arguments"
+        );
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw(
+            format,
+            "args",
+            FormatPredicateKind::LengthEquals(size),
+        ))
+    }
+
+    pub fn new_is_colocated_func(
+        format: &InstructionFormat,
+        field_name: &'static str,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsColocatedFunc,
+        ))
+    }
+
+    pub fn new_is_colocated_data(format_registry: &FormatRegistry) -> InstructionPredicateNode {
+        let format = format_registry.get(format_registry.by_name("UnaryGlobalValue"));
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            "global_value",
+            FormatPredicateKind::IsColocatedData,
+        ))
     }
 
     pub fn and(mut self, new_node: InstructionPredicateNode) -> Self {
@@ -615,6 +966,9 @@ impl InstructionPredicate {
         let mut and_nodes = match node {
             Some(node) => match node {
                 InstructionPredicateNode::And(nodes) => nodes,
+                InstructionPredicateNode::Or(_) => {
+                    panic!("Can't mix and/or without implementing operator precedence!")
+                }
                 _ => vec![node],
             },
             _ => Vec::new(),
@@ -624,12 +978,81 @@ impl InstructionPredicate {
         self
     }
 
+    pub fn or(mut self, new_node: InstructionPredicateNode) -> Self {
+        let node = self.node;
+        let mut or_nodes = match node {
+            Some(node) => match node {
+                InstructionPredicateNode::Or(nodes) => nodes,
+                InstructionPredicateNode::And(_) => {
+                    panic!("Can't mix and/or without implementing operator precedence!")
+                }
+                _ => vec![node],
+            },
+            _ => Vec::new(),
+        };
+        or_nodes.push(new_node);
+        self.node = Some(InstructionPredicateNode::Or(or_nodes));
+        self
+    }
+
     pub fn rust_predicate(&self) -> String {
         match &self.node {
             Some(root) => root.rust_predicate(),
             None => "true".into(),
         }
     }
+
+    /// Returns true if the predicate only depends on type parameters (and not on an instruction
+    /// format).
+    pub fn is_type_predicate(&self) -> bool {
+        self.node.as_ref().unwrap().is_type_predicate()
+    }
+
+    /// Returns references to all the nodes that are leaves in the condition (i.e. by flattening
+    /// AND/OR).
+    pub fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> {
+        self.node.as_ref().unwrap().collect_leaves()
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct InstructionPredicateNumber(u32);
+entity_impl!(InstructionPredicateNumber);
+
+pub type InstructionPredicateMap = PrimaryMap<InstructionPredicateNumber, InstructionPredicate>;
+
+/// A registry of predicates to help deduplicating them, during Encodings construction. When the
+/// construction process is over, it needs to be extracted with `extract` and associated to the
+/// TargetIsa.
+pub struct InstructionPredicateRegistry {
+    /// Maps a predicate number to its actual predicate.
+    map: InstructionPredicateMap,
+
+    /// Inverse map: maps a predicate to its predicate number. This is used before inserting a
+    /// predicate, to check whether it already exists.
+    inverted_map: HashMap<InstructionPredicate, InstructionPredicateNumber>,
+}
+
+impl InstructionPredicateRegistry {
+    pub fn new() -> Self {
+        Self {
+            map: PrimaryMap::new(),
+            inverted_map: HashMap::new(),
+        }
+    }
+    pub fn insert(&mut self, predicate: InstructionPredicate) -> InstructionPredicateNumber {
+        match self.inverted_map.get(&predicate) {
+            Some(&found) => found,
+            None => {
+                let key = self.map.push(predicate.clone());
+                self.inverted_map.insert(predicate, key);
+                key
+            }
+        }
+    }
+    pub fn extract(self) -> InstructionPredicateMap {
+        self.map
+    }
 }
 
 /// An instruction specification, containing an instruction that has bound types or not.
@@ -645,6 +1068,19 @@ impl InstSpec {
             InstSpec::Bound(bound_inst) => &bound_inst.inst,
         }
     }
+    pub fn bind(&self, lane_type: impl Into<LaneType>) -> BoundInstruction {
+        match self {
+            InstSpec::Inst(inst) => inst.bind(lane_type),
+            InstSpec::Bound(inst) => inst.clone().bind(lane_type),
+        }
+    }
+
+    pub fn bind_ref(&self, reference_type: impl Into<ReferenceType>) -> BoundInstruction {
+        match self {
+            InstSpec::Inst(inst) => inst.bind_ref(reference_type),
+            InstSpec::Bound(inst) => inst.clone().bind_ref(reference_type),
+        }
+    }
 }
 
 impl Into<InstSpec> for &Instruction {
@@ -661,11 +1097,65 @@ impl Into<InstSpec> for BoundInstruction {
 
 /// Helper bind reused by {Bound,}Instruction::bind.
 fn bind(
+    inst: Instruction,
+    lane_type: Option<LaneType>,
+    mut value_types: Vec<ValueTypeOrAny>,
+) -> BoundInstruction {
+    match lane_type {
+        Some(lane_type) => {
+            value_types.push(ValueTypeOrAny::ValueType(lane_type.into()));
+        }
+        None => {
+            value_types.push(ValueTypeOrAny::Any);
+        }
+    }
+
+    verify_polymorphic_binding(&inst, &value_types);
+
+    BoundInstruction { inst, value_types }
+}
+
+/// Helper bind for reference types reused by {Bound,}Instruction::bind_ref.
+fn bind_ref(
+    inst: Instruction,
+    reference_type: Option<ReferenceType>,
+    mut value_types: Vec<ValueTypeOrAny>,
+) -> BoundInstruction {
+    match reference_type {
+        Some(reference_type) => {
+            value_types.push(ValueTypeOrAny::ValueType(reference_type.into()));
+        }
+        None => {
+            value_types.push(ValueTypeOrAny::Any);
+        }
+    }
+
+    verify_polymorphic_binding(&inst, &value_types);
+
+    BoundInstruction { inst, value_types }
+}
+
+/// Helper bind for vector types reused by {Bound,}Instruction::bind.
+fn bind_vector(
     inst: Instruction,
     lane_type: LaneType,
-    mut value_types: Vec<ValueType>,
+    vector_size_in_bits: u64,
+    mut value_types: Vec<ValueTypeOrAny>,
 ) -> BoundInstruction {
-    value_types.push(ValueType::from(lane_type));
+    let num_lanes = vector_size_in_bits / lane_type.lane_bits();
+    assert!(
+        num_lanes >= 2,
+        "Minimum lane number for bind_vector is 2, found {}.",
+        num_lanes,
+    );
+    let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes));
+    value_types.push(ValueTypeOrAny::ValueType(vector_type));
+    verify_polymorphic_binding(&inst, &value_types);
+    BoundInstruction { inst, value_types }
+}
+
+/// Helper to verify that binding types to the instruction does not violate polymorphic rules
+fn verify_polymorphic_binding(inst: &Instruction, value_types: &Vec<ValueTypeOrAny>) {
     match &inst.polymorphic_info {
         Some(poly) => {
             assert!(
@@ -680,5 +1170,4 @@ fn bind(
             ));
         }
     }
-    BoundInstruction { inst, value_types }
 }
diff --git a/cranelift-codegen/meta/src/cdsl/isa.rs b/cranelift-codegen/meta/src/cdsl/isa.rs
index dcac2605c..e561ce2e9 100644
--- a/cranelift-codegen/meta/src/cdsl/isa.rs
+++ b/cranelift-codegen/meta/src/cdsl/isa.rs
@@ -1,18 +1,26 @@
+use std::collections::HashSet;
+use std::iter::FromIterator;
+
 use crate::cdsl::cpu_modes::CpuMode;
-use crate::cdsl::instructions::InstructionGroup;
+use crate::cdsl::instructions::{InstructionGroup, InstructionPredicateMap};
+use crate::cdsl::recipes::Recipes;
 use crate::cdsl::regs::IsaRegs;
 use crate::cdsl::settings::SettingGroup;
 use crate::cdsl::xform::{TransformGroupIndex, TransformGroups};
 
-use std::collections::HashSet;
-use std::iter::FromIterator;
-
 pub struct TargetIsa {
     pub name: &'static str,
     pub instructions: InstructionGroup,
     pub settings: SettingGroup,
     pub regs: IsaRegs,
+    pub recipes: Recipes,
     pub cpu_modes: Vec<CpuMode>,
+    pub encodings_predicates: InstructionPredicateMap,
+
+    /// TransformGroupIndex are global to all the ISAs, while we want to have indices into the
+    /// local array of transform groups that are directly used. We use this map to get this
+    /// information.
+    pub local_transform_groups: Vec<TransformGroupIndex>,
 }
 
 impl TargetIsa {
@@ -21,14 +29,35 @@ impl TargetIsa {
         instructions: InstructionGroup,
         settings: SettingGroup,
         regs: IsaRegs,
+        recipes: Recipes,
         cpu_modes: Vec<CpuMode>,
+        encodings_predicates: InstructionPredicateMap,
     ) -> Self {
+        // Compute the local TransformGroup index.
+        let mut local_transform_groups = Vec::new();
+        for cpu_mode in &cpu_modes {
+            let transform_groups = cpu_mode.direct_transform_groups();
+            for group_index in transform_groups {
+                // find() is fine here: the number of transform group is < 5 as of June 2019.
+                if local_transform_groups
+                    .iter()
+                    .find(|&val| group_index == *val)
+                    .is_none()
+                {
+                    local_transform_groups.push(group_index);
+                }
+            }
+        }
+
         Self {
             name,
             instructions,
             settings,
             regs,
+            recipes,
             cpu_modes,
+            encodings_predicates,
+            local_transform_groups,
         }
     }
 
@@ -39,9 +68,17 @@ impl TargetIsa {
         all_groups: &TransformGroups,
     ) -> Vec<TransformGroupIndex> {
         let mut set = HashSet::new();
-        for cpu_mode in &self.cpu_modes {
-            set.extend(cpu_mode.transitive_transform_groups(all_groups));
+
+        for &root in self.local_transform_groups.iter() {
+            set.insert(root);
+            let mut base = root;
+            // Follow the chain of chain_with.
+            while let Some(chain_with) = &all_groups.get(base).chain_with {
+                set.insert(*chain_with);
+                base = *chain_with;
+            }
         }
+
         let mut vec = Vec::from_iter(set);
         vec.sort();
         vec
@@ -49,13 +86,14 @@ impl TargetIsa {
 
     /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly
     /// reachable set of TransformGroup this TargetIsa uses.
-    pub fn direct_transform_groups(&self) -> Vec<TransformGroupIndex> {
-        let mut set = HashSet::new();
-        for cpu_mode in &self.cpu_modes {
-            set.extend(cpu_mode.direct_transform_groups());
-        }
-        let mut vec = Vec::from_iter(set);
-        vec.sort();
-        vec
+    pub fn direct_transform_groups(&self) -> &Vec<TransformGroupIndex> {
+        &self.local_transform_groups
+    }
+
+    pub fn translate_group_index(&self, group_index: TransformGroupIndex) -> usize {
+        self.local_transform_groups
+            .iter()
+            .position(|&val| val == group_index)
+            .expect("TransformGroup unused by this TargetIsa!")
     }
 }
diff --git a/cranelift-codegen/meta/src/cdsl/mod.rs b/cranelift-codegen/meta/src/cdsl/mod.rs
index d3227e2dd..370b442ae 100644
--- a/cranelift-codegen/meta/src/cdsl/mod.rs
+++ b/cranelift-codegen/meta/src/cdsl/mod.rs
@@ -6,10 +6,12 @@
 #[macro_use]
 pub mod ast;
 pub mod cpu_modes;
+pub mod encodings;
 pub mod formats;
 pub mod instructions;
 pub mod isa;
 pub mod operands;
+pub mod recipes;
 pub mod regs;
 pub mod settings;
 pub mod type_inference;
diff --git a/cranelift-codegen/meta/src/cdsl/recipes.rs b/cranelift-codegen/meta/src/cdsl/recipes.rs
new file mode 100644
index 000000000..8157e0886
--- /dev/null
+++ b/cranelift-codegen/meta/src/cdsl/recipes.rs
@@ -0,0 +1,297 @@
+use cranelift_entity::{entity_impl, PrimaryMap};
+
+use crate::cdsl::formats::{FormatRegistry, InstructionFormatIndex};
+use crate::cdsl::instructions::InstructionPredicate;
+use crate::cdsl::regs::RegClassIndex;
+use crate::cdsl::settings::SettingPredicateNumber;
+
+/// A specific register in a register class.
+///
+/// A register is identified by the top-level register class it belongs to and
+/// its first register unit.
+///
+/// Specific registers are used to describe constraints on instructions where
+/// some operands must use a fixed register.
+///
+/// Register instances can be created with the constructor, or accessed as
+/// attributes on the register class: `GPR.rcx`.
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub struct Register {
+    pub regclass: RegClassIndex,
+    pub unit: u8,
+}
+
+impl Register {
+    pub fn new(regclass: RegClassIndex, unit: u8) -> Self {
+        Self { regclass, unit }
+    }
+}
+
+/// An operand that must be in a stack slot.
+///
+/// A `Stack` object can be used to indicate an operand constraint for a value
+/// operand that must live in a stack slot.
+#[derive(Copy, Clone, Hash, PartialEq)]
+pub struct Stack {
+    pub regclass: RegClassIndex,
+}
+
+impl Stack {
+    pub fn new(regclass: RegClassIndex) -> Self {
+        Self { regclass }
+    }
+    pub fn stack_base_mask(&self) -> &'static str {
+        // TODO: Make this configurable instead of just using the SP.
+        "StackBaseMask(1)"
+    }
+}
+
+#[derive(Clone, Hash, PartialEq)]
+pub struct BranchRange {
+    pub inst_size: u64,
+    pub range: u64,
+}
+
+#[derive(Copy, Clone, Hash, PartialEq)]
+pub enum OperandConstraint {
+    RegClass(RegClassIndex),
+    FixedReg(Register),
+    TiedInput(usize),
+    Stack(Stack),
+}
+
+impl Into<OperandConstraint> for RegClassIndex {
+    fn into(self) -> OperandConstraint {
+        OperandConstraint::RegClass(self)
+    }
+}
+
+impl Into<OperandConstraint> for Register {
+    fn into(self) -> OperandConstraint {
+        OperandConstraint::FixedReg(self)
+    }
+}
+
+impl Into<OperandConstraint> for usize {
+    fn into(self) -> OperandConstraint {
+        OperandConstraint::TiedInput(self)
+    }
+}
+
+impl Into<OperandConstraint> for Stack {
+    fn into(self) -> OperandConstraint {
+        OperandConstraint::Stack(self)
+    }
+}
+
+/// A recipe for encoding instructions with a given format.
+///
+/// Many different instructions can be encoded by the same recipe, but they
+/// must all have the same instruction format.
+///
+/// The `operands_in` and `operands_out` arguments are tuples specifying the register
+/// allocation constraints for the value operands and results respectively. The
+/// possible constraints for an operand are:
+///
+/// - A `RegClass` specifying the set of allowed registers.
+/// - A `Register` specifying a fixed-register operand.
+/// - An integer indicating that this result is tied to a value operand, so
+///   they must use the same register.
+/// - A `Stack` specifying a value in a stack slot.
+///
+/// The `branch_range` argument must be provided for recipes that can encode
+/// branch instructions. It is an `(origin, bits)` tuple describing the exact
+/// range that can be encoded in a branch instruction.
+#[derive(Clone)]
+pub struct EncodingRecipe {
+    /// Short mnemonic name for this recipe.
+    pub name: String,
+
+    /// Associated instruction format.
+    pub format: InstructionFormatIndex,
+
+    /// Base number of bytes in the binary encoded instruction.
+    pub base_size: u64,
+
+    /// Tuple of register constraints for value operands.
+    pub operands_in: Vec<OperandConstraint>,
+
+    /// Tuple of register constraints for results.
+    pub operands_out: Vec<OperandConstraint>,
+
+    /// Function name to use when computing actual size.
+    pub compute_size: &'static str,
+
+    /// `(origin, bits)` range for branches.
+    pub branch_range: Option<BranchRange>,
+
+    /// This instruction clobbers `iflags` and `fflags`; true by default.
+    pub clobbers_flags: bool,
+
+    /// Instruction predicate.
+    pub inst_predicate: Option<InstructionPredicate>,
+
+    /// ISA predicate.
+    pub isa_predicate: Option<SettingPredicateNumber>,
+
+    /// Rust code for binary emission.
+    pub emit: Option<String>,
+}
+
+// Implement PartialEq ourselves: take all the fields into account but the name.
+impl PartialEq for EncodingRecipe {
+    fn eq(&self, other: &Self) -> bool {
+        self.format == other.format
+            && self.base_size == other.base_size
+            && self.operands_in == other.operands_in
+            && self.operands_out == other.operands_out
+            && self.compute_size == other.compute_size
+            && self.branch_range == other.branch_range
+            && self.clobbers_flags == other.clobbers_flags
+            && self.inst_predicate == other.inst_predicate
+            && self.isa_predicate == other.isa_predicate
+            && self.emit == other.emit
+    }
+}
+
+// To allow using it in a hashmap.
+impl Eq for EncodingRecipe {}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct EncodingRecipeNumber(u32);
+entity_impl!(EncodingRecipeNumber);
+
+pub type Recipes = PrimaryMap<EncodingRecipeNumber, EncodingRecipe>;
+
+#[derive(Clone)]
+pub struct EncodingRecipeBuilder {
+    pub name: String,
+    format: InstructionFormatIndex,
+    pub base_size: u64,
+    pub operands_in: Option<Vec<OperandConstraint>>,
+    pub operands_out: Option<Vec<OperandConstraint>>,
+    compute_size: Option<&'static str>,
+    pub branch_range: Option<BranchRange>,
+    pub emit: Option<String>,
+    clobbers_flags: Option<bool>,
+    inst_predicate: Option<InstructionPredicate>,
+    isa_predicate: Option<SettingPredicateNumber>,
+}
+
+impl EncodingRecipeBuilder {
+    pub fn new(name: impl Into<String>, format: InstructionFormatIndex, base_size: u64) -> Self {
+        Self {
+            name: name.into(),
+            format,
+            base_size,
+            operands_in: None,
+            operands_out: None,
+            compute_size: None,
+            branch_range: None,
+            emit: None,
+            clobbers_flags: None,
+            inst_predicate: None,
+            isa_predicate: None,
+        }
+    }
+
+    // Setters.
+    pub fn operands_in(mut self, constraints: Vec<impl Into<OperandConstraint>>) -> Self {
+        assert!(self.operands_in.is_none());
+        self.operands_in = Some(
+            constraints
+                .into_iter()
+                .map(|constr| constr.into())
+                .collect(),
+        );
+        self
+    }
+    pub fn operands_out(mut self, constraints: Vec<impl Into<OperandConstraint>>) -> Self {
+        assert!(self.operands_out.is_none());
+        self.operands_out = Some(
+            constraints
+                .into_iter()
+                .map(|constr| constr.into())
+                .collect(),
+        );
+        self
+    }
+    pub fn clobbers_flags(mut self, flag: bool) -> Self {
+        assert!(self.clobbers_flags.is_none());
+        self.clobbers_flags = Some(flag);
+        self
+    }
+    pub fn emit(mut self, code: impl Into<String>) -> Self {
+        assert!(self.emit.is_none());
+        self.emit = Some(code.into());
+        self
+    }
+    pub fn branch_range(mut self, range: (u64, u64)) -> Self {
+        assert!(self.branch_range.is_none());
+        self.branch_range = Some(BranchRange {
+            inst_size: range.0,
+            range: range.1,
+        });
+        self
+    }
+    pub fn isa_predicate(mut self, pred: SettingPredicateNumber) -> Self {
+        assert!(self.isa_predicate.is_none());
+        self.isa_predicate = Some(pred);
+        self
+    }
+    pub fn inst_predicate(mut self, inst_predicate: impl Into<InstructionPredicate>) -> Self {
+        assert!(self.inst_predicate.is_none());
+        self.inst_predicate = Some(inst_predicate.into());
+        self
+    }
+    pub fn compute_size(mut self, compute_size: &'static str) -> Self {
+        assert!(self.compute_size.is_none());
+        self.compute_size = Some(compute_size);
+        self
+    }
+
+    pub fn build(self, formats: &FormatRegistry) -> EncodingRecipe {
+        let operands_in = self.operands_in.unwrap_or(Vec::new());
+        let operands_out = self.operands_out.unwrap_or(Vec::new());
+
+        // The number of input constraints must match the number of format input operands.
+        if !formats.get(self.format).has_value_list {
+            let format = formats.get(self.format);
+            assert!(
+                operands_in.len() == format.num_value_operands,
+                format!(
+                    "missing operand constraints for recipe {} (format {})",
+                    self.name, format.name
+                )
+            );
+        }
+
+        // Ensure tied inputs actually refer to existing inputs.
+        for constraint in operands_in.iter().chain(operands_out.iter()) {
+            if let OperandConstraint::TiedInput(n) = *constraint {
+                assert!(n < operands_in.len());
+            }
+        }
+
+        let compute_size = match self.compute_size {
+            Some(compute_size) => compute_size,
+            None => "base_size",
+        };
+
+        let clobbers_flags = self.clobbers_flags.unwrap_or(true);
+
+        EncodingRecipe {
+            name: self.name.into(),
+            format: self.format,
+            base_size: self.base_size,
+            operands_in,
+            operands_out,
+            compute_size,
+            branch_range: self.branch_range,
+            clobbers_flags,
+            inst_predicate: self.inst_predicate,
+            isa_predicate: self.isa_predicate,
+            emit: self.emit,
+        }
+    }
+}
diff --git a/cranelift-codegen/meta/src/cdsl/regs.rs b/cranelift-codegen/meta/src/cdsl/regs.rs
index 8a84d0361..920b1f542 100644
--- a/cranelift-codegen/meta/src/cdsl/regs.rs
+++ b/cranelift-codegen/meta/src/cdsl/regs.rs
@@ -11,6 +11,7 @@ pub struct RegBank {
     pub names: Vec<&'static str>,
     pub prefix: &'static str,
     pub pressure_tracking: bool,
+    pub pinned_reg: Option<u16>,
     pub toprcs: Vec<RegClassIndex>,
     pub classes: Vec<RegClassIndex>,
 }
@@ -23,6 +24,7 @@ impl RegBank {
         names: Vec<&'static str>,
         prefix: &'static str,
         pressure_tracking: bool,
+        pinned_reg: Option<u16>,
     ) -> Self {
         RegBank {
             name,
@@ -31,13 +33,45 @@ impl RegBank {
             names,
             prefix,
             pressure_tracking,
+            pinned_reg,
             toprcs: Vec::new(),
             classes: Vec::new(),
         }
     }
+
+    fn unit_by_name(&self, name: &'static str) -> u8 {
+        let unit = if let Some(found) = self.names.iter().position(|&reg_name| reg_name == name) {
+            found
+        } else {
+            // Try to match without the bank prefix.
+            assert!(name.starts_with(self.prefix));
+            let name_without_prefix = &name[self.prefix.len()..];
+            if let Some(found) = self
+                .names
+                .iter()
+                .position(|&reg_name| reg_name == name_without_prefix)
+            {
+                found
+            } else {
+                // Ultimate try: try to parse a number and use this in the array, eg r15 on x86.
+                if let Ok(as_num) = name_without_prefix.parse::<u8>() {
+                    assert!(
+                        (as_num - self.first_unit) < self.units,
+                        "trying to get {}, but bank only has {} registers!",
+                        name,
+                        self.units
+                    );
+                    (as_num - self.first_unit) as usize
+                } else {
+                    panic!("invalid register name {}", name);
+                }
+            }
+        };
+        self.first_unit + (unit as u8)
+    }
 }
 
-#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
 pub struct RegClassIndex(u32);
 entity_impl!(RegClassIndex);
 
@@ -152,6 +186,7 @@ pub struct RegBankBuilder {
     pub names: Vec<&'static str>,
     pub prefix: &'static str,
     pub pressure_tracking: Option<bool>,
+    pub pinned_reg: Option<u16>,
 }
 
 impl RegBankBuilder {
@@ -162,6 +197,7 @@ impl RegBankBuilder {
             names: vec![],
             prefix,
             pressure_tracking: None,
+            pinned_reg: None,
         }
     }
     pub fn units(mut self, units: u8) -> Self {
@@ -176,6 +212,11 @@ impl RegBankBuilder {
         self.pressure_tracking = Some(track);
         self
     }
+    pub fn pinned_reg(mut self, unit: u16) -> Self {
+        assert!(unit < (self.units as u16));
+        self.pinned_reg = Some(unit);
+        self
+    }
 }
 
 pub struct IsaRegsBuilder {
@@ -215,6 +256,7 @@ impl IsaRegsBuilder {
             builder
                 .pressure_tracking
                 .expect("Pressure tracking must be explicitly set"),
+            builder.pinned_reg,
         ))
     }
 
@@ -352,4 +394,17 @@ impl IsaRegs {
     ) -> Self {
         Self { banks, classes }
     }
+
+    pub fn class_by_name(&self, name: &str) -> RegClassIndex {
+        self.classes
+            .values()
+            .find(|&class| class.name == name)
+            .expect(&format!("register class {} not found", name))
+            .index
+    }
+
+    pub fn regunit_by_name(&self, class_index: RegClassIndex, name: &'static str) -> u8 {
+        let bank_index = self.classes.get(class_index).unwrap().bank;
+        self.banks.get(bank_index).unwrap().unit_by_name(name)
+    }
 }
diff --git a/cranelift-codegen/meta/src/cdsl/settings.rs b/cranelift-codegen/meta/src/cdsl/settings.rs
index 61677a71f..5a45d9fb9 100644
--- a/cranelift-codegen/meta/src/cdsl/settings.rs
+++ b/cranelift-codegen/meta/src/cdsl/settings.rs
@@ -148,6 +148,14 @@ impl SettingGroup {
         }
         panic!("Should have found bool setting by name.");
     }
+
+    pub fn predicate_by_name(&self, name: &'static str) -> SettingPredicateNumber {
+        self.predicates
+            .iter()
+            .find(|pred| pred.name == name)
+            .unwrap_or_else(|| panic!("unknown predicate {}", name))
+            .number
+    }
 }
 
 /// This is the basic information needed to track the specific parts of a setting when building
@@ -209,10 +217,12 @@ struct ProtoPredicate {
     node: PredicateNode,
 }
 
+pub type SettingPredicateNumber = u8;
+
 pub struct Predicate {
     pub name: &'static str,
     node: PredicateNode,
-    pub number: u8,
+    pub number: SettingPredicateNumber,
 }
 
 impl Predicate {
diff --git a/cranelift-codegen/meta/src/cdsl/type_inference.rs b/cranelift-codegen/meta/src/cdsl/type_inference.rs
index e3a1e9bb0..a56d81463 100644
--- a/cranelift-codegen/meta/src/cdsl/type_inference.rs
+++ b/cranelift-codegen/meta/src/cdsl/type_inference.rs
@@ -4,7 +4,7 @@ use crate::cdsl::typevar::{DerivedFunc, TypeSet, TypeVar};
 use std::collections::{HashMap, HashSet};
 use std::iter::FromIterator;
 
-#[derive(Hash, PartialEq, Eq)]
+#[derive(Debug, Hash, PartialEq, Eq)]
 pub enum Constraint {
     /// Constraint specifying that a type var tv1 must be wider than or equal to type var tv2 at
     /// runtime. This requires that:
@@ -364,7 +364,6 @@ impl TypeEnvironment {
 
             // Sanity check: translated constraints should refer only to real variables.
             for arg in constraint.typevar_args() {
-                assert!(vars_tv.contains(arg));
                 let arg_free_tv = arg.free_typevar();
                 assert!(arg_free_tv.is_none() || vars_tv.contains(&arg_free_tv.unwrap()));
             }
diff --git a/cranelift-codegen/meta/src/cdsl/types.rs b/cranelift-codegen/meta/src/cdsl/types.rs
index a8aa4020d..92b9ab3a2 100644
--- a/cranelift-codegen/meta/src/cdsl/types.rs
+++ b/cranelift-codegen/meta/src/cdsl/types.rs
@@ -11,12 +11,14 @@ use crate::shared::types as shared_types;
 //
 // 0: Void
 // 0x01-0x6f: Special types
-// 0x70-0x7f: Lane types
+// 0x70-0x7d: Lane types
+// 0x7e-0x7f: Reference types
 // 0x80-0xff: Vector types
 //
 // Vector types are encoded with the lane type in the low 4 bits and log2(lanes)
 // in the high 4 bits, giving a range of 2-256 lanes.
 static LANE_BASE: u8 = 0x70;
+static REFERENCE_BASE: u8 = 0x7E;
 
 // Rust name prefix used for the `rust_name` method.
 static _RUST_NAME_PREFIX: &'static str = "ir::types::";
@@ -27,10 +29,11 @@ static _RUST_NAME_PREFIX: &'static str = "ir::types::";
 ///
 /// All SSA values have a type that is described by an instance of `ValueType`
 /// or one of its subclasses.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
 pub enum ValueType {
     BV(BVType),
     Lane(LaneType),
+    Reference(ReferenceType),
     Special(SpecialType),
     Vector(VectorType),
 }
@@ -46,11 +49,16 @@ impl ValueType {
         SpecialTypeIterator::new()
     }
 
+    pub fn all_reference_types() -> ReferenceTypeIterator {
+        ReferenceTypeIterator::new()
+    }
+
     /// Return a string containing the documentation comment for this type.
     pub fn doc(&self) -> String {
         match *self {
             ValueType::BV(ref b) => b.doc(),
             ValueType::Lane(l) => l.doc(),
+            ValueType::Reference(r) => r.doc(),
             ValueType::Special(s) => s.doc(),
             ValueType::Vector(ref v) => v.doc(),
         }
@@ -61,6 +69,7 @@ impl ValueType {
         match *self {
             ValueType::BV(ref b) => b.lane_bits(),
             ValueType::Lane(l) => l.lane_bits(),
+            ValueType::Reference(r) => r.lane_bits(),
             ValueType::Special(s) => s.lane_bits(),
             ValueType::Vector(ref v) => v.lane_bits(),
         }
@@ -84,6 +93,7 @@ impl ValueType {
         match *self {
             ValueType::BV(_) => None,
             ValueType::Lane(l) => Some(l.number()),
+            ValueType::Reference(r) => Some(r.number()),
             ValueType::Special(s) => Some(s.number()),
             ValueType::Vector(ref v) => Some(v.number()),
         }
@@ -112,6 +122,7 @@ impl fmt::Display for ValueType {
         match *self {
             ValueType::BV(ref b) => b.fmt(f),
             ValueType::Lane(l) => l.fmt(f),
+            ValueType::Reference(r) => r.fmt(f),
             ValueType::Special(s) => s.fmt(f),
             ValueType::Vector(ref v) => v.fmt(f),
         }
@@ -132,6 +143,13 @@ impl From<LaneType> for ValueType {
     }
 }
 
+/// Create a ValueType from a given reference type.
+impl From<ReferenceType> for ValueType {
+    fn from(reference: ReferenceType) -> Self {
+        ValueType::Reference(reference)
+    }
+}
+
 /// Create a ValueType from a given special type.
 impl From<SpecialType> for ValueType {
     fn from(spec: SpecialType) -> Self {
@@ -147,7 +165,7 @@ impl From<VectorType> for ValueType {
 }
 
 /// A concrete scalar type that can appear as a vector lane too.
-#[derive(Clone, Copy, PartialEq)]
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
 pub enum LaneType {
     BoolType(shared_types::Bool),
     FloatType(shared_types::Float),
@@ -197,12 +215,14 @@ impl LaneType {
                 LaneType::BoolType(shared_types::Bool::B16) => 2,
                 LaneType::BoolType(shared_types::Bool::B32) => 3,
                 LaneType::BoolType(shared_types::Bool::B64) => 4,
-                LaneType::IntType(shared_types::Int::I8) => 5,
-                LaneType::IntType(shared_types::Int::I16) => 6,
-                LaneType::IntType(shared_types::Int::I32) => 7,
-                LaneType::IntType(shared_types::Int::I64) => 8,
-                LaneType::FloatType(shared_types::Float::F32) => 9,
-                LaneType::FloatType(shared_types::Float::F64) => 10,
+                LaneType::BoolType(shared_types::Bool::B128) => 5,
+                LaneType::IntType(shared_types::Int::I8) => 6,
+                LaneType::IntType(shared_types::Int::I16) => 7,
+                LaneType::IntType(shared_types::Int::I32) => 8,
+                LaneType::IntType(shared_types::Int::I64) => 9,
+                LaneType::IntType(shared_types::Int::I128) => 10,
+                LaneType::FloatType(shared_types::Float::F32) => 11,
+                LaneType::FloatType(shared_types::Float::F64) => 12,
             }
     }
 
@@ -213,6 +233,7 @@ impl LaneType {
             16 => shared_types::Bool::B16,
             32 => shared_types::Bool::B32,
             64 => shared_types::Bool::B64,
+            128 => shared_types::Bool::B128,
             _ => unreachable!("unxpected num bits for bool"),
         })
     }
@@ -223,6 +244,7 @@ impl LaneType {
             16 => shared_types::Int::I16,
             32 => shared_types::Int::I32,
             64 => shared_types::Int::I64,
+            128 => shared_types::Int::I128,
             _ => unreachable!("unxpected num bits for int"),
         })
     }
@@ -242,6 +264,27 @@ impl LaneType {
             ValueType::Vector(VectorType::new(*self, lanes.into()))
         }
     }
+
+    pub fn is_float(&self) -> bool {
+        match self {
+            LaneType::FloatType(_) => true,
+            _ => false,
+        }
+    }
+
+    pub fn is_int(&self) -> bool {
+        match self {
+            LaneType::IntType(_) => true,
+            _ => false,
+        }
+    }
+
+    pub fn is_bool(&self) -> bool {
+        match self {
+            LaneType::BoolType(_) => true,
+            _ => false,
+        }
+    }
 }
 
 impl fmt::Display for LaneType {
@@ -327,7 +370,7 @@ impl Iterator for LaneTypeIterator {
 ///
 /// A vector type has a lane type which is an instance of `LaneType`,
 /// and a positive number of lanes.
-#[derive(Clone, PartialEq)]
+#[derive(Clone, PartialEq, Eq, Hash)]
 pub struct VectorType {
     base: LaneType,
     lanes: u64,
@@ -393,7 +436,7 @@ impl fmt::Debug for VectorType {
 }
 
 /// A flat bitvector type. Used for semantics description only.
-#[derive(Clone, PartialEq)]
+#[derive(Clone, PartialEq, Eq, Hash)]
 pub struct BVType {
     bits: u64,
 }
@@ -515,3 +558,83 @@ impl Iterator for SpecialTypeIterator {
         }
     }
 }
+
+/// Reference type is scalar type, but not lane type.
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub struct ReferenceType(pub shared_types::Reference);
+
+impl ReferenceType {
+    /// Return a string containing the documentation comment for this reference type.
+    pub fn doc(self) -> String {
+        format!("An opaque reference type with {} bits.", self.lane_bits())
+    }
+
+    /// Return the number of bits in a lane.
+    pub fn lane_bits(self) -> u64 {
+        match self.0 {
+            shared_types::Reference::R32 => 32,
+            shared_types::Reference::R64 => 64,
+        }
+    }
+
+    /// Find the unique number associated with this reference type.
+    pub fn number(self) -> u8 {
+        REFERENCE_BASE
+            + match self {
+                ReferenceType(shared_types::Reference::R32) => 0,
+                ReferenceType(shared_types::Reference::R64) => 1,
+            }
+    }
+
+    pub fn ref_from_bits(num_bits: u16) -> ReferenceType {
+        ReferenceType(match num_bits {
+            32 => shared_types::Reference::R32,
+            64 => shared_types::Reference::R64,
+            _ => unreachable!("unexpected number of bits for a reference type"),
+        })
+    }
+}
+
+impl fmt::Display for ReferenceType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "r{}", self.lane_bits())
+    }
+}
+
+impl fmt::Debug for ReferenceType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "ReferenceType(bits={})", self.lane_bits())
+    }
+}
+
+/// Create a ReferenceType from a given reference variant.
+impl From<shared_types::Reference> for ReferenceType {
+    fn from(r: shared_types::Reference) -> Self {
+        ReferenceType(r)
+    }
+}
+
+/// An iterator for different reference types.
+pub struct ReferenceTypeIterator {
+    reference_iter: shared_types::ReferenceIterator,
+}
+
+impl ReferenceTypeIterator {
+    /// Create a new reference type iterator.
+    fn new() -> Self {
+        Self {
+            reference_iter: shared_types::ReferenceIterator::new(),
+        }
+    }
+}
+
+impl Iterator for ReferenceTypeIterator {
+    type Item = ReferenceType;
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(r) = self.reference_iter.next() {
+            Some(ReferenceType::from(r))
+        } else {
+            None
+        }
+    }
+}
diff --git a/cranelift-codegen/meta/src/cdsl/typevar.rs b/cranelift-codegen/meta/src/cdsl/typevar.rs
index 22640b6b9..78e702cac 100644
--- a/cranelift-codegen/meta/src/cdsl/typevar.rs
+++ b/cranelift-codegen/meta/src/cdsl/typevar.rs
@@ -6,10 +6,11 @@ use std::iter::FromIterator;
 use std::ops;
 use std::rc::Rc;
 
-use crate::cdsl::types::{BVType, LaneType, SpecialType, ValueType};
+use crate::cdsl::types::{BVType, LaneType, ReferenceType, SpecialType, ValueType};
 
 const MAX_LANES: u16 = 256;
-const MAX_BITS: u16 = 64;
+const MAX_BITS: u16 = 128;
+const MAX_FLOAT_BITS: u16 = 64;
 const MAX_BITVEC: u16 = MAX_BITS * MAX_LANES;
 
 /// Type variables can be used in place of concrete types when defining
@@ -64,6 +65,10 @@ impl TypeVar {
             ValueType::Special(special_type) => {
                 return TypeVar::new(name, doc, builder.specials(vec![special_type]).build());
             }
+            ValueType::Reference(ReferenceType(reference_type)) => {
+                let bits = reference_type as RangeBound;
+                return TypeVar::new(name, doc, builder.refs(bits..bits).build());
+            }
             ValueType::Lane(lane_type) => (lane_type, 1),
             ValueType::Vector(vec_type) => {
                 (vec_type.lane_type(), vec_type.lane_count() as RangeBound)
@@ -173,7 +178,7 @@ impl TypeVar {
                     "can't double all integer types"
                 );
                 assert!(
-                    ts.floats.len() == 0 || *ts.floats.iter().max().unwrap() < MAX_BITS,
+                    ts.floats.len() == 0 || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS,
                     "can't double all float types"
                 );
                 assert!(
@@ -406,6 +411,7 @@ pub struct TypeSet {
     pub ints: NumSet,
     pub floats: NumSet,
     pub bools: NumSet,
+    pub refs: NumSet,
     pub bitvecs: NumSet,
     pub specials: Vec<SpecialType>,
 }
@@ -416,6 +422,7 @@ impl TypeSet {
         ints: NumSet,
         floats: NumSet,
         bools: NumSet,
+        refs: NumSet,
         bitvecs: NumSet,
         specials: Vec<SpecialType>,
     ) -> Self {
@@ -424,6 +431,7 @@ impl TypeSet {
             ints,
             floats,
             bools,
+            refs,
             bitvecs,
             specials,
         }
@@ -432,7 +440,11 @@ impl TypeSet {
     /// Return the number of concrete types represented by this typeset.
     pub fn size(&self) -> usize {
         self.lanes.len()
-            * (self.ints.len() + self.floats.len() + self.bools.len() + self.bitvecs.len())
+            * (self.ints.len()
+                + self.floats.len()
+                + self.bools.len()
+                + self.refs.len()
+                + self.bitvecs.len())
             + self.specials.len()
     }
 
@@ -462,6 +474,7 @@ impl TypeSet {
         let mut copy = self.clone();
         copy.ints = NumSet::new();
         copy.floats = NumSet::new();
+        copy.refs = NumSet::new();
         copy.bitvecs = NumSet::new();
         if (&self.lanes - &num_set![1]).len() > 0 {
             copy.bools = &self.ints | &self.floats;
@@ -491,7 +504,7 @@ impl TypeSet {
         copy.floats = NumSet::from_iter(
             self.floats
                 .iter()
-                .filter(|&&x| x < MAX_BITS)
+                .filter(|&&x| x < MAX_FLOAT_BITS)
                 .map(|&x| x * 2),
         );
         copy.bools = NumSet::from_iter(
@@ -544,6 +557,7 @@ impl TypeSet {
         copy.ints = NumSet::new();
         copy.bools = NumSet::new();
         copy.floats = NumSet::new();
+        copy.refs = NumSet::new();
         copy.bitvecs = self
             .lanes
             .iter()
@@ -568,6 +582,9 @@ impl TypeSet {
             for &bits in &self.bools {
                 ret.push(LaneType::bool_from_bits(bits).by(num_lanes));
             }
+            for &bits in &self.refs {
+                ret.push(ReferenceType::ref_from_bits(bits).into());
+            }
             for &bits in &self.bitvecs {
                 assert_eq!(num_lanes, 1);
                 ret.push(BVType::new(bits).into());
@@ -605,7 +622,7 @@ impl TypeSet {
                 let mut copy = self.clone();
                 copy.bitvecs = NumSet::new();
                 if self.bools.contains(&1) {
-                    copy.ints = NumSet::from_iter(vec![8, 16, 32, 64]);
+                    copy.ints = NumSet::from_iter(vec![8, 16, 32, 64, 128]);
                     copy.floats = NumSet::from_iter(vec![32, 64]);
                 } else {
                     copy.ints = &self.bools - &NumSet::from_iter(vec![1]);
@@ -630,6 +647,7 @@ impl TypeSet {
                 let mut ints = range_to_set(Some(8..MAX_BITS));
                 let mut floats = range_to_set(Some(32..64));
                 let mut bools = range_to_set(Some(1..MAX_BITS));
+                let refs = range_to_set(Some(32..64));
 
                 for &l in &all_lanes {
                     for &i in &all_ints {
@@ -654,7 +672,7 @@ impl TypeSet {
 
                 let bitvecs = NumSet::new();
                 let specials = Vec::new();
-                TypeSet::new(lanes, ints, floats, bools, bitvecs, specials)
+                TypeSet::new(lanes, ints, floats, bools, refs, bitvecs, specials)
             }
         }
     }
@@ -664,6 +682,7 @@ impl TypeSet {
         self.ints = &self.ints & &other.ints;
         self.floats = &self.floats & &other.floats;
         self.bools = &self.bools & &other.bools;
+        self.refs = &self.refs & &other.refs;
         self.bitvecs = &self.bitvecs & &other.bitvecs;
 
         let mut new_specials = Vec::new();
@@ -680,6 +699,7 @@ impl TypeSet {
             && self.ints.is_subset(&other.ints)
             && self.floats.is_subset(&other.floats)
             && self.bools.is_subset(&other.bools)
+            && self.refs.is_subset(&other.refs)
             && self.bitvecs.is_subset(&other.bitvecs)
             && {
                 let specials: HashSet<SpecialType> = HashSet::from_iter(self.specials.clone());
@@ -692,12 +712,14 @@ impl TypeSet {
         set_wider_or_equal(&self.ints, &other.ints)
             && set_wider_or_equal(&self.floats, &other.floats)
             && set_wider_or_equal(&self.bools, &other.bools)
+            && set_wider_or_equal(&self.refs, &other.refs)
     }
 
     pub fn is_narrower(&self, other: &TypeSet) -> bool {
         set_narrower(&self.ints, &other.ints)
             && set_narrower(&self.floats, &other.floats)
             && set_narrower(&self.bools, &other.bools)
+            && set_narrower(&self.refs, &other.refs)
     }
 }
 
@@ -738,6 +760,12 @@ impl fmt::Debug for TypeSet {
                 Vec::from_iter(self.bools.iter().map(|x| x.to_string())).join(", ")
             ));
         }
+        if !self.refs.is_empty() {
+            subsets.push(format!(
+                "refs={{{}}}",
+                Vec::from_iter(self.refs.iter().map(|x| x.to_string())).join(", ")
+            ));
+        }
         if !self.bitvecs.is_empty() {
             subsets.push(format!(
                 "bitvecs={{{}}}",
@@ -760,6 +788,7 @@ pub struct TypeSetBuilder {
     ints: Interval,
     floats: Interval,
     bools: Interval,
+    refs: Interval,
     bitvecs: Interval,
     includes_scalars: bool,
     simd_lanes: Interval,
@@ -772,6 +801,7 @@ impl TypeSetBuilder {
             ints: Interval::None,
             floats: Interval::None,
             bools: Interval::None,
+            refs: Interval::None,
             bitvecs: Interval::None,
             includes_scalars: true,
             simd_lanes: Interval::None,
@@ -794,6 +824,11 @@ impl TypeSetBuilder {
         self.bools = interval.into();
         self
     }
+    pub fn refs(mut self, interval: impl Into<Interval>) -> Self {
+        assert!(self.refs == Interval::None);
+        self.refs = interval.into();
+        self
+    }
     pub fn includes_scalars(mut self, includes_scalars: bool) -> Self {
         self.includes_scalars = includes_scalars;
         self
@@ -816,7 +851,7 @@ impl TypeSetBuilder {
 
     pub fn build(self) -> TypeSet {
         let min_lanes = if self.includes_scalars { 1 } else { 2 };
-;
+
         let bools = range_to_set(self.bools.to_range(1..MAX_BITS, None))
             .into_iter()
             .filter(legal_bool)
@@ -827,6 +862,7 @@ impl TypeSetBuilder {
             range_to_set(self.ints.to_range(8..MAX_BITS, None)),
             range_to_set(self.floats.to_range(32..64, None)),
             bools,
+            range_to_set(self.refs.to_range(32..64, None)),
             range_to_set(self.bitvecs.to_range(1..MAX_BITVEC, None)),
             self.specials,
         )
@@ -837,6 +873,7 @@ impl TypeSetBuilder {
             .ints(Interval::All)
             .floats(Interval::All)
             .bools(Interval::All)
+            .refs(Interval::All)
             .simd_lanes(Interval::All)
             .bitvecs(Interval::All)
             .specials(ValueType::all_special_types().collect())
@@ -914,7 +951,7 @@ fn test_typevar_builder() {
     let type_set = TypeSetBuilder::new().ints(Interval::All).build();
     assert_eq!(type_set.lanes, num_set![1]);
     assert!(type_set.floats.is_empty());
-    assert_eq!(type_set.ints, num_set![8, 16, 32, 64]);
+    assert_eq!(type_set.ints, num_set![8, 16, 32, 64, 128]);
     assert!(type_set.bools.is_empty());
     assert!(type_set.bitvecs.is_empty());
     assert!(type_set.specials.is_empty());
@@ -923,7 +960,7 @@ fn test_typevar_builder() {
     assert_eq!(type_set.lanes, num_set![1]);
     assert!(type_set.floats.is_empty());
     assert!(type_set.ints.is_empty());
-    assert_eq!(type_set.bools, num_set![1, 8, 16, 32, 64]);
+    assert_eq!(type_set.bools, num_set![1, 8, 16, 32, 64, 128]);
     assert!(type_set.bitvecs.is_empty());
     assert!(type_set.specials.is_empty());
 
@@ -1065,7 +1102,7 @@ fn test_forward_images() {
     );
     assert_eq!(
         TypeSetBuilder::new().ints(32..64).build().double_width(),
-        TypeSetBuilder::new().ints(64..64).build()
+        TypeSetBuilder::new().ints(64..128).build()
     );
     assert_eq!(
         TypeSetBuilder::new().floats(32..32).build().double_width(),
@@ -1081,7 +1118,7 @@ fn test_forward_images() {
     );
     assert_eq!(
         TypeSetBuilder::new().bools(32..64).build().double_width(),
-        TypeSetBuilder::new().bools(64..64).build()
+        TypeSetBuilder::new().bools(64..128).build()
     );
 }
 
@@ -1109,7 +1146,7 @@ fn test_backward_images() {
     assert_eq!(
         TypeSetBuilder::new()
             .simd_lanes(1..4)
-            .bools(1..64)
+            .bools(1..128)
             .build()
             .preimage(DerivedFunc::AsBool),
         TypeSetBuilder::new()
@@ -1169,9 +1206,9 @@ fn test_backward_images() {
     // Half width.
     assert_eq!(
         TypeSetBuilder::new()
-            .ints(64..64)
+            .ints(128..128)
             .floats(64..64)
-            .bools(64..64)
+            .bools(128..128)
             .build()
             .preimage(DerivedFunc::HalfWidth)
             .size(),
@@ -1185,7 +1222,7 @@ fn test_backward_images() {
             .preimage(DerivedFunc::HalfWidth),
         TypeSetBuilder::new()
             .simd_lanes(64..256)
-            .bools(16..64)
+            .bools(16..128)
             .build(),
     );
 
diff --git a/cranelift-codegen/meta/src/cdsl/xform.rs b/cranelift-codegen/meta/src/cdsl/xform.rs
index 0cc0c6a03..b90d552b9 100644
--- a/cranelift-codegen/meta/src/cdsl/xform.rs
+++ b/cranelift-codegen/meta/src/cdsl/xform.rs
@@ -183,7 +183,14 @@ fn rewrite_expr(
     assert_eq!(
         apply_target.inst().operands_in.len(),
         dummy_args.len(),
-        "number of arguments in instruction is incorrect"
+        "number of arguments in instruction {} is incorrect\nexpected: {:?}",
+        apply_target.inst().name,
+        apply_target
+            .inst()
+            .operands_in
+            .iter()
+            .map(|operand| format!("{}: {}", operand.name, operand.kind.name))
+            .collect::<Vec<_>>(),
     );
 
     let mut args = Vec::new();
@@ -384,9 +391,6 @@ impl TransformGroups {
     pub fn get(&self, id: TransformGroupIndex) -> &TransformGroup {
         &self.groups[id]
     }
-    pub fn get_mut(&mut self, id: TransformGroupIndex) -> &mut TransformGroup {
-        self.groups.get_mut(id).unwrap()
-    }
     fn next_key(&self) -> TransformGroupIndex {
         self.groups.next_key()
     }
@@ -404,11 +408,12 @@ impl TransformGroups {
 #[should_panic]
 fn test_double_custom_legalization() {
     use crate::cdsl::formats::{FormatRegistry, InstructionFormatBuilder};
-    use crate::cdsl::instructions::{InstructionBuilder, InstructionGroupBuilder};
+    use crate::cdsl::instructions::{AllInstructions, InstructionBuilder, InstructionGroupBuilder};
 
+    let mut dummy_all = AllInstructions::new();
     let mut format = FormatRegistry::new();
     format.insert(InstructionFormatBuilder::new("nullary"));
-    let mut inst_group = InstructionGroupBuilder::new("test", "", &format);
+    let mut inst_group = InstructionGroupBuilder::new("test", "", &mut dummy_all, &format);
     inst_group.push(InstructionBuilder::new("dummy", "doc"));
     let inst_group = inst_group.build();
     let dummy_inst = inst_group.by_name("dummy");
diff --git a/cranelift-codegen/meta/src/constant_hash.rs b/cranelift-codegen/meta/src/constant_hash.rs
index 7f629d0d8..4090e4d77 100644
--- a/cranelift-codegen/meta/src/constant_hash.rs
+++ b/cranelift-codegen/meta/src/constant_hash.rs
@@ -1,3 +1,5 @@
+use std::iter;
+
 pub fn simple_hash(s: &str) -> usize {
     let mut h: u32 = 5381;
     for c in s.chars() {
@@ -9,8 +11,12 @@ pub fn simple_hash(s: &str) -> usize {
 /// Compute an open addressed, quadratically probed hash table containing
 /// `items`. The returned table is a list containing the elements of the
 /// iterable `items` and `None` in unused slots.
-pub fn generate_table<T, H: Fn(&T) -> usize>(items: &Vec<T>, hash_function: H) -> Vec<Option<&T>> {
-    let size = (1.20 * items.len() as f64) as usize;
+pub fn generate_table<'cont, T, I: iter::Iterator<Item = &'cont T>, H: Fn(&T) -> usize>(
+    items: I,
+    num_items: usize,
+    hash_function: H,
+) -> Vec<Option<&'cont T>> {
+    let size = (1.20 * num_items as f64) as usize;
     // TODO do we really need the multiply by two here?
     let size = if size.is_power_of_two() {
         size * 2
@@ -18,10 +24,10 @@ pub fn generate_table<T, H: Fn(&T) -> usize>(items: &Vec<T>, hash_function: H) -
         size.next_power_of_two()
     };
 
-    let mut table: Vec<Option<&T>> = vec![None; size];
+    let mut table = vec![None; size];
 
     for i in items {
-        let mut h = hash_function(i) % size;
+        let mut h = hash_function(&i) % size;
         let mut s = 0;
         while table[h].is_some() {
             s += 1;
@@ -36,7 +42,7 @@ pub fn generate_table<T, H: Fn(&T) -> usize>(items: &Vec<T>, hash_function: H) -
 #[test]
 fn test_generate_table() {
     let v = vec!["Hello".to_string(), "world".to_string()];
-    let table = generate_table(&v, |s| simple_hash(&s));
+    let table = generate_table(v.iter(), v.len(), |s| simple_hash(&s));
     assert_eq!(
         table,
         vec![
diff --git a/cranelift-codegen/meta/src/default_map.rs b/cranelift-codegen/meta/src/default_map.rs
new file mode 100644
index 000000000..da6d47143
--- /dev/null
+++ b/cranelift-codegen/meta/src/default_map.rs
@@ -0,0 +1,19 @@
+use std::collections::HashMap;
+use std::hash::Hash;
+
+pub trait MapWithDefault<K, V: Default> {
+    fn get_or_default(&mut self, k: K) -> &mut V;
+}
+
+impl<K: Eq + Hash, V: Default> MapWithDefault<K, V> for HashMap<K, V> {
+    fn get_or_default(&mut self, k: K) -> &mut V {
+        self.entry(k).or_insert_with(|| V::default())
+    }
+}
+
+#[test]
+fn test_default() {
+    let mut hash_map = HashMap::new();
+    hash_map.insert(42, "hello");
+    assert_eq!(*hash_map.get_or_default(43), "");
+}
diff --git a/cranelift-codegen/meta/src/gen_binemit.rs b/cranelift-codegen/meta/src/gen_binemit.rs
new file mode 100644
index 000000000..53929fe8c
--- /dev/null
+++ b/cranelift-codegen/meta/src/gen_binemit.rs
@@ -0,0 +1,225 @@
+//! Generate binary emission code for each ISA.
+
+use cranelift_entity::EntityRef;
+
+use crate::error;
+use crate::srcgen::Formatter;
+
+use crate::cdsl::formats::FormatRegistry;
+use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes};
+
+/// Generate code to handle a single recipe.
+///
+/// - Unpack the instruction data, knowing the format.
+/// - Determine register locations for operands with register constraints.
+/// - Determine stack slot locations for operands with stack constraints.
+/// - Call hand-written code for the actual emission.
+fn gen_recipe(formats: &FormatRegistry, recipe: &EncodingRecipe, fmt: &mut Formatter) {
+    let inst_format = formats.get(recipe.format);
+    let num_value_ops = inst_format.num_value_operands;
+
+    // TODO: Set want_args to true for only MultiAry instructions instead of all formats with value list.
+    let want_args = inst_format.has_value_list
+        || recipe.operands_in.iter().any(|c| match c {
+            OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true,
+            OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false,
+        });
+    assert!(!want_args || num_value_ops > 0 || inst_format.has_value_list);
+
+    let want_outs = recipe.operands_out.iter().any(|c| match c {
+        OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true,
+        OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false,
+    });
+
+    let is_regmove = ["RegMove", "RegSpill", "RegFill"].contains(&inst_format.name);
+
+    // Unpack the instruction data.
+    fmtln!(fmt, "if let &InstructionData::{} {{", inst_format.name);
+    fmt.indent(|fmt| {
+        fmt.line("opcode,");
+        for f in &inst_format.imm_fields {
+            fmtln!(fmt, "{},", f.member);
+        }
+        if want_args {
+            if inst_format.has_value_list || num_value_ops > 1 {
+                fmt.line("ref args,");
+            } else {
+                fmt.line("arg,");
+            }
+        }
+        fmt.line("..");
+
+        fmt.outdented_line("} = inst_data {");
+
+        // Pass recipe arguments in this order: inputs, imm_fields, outputs.
+        let mut args = String::new();
+
+        if want_args && !is_regmove {
+            if inst_format.has_value_list {
+                fmt.line("let args = args.as_slice(&func.dfg.value_lists);");
+            } else if num_value_ops == 1 {
+                fmt.line("let args = [arg];");
+            }
+            args += &unwrap_values(&recipe.operands_in, "in", "args", fmt);
+        }
+
+        for f in &inst_format.imm_fields {
+            args += &format!(", {}", f.member);
+        }
+
+        // Unwrap interesting output arguments.
+        if want_outs {
+            if recipe.operands_out.len() == 1 {
+                fmt.line("let results = [func.dfg.first_result(inst)];")
+            } else {
+                fmt.line("let results = func.dfg.inst_results(inst);");
+            }
+            args += &unwrap_values(&recipe.operands_out, "out", "results", fmt);
+        }
+
+        // Optimization: Only update the register diversion tracker for regmove instructions.
+        if is_regmove {
+            fmt.line("divert.apply(inst_data);")
+        }
+
+        match &recipe.emit {
+            Some(emit) => {
+                fmt.multi_line(emit);
+                fmt.line("return;");
+            }
+            None => {
+                fmtln!(
+                    fmt,
+                    "return recipe_{}(func, inst, sink, bits{});",
+                    recipe.name.to_lowercase(),
+                    args
+                );
+            }
+        }
+    });
+    fmt.line("}");
+}
+
+/// Emit code that unwraps values living in registers or stack slots.
+///
+/// :param args: Input or output constraints.
+/// :param prefix: Prefix to be used for the generated local variables.
+/// :param values: Name of slice containing the values to be unwrapped.
+/// :returns: Comma separated list of the generated variables
+fn unwrap_values(
+    args: &[OperandConstraint],
+    prefix: &str,
+    values_slice: &str,
+    fmt: &mut Formatter,
+) -> String {
+    let mut varlist = String::new();
+    for (i, cst) in args.iter().enumerate() {
+        match cst {
+            OperandConstraint::RegClass(_reg_class) => {
+                let v = format!("{}_reg{}", prefix, i);
+                varlist += &format!(", {}", v);
+                fmtln!(
+                    fmt,
+                    "let {} = divert.reg({}[{}], &func.locations);",
+                    v,
+                    values_slice,
+                    i
+                );
+            }
+            OperandConstraint::Stack(stack) => {
+                let v = format!("{}_stk{}", prefix, i);
+                varlist += &format!(", {}", v);
+                fmtln!(fmt, "let {} = StackRef::masked(", v);
+                fmt.indent(|fmt| {
+                    fmtln!(
+                        fmt,
+                        "divert.stack({}[{}], &func.locations),",
+                        values_slice,
+                        i
+                    );
+                    fmt.line(format!("{},", stack.stack_base_mask()));
+                    fmt.line("&func.stack_slots,");
+                });
+                fmt.line(").unwrap();");
+            }
+            _ => {}
+        }
+    }
+    varlist
+}
+
+fn gen_isa(formats: &FormatRegistry, isa_name: &str, recipes: &Recipes, fmt: &mut Formatter) {
+    fmt.doc_comment(format!(
+        "Emit binary machine code for `inst` for the {} ISA.",
+        isa_name
+    ));
+
+    if recipes.is_empty() {
+        fmt.line("pub fn emit_inst<CS: CodeSink + ?Sized>(");
+        fmt.indent(|fmt| {
+            fmt.line("func: &Function,");
+            fmt.line("inst: Inst,");
+            fmt.line("_divert: &mut RegDiversions,");
+            fmt.line("_sink: &mut CS,");
+            fmt.line("_isa: &dyn TargetIsa,");
+        });
+        fmt.line(") {");
+        fmt.indent(|fmt| {
+            // No encoding recipes: Emit a stub.
+            fmt.line("bad_encoding(func, inst)");
+        });
+        fmt.line("}");
+        return;
+    }
+
+    fmt.line("#[allow(unused_variables, unreachable_code)]");
+    fmt.line("pub fn emit_inst<CS: CodeSink + ?Sized>(");
+    fmt.indent(|fmt| {
+        fmt.line("func: &Function,");
+        fmt.line("inst: Inst,");
+        fmt.line("divert: &mut RegDiversions,");
+        fmt.line("sink: &mut CS,");
+        fmt.line("isa: &dyn TargetIsa,")
+    });
+
+    fmt.line(") {");
+    fmt.indent(|fmt| {
+        fmt.line("let encoding = func.encodings[inst];");
+        fmt.line("let bits = encoding.bits();");
+        fmt.line("let inst_data = &func.dfg[inst];");
+        fmt.line("match encoding.recipe() {");
+        fmt.indent(|fmt| {
+            for (i, recipe) in recipes.iter() {
+                fmt.comment(format!("Recipe {}", recipe.name));
+                fmtln!(fmt, "{} => {{", i.index());
+                fmt.indent(|fmt| {
+                    gen_recipe(formats, recipe, fmt);
+                });
+                fmt.line("}");
+            }
+            fmt.line("_ => {},");
+        });
+        fmt.line("}");
+
+        // Allow for unencoded ghost instructions. The verifier will check details.
+        fmt.line("if encoding.is_legal() {");
+        fmt.indent(|fmt| {
+            fmt.line("bad_encoding(func, inst);");
+        });
+        fmt.line("}");
+    });
+    fmt.line("}");
+}
+
+pub fn generate(
+    formats: &FormatRegistry,
+    isa_name: &str,
+    recipes: &Recipes,
+    binemit_filename: &str,
+    out_dir: &str,
+) -> Result<(), error::Error> {
+    let mut fmt = Formatter::new();
+    gen_isa(formats, isa_name, recipes, &mut fmt);
+    fmt.update_file(binemit_filename, out_dir)?;
+    Ok(())
+}
diff --git a/cranelift-codegen/meta/src/gen_encodings.rs b/cranelift-codegen/meta/src/gen_encodings.rs
new file mode 100644
index 000000000..cfaa63f08
--- /dev/null
+++ b/cranelift-codegen/meta/src/gen_encodings.rs
@@ -0,0 +1,1139 @@
+//! Generate sources for instruction encoding.
+//!
+//! The tables and functions generated here support the `TargetISA::encode()` function which
+//! determines if a given instruction is legal, and if so, its `Encoding` data which consists of a
+//! *recipe* and some *encoding* bits.
+//!
+//! The `encode` function doesn't actually generate the binary machine bits. Each recipe has a
+//! corresponding hand-written function to do that after registers are allocated.
+//!
+//! This is the information available to us:
+//!
+//! - The instruction to be encoded as an `InstructionData` reference.
+//! - The controlling type variable.
+//! - The data-flow graph giving us access to the types of all values involved. This is needed for
+//! testing any secondary type variables.
+//! - A `PredicateView` reference for the ISA-specific settings for evaluating ISA predicates.
+//! - The currently active CPU mode is determined by the ISA.
+//!
+//! ## Level 1 table lookup
+//!
+//! The CPU mode provides the first table. The key is the instruction's controlling type variable.
+//! If the instruction is not polymorphic, use `INVALID` for the type variable. The table values
+//! are level 2 tables.
+//!
+//! ## Level 2 table lookup
+//!
+//! The level 2 table is keyed by the instruction's opcode. The table values are *encoding lists*.
+//!
+//! The two-level table lookup allows the level 2 tables to be much smaller with good locality.
+//! Code in any given function usually only uses a few different types, so many of the level 2
+//! tables will be cold.
+//!
+//! ## Encoding lists
+//!
+//! An encoding list is a non-empty sequence of list entries. Each entry has one of these forms:
+//!
+//! 1. Recipe + bits. Use this encoding if the recipe predicate is satisfied.
+//! 2. Recipe + bits, final entry. Use this encoding if the recipe predicate is satisfied.
+//!    Otherwise, stop with the default legalization code.
+//! 3. Stop with legalization code.
+//! 4. Predicate + skip count. Test predicate and skip N entries if it is false.
+//! 5. Predicate + stop. Test predicate and stop with the default legalization code if it is false.
+//!
+//! The instruction predicate is also used to distinguish between polymorphic instructions with
+//! different types for secondary type variables.
+
+use std::collections::btree_map;
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::convert::TryFrom;
+use std::iter::FromIterator;
+
+use cranelift_entity::EntityRef;
+
+use crate::error;
+use crate::srcgen::Formatter;
+
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::encodings::Encoding;
+use crate::cdsl::instructions::{Instruction, InstructionPredicate, InstructionPredicateNumber};
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes, Register};
+use crate::cdsl::regs::IsaRegs;
+use crate::cdsl::settings::SettingPredicateNumber;
+use crate::cdsl::types::ValueType;
+use crate::cdsl::xform::TransformGroupIndex;
+
+use crate::shared::Definitions as SharedDefinitions;
+
+use crate::constant_hash::generate_table;
+use crate::default_map::MapWithDefault;
+use crate::unique_table::UniqueSeqTable;
+
+/// Emit code for matching an instruction predicate against an `InstructionData` reference called
+/// `inst`.
+///
+/// The generated code is an `if let` pattern match that falls through if the instruction has an
+/// unexpected format. This should lead to a panic.
+fn emit_instp(instp: &InstructionPredicate, has_func: bool, fmt: &mut Formatter) {
+    if instp.is_type_predicate() {
+        fmt.line("let args = inst.arguments(&func.dfg.value_lists);");
+        fmt.line(instp.rust_predicate());
+        return;
+    }
+
+    let leaves = instp.collect_leaves();
+
+    let mut has_type_check = false;
+    let mut format_name = None;
+    let mut field_names = HashSet::new();
+
+    for leaf in leaves {
+        if leaf.is_type_predicate() {
+            has_type_check = true;
+        } else {
+            field_names.insert(leaf.format_destructuring_member_name());
+            let leaf_format_name = leaf.format_name();
+            match format_name {
+                None => format_name = Some(leaf_format_name),
+                Some(previous_format_name) => {
+                    assert!(
+                        previous_format_name == leaf_format_name,
+                        format!("Format predicate can only operate on a single InstructionFormat; trying to use both {} and {}", previous_format_name, leaf_format_name
+                    ));
+                }
+            }
+        }
+    }
+
+    let mut fields = Vec::from_iter(field_names);
+    fields.sort();
+    let fields = fields.join(", ");
+
+    let format_name = format_name.expect("There should be a format name!");
+
+    fmtln!(
+        fmt,
+        "if let crate::ir::InstructionData::{} {{ {}, .. }} = *inst {{",
+        format_name,
+        fields
+    );
+    fmt.indent(|fmt| {
+        if has_type_check {
+            // We could implement this.
+            assert!(has_func, "recipe predicates can't check type variables.");
+            fmt.line("let args = inst.arguments(&func.dfg.value_lists);");
+        } else if has_func {
+            // Silence dead argument.
+            fmt.line("let _ = func;");
+        }
+        fmtln!(fmt, "return {};", instp.rust_predicate());
+    });
+    fmtln!(fmt, "}");
+
+    fmt.line("unreachable!();");
+}
+
+/// Emit private functions for checking recipe predicates as well as a static `RECIPE_PREDICATES`
+/// array indexed by recipe number.
+///
+/// A recipe predicate is a combination of an ISA predicate and an instruction predicate. Many
+/// recipes have identical predicates.
+fn emit_recipe_predicates(isa: &TargetIsa, fmt: &mut Formatter) {
+    let mut predicate_names = HashMap::new();
+
+    fmt.comment(format!("{} recipe predicates.", isa.name));
+    for recipe in isa.recipes.values() {
+        let (isap, instp) = match (&recipe.isa_predicate, &recipe.inst_predicate) {
+            (None, None) => continue,
+            (isap, instp) if predicate_names.contains_key(&(isap, instp)) => continue,
+            (isap, instp) => (isap, instp),
+        };
+
+        let func_name = format!("recipe_predicate_{}", recipe.name.to_lowercase());
+        predicate_names.insert((isap, instp), func_name.clone());
+
+        // Generate the predicate function.
+        fmtln!(
+            fmt,
+            "fn {}({}: crate::settings::PredicateView, {}: &ir::InstructionData) -> bool {{",
+            func_name,
+            if let Some(_) = isap { "isap" } else { "_" },
+            if let Some(_) = instp { "inst" } else { "_" }
+        );
+        fmt.indent(|fmt| {
+            match (isap, instp) {
+                (Some(isap), None) => {
+                    fmtln!(fmt, "isap.test({})", isap);
+                }
+                (None, Some(instp)) => {
+                    emit_instp(instp, /* has func */ false, fmt);
+                }
+                (Some(isap), Some(instp)) => {
+                    fmtln!(fmt, "isap.test({}) &&", isap);
+                    emit_instp(instp, /* has func */ false, fmt);
+                }
+                _ => panic!("skipped above"),
+            }
+        });
+        fmtln!(fmt, "}");
+    }
+    fmt.empty_line();
+
+    // Generate the static table.
+    fmt.doc_comment(format!(
+        r#"{} recipe predicate table.
+
+        One entry per recipe, set to Some only when the recipe is guarded by a predicate."#,
+        isa.name
+    ));
+    fmtln!(
+        fmt,
+        "pub static RECIPE_PREDICATES: [RecipePredicate; {}] = [",
+        isa.recipes.len()
+    );
+    fmt.indent(|fmt| {
+        for recipe in isa.recipes.values() {
+            match (&recipe.isa_predicate, &recipe.inst_predicate) {
+                (None, None) => fmt.line("None,"),
+                key => fmtln!(fmt, "Some({}),", predicate_names.get(&key).unwrap()),
+            }
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+}
+
+/// Emit private functions for matching instruction predicates as well as a static
+/// `INST_PREDICATES` array indexed by predicate number.
+fn emit_inst_predicates(isa: &TargetIsa, fmt: &mut Formatter) {
+    fmt.comment(format!("{} instruction predicates.", isa.name));
+    for (id, instp) in isa.encodings_predicates.iter() {
+        fmtln!(fmt, "fn inst_predicate_{}(func: &crate::ir::Function, inst: &crate::ir::InstructionData) -> bool {{", id.index());
+        fmt.indent(|fmt| {
+            emit_instp(instp, /* has func */ true, fmt);
+        });
+        fmtln!(fmt, "}");
+    }
+    fmt.empty_line();
+
+    // Generate the static table.
+    fmt.doc_comment(format!(
+        r#"{} instruction predicate table.
+
+        One entry per instruction predicate, so the encoding bytecode can embed indexes into this
+        table."#,
+        isa.name
+    ));
+    fmtln!(
+        fmt,
+        "pub static INST_PREDICATES: [InstPredicate; {}] = [",
+        isa.encodings_predicates.len()
+    );
+    fmt.indent(|fmt| {
+        for id in isa.encodings_predicates.keys() {
+            fmtln!(fmt, "inst_predicate_{},", id.index());
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+}
+
+/// Emit a table of encoding recipe names keyed by recipe number.
+///
+/// This is used for pretty-printing encodings.
+fn emit_recipe_names(isa: &TargetIsa, fmt: &mut Formatter) {
+    fmt.doc_comment(format!(
+        r#"{} recipe names, using the same recipe index spaces as the one specified by the
+        corresponding binemit file."#,
+        isa.name
+    ));
+    fmtln!(
+        fmt,
+        "static RECIPE_NAMES: [&str; {}] = [",
+        isa.recipes.len()
+    );
+    fmt.indent(|fmt| {
+        for recipe in isa.recipes.values() {
+            fmtln!(fmt, r#""{}","#, recipe.name);
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+}
+
+/// Returns a set of all the registers involved in fixed register constraints.
+fn get_fixed_registers(operands_in: &Vec<OperandConstraint>) -> HashSet<Register> {
+    HashSet::from_iter(
+        operands_in
+            .iter()
+            .map(|constraint| {
+                if let OperandConstraint::FixedReg(reg) = &constraint {
+                    Some(reg.clone())
+                } else {
+                    None
+                }
+            })
+            .filter(|opt| opt.is_some())
+            .map(|opt| opt.unwrap()),
+    )
+}
+
+/// Emit a struct field initializer for an array of operand constraints.
+///
+/// Note "fixed_registers" must refer to the other kind of operands (i.e. if we're operating on
+/// inputs, fixed_registers must contain the fixed output registers).
+fn emit_operand_constraints(
+    registers: &IsaRegs,
+    recipe: &EncodingRecipe,
+    constraints: &Vec<OperandConstraint>,
+    field_name: &'static str,
+    tied_operands: &HashMap<usize, usize>,
+    fixed_registers: &HashSet<Register>,
+    fmt: &mut Formatter,
+) {
+    if constraints.len() == 0 {
+        fmtln!(fmt, "{}: &[],", field_name);
+        return;
+    }
+
+    fmtln!(fmt, "{}: &[", field_name);
+    fmt.indent(|fmt| {
+        for (n, constraint) in constraints.iter().enumerate() {
+            fmt.line("OperandConstraint {");
+            fmt.indent(|fmt| {
+                match constraint {
+                    OperandConstraint::RegClass(reg_class) => {
+                        if let Some(tied_input) = tied_operands.get(&n) {
+                            fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input);
+                        } else {
+                            fmt.line("kind: ConstraintKind::Reg,");
+                        }
+                        fmtln!(
+                            fmt,
+                            "regclass: &{}_DATA,",
+                            registers.classes[*reg_class].name
+                        );
+                    }
+                    OperandConstraint::FixedReg(reg) => {
+                        assert!(!tied_operands.contains_key(&n), "can't tie fixed registers");
+                        let constraint_kind = if fixed_registers.contains(&reg) {
+                            "FixedTied"
+                        } else {
+                            "FixedReg"
+                        };
+                        fmtln!(
+                            fmt,
+                            "kind: ConstraintKind::{}({}),",
+                            constraint_kind,
+                            reg.unit
+                        );
+                        fmtln!(
+                            fmt,
+                            "regclass: &{}_DATA,",
+                            registers.classes[reg.regclass].name
+                        );
+                    }
+                    OperandConstraint::TiedInput(tied_input) => {
+                        // This is a tied output constraint. It should never happen
+                        // for input constraints.
+                        assert!(
+                            tied_input == tied_operands.get(&n).unwrap(),
+                            "invalid tied constraint"
+                        );
+                        fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input);
+
+                        let tied_class = if let OperandConstraint::RegClass(tied_class) =
+                            recipe.operands_in[*tied_input]
+                        {
+                            tied_class
+                        } else {
+                            panic!("tied constraints relate only to register inputs");
+                        };
+
+                        fmtln!(
+                            fmt,
+                            "regclass: &{}_DATA,",
+                            registers.classes[tied_class].name
+                        );
+                    }
+                    OperandConstraint::Stack(stack) => {
+                        assert!(!tied_operands.contains_key(&n), "can't tie stack operand");
+                        fmt.line("kind: ConstraintKind::Stack,");
+                        fmtln!(
+                            fmt,
+                            "regclass: &{}_DATA,",
+                            registers.classes[stack.regclass].name
+                        );
+                    }
+                }
+            });
+            fmt.line("},");
+        }
+    });
+    fmtln!(fmt, "],");
+}
+
+/// Emit a table of encoding recipe operand constraints keyed by recipe number.
+///
+/// These are used by the register allocator to pick registers that can be properly encoded.
+fn emit_recipe_constraints(isa: &TargetIsa, fmt: &mut Formatter) {
+    fmt.doc_comment(format!(
+        r#"{} recipe constraints list, using the same recipe index spaces as the one
+        specified by the corresponding binemit file. These constraints are used by register
+        allocation to select the right location to use for input and output values."#,
+        isa.name
+    ));
+    fmtln!(
+        fmt,
+        "static RECIPE_CONSTRAINTS: [RecipeConstraints; {}] = [",
+        isa.recipes.len()
+    );
+    fmt.indent(|fmt| {
+        for recipe in isa.recipes.values() {
+            // Compute a mapping of tied operands in both directions (input tied to outputs and
+            // conversely).
+            let mut tied_in_to_out = HashMap::new();
+            let mut tied_out_to_in = HashMap::new();
+            for (out_index, constraint) in recipe.operands_out.iter().enumerate() {
+                if let OperandConstraint::TiedInput(in_index) = &constraint {
+                    tied_in_to_out.insert(*in_index, out_index);
+                    tied_out_to_in.insert(out_index, *in_index);
+                }
+            }
+
+            // Find the sets of registers involved in fixed register constraints.
+            let fixed_inputs = get_fixed_registers(&recipe.operands_in);
+            let fixed_outputs = get_fixed_registers(&recipe.operands_out);
+
+            fmt.comment(format!("Constraints for recipe {}:", recipe.name));
+            fmt.line("RecipeConstraints {");
+            fmt.indent(|fmt| {
+                emit_operand_constraints(
+                    &isa.regs,
+                    recipe,
+                    &recipe.operands_in,
+                    "ins",
+                    &tied_in_to_out,
+                    &fixed_outputs,
+                    fmt,
+                );
+                emit_operand_constraints(
+                    &isa.regs,
+                    recipe,
+                    &recipe.operands_out,
+                    "outs",
+                    &tied_out_to_in,
+                    &fixed_inputs,
+                    fmt,
+                );
+                fmtln!(
+                    fmt,
+                    "fixed_ins: {},",
+                    if !fixed_inputs.is_empty() {
+                        "true"
+                    } else {
+                        "false"
+                    }
+                );
+                fmtln!(
+                    fmt,
+                    "fixed_outs: {},",
+                    if !fixed_outputs.is_empty() {
+                        "true"
+                    } else {
+                        "false"
+                    }
+                );
+                fmtln!(
+                    fmt,
+                    "tied_ops: {},",
+                    if !tied_in_to_out.is_empty() {
+                        "true"
+                    } else {
+                        "false"
+                    }
+                );
+                fmtln!(
+                    fmt,
+                    "clobbers_flags: {},",
+                    if recipe.clobbers_flags {
+                        "true"
+                    } else {
+                        "false"
+                    }
+                );
+            });
+            fmt.line("},");
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+}
+
+/// Emit a table of encoding recipe code size information.
+fn emit_recipe_sizing(isa: &TargetIsa, fmt: &mut Formatter) {
+    fmt.doc_comment(format!(
+        r#"{} recipe sizing descriptors, using the same recipe index spaces as the one
+        specified by the corresponding binemit file. These are used to compute the final size of an
+        instruction, as well as to compute the range of branches."#,
+        isa.name
+    ));
+    fmtln!(
+        fmt,
+        "static RECIPE_SIZING: [RecipeSizing; {}] = [",
+        isa.recipes.len()
+    );
+    fmt.indent(|fmt| {
+        for recipe in isa.recipes.values() {
+            fmt.comment(format!("Code size information for recipe {}:", recipe.name));
+            fmt.line("RecipeSizing {");
+            fmt.indent(|fmt| {
+                fmtln!(fmt, "base_size: {},", recipe.base_size);
+                fmtln!(fmt, "compute_size: {},", recipe.compute_size);
+                if let Some(range) = &recipe.branch_range {
+                    fmtln!(
+                        fmt,
+                        "branch_range: Some(BranchRange {{ origin: {}, bits: {} }}),",
+                        range.inst_size,
+                        range.range
+                    );
+                } else {
+                    fmt.line("branch_range: None,");
+                }
+            });
+            fmt.line("},");
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+}
+
+/// Level 1 table mapping types to `Level2` objects.
+struct Level1Table<'cpu_mode> {
+    cpu_mode: &'cpu_mode CpuMode,
+    legalize_code: TransformGroupIndex,
+
+    table_map: HashMap<Option<ValueType>, usize>,
+    table_vec: Vec<Level2Table>,
+}
+
+impl<'cpu_mode> Level1Table<'cpu_mode> {
+    fn new(cpu_mode: &'cpu_mode CpuMode) -> Self {
+        Self {
+            cpu_mode,
+            legalize_code: cpu_mode.get_default_legalize_code(),
+            table_map: HashMap::new(),
+            table_vec: Vec::new(),
+        }
+    }
+
+    /// Returns the level2 table for the given type; None means monomorphic, in this context.
+    fn l2table_for(&mut self, typ: Option<ValueType>) -> &mut Level2Table {
+        let cpu_mode = &self.cpu_mode;
+        let index = match self.table_map.get(&typ) {
+            Some(&index) => index,
+            None => {
+                let legalize_code = cpu_mode.get_legalize_code_for(&typ);
+                let table = Level2Table::new(typ.clone(), legalize_code);
+                let index = self.table_vec.len();
+                self.table_map.insert(typ, index);
+                self.table_vec.push(table);
+                index
+            }
+        };
+        self.table_vec.get_mut(index).unwrap()
+    }
+
+    fn l2tables(&mut self) -> Vec<&mut Level2Table> {
+        self.table_vec
+            .iter_mut()
+            .filter(|table| !table.is_empty())
+            .collect::<Vec<_>>()
+    }
+}
+
+struct Level2HashTableEntry {
+    inst_name: String,
+    offset: usize,
+}
+
+/// Level 2 table mapping instruction opcodes to `EncList` objects.
+///
+/// A level 2 table can be completely empty if it only holds a custom legalization action for `ty`.
+struct Level2Table {
+    typ: Option<ValueType>,
+    legalize_code: TransformGroupIndex,
+    inst_to_encodings: BTreeMap<String, EncodingList>,
+    hash_table_offset: Option<usize>,
+    hash_table_len: Option<usize>,
+}
+
+impl Level2Table {
+    fn new(typ: Option<ValueType>, legalize_code: TransformGroupIndex) -> Self {
+        Self {
+            typ,
+            legalize_code,
+            inst_to_encodings: BTreeMap::new(),
+            hash_table_offset: None,
+            hash_table_len: None,
+        }
+    }
+
+    fn enclist_for(&mut self, inst: &Instruction) -> &mut EncodingList {
+        let copied_typ = self.typ.clone();
+        self.inst_to_encodings
+            .entry(inst.name.clone())
+            .or_insert_with(|| EncodingList::new(inst, copied_typ))
+    }
+
+    fn enclists(&mut self) -> btree_map::ValuesMut<'_, String, EncodingList> {
+        self.inst_to_encodings.values_mut()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.inst_to_encodings.is_empty()
+    }
+
+    fn layout_hashtable(
+        &mut self,
+        level2_hashtables: &mut Vec<Option<Level2HashTableEntry>>,
+        level2_doc: &mut HashMap<usize, Vec<String>>,
+    ) {
+        let hash_table = generate_table(
+            self.inst_to_encodings.values(),
+            self.inst_to_encodings.len(),
+            // TODO the Python code wanted opcode numbers to start from 1.
+            |enc_list| enc_list.inst.opcode_number.index() + 1,
+        );
+
+        let hash_table_offset = level2_hashtables.len();
+        let hash_table_len = hash_table.len();
+
+        assert!(self.hash_table_offset.is_none());
+        assert!(self.hash_table_len.is_none());
+        self.hash_table_offset = Some(hash_table_offset);
+        self.hash_table_len = Some(hash_table_len);
+
+        level2_hashtables.extend(hash_table.iter().map(|opt_enc_list| {
+            opt_enc_list.map(|enc_list| Level2HashTableEntry {
+                inst_name: enc_list.inst.camel_name.clone(),
+                offset: enc_list.offset.unwrap(),
+            })
+        }));
+
+        let typ_comment = match &self.typ {
+            Some(ty) => ty.to_string(),
+            None => "typeless".into(),
+        };
+
+        level2_doc.get_or_default(hash_table_offset).push(format!(
+            "{:06x}: {}, {} entries",
+            hash_table_offset, typ_comment, hash_table_len
+        ));
+    }
+}
+
+/// The u16 values in an encoding list entry are interpreted as follows:
+///
+/// NR = len(all_recipes)
+///
+/// entry < 2*NR
+///     Try Encoding(entry/2, next_entry) if the recipe predicate is satisfied.
+///     If bit 0 is set, stop with the default legalization code.
+///     If bit 0 is clear, keep going down the list.
+/// entry < PRED_START
+///     Stop with legalization code `entry - 2*NR`.
+///
+/// Remaining entries are interpreted as (skip, pred) pairs, where:
+///
+/// skip = (entry - PRED_START) >> PRED_BITS
+/// pred = (entry - PRED_START) & PRED_MASK
+///
+/// If the predicate is satisfied, keep going. Otherwise skip over the next
+/// `skip` entries. If skip == 0, stop with the default legalization code.
+///
+/// The `pred` predicate number is interpreted as an instruction predicate if it
+/// is in range, otherwise an ISA predicate.
+
+/// Encoding lists are represented as u16 arrays.
+const CODE_BITS: usize = 16;
+
+/// Beginning of the predicate code words.
+const PRED_START: u16 = 0x1000;
+
+/// Number of bits used to hold a predicate number (instruction + ISA predicates).
+const PRED_BITS: usize = 12;
+
+/// Mask for extracting the predicate number.
+const PRED_MASK: usize = (1 << PRED_BITS) - 1;
+
+/// Encoder for the list format above.
+struct Encoder {
+    num_instruction_predicates: usize,
+
+    /// u16 encoding list words.
+    words: Vec<u16>,
+
+    /// Documentation comments: Index into `words` + comment.
+    docs: Vec<(usize, String)>,
+}
+
+impl Encoder {
+    fn new(num_instruction_predicates: usize) -> Self {
+        Self {
+            num_instruction_predicates,
+            words: Vec::new(),
+            docs: Vec::new(),
+        }
+    }
+
+    /// Add a recipe+bits entry to the list.
+    fn recipe(&mut self, recipes: &Recipes, enc: &Encoding, is_final: bool) {
+        let code = (2 * enc.recipe.index() + if is_final { 1 } else { 0 }) as u16;
+        assert!(code < PRED_START);
+
+        let doc = format!(
+            "--> {}{}",
+            enc.to_rust_comment(recipes),
+            if is_final { " and stop" } else { "" }
+        );
+        self.docs.push((self.words.len(), doc));
+
+        self.words.push(code);
+        self.words.push(enc.encbits);
+    }
+
+    /// Add a predicate entry.
+    fn pred(&mut self, pred_comment: String, skip: usize, n: usize) {
+        assert!(n <= PRED_MASK);
+        let entry = (PRED_START as usize) + (n | (skip << PRED_BITS));
+        assert!(entry < (1 << CODE_BITS));
+        let entry = entry as u16;
+
+        let doc = if skip == 0 {
+            "stop".to_string()
+        } else {
+            format!("skip {}", skip)
+        };
+        let doc = format!("{} unless {}", doc, pred_comment);
+
+        self.docs.push((self.words.len(), doc));
+        self.words.push(entry);
+    }
+
+    /// Add an instruction predicate entry.
+    fn inst_predicate(&mut self, pred: InstructionPredicateNumber, skip: usize) {
+        let number = pred.index();
+        let pred_comment = format!("inst_predicate_{}", number);
+        self.pred(pred_comment, skip, number);
+    }
+
+    /// Add an ISA predicate entry.
+    fn isa_predicate(&mut self, pred: SettingPredicateNumber, skip: usize) {
+        // ISA predicates follow the instruction predicates.
+        let n = self.num_instruction_predicates + (pred as usize);
+        let pred_comment = format!("PredicateView({})", pred);
+        self.pred(pred_comment, skip, n);
+    }
+}
+
+/// List of instructions for encoding a given type + opcode pair.
+///
+/// An encoding list contains a sequence of predicates and encoding recipes, all encoded as u16
+/// values.
+struct EncodingList {
+    inst: Instruction,
+    typ: Option<ValueType>,
+    encodings: Vec<Encoding>,
+    offset: Option<usize>,
+}
+
+impl EncodingList {
+    fn new(inst: &Instruction, typ: Option<ValueType>) -> Self {
+        Self {
+            inst: inst.clone(),
+            typ,
+            encodings: Default::default(),
+            offset: None,
+        }
+    }
+
+    /// Encode this list as a sequence of u16 numbers.
+    ///
+    /// Adds the sequence to `enc_lists` and records the returned offset as
+    /// `self.offset`.
+    ///
+    /// Adds comment lines to `enc_lists_doc` keyed by enc_lists offsets.
+    fn encode(
+        &mut self,
+        isa: &TargetIsa,
+        cpu_mode: &CpuMode,
+        enc_lists: &mut UniqueSeqTable<u16>,
+        enc_lists_doc: &mut HashMap<usize, Vec<String>>,
+    ) {
+        assert!(!self.encodings.is_empty());
+
+        let mut encoder = Encoder::new(isa.encodings_predicates.len());
+
+        let mut index = 0;
+        while index < self.encodings.len() {
+            let encoding = &self.encodings[index];
+
+            // Try to see how many encodings are following and have the same ISA predicate and
+            // instruction predicate, so as to reduce the number of tests carried out by the
+            // encoding list interpreter..
+            //
+            // Encodings with similar tests are hereby called a group. The group includes the
+            // current encoding we're looking at.
+            let (isa_predicate, inst_predicate) =
+                (&encoding.isa_predicate, &encoding.inst_predicate);
+
+            let group_size = {
+                let mut group_size = 1;
+                while index + group_size < self.encodings.len() {
+                    let next_encoding = &self.encodings[index + group_size];
+                    if &next_encoding.inst_predicate != inst_predicate
+                        || &next_encoding.isa_predicate != isa_predicate
+                    {
+                        break;
+                    }
+                    group_size += 1;
+                }
+                group_size
+            };
+
+            let is_last_group = index + group_size == self.encodings.len();
+
+            // The number of entries to skip when a predicate isn't satisfied is the size of both
+            // predicates + the size of the group, minus one (for this predicate). Each recipe
+            // entry has a size of two u16 (recipe index + bits).
+            let mut skip = if is_last_group {
+                0
+            } else {
+                let isap_size = match isa_predicate {
+                    Some(_) => 1,
+                    None => 0,
+                };
+                let instp_size = match inst_predicate {
+                    Some(_) => 1,
+                    None => 0,
+                };
+                isap_size + instp_size + group_size * 2 - 1
+            };
+
+            if let Some(pred) = isa_predicate {
+                encoder.isa_predicate(*pred, skip);
+                if !is_last_group {
+                    skip -= 1;
+                }
+            }
+
+            if let Some(pred) = inst_predicate {
+                encoder.inst_predicate(*pred, skip);
+                // No need to update skip, it's dead after this point.
+            }
+
+            for i in 0..group_size {
+                let encoding = &self.encodings[index + i];
+                let is_last_encoding = index + i == self.encodings.len() - 1;
+                encoder.recipe(&isa.recipes, encoding, is_last_encoding);
+            }
+
+            index += group_size;
+        }
+
+        assert!(self.offset.is_none());
+        let offset = enc_lists.add(&encoder.words);
+        self.offset = Some(offset);
+
+        // Doc comments.
+        let recipe_typ_mode_name = format!(
+            "{}{} ({})",
+            self.inst.name,
+            if let Some(typ) = &self.typ {
+                format!(".{}", typ.to_string())
+            } else {
+                "".into()
+            },
+            cpu_mode.name
+        );
+
+        enc_lists_doc
+            .get_or_default(offset)
+            .push(format!("{:06x}: {}", offset, recipe_typ_mode_name));
+        for (pos, doc) in encoder.docs {
+            enc_lists_doc.get_or_default(offset + pos).push(doc);
+        }
+        enc_lists_doc
+            .get_or_default(offset + encoder.words.len())
+            .insert(0, format!("end of {}", recipe_typ_mode_name));
+    }
+}
+
+fn make_tables(cpu_mode: &CpuMode) -> Level1Table {
+    let mut table = Level1Table::new(cpu_mode);
+
+    for encoding in &cpu_mode.encodings {
+        table
+            .l2table_for(encoding.bound_type.clone())
+            .enclist_for(encoding.inst())
+            .encodings
+            .push(encoding.clone());
+    }
+
+    // Ensure there are level 1 table entries for all types with a custom legalize action.
+    for value_type in cpu_mode.get_legalized_types() {
+        table.l2table_for(Some(value_type.clone()));
+    }
+    // ... and also for monomorphic instructions.
+    table.l2table_for(None);
+
+    table
+}
+
+/// Compute encodings and doc comments for encoding lists in `level1`.
+fn encode_enclists(
+    isa: &TargetIsa,
+    cpu_mode: &CpuMode,
+    level1: &mut Level1Table,
+    enc_lists: &mut UniqueSeqTable<u16>,
+    enc_lists_doc: &mut HashMap<usize, Vec<String>>,
+) {
+    for level2 in level1.l2tables() {
+        for enclist in level2.enclists() {
+            enclist.encode(isa, cpu_mode, enc_lists, enc_lists_doc);
+        }
+    }
+}
+
+fn encode_level2_hashtables<'a>(
+    level1: &'a mut Level1Table,
+    level2_hashtables: &mut Vec<Option<Level2HashTableEntry>>,
+    level2_doc: &mut HashMap<usize, Vec<String>>,
+) {
+    for level2 in level1.l2tables() {
+        level2.layout_hashtable(level2_hashtables, level2_doc);
+    }
+}
+
+fn emit_encoding_tables(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) {
+    // Level 1 tables, one per CPU mode.
+    let mut level1_tables: HashMap<&'static str, Level1Table> = HashMap::new();
+
+    // Single table containing all the level2 hash tables.
+    let mut level2_hashtables = Vec::new();
+    let mut level2_doc: HashMap<usize, Vec<String>> = HashMap::new();
+
+    // Tables for encoding lists with comments.
+    let mut enc_lists = UniqueSeqTable::new();
+    let mut enc_lists_doc = HashMap::new();
+
+    for cpu_mode in &isa.cpu_modes {
+        level2_doc
+            .get_or_default(level2_hashtables.len())
+            .push(cpu_mode.name.into());
+
+        let mut level1 = make_tables(cpu_mode);
+
+        encode_enclists(
+            isa,
+            cpu_mode,
+            &mut level1,
+            &mut enc_lists,
+            &mut enc_lists_doc,
+        );
+        encode_level2_hashtables(&mut level1, &mut level2_hashtables, &mut level2_doc);
+
+        level1_tables.insert(cpu_mode.name, level1);
+    }
+
+    // Compute an appropriate Rust integer type to use for offsets into a table of the given length.
+    let offset_type = |length: usize| {
+        if length <= 0x10000 {
+            "u16"
+        } else {
+            assert!(u32::try_from(length).is_ok(), "table too big!");
+            "u32"
+        }
+    };
+
+    let level1_offset_type = offset_type(level2_hashtables.len());
+    let level2_offset_type = offset_type(enc_lists.len());
+
+    // Emit encoding lists.
+    fmt.doc_comment(
+        format!(r#"{} encoding lists.
+
+        This contains the entire encodings bytecode for every single instruction; the encodings
+        interpreter knows where to start from thanks to the initial lookup in the level 1 and level 2
+        table entries below."#, isa.name)
+    );
+    fmtln!(fmt, "pub static ENCLISTS: [u16; {}] = [", enc_lists.len());
+    fmt.indent(|fmt| {
+        let mut line = Vec::new();
+        for (index, entry) in enc_lists.iter().enumerate() {
+            if let Some(comments) = enc_lists_doc.get(&index) {
+                if !line.is_empty() {
+                    fmtln!(fmt, "{},", line.join(", "));
+                    line.clear();
+                }
+                for comment in comments {
+                    fmt.comment(comment);
+                }
+            }
+            line.push(format!("{:#06x}", entry));
+        }
+        if !line.is_empty() {
+            fmtln!(fmt, "{},", line.join(", "));
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+
+    // Emit the full concatenation of level 2 hash tables.
+    fmt.doc_comment(format!(
+        r#"{} level 2 hash tables.
+
+        This hash table, keyed by instruction opcode, contains all the starting offsets for the
+        encodings interpreter, for all the CPU modes. It is jumped to after a lookup on the
+        instruction's controlling type in the level 1 hash table."#,
+        isa.name
+    ));
+    fmtln!(
+        fmt,
+        "pub static LEVEL2: [Level2Entry<{}>; {}] = [",
+        level2_offset_type,
+        level2_hashtables.len()
+    );
+    fmt.indent(|fmt| {
+        for (offset, entry) in level2_hashtables.iter().enumerate() {
+            if let Some(comments) = level2_doc.get(&offset) {
+                for comment in comments {
+                    fmt.comment(comment);
+                }
+            }
+            if let Some(entry) = entry {
+                fmtln!(
+                    fmt,
+                    "Level2Entry {{ opcode: Some(crate::ir::Opcode::{}), offset: {:#08x} }},",
+                    entry.inst_name,
+                    entry.offset
+                );
+            } else {
+                fmt.line("Level2Entry { opcode: None, offset: 0 },");
+            }
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+
+    // Emit a level 1 hash table for each CPU mode.
+    for cpu_mode in &isa.cpu_modes {
+        let level1 = &level1_tables.get(cpu_mode.name).unwrap();
+        let hash_table = generate_table(
+            level1.table_vec.iter(),
+            level1.table_vec.len(),
+            |level2_table| {
+                if let Some(typ) = &level2_table.typ {
+                    typ.number().expect("type without a number") as usize
+                } else {
+                    0
+                }
+            },
+        );
+
+        fmt.doc_comment(format!(
+            r#"{} level 1 hash table for the CPU mode {}.
+
+            This hash table, keyed by instruction controlling type, contains all the level 2
+            hash-tables offsets for the given CPU mode, as well as a legalization identifier indicating
+            which legalization scheme to apply when the instruction doesn't have any valid encoding for
+            this CPU mode.
+        "#,
+            isa.name, cpu_mode.name
+        ));
+        fmtln!(
+            fmt,
+            "pub static LEVEL1_{}: [Level1Entry<{}>; {}] = [",
+            cpu_mode.name.to_uppercase(),
+            level1_offset_type,
+            hash_table.len()
+        );
+        fmt.indent(|fmt| {
+            for opt_level2 in hash_table {
+                let level2 = match opt_level2 {
+                    None => {
+                        // Empty hash table entry. Include the default legalization action.
+                        fmtln!(fmt, "Level1Entry {{ ty: ir::types::INVALID, log2len: !0, offset: 0, legalize: {} }},",
+                               isa.translate_group_index(level1.legalize_code));
+                        continue;
+                    }
+                    Some(level2) => level2,
+                };
+
+                let legalize_comment = defs.transform_groups.get(level2.legalize_code).name;
+                let legalize_code = isa.translate_group_index(level2.legalize_code);
+
+                let typ_name = if let Some(typ) = &level2.typ {
+                    typ.rust_name()
+                } else {
+                    "ir::types::INVALID".into()
+                };
+
+                if level2.is_empty() {
+                    // Empty level 2 table: Only a specialized legalization action, no actual
+                    // table.
+                    // Set an offset that is out of bounds, but make sure it doesn't overflow its
+                    // type when adding `1<<log2len`.
+                    fmtln!(fmt, "Level1Entry {{ ty: {}, log2len: 0, offset: !0 - 1, legalize: {} }}, // {}",
+                           typ_name, legalize_code, legalize_comment);
+                    continue;
+                }
+
+                // Proper level 2 hash table.
+                let l2l = (level2.hash_table_len.unwrap() as f64).log2() as i32;
+                assert!(l2l > 0, "Level2 hash table was too small.");
+                fmtln!(fmt, "Level1Entry {{ ty: {}, log2len: {}, offset: {:#08x}, legalize: {} }}, // {}",
+                       typ_name, l2l, level2.hash_table_offset.unwrap(), legalize_code, legalize_comment);
+            }
+        });
+        fmtln!(fmt, "];");
+        fmt.empty_line();
+    }
+}
+
+fn gen_isa(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) {
+    // Make the `RECIPE_PREDICATES` table.
+    emit_recipe_predicates(isa, fmt);
+
+    // Make the `INST_PREDICATES` table.
+    emit_inst_predicates(isa, fmt);
+
+    emit_encoding_tables(defs, isa, fmt);
+
+    emit_recipe_names(isa, fmt);
+    emit_recipe_constraints(isa, fmt);
+    emit_recipe_sizing(isa, fmt);
+
+    // Finally, tie it all together in an `EncInfo`.
+    fmt.line("pub static INFO: isa::EncInfo = isa::EncInfo {");
+    fmt.indent(|fmt| {
+        fmt.line("constraints: &RECIPE_CONSTRAINTS,");
+        fmt.line("sizing: &RECIPE_SIZING,");
+        fmt.line("names: &RECIPE_NAMES,");
+    });
+    fmt.line("};");
+}
+
+pub(crate) fn generate(
+    defs: &SharedDefinitions,
+    isa: &TargetIsa,
+    filename: &str,
+    out_dir: &str,
+) -> Result<(), error::Error> {
+    let mut fmt = Formatter::new();
+    gen_isa(defs, isa, &mut fmt);
+    fmt.update_file(filename, out_dir)?;
+    Ok(())
+}
diff --git a/cranelift-codegen/meta/src/gen_inst.rs b/cranelift-codegen/meta/src/gen_inst.rs
index 613987421..7d8353dad 100644
--- a/cranelift-codegen/meta/src/gen_inst.rs
+++ b/cranelift-codegen/meta/src/gen_inst.rs
@@ -1,15 +1,20 @@
+use std::fmt;
+
+use cranelift_entity::EntityRef;
+
 use crate::cdsl::camel_case;
 use crate::cdsl::formats::{FormatRegistry, InstructionFormat};
-use crate::cdsl::instructions::{Instruction, InstructionGroup};
+use crate::cdsl::instructions::{AllInstructions, Instruction};
 use crate::cdsl::operands::Operand;
 use crate::cdsl::typevar::{TypeSet, TypeVar};
+
+use crate::shared::Definitions as SharedDefinitions;
+
 use crate::constant_hash;
 use crate::error;
 use crate::srcgen::{Formatter, Match};
 use crate::unique_table::{UniqueSeqTable, UniqueTable};
 
-use std::fmt;
-
 // TypeSet indexes are encoded in 8 bits, with `0xff` reserved.
 const TYPESET_LIMIT: usize = 0xff;
 
@@ -368,7 +373,7 @@ fn gen_instruction_data_impl(registry: &FormatRegistry, fmt: &mut Formatter) {
 }
 
 fn gen_bool_accessor<T: Fn(&Instruction) -> bool>(
-    instruction_groups: &Vec<&InstructionGroup>,
+    all_inst: &AllInstructions,
     get_attr: T,
     name: &'static str,
     doc: &'static str,
@@ -378,11 +383,9 @@ fn gen_bool_accessor<T: Fn(&Instruction) -> bool>(
     fmtln!(fmt, "pub fn {}(self) -> bool {{", name);
     fmt.indent(|fmt| {
         let mut m = Match::new("self");
-        for group in instruction_groups.iter() {
-            for inst in group.iter() {
-                if get_attr(inst) {
-                    m.arm_no_fields(format!("Opcode::{}", inst.camel_name), "true");
-                }
+        for inst in all_inst.values() {
+            if get_attr(inst) {
+                m.arm_no_fields(format!("Opcode::{}", inst.camel_name), "true");
             }
         }
         m.arm_no_fields("_", "false");
@@ -392,18 +395,7 @@ fn gen_bool_accessor<T: Fn(&Instruction) -> bool>(
     fmt.empty_line();
 }
 
-fn gen_opcodes<'a>(
-    formats: &FormatRegistry,
-    igroups: &Vec<&'a InstructionGroup>,
-    fmt: &mut Formatter,
-) -> Vec<&'a Instruction> {
-    let mut all_inst = Vec::new();
-    for group in igroups {
-        for inst in group.iter() {
-            all_inst.push(inst);
-        }
-    }
-
+fn gen_opcodes<'a>(all_inst: &AllInstructions, formats: &FormatRegistry, fmt: &mut Formatter) {
     fmt.doc_comment(
         r#"
         An instruction opcode.
@@ -420,10 +412,7 @@ fn gen_opcodes<'a>(
     fmt.line("pub enum Opcode {");
     fmt.indent(|fmt| {
         let mut is_first_opcode = true;
-        for inst in &all_inst {
-            // TODO we might need to set an instruction number here. Probably can do in the
-            // InstructionGroup itself when adding instruction (would need to remember last
-            // instruction number in the SharedDefinitions or somewhere else).
+        for inst in all_inst.values() {
             let format = formats.get(inst.format);
             fmt.doc_comment(format!("`{}`. ({})", inst, format.name));
 
@@ -440,6 +429,8 @@ fn gen_opcodes<'a>(
 
             // Enum variant itself.
             if is_first_opcode {
+                assert!(inst.opcode_number.index() == 0);
+                // TODO the python crate requires opcode numbers to start from one.
                 fmtln!(fmt, "{} = 1,", inst.camel_name);
                 is_first_opcode = false;
             } else {
@@ -453,77 +444,77 @@ fn gen_opcodes<'a>(
     fmt.line("impl Opcode {");
     fmt.indent(|fmt| {
         gen_bool_accessor(
-            igroups,
+            all_inst,
             |inst| inst.is_terminator,
             "is_terminator",
             "True for instructions that terminate the EBB",
             fmt,
         );
         gen_bool_accessor(
-            igroups,
+            all_inst,
             |inst| inst.is_branch,
             "is_branch",
             "True for all branch or jump instructions.",
             fmt,
         );
         gen_bool_accessor(
-            igroups,
+            all_inst,
             |inst| inst.is_indirect_branch,
             "is_indirect_branch",
             "True for all indirect branch or jump instructions.",
             fmt,
         );
         gen_bool_accessor(
-            igroups,
+            all_inst,
             |inst| inst.is_call,
             "is_call",
             "Is this a call instruction?",
             fmt,
         );
         gen_bool_accessor(
-            igroups,
+            all_inst,
             |inst| inst.is_return,
             "is_return",
             "Is this a return instruction?",
             fmt,
         );
         gen_bool_accessor(
-            igroups,
+            all_inst,
             |inst| inst.is_ghost,
             "is_ghost",
             "Is this a ghost instruction?",
             fmt,
         );
         gen_bool_accessor(
-            igroups,
+            all_inst,
             |inst| inst.can_load,
             "can_load",
             "Can this instruction read from memory?",
             fmt,
         );
         gen_bool_accessor(
-            igroups,
+            all_inst,
             |inst| inst.can_store,
             "can_store",
             "Can this instruction write to memory?",
             fmt,
         );
         gen_bool_accessor(
-            igroups,
+            all_inst,
             |inst| inst.can_trap,
             "can_trap",
             "Can this instruction cause a trap?",
             fmt,
         );
         gen_bool_accessor(
-            igroups,
+            all_inst,
             |inst| inst.other_side_effects,
             "other_side_effects",
             "Does this instruction have other side effects besides can_* flags?",
             fmt,
         );
         gen_bool_accessor(
-            igroups,
+            all_inst,
             |inst| inst.writes_cpu_flags,
             "writes_cpu_flags",
             "Does this instruction write to CPU flags?",
@@ -540,7 +531,7 @@ fn gen_opcodes<'a>(
         all_inst.len()
     );
     fmt.indent(|fmt| {
-        for inst in &all_inst {
+        for inst in all_inst.values() {
             let format = formats.get(inst.format);
             fmtln!(fmt, "InstructionFormat::{}, // {}", format.name, inst.name);
         }
@@ -552,7 +543,7 @@ fn gen_opcodes<'a>(
     fmt.line("fn opcode_name(opc: Opcode) -> &\'static str {");
     fmt.indent(|fmt| {
         let mut m = Match::new("opc");
-        for inst in &all_inst {
+        for inst in all_inst.values() {
             m.arm_no_fields(
                 format!("Opcode::{}", inst.camel_name),
                 format!("\"{}\"", inst.name),
@@ -564,8 +555,9 @@ fn gen_opcodes<'a>(
     fmt.empty_line();
 
     // Generate an opcode hash table for looking up opcodes by name.
-    let hash_table =
-        constant_hash::generate_table(&all_inst, |inst| constant_hash::simple_hash(&inst.name));
+    let hash_table = constant_hash::generate_table(all_inst.values(), all_inst.len(), |inst| {
+        constant_hash::simple_hash(&inst.name)
+    });
     fmtln!(
         fmt,
         "const OPCODE_HASH_TABLE: [Option<Opcode>; {}] = [",
@@ -581,8 +573,6 @@ fn gen_opcodes<'a>(
     });
     fmtln!(fmt, "];");
     fmt.empty_line();
-
-    all_inst
 }
 
 /// Get the value type constraint for an SSA value operand, where
@@ -660,6 +650,9 @@ fn typeset_to_string(ts: &TypeSet) -> String {
     if ts.specials.len() > 0 {
         result += &format!(", specials=[{}]", iterable_to_string(&ts.specials));
     }
+    if ts.refs.len() > 0 {
+        result += &format!(", refs={}", iterable_to_string(&ts.refs));
+    }
     result += ")";
     result
 }
@@ -687,6 +680,7 @@ pub fn gen_typesets_table(type_sets: &UniqueTable<TypeSet>, fmt: &mut Formatter)
                 gen_bitset(&ts.ints, "ints", 8, fmt);
                 gen_bitset(&ts.floats, "floats", 8, fmt);
                 gen_bitset(&ts.bools, "bools", 8, fmt);
+                gen_bitset(&ts.refs, "refs", 8, fmt);
             });
             fmt.line("},");
         }
@@ -698,7 +692,7 @@ pub fn gen_typesets_table(type_sets: &UniqueTable<TypeSet>, fmt: &mut Formatter)
 /// - Emit a compact constant table of ValueTypeSet objects.
 /// - Emit a compact constant table of OperandConstraint objects.
 /// - Emit an opcode-indexed table of instruction constraints.
-fn gen_type_constraints(all_inst: &Vec<&Instruction>, fmt: &mut Formatter) {
+fn gen_type_constraints(all_inst: &AllInstructions, fmt: &mut Formatter) {
     // Table of TypeSet instances.
     let mut type_sets = UniqueTable::new();
 
@@ -719,7 +713,7 @@ fn gen_type_constraints(all_inst: &Vec<&Instruction>, fmt: &mut Formatter) {
         all_inst.len()
     );
     fmt.indent(|fmt| {
-        for inst in all_inst {
+        for inst in all_inst.values() {
             let (ctrl_typevar, ctrl_typeset) = if let Some(poly) = &inst.polymorphic_info {
                 let index = type_sets.add(&*poly.ctrl_typevar.get_raw_typeset());
                 (Some(&poly.ctrl_typevar), index)
@@ -940,7 +934,7 @@ fn gen_inst_builder(inst: &Instruction, format: &InstructionFormat, fmt: &mut Fo
         rtype
     );
 
-    fmt.doc_comment(format!("`{}`\n\n{}", inst, inst.doc_comment_first_line()));
+    fmt.doc_comment(&inst.doc);
     fmt.line("#[allow(non_snake_case)]");
     fmtln!(fmt, "fn {} {{", proto);
     fmt.indent(|fmt| {
@@ -1035,7 +1029,7 @@ fn gen_inst_builder(inst: &Instruction, format: &InstructionFormat, fmt: &mut Fo
 }
 
 /// Generate a Builder trait with methods for all instructions.
-fn gen_builder(instructions: &Vec<&Instruction>, formats: &FormatRegistry, fmt: &mut Formatter) {
+fn gen_builder(instructions: &AllInstructions, formats: &FormatRegistry, fmt: &mut Formatter) {
     fmt.doc_comment(
         r#"
         Convenience methods for building instructions.
@@ -1055,7 +1049,7 @@ fn gen_builder(instructions: &Vec<&Instruction>, formats: &FormatRegistry, fmt:
     );
     fmt.line("pub trait InstBuilder<'f>: InstBuilderBase<'f> {");
     fmt.indent(|fmt| {
-        for inst in instructions {
+        for inst in instructions.values() {
             gen_inst_builder(inst, formats.get(inst.format), fmt);
         }
         for format in formats.iter() {
@@ -1065,13 +1059,15 @@ fn gen_builder(instructions: &Vec<&Instruction>, formats: &FormatRegistry, fmt:
     fmt.line("}");
 }
 
-pub fn generate(
-    all_inst_groups: Vec<&InstructionGroup>,
-    format_registry: &FormatRegistry,
+pub(crate) fn generate(
+    shared_defs: &SharedDefinitions,
     opcode_filename: &str,
     inst_builder_filename: &str,
     out_dir: &str,
 ) -> Result<(), error::Error> {
+    let format_registry = &shared_defs.format_registry;
+    let all_inst = &shared_defs.all_instructions;
+
     // Opcodes.
     let mut fmt = Formatter::new();
     gen_formats(format_registry, &mut fmt);
@@ -1079,13 +1075,13 @@ pub fn generate(
     fmt.empty_line();
     gen_instruction_data_impl(format_registry, &mut fmt);
     fmt.empty_line();
-    let all_inst = gen_opcodes(format_registry, &all_inst_groups, &mut fmt);
-    gen_type_constraints(&all_inst, &mut fmt);
+    gen_opcodes(all_inst, format_registry, &mut fmt);
+    gen_type_constraints(all_inst, &mut fmt);
     fmt.update_file(opcode_filename, out_dir)?;
 
     // Instruction builder.
     let mut fmt = Formatter::new();
-    gen_builder(&all_inst, format_registry, &mut fmt);
+    gen_builder(all_inst, format_registry, &mut fmt);
     fmt.update_file(inst_builder_filename, out_dir)?;
 
     Ok(())
diff --git a/cranelift-codegen/meta/src/gen_legalizer.rs b/cranelift-codegen/meta/src/gen_legalizer.rs
index 149e29f92..da1fb1f58 100644
--- a/cranelift-codegen/meta/src/gen_legalizer.rs
+++ b/cranelift-codegen/meta/src/gen_legalizer.rs
@@ -61,10 +61,10 @@ fn unwrap_inst(
             fmtln!(fmt, "{},", field.member);
         }
 
-        if iform.num_value_operands == 1 {
-            fmt.line("arg,");
-        } else if iform.has_value_list || iform.num_value_operands > 1 {
+        if iform.has_value_list || iform.num_value_operands > 1 {
             fmt.line("ref args,");
+        } else if iform.num_value_operands == 1 {
+            fmt.line("arg,");
         }
 
         fmt.line("..");
@@ -87,6 +87,13 @@ fn unwrap_inst(
                 } else if op.is_value() {
                     let n = inst.value_opnums.iter().position(|&i| i == op_num).unwrap();
                     fmtln!(fmt, "func.dfg.resolve_aliases(args[{}]),", n);
+                } else if op.is_varargs() {
+                    let n = inst.imm_opnums.iter().chain(inst.value_opnums.iter()).max().map(|n| n + 1).unwrap_or(0);
+                    // We need to create a `Vec` here, as using a slice would result in a borrowck
+                    // error later on.
+                    fmtln!(fmt, "\
+                        args.iter().skip({}).map(|&arg| func.dfg.resolve_aliases(arg)).collect::<Vec<_>>(),\
+                    ", n);
                 }
             }
 
@@ -104,6 +111,19 @@ fn unwrap_inst(
     });
     fmtln!(fmt, "};");
 
+    assert_eq!(inst.operands_in.len(), apply.args.len());
+    for (i, op) in inst.operands_in.iter().enumerate() {
+        if op.is_varargs() {
+            let name = var_pool
+                .get(apply.args[i].maybe_var().expect("vararg without name"))
+                .name;
+
+            // Above name is set to an `Vec` representing the varargs. However it is expected to be
+            // `&[Value]` below, so we borrow it.
+            fmtln!(fmt, "let {} = &{};", name, name);
+        }
+    }
+
     for &op_num in &inst.value_opnums {
         let arg = &apply.args[op_num];
         if let Some(var_index) = arg.maybe_var() {
@@ -134,6 +154,19 @@ fn unwrap_inst(
                     .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap())
                     .to_comment_string(var_pool)
             ));
+
+            fmt.line("let r = pos.func.dfg.inst_results(inst);");
+            for (i, &var_index) in def.defined_vars.iter().enumerate() {
+                let var = var_pool.get(var_index);
+                fmtln!(fmt, "let {} = &r[{}];", var.name, i);
+                fmtln!(
+                    fmt,
+                    "let typeof_{} = pos.func.dfg.value_type(*{});",
+                    var.name,
+                    var.name
+                );
+            }
+
             replace_inst = true;
         } else {
             // Boring case: Detach the result values, capture them in locals.
@@ -389,6 +422,13 @@ fn gen_transform<'a>(
             fmt.line("let removed = pos.remove_inst();");
             fmt.line("debug_assert_eq!(removed, inst);");
         }
+
+        if transform.def_pool.get(transform.src).apply.inst.is_branch {
+            // A branch might have been legalized into multiple branches, so we need to recompute
+            // the cfg.
+            fmt.line("cfg.recompute_ebb(pos.func, pos.current_ebb().unwrap());");
+        }
+
         fmt.line("return true;");
     });
     fmt.line("}");
@@ -453,7 +493,9 @@ fn gen_transform_group<'a>(
 
                 // Emit the custom transforms. The Rust compiler will complain about any overlap with
                 // the normal transforms.
-                for (inst_camel_name, func_name) in &group.custom_legalizes {
+                let mut sorted_custom_legalizes = Vec::from_iter(&group.custom_legalizes);
+                sorted_custom_legalizes.sort();
+                for (inst_camel_name, func_name) in sorted_custom_legalizes {
                     fmtln!(fmt, "ir::Opcode::{} => {{", inst_camel_name);
                     fmt.indent(|fmt| {
                         fmtln!(fmt, "{}(inst, pos.func, cfg, isa);", func_name);
@@ -527,7 +569,7 @@ fn gen_isa(
         direct_groups.len()
     );
     fmt.indent(|fmt| {
-        for group_index in direct_groups {
+        for &group_index in direct_groups {
             fmtln!(fmt, "{},", transform_groups.get(group_index).rust_name());
         }
     });
diff --git a/cranelift-codegen/meta/src/gen_registers.rs b/cranelift-codegen/meta/src/gen_registers.rs
index 5e0fdac57..08edfa9e2 100644
--- a/cranelift-codegen/meta/src/gen_registers.rs
+++ b/cranelift-codegen/meta/src/gen_registers.rs
@@ -56,6 +56,7 @@ fn gen_regclass(isa: &TargetIsa, reg_class: &RegClass, fmt: &mut Formatter) {
         fmtln!(fmt, "first: {},", reg_bank.first_unit + reg_class.start);
         fmtln!(fmt, "subclasses: {:#x},", reg_class.subclass_mask());
         fmtln!(fmt, "mask: [{}],", mask);
+        fmtln!(fmt, "pinned_reg: {:?},", reg_bank.pinned_reg);
         fmtln!(fmt, "info: &INFO,");
     });
     fmtln!(fmt, "};");
diff --git a/cranelift-codegen/meta/src/gen_settings.rs b/cranelift-codegen/meta/src/gen_settings.rs
index 0238ef1ec..cdd07d78b 100644
--- a/cranelift-codegen/meta/src/gen_settings.rs
+++ b/cranelift-codegen/meta/src/gen_settings.rs
@@ -308,7 +308,9 @@ fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) {
     hash_entries.extend(group.settings.iter().map(|x| SettingOrPreset::Setting(x)));
     hash_entries.extend(group.presets.iter().map(|x| SettingOrPreset::Preset(x)));
 
-    let hash_table = generate_table(&hash_entries, |entry| simple_hash(entry.name()));
+    let hash_table = generate_table(hash_entries.iter(), hash_entries.len(), |entry| {
+        simple_hash(entry.name())
+    });
     fmtln!(fmt, "static HASH_TABLE: [u16; {}] = [", hash_table.len());
     fmt.indent(|fmt| {
         for h in &hash_table {
diff --git a/cranelift-codegen/meta/src/gen_types.rs b/cranelift-codegen/meta/src/gen_types.rs
index 0a52eb371..d4b4c60d6 100644
--- a/cranelift-codegen/meta/src/gen_types.rs
+++ b/cranelift-codegen/meta/src/gen_types.rs
@@ -54,6 +54,11 @@ fn emit_types(fmt: &mut srcgen::Formatter) -> Result<(), error::Error> {
         emit_type(&ty, fmt)?;
     }
 
+    // Emit all reference types.
+    for ty in cdsl_types::ValueType::all_reference_types().map(cdsl_types::ValueType::from) {
+        emit_type(&ty, fmt)?;
+    }
+
     // Emit vector definitions for common SIMD sizes.
     for vec_size in &[64_u64, 128, 256, 512] {
         emit_vectors(*vec_size, fmt)?;
diff --git a/cranelift-codegen/meta/src/isa/arm32/mod.rs b/cranelift-codegen/meta/src/isa/arm32/mod.rs
index ee238866f..1f3c05ad6 100644
--- a/cranelift-codegen/meta/src/isa/arm32/mod.rs
+++ b/cranelift-codegen/meta/src/isa/arm32/mod.rs
@@ -1,6 +1,7 @@
 use crate::cdsl::cpu_modes::CpuMode;
-use crate::cdsl::instructions::InstructionGroupBuilder;
+use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap};
 use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::recipes::Recipes;
 use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
 use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
 
@@ -48,13 +49,14 @@ fn define_regs() -> IsaRegs {
     regs.build()
 }
 
-pub fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
     let settings = define_settings(&shared_defs.settings);
     let regs = define_regs();
 
     let inst_group = InstructionGroupBuilder::new(
         "arm32",
         "arm32 specific instruction set",
+        &mut shared_defs.all_instructions,
         &shared_defs.format_registry,
     )
     .build();
@@ -64,11 +66,25 @@ pub fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
     let mut t32 = CpuMode::new("T32");
 
     // TODO refine these.
-    let narrow = shared_defs.transform_groups.by_name("narrow");
-    a32.legalize_default(narrow);
-    t32.legalize_default(narrow);
+    let narrow_flags = shared_defs.transform_groups.by_name("narrow_flags");
+    a32.legalize_default(narrow_flags);
+    t32.legalize_default(narrow_flags);
 
     let cpu_modes = vec![a32, t32];
 
-    TargetIsa::new("arm32", inst_group, settings, regs, cpu_modes)
+    // TODO implement arm32 recipes.
+    let recipes = Recipes::new();
+
+    // TODO implement arm32 encodings and predicates.
+    let encodings_predicates = InstructionPredicateMap::new();
+
+    TargetIsa::new(
+        "arm32",
+        inst_group,
+        settings,
+        regs,
+        recipes,
+        cpu_modes,
+        encodings_predicates,
+    )
 }
diff --git a/cranelift-codegen/meta/src/isa/arm64/mod.rs b/cranelift-codegen/meta/src/isa/arm64/mod.rs
index 5a11e6984..15bfb736c 100644
--- a/cranelift-codegen/meta/src/isa/arm64/mod.rs
+++ b/cranelift-codegen/meta/src/isa/arm64/mod.rs
@@ -1,6 +1,7 @@
 use crate::cdsl::cpu_modes::CpuMode;
-use crate::cdsl::instructions::InstructionGroupBuilder;
+use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap};
 use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::recipes::Recipes;
 use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
 use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
 
@@ -44,13 +45,14 @@ fn define_registers() -> IsaRegs {
     regs.build()
 }
 
-pub fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
     let settings = define_settings(&shared_defs.settings);
     let regs = define_registers();
 
     let inst_group = InstructionGroupBuilder::new(
         "arm64",
         "arm64 specific instruction set",
+        &mut shared_defs.all_instructions,
         &shared_defs.format_registry,
     )
     .build();
@@ -58,10 +60,24 @@ pub fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
     let mut a64 = CpuMode::new("A64");
 
     // TODO refine these.
-    let narrow = shared_defs.transform_groups.by_name("narrow");
-    a64.legalize_default(narrow);
+    let narrow_flags = shared_defs.transform_groups.by_name("narrow_flags");
+    a64.legalize_default(narrow_flags);
 
     let cpu_modes = vec![a64];
 
-    TargetIsa::new("arm64", inst_group, settings, regs, cpu_modes)
+    // TODO implement arm64 recipes.
+    let recipes = Recipes::new();
+
+    // TODO implement arm64 encodings and predicates.
+    let encodings_predicates = InstructionPredicateMap::new();
+
+    TargetIsa::new(
+        "arm64",
+        inst_group,
+        settings,
+        regs,
+        recipes,
+        cpu_modes,
+        encodings_predicates,
+    )
 }
diff --git a/cranelift-codegen/meta/src/isa/mod.rs b/cranelift-codegen/meta/src/isa/mod.rs
index 0c2bb5eed..6c5ba1388 100644
--- a/cranelift-codegen/meta/src/isa/mod.rs
+++ b/cranelift-codegen/meta/src/isa/mod.rs
@@ -55,7 +55,7 @@ impl fmt::Display for Isa {
     }
 }
 
-pub fn define(isas: &Vec<Isa>, shared_defs: &mut SharedDefinitions) -> Vec<TargetIsa> {
+pub(crate) fn define(isas: &Vec<Isa>, shared_defs: &mut SharedDefinitions) -> Vec<TargetIsa> {
     isas.iter()
         .map(|isa| match isa {
             Isa::Riscv => riscv::define(shared_defs),
diff --git a/cranelift-codegen/meta/src/isa/riscv/encodings.rs b/cranelift-codegen/meta/src/isa/riscv/encodings.rs
new file mode 100644
index 000000000..02a3bc7b6
--- /dev/null
+++ b/cranelift-codegen/meta/src/isa/riscv/encodings.rs
@@ -0,0 +1,438 @@
+use crate::cdsl::ast::{Apply, Expr, Literal, VarPool};
+use crate::cdsl::encodings::{Encoding, EncodingBuilder};
+use crate::cdsl::instructions::{
+    BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry,
+};
+use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes};
+use crate::cdsl::settings::SettingGroup;
+
+use crate::shared::types::Bool::B1;
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I16, I32, I64, I8};
+use crate::shared::types::Reference::{R32, R64};
+use crate::shared::Definitions as SharedDefinitions;
+
+use super::recipes::RecipeGroup;
+
+fn enc(inst: impl Into<InstSpec>, recipe: EncodingRecipeNumber, bits: u16) -> EncodingBuilder {
+    EncodingBuilder::new(inst.into(), recipe, bits)
+}
+
+pub struct PerCpuModeEncodings<'defs> {
+    pub inst_pred_reg: InstructionPredicateRegistry,
+    pub enc32: Vec<Encoding>,
+    pub enc64: Vec<Encoding>,
+    recipes: &'defs Recipes,
+}
+
+impl<'defs> PerCpuModeEncodings<'defs> {
+    fn new(recipes: &'defs Recipes) -> Self {
+        Self {
+            inst_pred_reg: InstructionPredicateRegistry::new(),
+            enc32: Vec::new(),
+            enc64: Vec::new(),
+            recipes,
+        }
+    }
+    fn add32(&mut self, encoding: EncodingBuilder) {
+        self.enc32
+            .push(encoding.build(self.recipes, &mut self.inst_pred_reg));
+    }
+    fn add64(&mut self, encoding: EncodingBuilder) {
+        self.enc64
+            .push(encoding.build(self.recipes, &mut self.inst_pred_reg));
+    }
+}
+
+// The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit instructions have 11 as
+// the two low bits, with bits 6:2 determining the base opcode.
+//
+// Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ...
+// The functions below encode the encbits.
+
+fn load_bits(funct3: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    0b00000 | (funct3 << 5)
+}
+
+fn store_bits(funct3: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    0b01000 | (funct3 << 5)
+}
+
+fn branch_bits(funct3: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    0b11000 | (funct3 << 5)
+}
+
+fn jalr_bits() -> u16 {
+    // This was previously accepting an argument funct3 of 3 bits and used the following formula:
+    //0b11001 | (funct3 << 5)
+    0b11001
+}
+
+fn jal_bits() -> u16 {
+    0b11011
+}
+
+fn opimm_bits(funct3: u16, funct7: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    0b00100 | (funct3 << 5) | (funct7 << 8)
+}
+
+fn opimm32_bits(funct3: u16, funct7: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    0b00110 | (funct3 << 5) | (funct7 << 8)
+}
+
+fn op_bits(funct3: u16, funct7: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    assert!(funct7 <= 0b1111111);
+    0b01100 | (funct3 << 5) | (funct7 << 8)
+}
+
+fn op32_bits(funct3: u16, funct7: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    assert!(funct7 <= 0b1111111);
+    0b01110 | (funct3 << 5) | (funct7 << 8)
+}
+
+fn lui_bits() -> u16 {
+    0b01101
+}
+
+pub(crate) fn define<'defs>(
+    shared_defs: &'defs SharedDefinitions,
+    isa_settings: &SettingGroup,
+    recipes: &'defs RecipeGroup,
+) -> PerCpuModeEncodings<'defs> {
+    // Instructions shorthands.
+    let shared = &shared_defs.instructions;
+
+    let band = shared.by_name("band");
+    let band_imm = shared.by_name("band_imm");
+    let bor = shared.by_name("bor");
+    let bor_imm = shared.by_name("bor_imm");
+    let br_icmp = shared.by_name("br_icmp");
+    let brz = shared.by_name("brz");
+    let brnz = shared.by_name("brnz");
+    let bxor = shared.by_name("bxor");
+    let bxor_imm = shared.by_name("bxor_imm");
+    let call = shared.by_name("call");
+    let call_indirect = shared.by_name("call_indirect");
+    let copy = shared.by_name("copy");
+    let copy_nop = shared.by_name("copy_nop");
+    let copy_to_ssa = shared.by_name("copy_to_ssa");
+    let fill = shared.by_name("fill");
+    let fill_nop = shared.by_name("fill_nop");
+    let iadd = shared.by_name("iadd");
+    let iadd_imm = shared.by_name("iadd_imm");
+    let iconst = shared.by_name("iconst");
+    let icmp = shared.by_name("icmp");
+    let icmp_imm = shared.by_name("icmp_imm");
+    let imul = shared.by_name("imul");
+    let ishl = shared.by_name("ishl");
+    let ishl_imm = shared.by_name("ishl_imm");
+    let isub = shared.by_name("isub");
+    let jump = shared.by_name("jump");
+    let regmove = shared.by_name("regmove");
+    let spill = shared.by_name("spill");
+    let sshr = shared.by_name("sshr");
+    let sshr_imm = shared.by_name("sshr_imm");
+    let ushr = shared.by_name("ushr");
+    let ushr_imm = shared.by_name("ushr_imm");
+    let return_ = shared.by_name("return");
+
+    // Recipes shorthands, prefixed with r_.
+    let r_copytossa = recipes.by_name("copytossa");
+    let r_fillnull = recipes.by_name("fillnull");
+    let r_icall = recipes.by_name("Icall");
+    let r_icopy = recipes.by_name("Icopy");
+    let r_ii = recipes.by_name("Ii");
+    let r_iicmp = recipes.by_name("Iicmp");
+    let r_iret = recipes.by_name("Iret");
+    let r_irmov = recipes.by_name("Irmov");
+    let r_iz = recipes.by_name("Iz");
+    let r_gp_sp = recipes.by_name("GPsp");
+    let r_gp_fi = recipes.by_name("GPfi");
+    let r_r = recipes.by_name("R");
+    let r_ricmp = recipes.by_name("Ricmp");
+    let r_rshamt = recipes.by_name("Rshamt");
+    let r_sb = recipes.by_name("SB");
+    let r_sb_zero = recipes.by_name("SBzero");
+    let r_stacknull = recipes.by_name("stacknull");
+    let r_u = recipes.by_name("U");
+    let r_uj = recipes.by_name("UJ");
+    let r_uj_call = recipes.by_name("UJcall");
+
+    // Predicates shorthands.
+    let use_m = isa_settings.predicate_by_name("use_m");
+
+    // Definitions.
+    let mut e = PerCpuModeEncodings::new(&recipes.recipes);
+
+    // Basic arithmetic binary instructions are encoded in an R-type instruction.
+    for &(inst, inst_imm, f3, f7) in &[
+        (iadd, Some(iadd_imm), 0b000, 0b0000000),
+        (isub, None, 0b000, 0b0100000),
+        (bxor, Some(bxor_imm), 0b100, 0b0000000),
+        (bor, Some(bor_imm), 0b110, 0b0000000),
+        (band, Some(band_imm), 0b111, 0b0000000),
+    ] {
+        e.add32(enc(inst.bind(I32), r_r, op_bits(f3, f7)));
+        e.add64(enc(inst.bind(I64), r_r, op_bits(f3, f7)));
+
+        // Immediate versions for add/xor/or/and.
+        if let Some(inst_imm) = inst_imm {
+            e.add32(enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0)));
+            e.add64(enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0)));
+        }
+    }
+
+    // 32-bit ops in RV64.
+    e.add64(enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b0000000)));
+    e.add64(enc(isub.bind(I32), r_r, op32_bits(0b000, 0b0100000)));
+    // There are no andiw/oriw/xoriw variations.
+    e.add64(enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0)));
+
+    // Use iadd_imm with %x0 to materialize constants.
+    e.add32(enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
+    e.add64(enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
+    e.add64(enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0)));
+
+    // Dynamic shifts have the same masking semantics as the clif base instructions.
+    for &(inst, inst_imm, f3, f7) in &[
+        (ishl, ishl_imm, 0b1, 0b0),
+        (ushr, ushr_imm, 0b101, 0b0),
+        (sshr, sshr_imm, 0b101, 0b100000),
+    ] {
+        e.add32(enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7)));
+        e.add64(enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7)));
+        e.add64(enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7)));
+        // Allow i32 shift amounts in 64-bit shifts.
+        e.add64(enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7)));
+        e.add64(enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7)));
+
+        // Immediate shifts.
+        e.add32(enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7)));
+        e.add64(enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7)));
+        e.add64(enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7)));
+    }
+
+    // Signed and unsigned integer 'less than'. There are no 'w' variants for comparing 32-bit
+    // numbers in RV64.
+    {
+        let mut var_pool = VarPool::new();
+
+        // Helper that creates an instruction predicate for an instruction in the icmp family.
+        let mut icmp_instp = |bound_inst: &BoundInstruction,
+                              intcc_field: &'static str|
+         -> InstructionPredicateNode {
+            let x = var_pool.create("x");
+            let y = var_pool.create("y");
+            let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field);
+            Apply::new(
+                bound_inst.clone().into(),
+                vec![Expr::Literal(cc), Expr::Var(x), Expr::Var(y)],
+            )
+            .inst_predicate(&shared_defs.format_registry, &var_pool)
+            .unwrap()
+        };
+
+        let icmp_i32 = icmp.bind(I32);
+        let icmp_i64 = icmp.bind(I64);
+        e.add32(
+            enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b0000000))
+                .inst_predicate(icmp_instp(&icmp_i32, "slt")),
+        );
+        e.add64(
+            enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b0000000))
+                .inst_predicate(icmp_instp(&icmp_i64, "slt")),
+        );
+
+        e.add32(
+            enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b0000000))
+                .inst_predicate(icmp_instp(&icmp_i32, "ult")),
+        );
+        e.add64(
+            enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b0000000))
+                .inst_predicate(icmp_instp(&icmp_i64, "ult")),
+        );
+
+        // Immediate variants.
+        let icmp_i32 = icmp_imm.bind(I32);
+        let icmp_i64 = icmp_imm.bind(I64);
+        e.add32(
+            enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0))
+                .inst_predicate(icmp_instp(&icmp_i32, "slt")),
+        );
+        e.add64(
+            enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0))
+                .inst_predicate(icmp_instp(&icmp_i64, "slt")),
+        );
+
+        e.add32(
+            enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0))
+                .inst_predicate(icmp_instp(&icmp_i32, "ult")),
+        );
+        e.add64(
+            enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0))
+                .inst_predicate(icmp_instp(&icmp_i64, "ult")),
+        );
+    }
+
+    // Integer constants with the low 12 bits clear are materialized by lui.
+    e.add32(enc(iconst.bind(I32), r_u, lui_bits()));
+    e.add64(enc(iconst.bind(I32), r_u, lui_bits()));
+    e.add64(enc(iconst.bind(I64), r_u, lui_bits()));
+
+    // "M" Standard Extension for Integer Multiplication and Division.
+    // Gated by the `use_m` flag.
+    e.add32(enc(imul.bind(I32), r_r, op_bits(0b000, 0b00000001)).isa_predicate(use_m));
+    e.add64(enc(imul.bind(I64), r_r, op_bits(0b000, 0b00000001)).isa_predicate(use_m));
+    e.add64(enc(imul.bind(I32), r_r, op32_bits(0b000, 0b00000001)).isa_predicate(use_m));
+
+    // Control flow.
+
+    // Unconditional branches.
+    e.add32(enc(jump, r_uj, jal_bits()));
+    e.add64(enc(jump, r_uj, jal_bits()));
+    e.add32(enc(call, r_uj_call, jal_bits()));
+    e.add64(enc(call, r_uj_call, jal_bits()));
+
+    // Conditional branches.
+    {
+        let mut var_pool = VarPool::new();
+
+        // Helper that creates an instruction predicate for an instruction in the icmp family.
+        let mut br_icmp_instp = |bound_inst: &BoundInstruction,
+                                 intcc_field: &'static str|
+         -> InstructionPredicateNode {
+            let x = var_pool.create("x");
+            let y = var_pool.create("y");
+            let dest = var_pool.create("dest");
+            let args = var_pool.create("args");
+            let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field);
+            Apply::new(
+                bound_inst.clone().into(),
+                vec![
+                    Expr::Literal(cc),
+                    Expr::Var(x),
+                    Expr::Var(y),
+                    Expr::Var(dest),
+                    Expr::Var(args),
+                ],
+            )
+            .inst_predicate(&shared_defs.format_registry, &var_pool)
+            .unwrap()
+        };
+
+        let br_icmp_i32 = br_icmp.bind(I32);
+        let br_icmp_i64 = br_icmp.bind(I64);
+        for &(cond, f3) in &[
+            ("eq", 0b000),
+            ("ne", 0b001),
+            ("slt", 0b100),
+            ("sge", 0b101),
+            ("ult", 0b110),
+            ("uge", 0b111),
+        ] {
+            e.add32(
+                enc(br_icmp_i32.clone(), r_sb, branch_bits(f3))
+                    .inst_predicate(br_icmp_instp(&br_icmp_i32, cond)),
+            );
+            e.add64(
+                enc(br_icmp_i64.clone(), r_sb, branch_bits(f3))
+                    .inst_predicate(br_icmp_instp(&br_icmp_i64, cond)),
+            );
+        }
+    }
+
+    for &(inst, f3) in &[(brz, 0b000), (brnz, 0b001)] {
+        e.add32(enc(inst.bind(I32), r_sb_zero, branch_bits(f3)));
+        e.add64(enc(inst.bind(I64), r_sb_zero, branch_bits(f3)));
+        e.add32(enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
+        e.add64(enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
+    }
+
+    // Returns are a special case of jalr_bits using %x1 to hold the return address.
+    // The return address is provided by a special-purpose `link` return value that
+    // is added by legalize_signature().
+    e.add32(enc(return_, r_iret, jalr_bits()));
+    e.add64(enc(return_, r_iret, jalr_bits()));
+    e.add32(enc(call_indirect.bind(I32), r_icall, jalr_bits()));
+    e.add64(enc(call_indirect.bind(I64), r_icall, jalr_bits()));
+
+    // Spill and fill.
+    e.add32(enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
+    e.add64(enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
+    e.add64(enc(spill.bind(I64), r_gp_sp, store_bits(0b011)));
+    e.add32(enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
+    e.add64(enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
+    e.add64(enc(fill.bind(I64), r_gp_fi, load_bits(0b011)));
+
+    // No-op fills, created by late-stage redundant-fill removal.
+    for &ty in &[I64, I32] {
+        e.add64(enc(fill_nop.bind(ty), r_fillnull, 0));
+        e.add32(enc(fill_nop.bind(ty), r_fillnull, 0));
+    }
+    e.add64(enc(fill_nop.bind(B1), r_fillnull, 0));
+    e.add32(enc(fill_nop.bind(B1), r_fillnull, 0));
+
+    // Register copies.
+    e.add32(enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0)));
+    e.add64(enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0)));
+    e.add64(enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0)));
+
+    e.add32(enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0)));
+    e.add64(enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0)));
+    e.add64(enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0)));
+
+    e.add32(enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
+    e.add64(enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
+    e.add32(enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
+    e.add64(enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
+
+    // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
+    // into a no-op.
+    // The same encoding is generated for both the 64- and 32-bit architectures.
+    for &ty in &[I64, I32, I16, I8] {
+        e.add32(enc(copy_nop.bind(ty), r_stacknull, 0));
+        e.add64(enc(copy_nop.bind(ty), r_stacknull, 0));
+    }
+    for &ty in &[F64, F32] {
+        e.add32(enc(copy_nop.bind(ty), r_stacknull, 0));
+        e.add64(enc(copy_nop.bind(ty), r_stacknull, 0));
+    }
+
+    // Copy-to-SSA
+    e.add32(enc(
+        copy_to_ssa.bind(I32),
+        r_copytossa,
+        opimm_bits(0b000, 0),
+    ));
+    e.add64(enc(
+        copy_to_ssa.bind(I64),
+        r_copytossa,
+        opimm_bits(0b000, 0),
+    ));
+    e.add64(enc(
+        copy_to_ssa.bind(I32),
+        r_copytossa,
+        opimm32_bits(0b000, 0),
+    ));
+    e.add32(enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
+    e.add64(enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
+    e.add32(enc(
+        copy_to_ssa.bind_ref(R32),
+        r_copytossa,
+        opimm_bits(0b000, 0),
+    ));
+    e.add64(enc(
+        copy_to_ssa.bind_ref(R64),
+        r_copytossa,
+        opimm_bits(0b000, 0),
+    ));
+
+    e
+}
diff --git a/cranelift-codegen/meta/src/isa/riscv/mod.rs b/cranelift-codegen/meta/src/isa/riscv/mod.rs
index 776224d74..14eb3267f 100644
--- a/cranelift-codegen/meta/src/isa/riscv/mod.rs
+++ b/cranelift-codegen/meta/src/isa/riscv/mod.rs
@@ -8,6 +8,9 @@ use crate::shared::types::Float::{F32, F64};
 use crate::shared::types::Int::{I32, I64};
 use crate::shared::Definitions as SharedDefinitions;
 
+mod encodings;
+mod recipes;
+
 fn define_settings(shared: &SettingGroup) -> SettingGroup {
     let mut setting = SettingGroupBuilder::new("riscv");
 
@@ -82,13 +85,14 @@ fn define_registers() -> IsaRegs {
     regs.build()
 }
 
-pub fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
     let settings = define_settings(&shared_defs.settings);
     let regs = define_registers();
 
     let inst_group = InstructionGroupBuilder::new(
         "riscv",
         "riscv specific instruction set",
+        &mut shared_defs.all_instructions,
         &shared_defs.format_registry,
     )
     .build();
@@ -98,21 +102,39 @@ pub fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
     let mut rv_64 = CpuMode::new("RV64");
 
     let expand = shared_defs.transform_groups.by_name("expand");
-    let narrow = shared_defs.transform_groups.by_name("narrow");
+    let narrow_no_flags = shared_defs.transform_groups.by_name("narrow_no_flags");
+
     rv_32.legalize_monomorphic(expand);
-    rv_32.legalize_default(narrow);
+    rv_32.legalize_default(narrow_no_flags);
     rv_32.legalize_type(I32, expand);
     rv_32.legalize_type(F32, expand);
     rv_32.legalize_type(F64, expand);
 
     rv_64.legalize_monomorphic(expand);
-    rv_64.legalize_default(narrow);
+    rv_64.legalize_default(narrow_no_flags);
     rv_64.legalize_type(I32, expand);
     rv_64.legalize_type(I64, expand);
     rv_64.legalize_type(F32, expand);
     rv_64.legalize_type(F64, expand);
 
+    let recipes = recipes::define(shared_defs, &regs);
+
+    let encodings = encodings::define(shared_defs, &settings, &recipes);
+    rv_32.set_encodings(encodings.enc32);
+    rv_64.set_encodings(encodings.enc64);
+    let encodings_predicates = encodings.inst_pred_reg.extract();
+
+    let recipes = recipes.collect();
+
     let cpu_modes = vec![rv_32, rv_64];
 
-    TargetIsa::new("riscv", inst_group, settings, regs, cpu_modes)
+    TargetIsa::new(
+        "riscv",
+        inst_group,
+        settings,
+        regs,
+        recipes,
+        cpu_modes,
+        encodings_predicates,
+    )
 }
diff --git a/cranelift-codegen/meta/src/isa/riscv/recipes.rs b/cranelift-codegen/meta/src/isa/riscv/recipes.rs
new file mode 100644
index 000000000..f32bbd568
--- /dev/null
+++ b/cranelift-codegen/meta/src/isa/riscv/recipes.rs
@@ -0,0 +1,293 @@
+use std::collections::HashMap;
+
+use crate::cdsl::formats::FormatRegistry;
+use crate::cdsl::instructions::InstructionPredicate;
+use crate::cdsl::recipes::{EncodingRecipeBuilder, EncodingRecipeNumber, Recipes, Stack};
+use crate::cdsl::regs::IsaRegs;
+use crate::shared::Definitions as SharedDefinitions;
+
+/// An helper to create recipes and use them when defining the RISCV encodings.
+pub struct RecipeGroup<'formats> {
+    /// Memoized format registry, to pass it to the builders.
+    formats: &'formats FormatRegistry,
+
+    /// The actualy list of recipes explicitly created in this file.
+    pub recipes: Recipes,
+
+    /// Provides fast lookup from a name to an encoding recipe.
+    name_to_recipe: HashMap<String, EncodingRecipeNumber>,
+}
+
+impl<'formats> RecipeGroup<'formats> {
+    fn new(formats: &'formats FormatRegistry) -> Self {
+        Self {
+            formats,
+            recipes: Recipes::new(),
+            name_to_recipe: HashMap::new(),
+        }
+    }
+
+    fn push(&mut self, builder: EncodingRecipeBuilder) {
+        assert!(
+            self.name_to_recipe.get(&builder.name).is_none(),
+            format!("riscv recipe '{}' created twice", builder.name)
+        );
+        let name = builder.name.clone();
+        let number = self.recipes.push(builder.build(self.formats));
+        self.name_to_recipe.insert(name, number);
+    }
+
+    pub fn by_name(&self, name: &str) -> EncodingRecipeNumber {
+        let number = *self
+            .name_to_recipe
+            .get(name)
+            .expect(&format!("unknown riscv recipe name {}", name));
+        number
+    }
+
+    pub fn collect(self) -> Recipes {
+        self.recipes
+    }
+}
+
+pub(crate) fn define<'formats>(
+    shared_defs: &'formats SharedDefinitions,
+    regs: &IsaRegs,
+) -> RecipeGroup<'formats> {
+    let formats = &shared_defs.format_registry;
+
+    // Format shorthands.
+    let f_binary = formats.by_name("Binary");
+    let f_binary_imm = formats.by_name("BinaryImm");
+    let f_branch = formats.by_name("Branch");
+    let f_branch_icmp = formats.by_name("BranchIcmp");
+    let f_call = formats.by_name("Call");
+    let f_call_indirect = formats.by_name("CallIndirect");
+    let f_copy_to_ssa = formats.by_name("CopyToSsa");
+    let f_int_compare = formats.by_name("IntCompare");
+    let f_int_compare_imm = formats.by_name("IntCompareImm");
+    let f_jump = formats.by_name("Jump");
+    let f_multiary = formats.by_name("MultiAry");
+    let f_regmove = formats.by_name("RegMove");
+    let f_unary = formats.by_name("Unary");
+    let f_unary_imm = formats.by_name("UnaryImm");
+
+    // Register classes shorthands.
+    let gpr = regs.class_by_name("GPR");
+
+    // Definitions.
+    let mut recipes = RecipeGroup::new(&shared_defs.format_registry);
+
+    // R-type 32-bit instructions: These are mostly binary arithmetic instructions.
+    // The encbits are `opcode[6:2] | (funct3 << 5) | (funct7 << 8)
+    recipes.push(
+        EncodingRecipeBuilder::new("R", f_binary, 4)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![gpr])
+            .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"),
+    );
+
+    // R-type with an immediate shift amount instead of rs2.
+    recipes.push(
+        EncodingRecipeBuilder::new("Rshamt", f_binary_imm, 4)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .emit("put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);"),
+    );
+
+    // R-type encoding of an integer comparison.
+    recipes.push(
+        EncodingRecipeBuilder::new("Ricmp", f_int_compare, 4)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![gpr])
+            .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"),
+    );
+
+    let format = formats.get(f_binary_imm);
+    recipes.push(
+        EncodingRecipeBuilder::new("Ii", f_binary_imm, 4)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .inst_predicate(InstructionPredicate::new_is_signed_int(
+                format, "imm", 12, 0,
+            ))
+            .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"),
+    );
+
+    // I-type instruction with a hardcoded %x0 rs1.
+    let format = formats.get(f_unary_imm);
+    recipes.push(
+        EncodingRecipeBuilder::new("Iz", f_unary_imm, 4)
+            .operands_out(vec![gpr])
+            .inst_predicate(InstructionPredicate::new_is_signed_int(
+                format, "imm", 12, 0,
+            ))
+            .emit("put_i(bits, 0, imm.into(), out_reg0, sink);"),
+    );
+
+    // I-type encoding of an integer comparison.
+    let format = formats.get(f_int_compare_imm);
+    recipes.push(
+        EncodingRecipeBuilder::new("Iicmp", f_int_compare_imm, 4)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .inst_predicate(InstructionPredicate::new_is_signed_int(
+                format, "imm", 12, 0,
+            ))
+            .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"),
+    );
+
+    // I-type encoding for `jalr` as a return instruction. We won't use the immediate offset.  The
+    // variable return values are not encoded.
+    recipes.push(EncodingRecipeBuilder::new("Iret", f_multiary, 4).emit(
+        r#"
+                    // Return instructions are always a jalr to %x1.
+                    // The return address is provided as a special-purpose link argument.
+                    put_i(
+                        bits,
+                        1, // rs1 = %x1
+                        0, // no offset.
+                        0, // rd = %x0: no address written.
+                        sink,
+                    );
+                "#,
+    ));
+
+    // I-type encoding for `jalr` as a call_indirect.
+    recipes.push(
+        EncodingRecipeBuilder::new("Icall", f_call_indirect, 4)
+            .operands_in(vec![gpr])
+            .emit(
+                r#"
+                    // call_indirect instructions are jalr with rd=%x1.
+                    put_i(
+                        bits,
+                        in_reg0,
+                        0, // no offset.
+                        1, // rd = %x1: link register.
+                        sink,
+                    );
+                "#,
+            ),
+    );
+
+    // Copy of a GPR is implemented as addi x, 0.
+    recipes.push(
+        EncodingRecipeBuilder::new("Icopy", f_unary, 4)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .emit("put_i(bits, in_reg0, 0, out_reg0, sink);"),
+    );
+
+    // Same for a GPR regmove.
+    recipes.push(
+        EncodingRecipeBuilder::new("Irmov", f_regmove, 4)
+            .operands_in(vec![gpr])
+            .emit("put_i(bits, src, 0, dst, sink);"),
+    );
+
+    // Same for copy-to-SSA -- GPR regmove.
+    recipes.push(
+        EncodingRecipeBuilder::new("copytossa", f_copy_to_ssa, 4)
+            // No operands_in to mention, because a source register is specified directly.
+            .operands_out(vec![gpr])
+            .emit("put_i(bits, src, 0, out_reg0, sink);"),
+    );
+
+    // U-type instructions have a 20-bit immediate that targets bits 12-31.
+    let format = formats.get(f_unary_imm);
+    recipes.push(
+        EncodingRecipeBuilder::new("U", f_unary_imm, 4)
+            .operands_out(vec![gpr])
+            .inst_predicate(InstructionPredicate::new_is_signed_int(
+                format, "imm", 32, 12,
+            ))
+            .emit("put_u(bits, imm.into(), out_reg0, sink);"),
+    );
+
+    // UJ-type unconditional branch instructions.
+    recipes.push(
+        EncodingRecipeBuilder::new("UJ", f_jump, 4)
+            .branch_range((0, 21))
+            .emit(
+                r#"
+                    let dest = i64::from(func.offsets[destination]);
+                    let disp = dest - i64::from(sink.offset());
+                    put_uj(bits, disp, 0, sink);
+                "#,
+            ),
+    );
+
+    recipes.push(EncodingRecipeBuilder::new("UJcall", f_call, 4).emit(
+        r#"
+                    sink.reloc_external(Reloc::RiscvCall,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    // rd=%x1 is the standard link register.
+                    put_uj(bits, 0, 1, sink);
+                "#,
+    ));
+
+    // SB-type branch instructions.
+    recipes.push(
+        EncodingRecipeBuilder::new("SB", f_branch_icmp, 4)
+            .operands_in(vec![gpr, gpr])
+            .branch_range((0, 13))
+            .emit(
+                r#"
+                    let dest = i64::from(func.offsets[destination]);
+                    let disp = dest - i64::from(sink.offset());
+                    put_sb(bits, disp, in_reg0, in_reg1, sink);
+                "#,
+            ),
+    );
+
+    // SB-type branch instruction with rs2 fixed to zero.
+    recipes.push(
+        EncodingRecipeBuilder::new("SBzero", f_branch, 4)
+            .operands_in(vec![gpr])
+            .branch_range((0, 13))
+            .emit(
+                r#"
+                    let dest = i64::from(func.offsets[destination]);
+                    let disp = dest - i64::from(sink.offset());
+                    put_sb(bits, disp, in_reg0, 0, sink);
+                "#,
+            ),
+    );
+
+    // Spill of a GPR.
+    recipes.push(
+        EncodingRecipeBuilder::new("GPsp", f_unary, 4)
+            .operands_in(vec![gpr])
+            .operands_out(vec![Stack::new(gpr)])
+            .emit("unimplemented!();"),
+    );
+
+    // Fill of a GPR.
+    recipes.push(
+        EncodingRecipeBuilder::new("GPfi", f_unary, 4)
+            .operands_in(vec![Stack::new(gpr)])
+            .operands_out(vec![gpr])
+            .emit("unimplemented!();"),
+    );
+
+    // Stack-slot to same stack-slot copy, which is guaranteed to turn into a no-op.
+    recipes.push(
+        EncodingRecipeBuilder::new("stacknull", f_unary, 0)
+            .operands_in(vec![Stack::new(gpr)])
+            .operands_out(vec![Stack::new(gpr)])
+            .emit(""),
+    );
+
+    // No-op fills, created by late-stage redundant-fill removal.
+    recipes.push(
+        EncodingRecipeBuilder::new("fillnull", f_unary, 0)
+            .operands_in(vec![Stack::new(gpr)])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(""),
+    );
+
+    recipes
+}
diff --git a/cranelift-codegen/meta/src/isa/x86/encodings.rs b/cranelift-codegen/meta/src/isa/x86/encodings.rs
new file mode 100644
index 000000000..741f77d75
--- /dev/null
+++ b/cranelift-codegen/meta/src/isa/x86/encodings.rs
@@ -0,0 +1,2036 @@
+#![allow(non_snake_case)]
+
+use std::collections::HashMap;
+
+use crate::cdsl::encodings::{Encoding, EncodingBuilder};
+use crate::cdsl::instructions::{
+    InstSpec, Instruction, InstructionGroup, InstructionPredicate, InstructionPredicateNode,
+    InstructionPredicateRegistry,
+};
+use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
+use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
+use crate::cdsl::types::{LaneType, ValueType};
+use crate::shared::types::Bool::{B1, B16, B32, B64, B8};
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I16, I32, I64, I8};
+use crate::shared::types::Reference::{R32, R64};
+use crate::shared::Definitions as SharedDefinitions;
+
+use super::recipes::{RecipeGroup, Template};
+
+pub struct PerCpuModeEncodings {
+    pub enc32: Vec<Encoding>,
+    pub enc64: Vec<Encoding>,
+    pub recipes: Recipes,
+    recipes_by_name: HashMap<String, EncodingRecipeNumber>,
+    pub inst_pred_reg: InstructionPredicateRegistry,
+}
+
+impl PerCpuModeEncodings {
+    fn new() -> Self {
+        Self {
+            enc32: Vec::new(),
+            enc64: Vec::new(),
+            recipes: Recipes::new(),
+            recipes_by_name: HashMap::new(),
+            inst_pred_reg: InstructionPredicateRegistry::new(),
+        }
+    }
+
+    fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber {
+        if let Some(found_index) = self.recipes_by_name.get(&recipe.name) {
+            assert!(
+                self.recipes[*found_index] == recipe,
+                format!(
+                    "trying to insert different recipes with a same name ({})",
+                    recipe.name
+                )
+            );
+            *found_index
+        } else {
+            let recipe_name = recipe.name.clone();
+            let index = self.recipes.push(recipe);
+            self.recipes_by_name.insert(recipe_name, index);
+            index
+        }
+    }
+
+    fn make_encoding<T>(
+        &mut self,
+        inst: InstSpec,
+        template: Template,
+        builder_closure: T,
+    ) -> Encoding
+    where
+        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+    {
+        let (recipe, bits) = template.build();
+        let recipe_number = self.add_recipe(recipe);
+        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+        builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg)
+    }
+
+    fn enc32_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
+    where
+        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+    {
+        let encoding = self.make_encoding(inst.into(), template, builder_closure);
+        self.enc32.push(encoding);
+    }
+    fn enc32(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        self.enc32_func(inst, template, |x| x);
+    }
+    fn enc32_isap(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        isap: SettingPredicateNumber,
+    ) {
+        self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap));
+    }
+    fn enc32_instp(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp));
+    }
+    fn enc32_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
+        let recipe_number = self.add_recipe(recipe.clone());
+        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+        let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
+        self.enc32.push(encoding);
+    }
+
+    fn enc64_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
+    where
+        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+    {
+        let encoding = self.make_encoding(inst.into(), template, builder_closure);
+        self.enc64.push(encoding);
+    }
+    fn enc64(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        self.enc64_func(inst, template, |x| x);
+    }
+    fn enc64_isap(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        isap: SettingPredicateNumber,
+    ) {
+        self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap));
+    }
+    fn enc64_instp(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp));
+    }
+    fn enc64_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
+        let recipe_number = self.add_recipe(recipe.clone());
+        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+        let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
+        self.enc64.push(encoding);
+    }
+
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with and without REX.
+    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
+    fn enc_i32_i64(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        let inst: InstSpec = inst.into();
+        self.enc32(inst.bind(I32), template.nonrex());
+
+        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
+        // reg-alloc would never use r8 and up.
+        self.enc64(inst.bind(I32), template.rex());
+        self.enc64(inst.bind(I32), template.nonrex());
+        self.enc64(inst.bind(I64), template.rex().w());
+    }
+
+    /// Add encodings for `inst.b32` to X86_32.
+    /// Add encodings for `inst.b32` to X86_64 with and without REX.
+    /// Add encodings for `inst.b64` to X86_64 with a REX.W prefix.
+    fn enc_b32_b64(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        let inst: InstSpec = inst.into();
+        self.enc32(inst.bind(B32), template.nonrex());
+
+        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
+        // reg-alloc would never use r8 and up.
+        self.enc64(inst.bind(B32), template.rex());
+        self.enc64(inst.bind(B32), template.nonrex());
+        self.enc64(inst.bind(B64), template.rex().w());
+    }
+
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with a REX prefix.
+    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
+    fn enc_i32_i64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        let inst: InstSpec = inst.into();
+        self.enc32(inst.bind(I32), template.nonrex());
+        self.enc64(inst.bind(I32), template.rex());
+        self.enc64(inst.bind(I64), template.rex().w());
+    }
+
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with and without REX.
+    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
+    fn enc_i32_i64_instp(
+        &mut self,
+        inst: &Instruction,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc32_func(inst.bind(I32), template.nonrex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+
+        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
+        // reg-alloc would never use r8 and up.
+        self.enc64_func(inst.bind(I32), template.rex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst.bind(I32), template.nonrex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst.bind(I64), template.rex().w(), |builder| {
+            builder.inst_predicate(instp)
+        });
+    }
+
+    /// Add encodings for `inst.r32` to X86_32.
+    /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
+    fn enc_r32_r64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        let inst: InstSpec = inst.into();
+        self.enc32(inst.bind_ref(R32), template.nonrex());
+        self.enc64(inst.bind_ref(R64), template.rex().w());
+    }
+
+    /// Add encodings for `inst` to X86_64 with and without a REX prefix.
+    fn enc_x86_64(&mut self, inst: impl Into<InstSpec> + Clone, template: Template) {
+        // See above comment about the ordering of rex vs non-rex encodings.
+        self.enc64(inst.clone(), template.rex());
+        self.enc64(inst, template);
+    }
+
+    /// Add encodings for `inst` to X86_64 with and without a REX prefix.
+    fn enc_x86_64_instp(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        // See above comment about the ordering of rex vs non-rex encodings.
+        self.enc64_func(inst.clone(), template.rex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst, template, |builder| builder.inst_predicate(instp));
+    }
+    fn enc_x86_64_isap(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        isap: SettingPredicateNumber,
+    ) {
+        // See above comment about the ordering of rex vs non-rex encodings.
+        self.enc64_isap(inst.clone(), template.rex(), isap);
+        self.enc64_isap(inst, template, isap);
+    }
+
+    /// Add all three encodings for `inst`:
+    /// - X86_32
+    /// - X86_64 with and without the REX prefix.
+    fn enc_both(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
+        self.enc32(inst.clone(), template.clone());
+        self.enc_x86_64(inst, template);
+    }
+    fn enc_both_isap(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        isap: SettingPredicateNumber,
+    ) {
+        self.enc32_isap(inst.clone(), template.clone(), isap);
+        self.enc_x86_64_isap(inst, template, isap);
+    }
+    fn enc_both_instp(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc32_instp(inst.clone(), template.clone(), instp.clone());
+        self.enc_x86_64_instp(inst, template, instp);
+    }
+
+    /// Add two encodings for `inst`:
+    /// - X86_32
+    /// - X86_64 with the REX prefix.
+    fn enc_both_rex_only(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
+        self.enc32(inst.clone(), template.clone());
+        self.enc64(inst, template.rex());
+    }
+
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with and without REX.
+    /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
+    /// argument to determine whether or not to set the REX.W bit.
+    fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
+        self.enc32(inst.clone().bind(I32).bind_any(), template.clone());
+
+        // REX-less encoding must come after REX encoding so we don't use it by
+        // default. Otherwise reg-alloc would never use r8 and up.
+        self.enc64(inst.clone().bind(I32).bind_any(), template.clone().rex());
+        self.enc64(inst.clone().bind(I32).bind_any(), template.clone());
+
+        if w_bit {
+            self.enc64(inst.clone().bind(I64).bind_any(), template.rex().w());
+        } else {
+            self.enc64(inst.clone().bind(I64).bind_any(), template.clone().rex());
+            self.enc64(inst.clone().bind(I64).bind_any(), template);
+        }
+    }
+
+    /// Add the same encoding/template pairing to both X86_32 and X86_64
+    fn enc_32_64(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
+        self.enc32(inst.clone(), template.clone());
+        self.enc64(inst, template);
+    }
+
+    /// Add the same encoding/recipe pairing to both X86_32 and X86_64
+    fn enc_32_64_rec(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        recipe: &EncodingRecipe,
+        bits: u16,
+    ) {
+        self.enc32_rec(inst.clone(), recipe, bits);
+        self.enc64_rec(inst, recipe, bits);
+    }
+
+    /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand
+    /// binding) has already happened.
+    fn enc_32_64_maybe_isap(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        isap: Option<SettingPredicateNumber>,
+    ) {
+        self.enc32_maybe_isap(inst.clone(), template.clone(), isap);
+        self.enc64_maybe_isap(inst, template, isap);
+    }
+
+    fn enc32_maybe_isap(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        isap: Option<SettingPredicateNumber>,
+    ) {
+        match isap {
+            None => self.enc32(inst, template),
+            Some(isap) => self.enc32_isap(inst, template, isap),
+        }
+    }
+
+    fn enc64_maybe_isap(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        isap: Option<SettingPredicateNumber>,
+    ) {
+        match isap {
+            None => self.enc64(inst, template),
+            Some(isap) => self.enc64_isap(inst, template, isap),
+        }
+    }
+}
+
+// Definitions.
+
+pub(crate) fn define(
+    shared_defs: &SharedDefinitions,
+    settings: &SettingGroup,
+    x86: &InstructionGroup,
+    r: &RecipeGroup,
+) -> PerCpuModeEncodings {
+    let shared = &shared_defs.instructions;
+    let formats = &shared_defs.format_registry;
+
+    // Shorthands for instructions.
+    let adjust_sp_down = shared.by_name("adjust_sp_down");
+    let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm");
+    let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm");
+    let band = shared.by_name("band");
+    let band_imm = shared.by_name("band_imm");
+    let band_not = shared.by_name("band_not");
+    let bconst = shared.by_name("bconst");
+    let bint = shared.by_name("bint");
+    let bitcast = shared.by_name("bitcast");
+    let bnot = shared.by_name("bnot");
+    let bor = shared.by_name("bor");
+    let bor_imm = shared.by_name("bor_imm");
+    let brff = shared.by_name("brff");
+    let brif = shared.by_name("brif");
+    let brnz = shared.by_name("brnz");
+    let brz = shared.by_name("brz");
+    let bxor = shared.by_name("bxor");
+    let bxor_imm = shared.by_name("bxor_imm");
+    let call = shared.by_name("call");
+    let call_indirect = shared.by_name("call_indirect");
+    let ceil = shared.by_name("ceil");
+    let clz = shared.by_name("clz");
+    let copy = shared.by_name("copy");
+    let copy_nop = shared.by_name("copy_nop");
+    let copy_special = shared.by_name("copy_special");
+    let copy_to_ssa = shared.by_name("copy_to_ssa");
+    let ctz = shared.by_name("ctz");
+    let debugtrap = shared.by_name("debugtrap");
+    let f32const = shared.by_name("f32const");
+    let f64const = shared.by_name("f64const");
+    let fadd = shared.by_name("fadd");
+    let fcmp = shared.by_name("fcmp");
+    let fcvt_from_sint = shared.by_name("fcvt_from_sint");
+    let fdemote = shared.by_name("fdemote");
+    let fdiv = shared.by_name("fdiv");
+    let ffcmp = shared.by_name("ffcmp");
+    let fill = shared.by_name("fill");
+    let fill_nop = shared.by_name("fill_nop");
+    let floor = shared.by_name("floor");
+    let fmul = shared.by_name("fmul");
+    let fpromote = shared.by_name("fpromote");
+    let fsub = shared.by_name("fsub");
+    let func_addr = shared.by_name("func_addr");
+    let get_pinned_reg = shared.by_name("get_pinned_reg");
+    let iadd = shared.by_name("iadd");
+    let iadd_ifcout = shared.by_name("iadd_ifcout");
+    let iadd_ifcin = shared.by_name("iadd_ifcin");
+    let iadd_ifcarry = shared.by_name("iadd_ifcarry");
+    let iadd_imm = shared.by_name("iadd_imm");
+    let icmp = shared.by_name("icmp");
+    let icmp_imm = shared.by_name("icmp_imm");
+    let iconst = shared.by_name("iconst");
+    let ifcmp = shared.by_name("ifcmp");
+    let ifcmp_imm = shared.by_name("ifcmp_imm");
+    let ifcmp_sp = shared.by_name("ifcmp_sp");
+    let imul = shared.by_name("imul");
+    let indirect_jump_table_br = shared.by_name("indirect_jump_table_br");
+    let ireduce = shared.by_name("ireduce");
+    let ishl = shared.by_name("ishl");
+    let ishl_imm = shared.by_name("ishl_imm");
+    let is_null = shared.by_name("is_null");
+    let istore16 = shared.by_name("istore16");
+    let istore16_complex = shared.by_name("istore16_complex");
+    let istore32 = shared.by_name("istore32");
+    let istore32_complex = shared.by_name("istore32_complex");
+    let istore8 = shared.by_name("istore8");
+    let istore8_complex = shared.by_name("istore8_complex");
+    let isub = shared.by_name("isub");
+    let isub_ifbout = shared.by_name("isub_ifbout");
+    let isub_ifbin = shared.by_name("isub_ifbin");
+    let isub_ifborrow = shared.by_name("isub_ifborrow");
+    let jump = shared.by_name("jump");
+    let jump_table_base = shared.by_name("jump_table_base");
+    let jump_table_entry = shared.by_name("jump_table_entry");
+    let load = shared.by_name("load");
+    let load_complex = shared.by_name("load_complex");
+    let nearest = shared.by_name("nearest");
+    let null = shared.by_name("null");
+    let popcnt = shared.by_name("popcnt");
+    let raw_bitcast = shared.by_name("raw_bitcast");
+    let regfill = shared.by_name("regfill");
+    let regmove = shared.by_name("regmove");
+    let regspill = shared.by_name("regspill");
+    let return_ = shared.by_name("return");
+    let rotl = shared.by_name("rotl");
+    let rotl_imm = shared.by_name("rotl_imm");
+    let rotr = shared.by_name("rotr");
+    let rotr_imm = shared.by_name("rotr_imm");
+    let safepoint = shared.by_name("safepoint");
+    let scalar_to_vector = shared.by_name("scalar_to_vector");
+    let selectif = shared.by_name("selectif");
+    let sextend = shared.by_name("sextend");
+    let set_pinned_reg = shared.by_name("set_pinned_reg");
+    let sload16 = shared.by_name("sload16");
+    let sload16_complex = shared.by_name("sload16_complex");
+    let sload32 = shared.by_name("sload32");
+    let sload32_complex = shared.by_name("sload32_complex");
+    let sload8 = shared.by_name("sload8");
+    let sload8_complex = shared.by_name("sload8_complex");
+    let spill = shared.by_name("spill");
+    let sqrt = shared.by_name("sqrt");
+    let sshr = shared.by_name("sshr");
+    let sshr_imm = shared.by_name("sshr_imm");
+    let stack_addr = shared.by_name("stack_addr");
+    let store = shared.by_name("store");
+    let store_complex = shared.by_name("store_complex");
+    let symbol_value = shared.by_name("symbol_value");
+    let trap = shared.by_name("trap");
+    let trapff = shared.by_name("trapff");
+    let trapif = shared.by_name("trapif");
+    let resumable_trap = shared.by_name("resumable_trap");
+    let trueff = shared.by_name("trueff");
+    let trueif = shared.by_name("trueif");
+    let trunc = shared.by_name("trunc");
+    let uextend = shared.by_name("uextend");
+    let uload16 = shared.by_name("uload16");
+    let uload16_complex = shared.by_name("uload16_complex");
+    let uload32 = shared.by_name("uload32");
+    let uload32_complex = shared.by_name("uload32_complex");
+    let uload8 = shared.by_name("uload8");
+    let uload8_complex = shared.by_name("uload8_complex");
+    let ushr = shared.by_name("ushr");
+    let ushr_imm = shared.by_name("ushr_imm");
+    let vconst = shared.by_name("vconst");
+    let x86_bsf = x86.by_name("x86_bsf");
+    let x86_bsr = x86.by_name("x86_bsr");
+    let x86_cvtt2si = x86.by_name("x86_cvtt2si");
+    let x86_fmax = x86.by_name("x86_fmax");
+    let x86_fmin = x86.by_name("x86_fmin");
+    let x86_insertps = x86.by_name("x86_insertps");
+    let x86_movlhps = x86.by_name("x86_movlhps");
+    let x86_movsd = x86.by_name("x86_movsd");
+    let x86_pop = x86.by_name("x86_pop");
+    let x86_pextr = x86.by_name("x86_pextr");
+    let x86_pinsr = x86.by_name("x86_pinsr");
+    let x86_pshufd = x86.by_name("x86_pshufd");
+    let x86_pshufb = x86.by_name("x86_pshufb");
+    let x86_push = x86.by_name("x86_push");
+    let x86_sdivmodx = x86.by_name("x86_sdivmodx");
+    let x86_smulx = x86.by_name("x86_smulx");
+    let x86_udivmodx = x86.by_name("x86_udivmodx");
+    let x86_umulx = x86.by_name("x86_umulx");
+
+    // Shorthands for recipes.
+    let rec_adjustsp = r.template("adjustsp");
+    let rec_adjustsp_ib = r.template("adjustsp_ib");
+    let rec_adjustsp_id = r.template("adjustsp_id");
+    let rec_allones_fnaddr4 = r.template("allones_fnaddr4");
+    let rec_allones_fnaddr8 = r.template("allones_fnaddr8");
+    let rec_brfb = r.template("brfb");
+    let rec_brfd = r.template("brfd");
+    let rec_brib = r.template("brib");
+    let rec_brid = r.template("brid");
+    let rec_bsf_and_bsr = r.template("bsf_and_bsr");
+    let rec_call_id = r.template("call_id");
+    let rec_call_plt_id = r.template("call_plt_id");
+    let rec_call_r = r.template("call_r");
+    let rec_cmov = r.template("cmov");
+    let rec_copysp = r.template("copysp");
+    let rec_div = r.template("div");
+    let rec_debugtrap = r.recipe("debugtrap");
+    let rec_f32imm_z = r.template("f32imm_z");
+    let rec_f64imm_z = r.template("f64imm_z");
+    let rec_fa = r.template("fa");
+    let rec_fax = r.template("fax");
+    let rec_fa_ib = r.template("fa_ib");
+    let rec_fcmp = r.template("fcmp");
+    let rec_fcscc = r.template("fcscc");
+    let rec_ffillnull = r.recipe("ffillnull");
+    let rec_ffillSib32 = r.template("ffillSib32");
+    let rec_fillnull = r.recipe("fillnull");
+    let rec_fillSib32 = r.template("fillSib32");
+    let rec_fld = r.template("fld");
+    let rec_fldDisp32 = r.template("fldDisp32");
+    let rec_fldDisp8 = r.template("fldDisp8");
+    let rec_fldWithIndex = r.template("fldWithIndex");
+    let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32");
+    let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8");
+    let rec_fnaddr4 = r.template("fnaddr4");
+    let rec_fnaddr8 = r.template("fnaddr8");
+    let rec_fregfill32 = r.template("fregfill32");
+    let rec_fregspill32 = r.template("fregspill32");
+    let rec_frmov = r.template("frmov");
+    let rec_frurm = r.template("frurm");
+    let rec_fspillSib32 = r.template("fspillSib32");
+    let rec_fst = r.template("fst");
+    let rec_fstDisp32 = r.template("fstDisp32");
+    let rec_fstDisp8 = r.template("fstDisp8");
+    let rec_fstWithIndex = r.template("fstWithIndex");
+    let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
+    let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
+    let rec_furm = r.template("furm");
+    let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
+    let rec_furmi_rnd = r.template("furmi_rnd");
+    let rec_get_pinned_reg = r.recipe("get_pinned_reg");
+    let rec_got_fnaddr8 = r.template("got_fnaddr8");
+    let rec_got_gvaddr8 = r.template("got_gvaddr8");
+    let rec_gvaddr4 = r.template("gvaddr4");
+    let rec_gvaddr8 = r.template("gvaddr8");
+    let rec_icscc = r.template("icscc");
+    let rec_icscc_ib = r.template("icscc_ib");
+    let rec_icscc_id = r.template("icscc_id");
+    let rec_indirect_jmp = r.template("indirect_jmp");
+    let rec_is_zero = r.template("is_zero");
+    let rec_jmpb = r.template("jmpb");
+    let rec_jmpd = r.template("jmpd");
+    let rec_jt_base = r.template("jt_base");
+    let rec_jt_entry = r.template("jt_entry");
+    let rec_ld = r.template("ld");
+    let rec_ldDisp32 = r.template("ldDisp32");
+    let rec_ldDisp8 = r.template("ldDisp8");
+    let rec_ldWithIndex = r.template("ldWithIndex");
+    let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32");
+    let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8");
+    let rec_mulx = r.template("mulx");
+    let rec_null = r.recipe("null");
+    let rec_null_fpr = r.recipe("null_fpr");
+    let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
+    let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
+    let rec_popq = r.template("popq");
+    let rec_pu_id = r.template("pu_id");
+    let rec_pu_id_bool = r.template("pu_id_bool");
+    let rec_pu_id_ref = r.template("pu_id_ref");
+    let rec_pu_iq = r.template("pu_iq");
+    let rec_pushq = r.template("pushq");
+    let rec_ret = r.template("ret");
+    let rec_r_ib = r.template("r_ib");
+    let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr");
+    let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr");
+    let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r");
+    let rec_r_id = r.template("r_id");
+    let rec_rcmp = r.template("rcmp");
+    let rec_rcmp_ib = r.template("rcmp_ib");
+    let rec_rcmp_id = r.template("rcmp_id");
+    let rec_rcmp_sp = r.template("rcmp_sp");
+    let rec_regfill32 = r.template("regfill32");
+    let rec_regspill32 = r.template("regspill32");
+    let rec_rc = r.template("rc");
+    let rec_rfumr = r.template("rfumr");
+    let rec_rfurm = r.template("rfurm");
+    let rec_rmov = r.template("rmov");
+    let rec_rr = r.template("rr");
+    let rec_rout = r.template("rout");
+    let rec_rin = r.template("rin");
+    let rec_rio = r.template("rio");
+    let rec_rrx = r.template("rrx");
+    let rec_safepoint = r.recipe("safepoint");
+    let rec_setf_abcd = r.template("setf_abcd");
+    let rec_seti_abcd = r.template("seti_abcd");
+    let rec_set_pinned_reg = r.template("set_pinned_reg");
+    let rec_spaddr4_id = r.template("spaddr4_id");
+    let rec_spaddr8_id = r.template("spaddr8_id");
+    let rec_spillSib32 = r.template("spillSib32");
+    let rec_st = r.template("st");
+    let rec_stacknull = r.recipe("stacknull");
+    let rec_stDisp32 = r.template("stDisp32");
+    let rec_stDisp32_abcd = r.template("stDisp32_abcd");
+    let rec_stDisp8 = r.template("stDisp8");
+    let rec_stDisp8_abcd = r.template("stDisp8_abcd");
+    let rec_stWithIndex = r.template("stWithIndex");
+    let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32");
+    let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd");
+    let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8");
+    let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd");
+    let rec_stWithIndex_abcd = r.template("stWithIndex_abcd");
+    let rec_st_abcd = r.template("st_abcd");
+    let rec_t8jccb_abcd = r.template("t8jccb_abcd");
+    let rec_t8jccd_abcd = r.template("t8jccd_abcd");
+    let rec_t8jccd_long = r.template("t8jccd_long");
+    let rec_tjccb = r.template("tjccb");
+    let rec_tjccd = r.template("tjccd");
+    let rec_trap = r.template("trap");
+    let rec_trapif = r.recipe("trapif");
+    let rec_trapff = r.recipe("trapff");
+    let rec_u_id = r.template("u_id");
+    let rec_u_id_z = r.template("u_id_z");
+    let rec_umr = r.template("umr");
+    let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa");
+    let rec_ur = r.template("ur");
+    let rec_urm = r.template("urm");
+    let rec_urm_noflags = r.template("urm_noflags");
+    let rec_urm_noflags_abcd = r.template("urm_noflags_abcd");
+    let rec_vconst = r.template("vconst");
+
+    // Predicates shorthands.
+    let all_ones_funcaddrs_and_not_is_pic =
+        settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic");
+    let is_pic = settings.predicate_by_name("is_pic");
+    let not_all_ones_funcaddrs_and_not_is_pic =
+        settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic");
+    let not_is_pic = settings.predicate_by_name("not_is_pic");
+    let use_popcnt = settings.predicate_by_name("use_popcnt");
+    let use_lzcnt = settings.predicate_by_name("use_lzcnt");
+    let use_bmi1 = settings.predicate_by_name("use_bmi1");
+    let use_sse41 = settings.predicate_by_name("use_sse41");
+    let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd");
+    let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
+
+    // Definitions.
+    let mut e = PerCpuModeEncodings::new();
+
+    // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing!
+    e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0);
+    e.enc_x86_64(
+        set_pinned_reg.bind(I64),
+        rec_set_pinned_reg.opcodes(vec![0x89]).rex().w(),
+    );
+
+    e.enc_i32_i64(iadd, rec_rr.opcodes(vec![0x01]));
+    e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(vec![0x01]));
+    e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(vec![0x11]));
+    e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(vec![0x11]));
+
+    e.enc_i32_i64(isub, rec_rr.opcodes(vec![0x29]));
+    e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(vec![0x29]));
+    e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(vec![0x19]));
+    e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(vec![0x19]));
+
+    e.enc_i32_i64(band, rec_rr.opcodes(vec![0x21]));
+    e.enc_b32_b64(band, rec_rr.opcodes(vec![0x21]));
+    e.enc_i32_i64(bor, rec_rr.opcodes(vec![0x09]));
+    e.enc_b32_b64(bor, rec_rr.opcodes(vec![0x09]));
+    e.enc_i32_i64(bxor, rec_rr.opcodes(vec![0x31]));
+    e.enc_b32_b64(bxor, rec_rr.opcodes(vec![0x31]));
+
+    // x86 has a bitwise not instruction NOT.
+    e.enc_i32_i64(bnot, rec_ur.opcodes(vec![0xf7]).rrr(2));
+    e.enc_b32_b64(bnot, rec_ur.opcodes(vec![0xf7]).rrr(2));
+
+    // Also add a `b1` encodings for the logic instructions.
+    // TODO: Should this be done with 8-bit instructions? It would improve partial register
+    // dependencies.
+    e.enc_both(band.bind(B1), rec_rr.opcodes(vec![0x21]));
+    e.enc_both(bor.bind(B1), rec_rr.opcodes(vec![0x09]));
+    e.enc_both(bxor.bind(B1), rec_rr.opcodes(vec![0x31]));
+
+    e.enc_i32_i64(imul, rec_rrx.opcodes(vec![0x0f, 0xaf]));
+    e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(vec![0xf7]).rrr(7));
+    e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(vec![0xf7]).rrr(6));
+
+    e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(vec![0xf7]).rrr(5));
+    e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(vec![0xf7]).rrr(4));
+
+    e.enc_i32_i64(copy, rec_umr.opcodes(vec![0x89]));
+    e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(vec![0x89]));
+    e.enc_both(copy.bind(B1), rec_umr.opcodes(vec![0x89]));
+    e.enc_both(copy.bind(I8), rec_umr.opcodes(vec![0x89]));
+    e.enc_both(copy.bind(I16), rec_umr.opcodes(vec![0x89]));
+
+    // TODO For x86-64, only define REX forms for now, since we can't describe the
+    // special regunit immediate operands with the current constraint language.
+    for &ty in &[I8, I16, I32] {
+        e.enc32(regmove.bind(ty), rec_rmov.opcodes(vec![0x89]));
+        e.enc64(regmove.bind(ty), rec_rmov.opcodes(vec![0x89]).rex());
+    }
+    for &ty in &[B8, B16, B32] {
+        e.enc32(regmove.bind(ty), rec_rmov.opcodes(vec![0x89]));
+        e.enc64(regmove.bind(ty), rec_rmov.opcodes(vec![0x89]).rex());
+    }
+    e.enc64(regmove.bind(I64), rec_rmov.opcodes(vec![0x89]).rex().w());
+    e.enc64(regmove.bind(B64), rec_rmov.opcodes(vec![0x89]).rex().w());
+    e.enc_both(regmove.bind(B1), rec_rmov.opcodes(vec![0x89]));
+    e.enc_both(regmove.bind(I8), rec_rmov.opcodes(vec![0x89]));
+    e.enc32(regmove.bind_ref(R32), rec_rmov.opcodes(vec![0x89]));
+    e.enc64(regmove.bind_ref(R32), rec_rmov.opcodes(vec![0x89]).rex());
+    e.enc64(
+        regmove.bind_ref(R64),
+        rec_rmov.opcodes(vec![0x89]).rex().w(),
+    );
+
+    e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(vec![0x83]).rrr(0));
+    e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(vec![0x81]).rrr(0));
+
+    e.enc_i32_i64(band_imm, rec_r_ib.opcodes(vec![0x83]).rrr(4));
+    e.enc_i32_i64(band_imm, rec_r_id.opcodes(vec![0x81]).rrr(4));
+
+    e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(vec![0x83]).rrr(1));
+    e.enc_i32_i64(bor_imm, rec_r_id.opcodes(vec![0x81]).rrr(1));
+
+    e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(vec![0x83]).rrr(6));
+    e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(vec![0x81]).rrr(6));
+
+    // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can
+    // even use the single-byte immediate for 0xffff_ffXX masks.
+
+    // Immediate constants.
+    e.enc32(iconst.bind(I32), rec_pu_id.opcodes(vec![0xb8]));
+
+    e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(vec![0xb8]));
+    e.enc64(iconst.bind(I32), rec_pu_id.opcodes(vec![0xb8]));
+
+    // The 32-bit immediate movl also zero-extends to 64 bits.
+    let f_unary_imm = formats.get(formats.by_name("UnaryImm"));
+    let is_unsigned_int32 = InstructionPredicate::new_is_unsigned_int(f_unary_imm, "imm", 32, 0);
+
+    e.enc64_func(
+        iconst.bind(I64),
+        rec_pu_id.opcodes(vec![0xb8]).rex(),
+        |encoding| encoding.inst_predicate(is_unsigned_int32.clone()),
+    );
+    e.enc64_func(
+        iconst.bind(I64),
+        rec_pu_id.opcodes(vec![0xb8]),
+        |encoding| encoding.inst_predicate(is_unsigned_int32),
+    );
+
+    // Sign-extended 32-bit immediate.
+    e.enc64(
+        iconst.bind(I64),
+        rec_u_id.rex().opcodes(vec![0xc7]).rrr(0).w(),
+    );
+
+    // Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
+    e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(vec![0xb8]).rex().w());
+
+    // Bool constants (uses MOV)
+    for &ty in &[B1, B8, B16, B32] {
+        e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(vec![0xb8]));
+    }
+    e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(vec![0xb8]).rex());
+
+    let is_zero_int = InstructionPredicate::new_is_zero_int(f_unary_imm, "imm");
+    e.enc_both_instp(
+        iconst.bind(I8),
+        rec_u_id_z.opcodes(vec![0x30]),
+        is_zero_int.clone(),
+    );
+    // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that
+    // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not
+    // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these
+    // scenarios, so we explicitly select a wider but permissible opcode.
+    //
+    // This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't
+    // an appropriate i16 encoding available.
+    e.enc_both_instp(
+        iconst.bind(I16),
+        rec_u_id_z.opcodes(vec![0x31]),
+        is_zero_int.clone(),
+    );
+    e.enc_both_instp(
+        iconst.bind(I32),
+        rec_u_id_z.opcodes(vec![0x31]),
+        is_zero_int.clone(),
+    );
+    e.enc_x86_64_instp(
+        iconst.bind(I64),
+        rec_u_id_z.opcodes(vec![0x31]),
+        is_zero_int,
+    );
+
+    // Shifts and rotates.
+    // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
+    // and 16-bit shifts would need explicit masking.
+
+    for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] {
+        // Cannot use enc_i32_i64 for this pattern because instructions require
+        // to bind any.
+        e.enc32(
+            inst.bind(I32).bind_any(),
+            rec_rc.opcodes(vec![0xd3]).rrr(rrr),
+        );
+        e.enc64(
+            inst.bind(I64).bind_any(),
+            rec_rc.opcodes(vec![0xd3]).rrr(rrr).rex().w(),
+        );
+        e.enc64(
+            inst.bind(I32).bind_any(),
+            rec_rc.opcodes(vec![0xd3]).rrr(rrr).rex(),
+        );
+        e.enc64(
+            inst.bind(I32).bind_any(),
+            rec_rc.opcodes(vec![0xd3]).rrr(rrr),
+        );
+    }
+
+    for &(inst, rrr) in &[
+        (rotl_imm, 0),
+        (rotr_imm, 1),
+        (ishl_imm, 4),
+        (ushr_imm, 5),
+        (sshr_imm, 7),
+    ] {
+        e.enc_i32_i64(inst, rec_r_ib.opcodes(vec![0xc1]).rrr(rrr));
+    }
+
+    // Population count.
+    e.enc32_isap(
+        popcnt.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]),
+        use_popcnt,
+    );
+    e.enc64_isap(
+        popcnt.bind(I64),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]).rex().w(),
+        use_popcnt,
+    );
+    e.enc64_isap(
+        popcnt.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]).rex(),
+        use_popcnt,
+    );
+    e.enc64_isap(
+        popcnt.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]),
+        use_popcnt,
+    );
+
+    // Count leading zero bits.
+    e.enc32_isap(
+        clz.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]),
+        use_lzcnt,
+    );
+    e.enc64_isap(
+        clz.bind(I64),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]).rex().w(),
+        use_lzcnt,
+    );
+    e.enc64_isap(
+        clz.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]).rex(),
+        use_lzcnt,
+    );
+    e.enc64_isap(
+        clz.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]),
+        use_lzcnt,
+    );
+
+    // Count trailing zero bits.
+    e.enc32_isap(
+        ctz.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]),
+        use_bmi1,
+    );
+    e.enc64_isap(
+        ctz.bind(I64),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]).rex().w(),
+        use_bmi1,
+    );
+    e.enc64_isap(
+        ctz.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]).rex(),
+        use_bmi1,
+    );
+    e.enc64_isap(
+        ctz.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]),
+        use_bmi1,
+    );
+
+    // Loads and stores.
+    let f_load_complex = formats.get(formats.by_name("LoadComplex"));
+    let is_load_complex_length_two = InstructionPredicate::new_length_equals(f_load_complex, 2);
+
+    for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] {
+        e.enc_i32_i64_instp(
+            load_complex,
+            recipe.opcodes(vec![0x8b]),
+            is_load_complex_length_two.clone(),
+        );
+        e.enc_x86_64_instp(
+            uload32_complex,
+            recipe.opcodes(vec![0x8b]),
+            is_load_complex_length_two.clone(),
+        );
+
+        e.enc64_instp(
+            sload32_complex,
+            recipe.opcodes(vec![0x63]).rex().w(),
+            is_load_complex_length_two.clone(),
+        );
+
+        e.enc_i32_i64_instp(
+            uload16_complex,
+            recipe.opcodes(vec![0x0f, 0xb7]),
+            is_load_complex_length_two.clone(),
+        );
+        e.enc_i32_i64_instp(
+            sload16_complex,
+            recipe.opcodes(vec![0x0f, 0xbf]),
+            is_load_complex_length_two.clone(),
+        );
+
+        e.enc_i32_i64_instp(
+            uload8_complex,
+            recipe.opcodes(vec![0x0f, 0xb6]),
+            is_load_complex_length_two.clone(),
+        );
+
+        e.enc_i32_i64_instp(
+            sload8_complex,
+            recipe.opcodes(vec![0x0f, 0xbe]),
+            is_load_complex_length_two.clone(),
+        );
+    }
+
+    let f_store_complex = formats.get(formats.by_name("StoreComplex"));
+    let is_store_complex_length_three = InstructionPredicate::new_length_equals(f_store_complex, 3);
+
+    for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] {
+        e.enc_i32_i64_instp(
+            store_complex,
+            recipe.opcodes(vec![0x89]),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_x86_64_instp(
+            istore32_complex,
+            recipe.opcodes(vec![0x89]),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_both_instp(
+            istore16_complex.bind(I32),
+            recipe.opcodes(vec![0x66, 0x89]),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_x86_64_instp(
+            istore16_complex.bind(I64),
+            recipe.opcodes(vec![0x66, 0x89]),
+            is_store_complex_length_three.clone(),
+        );
+    }
+
+    for recipe in &[
+        rec_stWithIndex_abcd,
+        rec_stWithIndexDisp8_abcd,
+        rec_stWithIndexDisp32_abcd,
+    ] {
+        e.enc_both_instp(
+            istore8_complex.bind(I32),
+            recipe.opcodes(vec![0x88]),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_x86_64_instp(
+            istore8_complex.bind(I64),
+            recipe.opcodes(vec![0x88]),
+            is_store_complex_length_three.clone(),
+        );
+    }
+
+    for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] {
+        e.enc_i32_i64_ld_st(store, true, recipe.opcodes(vec![0x89]));
+        e.enc_x86_64(istore32.bind(I64).bind_any(), recipe.opcodes(vec![0x89]));
+        e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(vec![0x66, 0x89]));
+    }
+
+    // Byte stores are more complicated because the registers they can address
+    // depends of the presence of a REX prefix. The st*_abcd recipes fall back to
+    // the corresponding st* recipes when a REX prefix is applied.
+
+    for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] {
+        e.enc_both(istore8.bind(I32).bind_any(), recipe.opcodes(vec![0x88]));
+        e.enc_x86_64(istore8.bind(I64).bind_any(), recipe.opcodes(vec![0x88]));
+    }
+
+    e.enc_i32_i64(spill, rec_spillSib32.opcodes(vec![0x89]));
+    e.enc_i32_i64(regspill, rec_regspill32.opcodes(vec![0x89]));
+    e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(vec![0x89]));
+    e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(vec![0x89]));
+
+    // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
+    // constraining the permitted registers.
+    // See MIN_SPILL_SLOT_SIZE which makes this safe.
+
+    e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(vec![0x89]));
+    e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(vec![0x89]));
+    for &ty in &[I8, I16] {
+        e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(vec![0x89]));
+        e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(vec![0x89]));
+    }
+
+    for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] {
+        e.enc_i32_i64_ld_st(load, true, recipe.opcodes(vec![0x8b]));
+        e.enc_x86_64(uload32.bind(I64), recipe.opcodes(vec![0x8b]));
+        e.enc64(sload32.bind(I64), recipe.opcodes(vec![0x63]).rex().w());
+        e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(vec![0x0f, 0xb7]));
+        e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(vec![0x0f, 0xbf]));
+        e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(vec![0x0f, 0xb6]));
+        e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(vec![0x0f, 0xbe]));
+    }
+
+    e.enc_i32_i64(fill, rec_fillSib32.opcodes(vec![0x8b]));
+    e.enc_i32_i64(regfill, rec_regfill32.opcodes(vec![0x8b]));
+    e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(vec![0x8b]));
+    e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(vec![0x8b]));
+
+    // No-op fills, created by late-stage redundant-fill removal.
+    for &ty in &[I64, I32, I16, I8] {
+        e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0);
+        e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0);
+    }
+    e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0);
+    e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0);
+    for &ty in &[F64, F32] {
+        e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0);
+        e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0);
+    }
+
+    // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
+
+    e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(vec![0x8b]));
+    e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(vec![0x8b]));
+    for &ty in &[I8, I16] {
+        e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(vec![0x8b]));
+        e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(vec![0x8b]));
+    }
+
+    // Push and Pop.
+    e.enc32(x86_push.bind(I32), rec_pushq.opcodes(vec![0x50]));
+    e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(vec![0x50]));
+
+    e.enc32(x86_pop.bind(I32), rec_popq.opcodes(vec![0x58]));
+    e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(vec![0x58]));
+
+    // Copy Special
+    // For x86-64, only define REX forms for now, since we can't describe the
+    // special regunit immediate operands with the current constraint language.
+    e.enc64(copy_special, rec_copysp.opcodes(vec![0x89]).rex().w());
+    e.enc32(copy_special, rec_copysp.opcodes(vec![0x89]));
+
+    // Copy to SSA.  These have to be done with special _rex_only encoders, because the standard
+    // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account
+    // the source register, which is specified directly in the instruction.
+    e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89]));
+    e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(vec![0x89]));
+    e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(vec![0x89]));
+    e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(vec![0x89]));
+    e.enc_both_rex_only(
+        copy_to_ssa.bind(I16),
+        rec_umr_reg_to_ssa.opcodes(vec![0x89]),
+    );
+    e.enc_both_rex_only(
+        copy_to_ssa.bind(F64),
+        rec_furm_reg_to_ssa.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+    e.enc_both_rex_only(
+        copy_to_ssa.bind(F32),
+        rec_furm_reg_to_ssa.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+
+    // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
+    // into a no-op.
+    // The same encoding is generated for both the 64- and 32-bit architectures.
+    for &ty in &[I64, I32, I16, I8] {
+        e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
+        e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
+    }
+    for &ty in &[F64, F32] {
+        e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
+        e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
+    }
+
+    // Adjust SP down by a dynamic value (or up, with a negative operand).
+    e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(vec![0x29]));
+    e.enc64(
+        adjust_sp_down.bind(I64),
+        rec_adjustsp.opcodes(vec![0x29]).rex().w(),
+    );
+
+    // Adjust SP up by an immediate (or down, with a negative immediate).
+    e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(vec![0x83]));
+    e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(vec![0x81]));
+    e.enc64(
+        adjust_sp_up_imm,
+        rec_adjustsp_ib.opcodes(vec![0x83]).rex().w(),
+    );
+    e.enc64(
+        adjust_sp_up_imm,
+        rec_adjustsp_id.opcodes(vec![0x81]).rex().w(),
+    );
+
+    // Adjust SP down by an immediate (or up, with a negative immediate).
+    e.enc32(
+        adjust_sp_down_imm,
+        rec_adjustsp_ib.opcodes(vec![0x83]).rrr(5),
+    );
+    e.enc32(
+        adjust_sp_down_imm,
+        rec_adjustsp_id.opcodes(vec![0x81]).rrr(5),
+    );
+    e.enc64(
+        adjust_sp_down_imm,
+        rec_adjustsp_ib.opcodes(vec![0x83]).rrr(5).rex().w(),
+    );
+    e.enc64(
+        adjust_sp_down_imm,
+        rec_adjustsp_id.opcodes(vec![0x81]).rrr(5).rex().w(),
+    );
+
+    // Float loads and stores.
+    e.enc_both(
+        load.bind(F32).bind_any(),
+        rec_fld.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load.bind(F32).bind_any(),
+        rec_fldDisp8.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load.bind(F32).bind_any(),
+        rec_fldDisp32.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+
+    e.enc_both(
+        load_complex.bind(F32),
+        rec_fldWithIndex.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load_complex.bind(F32),
+        rec_fldWithIndexDisp8.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load_complex.bind(F32),
+        rec_fldWithIndexDisp32.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+
+    e.enc_both(
+        load.bind(F64).bind_any(),
+        rec_fld.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load.bind(F64).bind_any(),
+        rec_fldDisp8.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load.bind(F64).bind_any(),
+        rec_fldDisp32.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+
+    e.enc_both(
+        load_complex.bind(F64),
+        rec_fldWithIndex.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load_complex.bind(F64),
+        rec_fldWithIndexDisp8.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load_complex.bind(F64),
+        rec_fldWithIndexDisp32.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+
+    e.enc_both(
+        store.bind(F32).bind_any(),
+        rec_fst.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store.bind(F32).bind_any(),
+        rec_fstDisp8.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store.bind(F32).bind_any(),
+        rec_fstDisp32.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+
+    e.enc_both(
+        store_complex.bind(F32),
+        rec_fstWithIndex.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store_complex.bind(F32),
+        rec_fstWithIndexDisp8.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store_complex.bind(F32),
+        rec_fstWithIndexDisp32.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+
+    e.enc_both(
+        store.bind(F64).bind_any(),
+        rec_fst.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store.bind(F64).bind_any(),
+        rec_fstDisp8.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store.bind(F64).bind_any(),
+        rec_fstDisp32.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+
+    e.enc_both(
+        store_complex.bind(F64),
+        rec_fstWithIndex.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store_complex.bind(F64),
+        rec_fstWithIndexDisp8.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store_complex.bind(F64),
+        rec_fstWithIndexDisp32.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+
+    e.enc_both(
+        fill.bind(F32),
+        rec_ffillSib32.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        regfill.bind(F32),
+        rec_fregfill32.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        fill.bind(F64),
+        rec_ffillSib32.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        regfill.bind(F64),
+        rec_fregfill32.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+
+    e.enc_both(
+        spill.bind(F32),
+        rec_fspillSib32.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        regspill.bind(F32),
+        rec_fregspill32.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        spill.bind(F64),
+        rec_fspillSib32.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        regspill.bind(F64),
+        rec_fregspill32.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+
+    // Function addresses.
+
+    // Non-PIC, all-ones funcaddresses.
+    e.enc32_isap(
+        func_addr.bind(I32),
+        rec_fnaddr4.opcodes(vec![0xb8]),
+        not_all_ones_funcaddrs_and_not_is_pic,
+    );
+    e.enc64_isap(
+        func_addr.bind(I64),
+        rec_fnaddr8.opcodes(vec![0xb8]).rex().w(),
+        not_all_ones_funcaddrs_and_not_is_pic,
+    );
+
+    // Non-PIC, all-zeros funcaddresses.
+    e.enc32_isap(
+        func_addr.bind(I32),
+        rec_allones_fnaddr4.opcodes(vec![0xb8]),
+        all_ones_funcaddrs_and_not_is_pic,
+    );
+    e.enc64_isap(
+        func_addr.bind(I64),
+        rec_allones_fnaddr8.opcodes(vec![0xb8]).rex().w(),
+        all_ones_funcaddrs_and_not_is_pic,
+    );
+
+    // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field.
+    let f_func_addr = formats.get(formats.by_name("FuncAddr"));
+    let is_colocated_func = InstructionPredicate::new_is_colocated_func(f_func_addr, "func_ref");
+    e.enc64_instp(
+        func_addr.bind(I64),
+        rec_pcrel_fnaddr8.opcodes(vec![0x8d]).rex().w(),
+        is_colocated_func,
+    );
+
+    // 64-bit, non-colocated, PIC.
+    e.enc64_isap(
+        func_addr.bind(I64),
+        rec_got_fnaddr8.opcodes(vec![0x8b]).rex().w(),
+        is_pic,
+    );
+
+    // Global addresses.
+
+    // Non-PIC.
+    e.enc32_isap(
+        symbol_value.bind(I32),
+        rec_gvaddr4.opcodes(vec![0xb8]),
+        not_is_pic,
+    );
+    e.enc64_isap(
+        symbol_value.bind(I64),
+        rec_gvaddr8.opcodes(vec![0xb8]).rex().w(),
+        not_is_pic,
+    );
+
+    // PIC, colocated.
+    e.enc64_func(
+        symbol_value.bind(I64),
+        rec_pcrel_gvaddr8.opcodes(vec![0x8d]).rex().w(),
+        |encoding| {
+            encoding
+                .isa_predicate(is_pic)
+                .inst_predicate(InstructionPredicate::new_is_colocated_data(formats))
+        },
+    );
+
+    // PIC, non-colocated.
+    e.enc64_isap(
+        symbol_value.bind(I64),
+        rec_got_gvaddr8.opcodes(vec![0x8b]).rex().w(),
+        is_pic,
+    );
+
+    // Stack addresses.
+    //
+    // TODO: Add encoding rules for stack_load and stack_store, so that they
+    // don't get legalized to stack_addr + load/store.
+    e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(vec![0x8d]));
+    e.enc64(
+        stack_addr.bind(I64),
+        rec_spaddr8_id.opcodes(vec![0x8d]).rex().w(),
+    );
+
+    // Call/return
+
+    // 32-bit, both PIC and non-PIC.
+    e.enc32(call, rec_call_id.opcodes(vec![0xe8]));
+
+    // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field.
+    let f_call = formats.get(formats.by_name("Call"));
+    let is_colocated_func = InstructionPredicate::new_is_colocated_func(f_call, "func_ref");
+    e.enc64_instp(call, rec_call_id.opcodes(vec![0xe8]), is_colocated_func);
+
+    // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC
+    // is currently using the large model, which requires calls be lowered to
+    // func_addr+call_indirect.
+    e.enc64_isap(call, rec_call_plt_id.opcodes(vec![0xe8]), is_pic);
+
+    e.enc32(
+        call_indirect.bind(I32),
+        rec_call_r.opcodes(vec![0xff]).rrr(2),
+    );
+    e.enc64(
+        call_indirect.bind(I64),
+        rec_call_r.opcodes(vec![0xff]).rrr(2).rex(),
+    );
+    e.enc64(
+        call_indirect.bind(I64),
+        rec_call_r.opcodes(vec![0xff]).rrr(2),
+    );
+
+    e.enc32(return_, rec_ret.opcodes(vec![0xc3]));
+    e.enc64(return_, rec_ret.opcodes(vec![0xc3]));
+
+    // Branches.
+    e.enc32(jump, rec_jmpb.opcodes(vec![0xeb]));
+    e.enc64(jump, rec_jmpb.opcodes(vec![0xeb]));
+    e.enc32(jump, rec_jmpd.opcodes(vec![0xe9]));
+    e.enc64(jump, rec_jmpd.opcodes(vec![0xe9]));
+
+    e.enc_both(brif, rec_brib.opcodes(vec![0x70]));
+    e.enc_both(brif, rec_brid.opcodes(vec![0x0f, 0x80]));
+
+    // Not all float condition codes are legal, see `supported_floatccs`.
+    e.enc_both(brff, rec_brfb.opcodes(vec![0x70]));
+    e.enc_both(brff, rec_brfd.opcodes(vec![0x0f, 0x80]));
+
+    // Note that the tjccd opcode will be prefixed with 0x0f.
+    e.enc_i32_i64(brz, rec_tjccb.opcodes(vec![0x74]));
+    e.enc_i32_i64(brz, rec_tjccd.opcodes(vec![0x84]));
+    e.enc_i32_i64(brnz, rec_tjccb.opcodes(vec![0x75]));
+    e.enc_i32_i64(brnz, rec_tjccd.opcodes(vec![0x85]));
+
+    // Branch on a b1 value in a register only looks at the low 8 bits. See also
+    // bint encodings below.
+    //
+    // Start with the worst-case encoding for X86_32 only. The register allocator
+    // can't handle a branch with an ABCD-constrained operand.
+    e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(vec![0x84]));
+    e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(vec![0x85]));
+
+    e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(vec![0x74]));
+    e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(vec![0x84]));
+    e.enc_both(brnz.bind(B1), rec_t8jccb_abcd.opcodes(vec![0x75]));
+    e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(vec![0x85]));
+
+    // Jump tables.
+    e.enc64(
+        jump_table_entry.bind(I64),
+        rec_jt_entry.opcodes(vec![0x63]).rex().w(),
+    );
+    e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(vec![0x8b]));
+
+    e.enc64(
+        jump_table_base.bind(I64),
+        rec_jt_base.opcodes(vec![0x8d]).rex().w(),
+    );
+    e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(vec![0x8d]));
+
+    e.enc_x86_64(
+        indirect_jump_table_br.bind(I64),
+        rec_indirect_jmp.opcodes(vec![0xff]).rrr(4),
+    );
+    e.enc32(
+        indirect_jump_table_br.bind(I32),
+        rec_indirect_jmp.opcodes(vec![0xff]).rrr(4),
+    );
+
+    // Trap as ud2
+    e.enc32(trap, rec_trap.opcodes(vec![0x0f, 0x0b]));
+    e.enc64(trap, rec_trap.opcodes(vec![0x0f, 0x0b]));
+    e.enc32(resumable_trap, rec_trap.opcodes(vec![0x0f, 0x0b]));
+    e.enc64(resumable_trap, rec_trap.opcodes(vec![0x0f, 0x0b]));
+
+    // Debug trap as int3
+    e.enc32_rec(debugtrap, rec_debugtrap, 0);
+    e.enc64_rec(debugtrap, rec_debugtrap, 0);
+
+    e.enc32_rec(trapif, rec_trapif, 0);
+    e.enc64_rec(trapif, rec_trapif, 0);
+    e.enc32_rec(trapff, rec_trapff, 0);
+    e.enc64_rec(trapff, rec_trapff, 0);
+
+    // Comparisons
+    e.enc_i32_i64(icmp, rec_icscc.opcodes(vec![0x39]));
+    e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(vec![0x83]).rrr(7));
+    e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(vec![0x81]).rrr(7));
+    e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(vec![0x39]));
+    e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(vec![0x83]).rrr(7));
+    e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(vec![0x81]).rrr(7));
+    // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
+
+    e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(vec![0x39]));
+    e.enc64(
+        ifcmp_sp.bind(I64),
+        rec_rcmp_sp.opcodes(vec![0x39]).rex().w(),
+    );
+
+    // Convert flags to bool.
+    // This encodes `b1` as an 8-bit low register with the value 0 or 1.
+    e.enc_both(trueif, rec_seti_abcd.opcodes(vec![0x0f, 0x90]));
+    e.enc_both(trueff, rec_setf_abcd.opcodes(vec![0x0f, 0x90]));
+
+    // Conditional move (a.k.a integer select).
+    e.enc_i32_i64(selectif, rec_cmov.opcodes(vec![0x0f, 0x40]));
+
+    // Bit scan forwards and reverse
+    e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(vec![0x0f, 0xbc]));
+    e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(vec![0x0f, 0xbd]));
+
+    // Convert bool to int.
+    //
+    // This assumes that b1 is represented as an 8-bit low register with the value 0
+    // or 1.
+    //
+    // Encode movzbq as movzbl, because it's equivalent and shorter.
+    e.enc32(
+        bint.bind(I32).bind(B1),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]),
+    );
+
+    e.enc64(
+        bint.bind(I64).bind(B1),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(),
+    );
+    e.enc64(
+        bint.bind(I64).bind(B1),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]),
+    );
+    e.enc64(
+        bint.bind(I32).bind(B1),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(),
+    );
+    e.enc64(
+        bint.bind(I32).bind(B1),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]),
+    );
+
+    // Numerical conversions.
+
+    // Reducing an integer is a no-op.
+    e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
+    e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
+    e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
+
+    e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0);
+
+    // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
+    // instructions for %al/%ax/%eax to %ax/%eax/%rax.
+
+    // movsbl
+    e.enc32(
+        sextend.bind(I32).bind(I8),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xbe]),
+    );
+    e.enc64(
+        sextend.bind(I32).bind(I8),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xbe]).rex(),
+    );
+    e.enc64(
+        sextend.bind(I32).bind(I8),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xbe]),
+    );
+
+    // movswl
+    e.enc32(
+        sextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xbf]),
+    );
+    e.enc64(
+        sextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xbf]).rex(),
+    );
+    e.enc64(
+        sextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xbf]),
+    );
+
+    // movsbq
+    e.enc64(
+        sextend.bind(I64).bind(I8),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xbe]).rex().w(),
+    );
+
+    // movswq
+    e.enc64(
+        sextend.bind(I64).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xbf]).rex().w(),
+    );
+
+    // movslq
+    e.enc64(
+        sextend.bind(I64).bind(I32),
+        rec_urm_noflags.opcodes(vec![0x63]).rex().w(),
+    );
+
+    // movzbl
+    e.enc32(
+        uextend.bind(I32).bind(I8),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]),
+    );
+    e.enc64(
+        uextend.bind(I32).bind(I8),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(),
+    );
+    e.enc64(
+        uextend.bind(I32).bind(I8),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]),
+    );
+
+    // movzwl
+    e.enc32(
+        uextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb7]),
+    );
+    e.enc64(
+        uextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb7]).rex(),
+    );
+    e.enc64(
+        uextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb7]),
+    );
+
+    // movzbq, encoded as movzbl because it's equivalent and shorter.
+    e.enc64(
+        uextend.bind(I64).bind(I8),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(),
+    );
+    e.enc64(
+        uextend.bind(I64).bind(I8),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]),
+    );
+
+    // movzwq, encoded as movzwl because it's equivalent and shorter
+    e.enc64(
+        uextend.bind(I64).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb7]).rex(),
+    );
+    e.enc64(
+        uextend.bind(I64).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb7]),
+    );
+
+    // A 32-bit register copy clears the high 32 bits.
+    e.enc64(
+        uextend.bind(I64).bind(I32),
+        rec_umr.opcodes(vec![0x89]).rex(),
+    );
+    e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(vec![0x89]));
+
+    // Floating point
+
+    // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for
+    // 32-bit and 64-bit floats respectively.
+    let f_unary_ieee32 = formats.get(formats.by_name("UnaryIeee32"));
+    let is_zero_32_bit_float = InstructionPredicate::new_is_zero_32bit_float(f_unary_ieee32, "imm");
+    e.enc32_instp(
+        f32const,
+        rec_f32imm_z.opcodes(vec![0x0f, 0x57]),
+        is_zero_32_bit_float.clone(),
+    );
+
+    let f_unary_ieee64 = formats.get(formats.by_name("UnaryIeee64"));
+    let is_zero_64_bit_float = InstructionPredicate::new_is_zero_64bit_float(f_unary_ieee64, "imm");
+    e.enc32_instp(
+        f64const,
+        rec_f64imm_z.opcodes(vec![0x66, 0x0f, 0x57]),
+        is_zero_64_bit_float.clone(),
+    );
+
+    e.enc_x86_64_instp(
+        f32const,
+        rec_f32imm_z.opcodes(vec![0x0f, 0x57]),
+        is_zero_32_bit_float,
+    );
+    e.enc_x86_64_instp(
+        f64const,
+        rec_f64imm_z.opcodes(vec![0x66, 0x0f, 0x57]),
+        is_zero_64_bit_float,
+    );
+
+    // movd
+    e.enc_both(
+        bitcast.bind(F32).bind(I32),
+        rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]),
+    );
+    e.enc_both(
+        bitcast.bind(I32).bind(F32),
+        rec_rfumr.opcodes(vec![0x66, 0x0f, 0x7e]),
+    );
+
+    // movq
+    e.enc64(
+        bitcast.bind(F64).bind(I64),
+        rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]).rex().w(),
+    );
+    e.enc64(
+        bitcast.bind(I64).bind(F64),
+        rec_rfumr.opcodes(vec![0x66, 0x0f, 0x7e]).rex().w(),
+    );
+
+    // movaps
+    e.enc_both(copy.bind(F32), rec_furm.opcodes(vec![0x0f, 0x28]));
+    e.enc_both(copy.bind(F64), rec_furm.opcodes(vec![0x0f, 0x28]));
+
+    // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
+    // immediate operands with the current constraint language.
+    e.enc32(regmove.bind(F32), rec_frmov.opcodes(vec![0x0f, 0x28]));
+    e.enc64(regmove.bind(F32), rec_frmov.opcodes(vec![0x0f, 0x28]).rex());
+
+    // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
+    // immediate operands with the current constraint language.
+    e.enc32(regmove.bind(F64), rec_frmov.opcodes(vec![0x0f, 0x28]));
+    e.enc64(regmove.bind(F64), rec_frmov.opcodes(vec![0x0f, 0x28]).rex());
+
+    // cvtsi2ss
+    e.enc_i32_i64(
+        fcvt_from_sint.bind(F32),
+        rec_frurm.opcodes(vec![0xf3, 0x0f, 0x2a]),
+    );
+
+    // cvtsi2sd
+    e.enc_i32_i64(
+        fcvt_from_sint.bind(F64),
+        rec_frurm.opcodes(vec![0xf2, 0x0f, 0x2a]),
+    );
+
+    // cvtss2sd
+    e.enc_both(
+        fpromote.bind(F64).bind(F32),
+        rec_furm.opcodes(vec![0xf3, 0x0f, 0x5a]),
+    );
+
+    // cvtsd2ss
+    e.enc_both(
+        fdemote.bind(F32).bind(F64),
+        rec_furm.opcodes(vec![0xf2, 0x0f, 0x5a]),
+    );
+
+    // cvttss2si
+    e.enc_both(
+        x86_cvtt2si.bind(I32).bind(F32),
+        rec_rfurm.opcodes(vec![0xf3, 0x0f, 0x2c]),
+    );
+    e.enc64(
+        x86_cvtt2si.bind(I64).bind(F32),
+        rec_rfurm.opcodes(vec![0xf3, 0x0f, 0x2c]).rex().w(),
+    );
+
+    // cvttsd2si
+    e.enc_both(
+        x86_cvtt2si.bind(I32).bind(F64),
+        rec_rfurm.opcodes(vec![0xf2, 0x0f, 0x2c]),
+    );
+    e.enc64(
+        x86_cvtt2si.bind(I64).bind(F64),
+        rec_rfurm.opcodes(vec![0xf2, 0x0f, 0x2c]).rex().w(),
+    );
+
+    // Exact square roots.
+    e.enc_both(sqrt.bind(F32), rec_furm.opcodes(vec![0xf3, 0x0f, 0x51]));
+    e.enc_both(sqrt.bind(F64), rec_furm.opcodes(vec![0xf2, 0x0f, 0x51]));
+
+    // Rounding. The recipe looks at the opcode to pick an immediate.
+    for inst in &[nearest, floor, ceil, trunc] {
+        e.enc_both_isap(
+            inst.bind(F32),
+            rec_furmi_rnd.opcodes(vec![0x66, 0x0f, 0x3a, 0x0a]),
+            use_sse41,
+        );
+        e.enc_both_isap(
+            inst.bind(F64),
+            rec_furmi_rnd.opcodes(vec![0x66, 0x0f, 0x3a, 0x0b]),
+            use_sse41,
+        );
+    }
+
+    // Binary arithmetic ops.
+    for &(inst, opc) in &[
+        (fadd, 0x58),
+        (fsub, 0x5c),
+        (fmul, 0x59),
+        (fdiv, 0x5e),
+        (x86_fmin, 0x5d),
+        (x86_fmax, 0x5f),
+    ] {
+        e.enc_both(inst.bind(F32), rec_fa.opcodes(vec![0xf3, 0x0f, opc]));
+        e.enc_both(inst.bind(F64), rec_fa.opcodes(vec![0xf2, 0x0f, opc]));
+    }
+
+    // Binary bitwise ops.
+    for &(inst, opc) in &[(band, 0x54), (bor, 0x56), (bxor, 0x57)] {
+        e.enc_both(inst.bind(F32), rec_fa.opcodes(vec![0x0f, opc]));
+        e.enc_both(inst.bind(F64), rec_fa.opcodes(vec![0x0f, opc]));
+    }
+
+    // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y.
+    e.enc_both(band_not.bind(F32), rec_fax.opcodes(vec![0x0f, 0x55]));
+    e.enc_both(band_not.bind(F64), rec_fax.opcodes(vec![0x0f, 0x55]));
+
+    // Comparisons.
+    //
+    // This only covers the condition codes in `supported_floatccs`, the rest are
+    // handled by legalization patterns.
+    e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(vec![0x0f, 0x2e]));
+    e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(vec![0x66, 0x0f, 0x2e]));
+    e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(vec![0x0f, 0x2e]));
+    e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(vec![0x66, 0x0f, 0x2e]));
+
+    // SIMD vector size: eventually multiple vector sizes may be supported but for now only
+    // SSE-sized vectors are available.
+    let sse_vector_size: u64 = 128;
+
+    // SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see
+    // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the
+    // value across the register.
+
+    let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
+
+    // PSHUFB, 8-bit shuffle using two XMM registers.
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size);
+        let template = rec_fa.nonrex().opcodes(vec![0x66, 0x0f, 0x38, 00]);
+        e.enc32_isap(instruction.clone(), template.clone(), use_ssse3_simd);
+        e.enc64_isap(instruction, template, use_ssse3_simd);
+    }
+
+    // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate.
+    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
+        let instruction = x86_pshufd.bind_vector_from_lane(ty, sse_vector_size);
+        let template = rec_r_ib_unsigned_fpr
+            .nonrex()
+            .opcodes(vec![0x66, 0x0f, 0x70]);
+        e.enc32(instruction.clone(), template.clone());
+        e.enc64(instruction, template);
+    }
+
+    // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
+    // to the Intel manual: "When the destination operand is an XMM register, the source operand is
+    // written to the low doubleword of the register and the register is zero-extended to 128 bits."
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size);
+        if ty.is_float() {
+            e.enc_32_64_rec(instruction, rec_null_fpr, 0);
+        } else {
+            let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ
+            if ty.lane_bits() < 64 {
+                // no 32-bit encodings for 64-bit widths
+                e.enc32(instruction.clone(), template.clone());
+            }
+            e.enc_x86_64(instruction, template);
+        }
+    }
+
+    // SIMD insertlane
+    let mut x86_pinsr_mapping: HashMap<u64, (Vec<u8>, Option<SettingPredicateNumber>)> =
+        HashMap::new();
+    x86_pinsr_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x20], Some(use_sse41_simd))); // PINSRB
+    x86_pinsr_mapping.insert(16, (vec![0x66, 0x0f, 0xc4], None)); // PINSRW from SSE2
+    x86_pinsr_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41_simd))); // PINSRD
+    x86_pinsr_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41_simd))); // PINSRQ, only x86_64
+
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        if let Some((opcode, isap)) = x86_pinsr_mapping.get(&ty.lane_bits()) {
+            let instruction = x86_pinsr.bind_vector_from_lane(ty, sse_vector_size);
+            let template = rec_r_ib_unsigned_r.opcodes(opcode.clone());
+            if ty.lane_bits() < 64 {
+                e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone());
+            } else {
+                // It turns out the 64-bit widths have REX/W encodings and only are available on
+                // x86_64.
+                e.enc64_maybe_isap(instruction, template.rex().w(), isap.clone());
+            }
+        }
+    }
+
+    // For legalizing insertlane with floats, INSERTPS from SSE4.1.
+    {
+        let instruction = x86_insertps.bind_vector_from_lane(F32, sse_vector_size);
+        let template = rec_fa_ib.nonrex().opcodes(vec![0x66, 0x0f, 0x3a, 0x21]);
+        e.enc_32_64_maybe_isap(instruction, template, Some(use_sse41_simd));
+    }
+
+    // For legalizing insertlane with floats,  MOVSD from SSE2.
+    {
+        let instruction = x86_movsd.bind_vector_from_lane(F64, sse_vector_size);
+        let template = rec_fa.nonrex().opcodes(vec![0xf2, 0x0f, 0x10]);
+        e.enc_32_64_maybe_isap(instruction, template, None); // from SSE2
+    }
+
+    // For legalizing insertlane with floats, MOVLHPS from SSE.
+    {
+        let instruction = x86_movlhps.bind_vector_from_lane(F64, sse_vector_size);
+        let template = rec_fa.nonrex().opcodes(vec![0x0f, 0x16]);
+        e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
+    }
+
+    // SIMD extractlane
+    let mut x86_pextr_mapping: HashMap<u64, (Vec<u8>, Option<SettingPredicateNumber>)> =
+        HashMap::new();
+    x86_pextr_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x14], Some(use_sse41_simd))); // PEXTRB
+    x86_pextr_mapping.insert(16, (vec![0x66, 0x0f, 0xc5], None)); // PEXTRW from SSE2, SSE4.1 has a PEXTRW that can move to reg/m16 but the opcode is four bytes
+    x86_pextr_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41_simd))); // PEXTRD
+    x86_pextr_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41_simd))); // PEXTRQ, only x86_64
+
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        if let Some((opcode, isap)) = x86_pextr_mapping.get(&ty.lane_bits()) {
+            let instruction = x86_pextr.bind_vector_from_lane(ty, sse_vector_size);
+            let template = rec_r_ib_unsigned_gpr.opcodes(opcode.clone());
+            if ty.lane_bits() < 64 {
+                e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone());
+            } else {
+                // It turns out the 64-bit widths have REX/W encodings and only are available on
+                // x86_64.
+                e.enc64_maybe_isap(instruction, template.rex().w(), isap.clone());
+            }
+        }
+    }
+
+    // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
+    for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
+        for to_type in
+            ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type)
+        {
+            let instruction = raw_bitcast
+                .bind_vector_from_lane(to_type, sse_vector_size)
+                .bind_vector_from_lane(from_type, sse_vector_size);
+            e.enc_32_64_rec(instruction, rec_null_fpr, 0);
+        }
+    }
+
+    // SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an
+    // XMM register.
+    for float_type in &[F32, F64] {
+        for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) {
+            e.enc_32_64_rec(
+                raw_bitcast
+                    .bind_vector_from_lane(lane_type, sse_vector_size)
+                    .bind(*float_type),
+                rec_null_fpr,
+                0,
+            );
+            e.enc_32_64_rec(
+                raw_bitcast
+                    .bind(*float_type)
+                    .bind_vector_from_lane(lane_type, sse_vector_size),
+                rec_null_fpr,
+                0,
+            );
+        }
+    }
+
+    // SIMD vconst using MOVUPS
+    // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have
+    // to guarantee that the constants are aligned when emitted and there is currently no mechanism
+    // for that; alternately, constants could be loaded into XMM registers using a sequence like:
+    // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored
+    // in memory) but some performance measurements are needed.
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size);
+        let template = rec_vconst.nonrex().opcodes(vec![0x0f, 0x10]);
+        e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
+    }
+
+    // SIMD bor using ORPS
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let instruction = bor.bind_vector_from_lane(ty, sse_vector_size);
+        let template = rec_fa.nonrex().opcodes(vec![0x0f, 0x56]);
+        e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
+    }
+
+    // SIMD register movement: store, load, spill, fill, regmove. All of these use encodings of
+    // MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have
+    // alignment or type-specific encodings, see https://github.com/CraneStation/cranelift/issues/1039).
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        // Store
+        let bound_store = store.bind_vector_from_lane(ty, sse_vector_size).bind_any();
+        e.enc_32_64(bound_store.clone(), rec_fst.opcodes(vec![0x0f, 0x11]));
+        e.enc_32_64(bound_store.clone(), rec_fstDisp8.opcodes(vec![0x0f, 0x11]));
+        e.enc_32_64(bound_store, rec_fstDisp32.opcodes(vec![0x0f, 0x11]));
+
+        // Load
+        let bound_load = load.bind_vector_from_lane(ty, sse_vector_size).bind_any();
+        e.enc_32_64(bound_load.clone(), rec_fld.opcodes(vec![0x0f, 0x10]));
+        e.enc_32_64(bound_load.clone(), rec_fldDisp8.opcodes(vec![0x0f, 0x10]));
+        e.enc_32_64(bound_load, rec_fldDisp32.opcodes(vec![0x0f, 0x10]));
+
+        // Spill
+        let bound_spill = spill.bind_vector_from_lane(ty, sse_vector_size);
+        e.enc_32_64(bound_spill, rec_fspillSib32.opcodes(vec![0x0f, 0x11]));
+        let bound_regspill = regspill.bind_vector_from_lane(ty, sse_vector_size);
+        e.enc_32_64(bound_regspill, rec_fregspill32.opcodes(vec![0x0f, 0x11]));
+
+        // Fill
+        let bound_fill = fill.bind_vector_from_lane(ty, sse_vector_size);
+        e.enc_32_64(bound_fill, rec_ffillSib32.opcodes(vec![0x0f, 0x10]));
+        let bound_regfill = regfill.bind_vector_from_lane(ty, sse_vector_size);
+        e.enc_32_64(bound_regfill, rec_fregfill32.opcodes(vec![0x0f, 0x10]));
+        let bound_fill_nop = fill_nop.bind_vector_from_lane(ty, sse_vector_size);
+        e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0);
+
+        // Regmove
+        let bound_regmove = regmove.bind_vector_from_lane(ty, sse_vector_size);
+        e.enc_32_64(bound_regmove, rec_frmov.opcodes(vec![0x0f, 0x28]));
+
+        // Copy
+        let bound_copy = copy.bind_vector_from_lane(ty, sse_vector_size);
+        e.enc_32_64(bound_copy, rec_furm.opcodes(vec![0x0f, 0x28])); // MOVAPS from SSE
+        let bound_copy_nop = copy_nop.bind_vector_from_lane(ty, sse_vector_size);
+        e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0);
+    }
+
+    // SIMD integer addition
+    for (ty, opcodes) in &[
+        (I8, &[0x66, 0x0f, 0xfc]),  // PADDB from SSE2
+        (I16, &[0x66, 0x0f, 0xfd]), // PADDW from SSE2
+        (I32, &[0x66, 0x0f, 0xfe]), // PADDD from SSE2
+        (I64, &[0x66, 0x0f, 0xd4]), // PADDQ from SSE2
+    ] {
+        let iadd = iadd.bind_vector_from_lane(ty.clone(), sse_vector_size);
+        e.enc_32_64(iadd, rec_fa.opcodes(opcodes.to_vec()));
+    }
+
+    // Reference type instructions
+
+    // Null references implemented as iconst 0.
+    e.enc32(null.bind_ref(R32), rec_pu_id_ref.opcodes(vec![0xb8]));
+
+    e.enc64(null.bind_ref(R64), rec_pu_id_ref.rex().opcodes(vec![0xb8]));
+    e.enc64(null.bind_ref(R64), rec_pu_id_ref.opcodes(vec![0xb8]));
+
+    // is_null, implemented by testing whether the value is 0.
+    e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(vec![0x85]));
+
+    // safepoint instruction calls sink, no actual encoding.
+    e.enc32_rec(safepoint, rec_safepoint, 0);
+    e.enc64_rec(safepoint, rec_safepoint, 0);
+
+    e
+}
diff --git a/cranelift-codegen/meta/src/isa/x86/instructions.rs b/cranelift-codegen/meta/src/isa/x86/instructions.rs
index 1fcf37a68..b9f2496a8 100644
--- a/cranelift-codegen/meta/src/isa/x86/instructions.rs
+++ b/cranelift-codegen/meta/src/isa/x86/instructions.rs
@@ -2,16 +2,25 @@
 
 use crate::cdsl::formats::FormatRegistry;
 use crate::cdsl::instructions::{
-    InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder,
+    AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder,
 };
 use crate::cdsl::operands::{create_operand as operand, create_operand_doc as operand_doc};
 use crate::cdsl::types::ValueType;
 use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar};
+use crate::shared::immediates::Immediates;
 use crate::shared::types;
 
-pub fn define(format_registry: &FormatRegistry) -> InstructionGroup {
-    let mut ig =
-        InstructionGroupBuilder::new("x86", "x86 specific instruction set", format_registry);
+pub(crate) fn define(
+    mut all_instructions: &mut AllInstructions,
+    format_registry: &FormatRegistry,
+    immediates: &Immediates,
+) -> InstructionGroup {
+    let mut ig = InstructionGroupBuilder::new(
+        "x86",
+        "x86 specific instruction set",
+        &mut all_instructions,
+        format_registry,
+    );
 
     let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
 
@@ -144,7 +153,7 @@ pub fn define(format_registry: &FormatRegistry) -> InstructionGroup {
         Floating point minimum with x86 semantics.
 
         This is equivalent to the C ternary operator `x < y ? x : y` which
-        differs from :inst:`fmin` when either operand is NaN or when comparing
+        differs from `fmin` when either operand is NaN or when comparing
         +0.0 to -0.0.
 
         When the two operands don't compare as LT, `y` is returned unchanged,
@@ -162,7 +171,7 @@ pub fn define(format_registry: &FormatRegistry) -> InstructionGroup {
         Floating point maximum with x86 semantics.
 
         This is equivalent to the C ternary operator `x > y ? x : y` which
-        differs from :inst:`fmax` when either operand is NaN or when comparing
+        differs from `fmax` when either operand is NaN or when comparing
         +0.0 to -0.0.
 
         When the two operands don't compare as GT, `y` is returned unchanged,
@@ -242,5 +251,141 @@ pub fn define(format_registry: &FormatRegistry) -> InstructionGroup {
         .operands_out(vec![y, rflags]),
     );
 
+    let uimm8 = &immediates.uimm8;
+    let TxN = &TypeVar::new(
+        "TxN",
+        "A SIMD vector type",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .floats(Interval::All)
+            .bools(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(false)
+            .build(),
+    );
+    let a = &operand_doc("a", TxN, "A vector value (i.e. held in an XMM register)");
+    let b = &operand_doc("b", TxN, "A vector value (i.e. held in an XMM register)");
+    let i = &operand_doc("i", uimm8, "An ordering operand controlling the copying of data from the source to the destination; see PSHUFD in Intel manual for details");
+
+    ig.push(
+        Inst::new(
+            "x86_pshufd",
+            r#"
+    Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended
+    register and re-orders the data according to the passed immediate byte.
+    "#,
+        )
+        .operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN)
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_pshufb",
+            r#"
+    Packed Shuffle Bytes -- re-orders data in an extended register using a shuffle
+    mask from either memory or another extended register
+    "#,
+        )
+        .operands_in(vec![a, b]) // TODO allow re-ordering from memory here (need more permissive type than TxN)
+        .operands_out(vec![a]),
+    );
+
+    let Idx = &operand_doc("Idx", uimm8, "Lane index");
+    let x = &operand("x", TxN);
+    let a = &operand("a", &TxN.lane_of());
+
+    ig.push(
+        Inst::new(
+            "x86_pextr",
+            r#"
+        Extract lane ``Idx`` from ``x``.
+        The lane index, ``Idx``, is an immediate value, not an SSA value. It
+        must indicate a valid lane index for the type of ``x``.
+        "#,
+        )
+        .operands_in(vec![x, Idx])
+        .operands_out(vec![a]),
+    );
+
+    let IBxN = &TypeVar::new(
+        "IBxN",
+        "A SIMD vector type containing only booleans and integers",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .bools(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(false)
+            .build(),
+    );
+    let x = &operand("x", IBxN);
+    let y = &operand_doc("y", &IBxN.lane_of(), "New lane value");
+    let a = &operand("a", IBxN);
+
+    ig.push(
+        Inst::new(
+            "x86_pinsr",
+            r#"
+        Insert ``y`` into ``x`` at lane ``Idx``.
+        The lane index, ``Idx``, is an immediate value, not an SSA value. It
+        must indicate a valid lane index for the type of ``x``.
+        "#,
+        )
+        .operands_in(vec![x, Idx, y])
+        .operands_out(vec![a]),
+    );
+
+    let FxN = &TypeVar::new(
+        "FxN",
+        "A SIMD vector type containing floats",
+        TypeSetBuilder::new()
+            .floats(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(false)
+            .build(),
+    );
+    let x = &operand("x", FxN);
+    let y = &operand_doc("y", &FxN.lane_of(), "New lane value");
+    let a = &operand("a", FxN);
+
+    ig.push(
+        Inst::new(
+            "x86_insertps",
+            r#"
+        Insert a lane of ``y`` into ``x`` at using ``Idx`` to encode both which lane the value is 
+        extracted from and which it is inserted to. This is similar to x86_pinsr but inserts 
+        floats, which are already stored in an XMM register.
+        "#,
+        )
+        .operands_in(vec![x, Idx, y])
+        .operands_out(vec![a]),
+    );
+
+    let x = &operand("x", FxN);
+    let y = &operand("y", FxN);
+    let a = &operand("a", FxN);
+
+    ig.push(
+        Inst::new(
+            "x86_movsd",
+            r#"
+        Move the low 64 bits of the float vector ``y`` to the low 64 bits of float vector ``x``
+        "#,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_movlhps",
+            r#"
+        Move the low 64 bits of the float vector ``y`` to the high 64 bits of float vector ``x``
+        "#,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
     ig.build()
 }
diff --git a/cranelift-codegen/meta/src/isa/x86/legalize.rs b/cranelift-codegen/meta/src/isa/x86/legalize.rs
index 2815665da..e37759e89 100644
--- a/cranelift-codegen/meta/src/isa/x86/legalize.rs
+++ b/cranelift-codegen/meta/src/isa/x86/legalize.rs
@@ -1,11 +1,12 @@
 use crate::cdsl::ast::{var, ExprBuilder, Literal};
 use crate::cdsl::instructions::InstructionGroup;
+use crate::cdsl::types::ValueType;
 use crate::cdsl::xform::TransformGroupBuilder;
-
+use crate::shared::types::Float::F64;
 use crate::shared::types::Int::{I32, I64};
 use crate::shared::Definitions as SharedDefinitions;
 
-pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
+pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
     let mut group = TransformGroupBuilder::new(
         "x86_expand",
         r#"
@@ -22,6 +23,8 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
     let bor = insts.by_name("bor");
     let clz = insts.by_name("clz");
     let ctz = insts.by_name("ctz");
+    let extractlane = insts.by_name("extractlane");
+    let f64const = insts.by_name("f64const");
     let fcmp = insts.by_name("fcmp");
     let fcvt_from_uint = insts.by_name("fcvt_from_uint");
     let fcvt_to_sint = insts.by_name("fcvt_to_sint");
@@ -33,11 +36,16 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
     let iadd = insts.by_name("iadd");
     let iconst = insts.by_name("iconst");
     let imul = insts.by_name("imul");
+    let insertlane = insts.by_name("insertlane");
     let isub = insts.by_name("isub");
     let popcnt = insts.by_name("popcnt");
+    let raw_bitcast = insts.by_name("raw_bitcast");
+    let scalar_to_vector = insts.by_name("scalar_to_vector");
     let sdiv = insts.by_name("sdiv");
     let selectif = insts.by_name("selectif");
     let smulhi = insts.by_name("smulhi");
+    let splat = insts.by_name("splat");
+    let shuffle = insts.by_name("shuffle");
     let srem = insts.by_name("srem");
     let udiv = insts.by_name("udiv");
     let umulhi = insts.by_name("umulhi");
@@ -46,13 +54,12 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
 
     let x86_bsf = x86_instructions.by_name("x86_bsf");
     let x86_bsr = x86_instructions.by_name("x86_bsr");
+    let x86_pshufb = x86_instructions.by_name("x86_pshufb");
+    let x86_pshufd = x86_instructions.by_name("x86_pshufd");
     let x86_umulx = x86_instructions.by_name("x86_umulx");
     let x86_smulx = x86_instructions.by_name("x86_smulx");
 
-    // List of immediates.
-    let floatcc = shared.operand_kinds.by_name("floatcc");
-    let imm64 = shared.operand_kinds.by_name("imm64");
-    let intcc = shared.operand_kinds.by_name("intcc");
+    let imm = &shared.imm;
 
     // Division and remainder.
     //
@@ -88,12 +95,12 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
     // `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
     // patterns.
 
-    let floatcc_eq = Literal::enumerator_for(floatcc, "eq");
-    let floatcc_ord = Literal::enumerator_for(floatcc, "ord");
-    let floatcc_ueq = Literal::enumerator_for(floatcc, "ueq");
-    let floatcc_ne = Literal::enumerator_for(floatcc, "ne");
-    let floatcc_uno = Literal::enumerator_for(floatcc, "uno");
-    let floatcc_one = Literal::enumerator_for(floatcc, "one");
+    let floatcc_eq = Literal::enumerator_for(&imm.floatcc, "eq");
+    let floatcc_ord = Literal::enumerator_for(&imm.floatcc, "ord");
+    let floatcc_ueq = Literal::enumerator_for(&imm.floatcc, "ueq");
+    let floatcc_ne = Literal::enumerator_for(&imm.floatcc, "ne");
+    let floatcc_uno = Literal::enumerator_for(&imm.floatcc, "uno");
+    let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one");
 
     // Equality needs an explicit `ord` test which checks the parity bit.
     group.legalize(
@@ -113,14 +120,14 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
         ],
     );
 
-    let floatcc_lt = &Literal::enumerator_for(floatcc, "lt");
-    let floatcc_gt = &Literal::enumerator_for(floatcc, "gt");
-    let floatcc_le = &Literal::enumerator_for(floatcc, "le");
-    let floatcc_ge = &Literal::enumerator_for(floatcc, "ge");
-    let floatcc_ugt = &Literal::enumerator_for(floatcc, "ugt");
-    let floatcc_ult = &Literal::enumerator_for(floatcc, "ult");
-    let floatcc_uge = &Literal::enumerator_for(floatcc, "uge");
-    let floatcc_ule = &Literal::enumerator_for(floatcc, "ule");
+    let floatcc_lt = &Literal::enumerator_for(&imm.floatcc, "lt");
+    let floatcc_gt = &Literal::enumerator_for(&imm.floatcc, "gt");
+    let floatcc_le = &Literal::enumerator_for(&imm.floatcc, "le");
+    let floatcc_ge = &Literal::enumerator_for(&imm.floatcc, "ge");
+    let floatcc_ugt = &Literal::enumerator_for(&imm.floatcc, "ugt");
+    let floatcc_ult = &Literal::enumerator_for(&imm.floatcc, "ult");
+    let floatcc_uge = &Literal::enumerator_for(&imm.floatcc, "uge");
+    let floatcc_ule = &Literal::enumerator_for(&imm.floatcc, "ule");
 
     // Inequalities that need to be reversed.
     for &(cc, rev_cc) in &[
@@ -154,9 +161,9 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
     let r2flags = var("r2flags");
     let index2 = var("index2");
 
-    let intcc_eq = Literal::enumerator_for(intcc, "eq");
-    let imm64_minus_one = Literal::constant(imm64, -1);
-    let imm64_63 = Literal::constant(imm64, 63);
+    let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
+    let imm64_minus_one = Literal::constant(&imm.imm64, -1);
+    let imm64_63 = Literal::constant(&imm.imm64, 63);
     group.legalize(
         def!(a = clz.I64(x)),
         vec![
@@ -168,7 +175,7 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
         ],
     );
 
-    let imm64_31 = Literal::constant(imm64, 31);
+    let imm64_31 = Literal::constant(&imm.imm64, 31);
     group.legalize(
         def!(a = clz.I32(x)),
         vec![
@@ -180,7 +187,7 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
         ],
     );
 
-    let imm64_64 = Literal::constant(imm64, 64);
+    let imm64_64 = Literal::constant(&imm.imm64, 64);
     group.legalize(
         def!(a = ctz.I64(x)),
         vec![
@@ -190,7 +197,7 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
         ],
     );
 
-    let imm64_32 = Literal::constant(imm64, 32);
+    let imm64_32 = Literal::constant(&imm.imm64, 32);
     group.legalize(
         def!(a = ctz.I32(x)),
         vec![
@@ -221,13 +228,13 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
     let qc0F = var("qc0F");
     let qc01 = var("qc01");
 
-    let imm64_1 = Literal::constant(imm64, 1);
-    let imm64_4 = Literal::constant(imm64, 4);
+    let imm64_1 = Literal::constant(&imm.imm64, 1);
+    let imm64_4 = Literal::constant(&imm.imm64, 4);
     group.legalize(
         def!(qv16 = popcnt.I64(qv1)),
         vec![
             def!(qv3 = ushr_imm(qv1, imm64_1)),
-            def!(qc77 = iconst(Literal::constant(imm64, 0x7777777777777777))),
+            def!(qc77 = iconst(Literal::constant(&imm.imm64, 0x7777777777777777))),
             def!(qv4 = band(qv3, qc77)),
             def!(qv5 = isub(qv1, qv4)),
             def!(qv6 = ushr_imm(qv4, imm64_1)),
@@ -238,11 +245,11 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
             def!(qv11 = isub(qv8, qv10)),
             def!(qv12 = ushr_imm(qv11, imm64_4)),
             def!(qv13 = iadd(qv11, qv12)),
-            def!(qc0F = iconst(Literal::constant(imm64, 0x0F0F0F0F0F0F0F0F))),
+            def!(qc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F0F0F0F0F0F0F))),
             def!(qv14 = band(qv13, qc0F)),
-            def!(qc01 = iconst(Literal::constant(imm64, 0x0101010101010101))),
+            def!(qc01 = iconst(Literal::constant(&imm.imm64, 0x0101010101010101))),
             def!(qv15 = imul(qv14, qc01)),
-            def!(qv16 = ushr_imm(qv15, Literal::constant(imm64, 56))),
+            def!(qv16 = ushr_imm(qv15, Literal::constant(&imm.imm64, 56))),
         ],
     );
 
@@ -270,7 +277,7 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
         def!(lv16 = popcnt.I32(lv1)),
         vec![
             def!(lv3 = ushr_imm(lv1, imm64_1)),
-            def!(lc77 = iconst(Literal::constant(imm64, 0x77777777))),
+            def!(lc77 = iconst(Literal::constant(&imm.imm64, 0x77777777))),
             def!(lv4 = band(lv3, lc77)),
             def!(lv5 = isub(lv1, lv4)),
             def!(lv6 = ushr_imm(lv4, imm64_1)),
@@ -281,13 +288,102 @@ pub fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGrou
             def!(lv11 = isub(lv8, lv10)),
             def!(lv12 = ushr_imm(lv11, imm64_4)),
             def!(lv13 = iadd(lv11, lv12)),
-            def!(lc0F = iconst(Literal::constant(imm64, 0x0F0F0F0F))),
+            def!(lc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F0F0F))),
             def!(lv14 = band(lv13, lc0F)),
-            def!(lc01 = iconst(Literal::constant(imm64, 0x01010101))),
+            def!(lc01 = iconst(Literal::constant(&imm.imm64, 0x01010101))),
             def!(lv15 = imul(lv14, lc01)),
-            def!(lv16 = ushr_imm(lv15, Literal::constant(imm64, 24))),
+            def!(lv16 = ushr_imm(lv15, Literal::constant(&imm.imm64, 24))),
         ],
     );
 
     group.build_and_add_to(&mut shared.transform_groups);
+
+    let mut narrow = TransformGroupBuilder::new(
+        "x86_narrow",
+        r#"
+    Legalize instructions by narrowing.
+
+    Use x86-specific instructions if needed."#,
+    )
+    .isa("x86")
+    .chain_with(shared.transform_groups.by_name("narrow_flags").id);
+
+    // SIMD
+    let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
+    let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
+    let ieee64_zero = Literal::constant(&imm.ieee64, 0x00);
+    let b = var("b");
+    let c = var("c");
+    let d = var("d");
+
+    // SIMD vector size: eventually multiple vector sizes may be supported but for now only SSE-sized vectors are available
+    let sse_vector_size: u64 = 128;
+
+    // SIMD splat: 8-bits
+    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
+        let splat_any8x16 = splat.bind_vector_from_lane(ty, sse_vector_size);
+        let bitcast_f64_to_any8x16 = raw_bitcast
+            .bind_vector_from_lane(ty, sse_vector_size)
+            .bind(F64);
+        narrow.legalize(
+            def!(y = splat_any8x16(x)),
+            vec![
+                def!(a = scalar_to_vector(x)), // move into the lowest 8 bits of an XMM register
+                def!(b = f64const(ieee64_zero)), // zero out a different XMM register; the shuffle mask for moving the lowest byte to all other byte lanes is 0x0
+                def!(c = bitcast_f64_to_any8x16(b)), // no instruction emitted; informs the SSA that the 0 in b can be used as a vector of this type
+                def!(y = x86_pshufb(a, c)), // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b)
+            ],
+        );
+    }
+
+    // SIMD splat: 16-bits
+    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
+        let splat_x16x8 = splat.bind_vector_from_lane(ty, sse_vector_size);
+        let raw_bitcast_any16x8_to_i32x4 = raw_bitcast
+            .bind_vector_from_lane(I32, sse_vector_size)
+            .bind_vector_from_lane(ty, sse_vector_size);
+        let raw_bitcast_i32x4_to_any16x8 = raw_bitcast
+            .bind_vector_from_lane(ty, sse_vector_size)
+            .bind_vector_from_lane(I32, sse_vector_size);
+        narrow.legalize(
+            def!(y = splat_x16x8(x)),
+            vec![
+                def!(a = scalar_to_vector(x)), // move into the lowest 16 bits of an XMM register
+                def!(b = insertlane(a, uimm8_one, x)), // insert the value again but in the next lowest 16 bits
+                def!(c = raw_bitcast_any16x8_to_i32x4(b)), // no instruction emitted; pretend this is an I32x4 so we can use PSHUFD
+                def!(d = x86_pshufd(c, uimm8_zero)), // broadcast the bytes in the XMM register with PSHUFD
+                def!(y = raw_bitcast_i32x4_to_any16x8(d)), // no instruction emitted; pretend this is an X16x8 again
+            ],
+        );
+    }
+
+    // SIMD splat: 32-bits
+    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
+        let splat_any32x4 = splat.bind_vector_from_lane(ty, sse_vector_size);
+        narrow.legalize(
+            def!(y = splat_any32x4(x)),
+            vec![
+                def!(a = scalar_to_vector(x)), // translate to an x86 MOV to get the value in an XMM register
+                def!(y = x86_pshufd(a, uimm8_zero)), // broadcast the bytes in the XMM register with PSHUF
+            ],
+        );
+    }
+
+    // SIMD splat: 64-bits
+    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) {
+        let splat_any64x2 = splat.bind_vector_from_lane(ty, sse_vector_size);
+        narrow.legalize(
+            def!(y = splat_any64x2(x)),
+            vec![
+                def!(a = scalar_to_vector(x)), // move into the lowest 64 bits of an XMM register
+                def!(y = insertlane(a, uimm8_one, x)), // move into the highest 64 bits of the same XMM register
+            ],
+        );
+    }
+
+    narrow.custom_legalize(shuffle, "convert_shuffle");
+    narrow.custom_legalize(extractlane, "convert_extractlane");
+    narrow.custom_legalize(insertlane, "convert_insertlane");
+
+    narrow.build_and_add_to(&mut shared.transform_groups);
 }
diff --git a/cranelift-codegen/meta/src/isa/x86/mod.rs b/cranelift-codegen/meta/src/isa/x86/mod.rs
index dba573706..6ced50939 100644
--- a/cranelift-codegen/meta/src/isa/x86/mod.rs
+++ b/cranelift-codegen/meta/src/isa/x86/mod.rs
@@ -6,16 +6,22 @@ use crate::shared::types::Float::{F32, F64};
 use crate::shared::types::Int::{I16, I32, I64, I8};
 use crate::shared::Definitions as SharedDefinitions;
 
+mod encodings;
 mod instructions;
 mod legalize;
+mod recipes;
 mod registers;
 mod settings;
 
-pub fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
     let settings = settings::define(&shared_defs.settings);
     let regs = registers::define();
 
-    let inst_group = instructions::define(&shared_defs.format_registry);
+    let inst_group = instructions::define(
+        &mut shared_defs.all_instructions,
+        &shared_defs.format_registry,
+        &shared_defs.imm,
+    );
     legalize::define(shared_defs, &inst_group);
 
     // CPU modes for 32-bit and 64-bit operations.
@@ -23,12 +29,13 @@ pub fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
     let mut x86_32 = CpuMode::new("I32");
 
     let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
-    let narrow = shared_defs.transform_groups.by_name("narrow");
+    let narrow_flags = shared_defs.transform_groups.by_name("narrow_flags");
     let widen = shared_defs.transform_groups.by_name("widen");
+    let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow");
     let x86_expand = shared_defs.transform_groups.by_name("x86_expand");
 
     x86_32.legalize_monomorphic(expand_flags);
-    x86_32.legalize_default(narrow);
+    x86_32.legalize_default(narrow_flags);
     x86_32.legalize_type(B1, expand_flags);
     x86_32.legalize_type(I8, widen);
     x86_32.legalize_type(I16, widen);
@@ -37,7 +44,7 @@ pub fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
     x86_32.legalize_type(F64, x86_expand);
 
     x86_64.legalize_monomorphic(expand_flags);
-    x86_64.legalize_default(narrow);
+    x86_64.legalize_default(x86_narrow);
     x86_64.legalize_type(B1, expand_flags);
     x86_64.legalize_type(I8, widen);
     x86_64.legalize_type(I16, widen);
@@ -46,7 +53,24 @@ pub fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
     x86_64.legalize_type(F32, x86_expand);
     x86_64.legalize_type(F64, x86_expand);
 
+    let recipes = recipes::define(shared_defs, &settings, &regs);
+
+    let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes);
+    x86_32.set_encodings(encodings.enc32);
+    x86_64.set_encodings(encodings.enc64);
+    let encodings_predicates = encodings.inst_pred_reg.extract();
+
+    let recipes = encodings.recipes;
+
     let cpu_modes = vec![x86_64, x86_32];
 
-    TargetIsa::new("x86", inst_group, settings, regs, cpu_modes)
+    TargetIsa::new(
+        "x86",
+        inst_group,
+        settings,
+        regs,
+        recipes,
+        cpu_modes,
+        encodings_predicates,
+    )
 }
diff --git a/cranelift-codegen/meta/src/isa/x86/recipes.rs b/cranelift-codegen/meta/src/isa/x86/recipes.rs
new file mode 100644
index 000000000..bee51883a
--- /dev/null
+++ b/cranelift-codegen/meta/src/isa/x86/recipes.rs
@@ -0,0 +1,3083 @@
+use std::rc::Rc;
+
+use crate::cdsl::ast::Literal;
+use crate::cdsl::formats::{FormatRegistry, InstructionFormat};
+use crate::cdsl::instructions::InstructionPredicate;
+use crate::cdsl::recipes::{
+    EncodingRecipe, EncodingRecipeBuilder, OperandConstraint, Register, Stack,
+};
+use crate::cdsl::regs::IsaRegs;
+use crate::cdsl::settings::SettingGroup;
+use crate::shared::Definitions as SharedDefinitions;
+
+/// Helper data structure to create recipes and template recipes.
+/// It contains all the recipes and recipe templates that might be used in the encodings crate of
+/// this same directory.
+pub struct RecipeGroup<'builder> {
+    /// Memoized format pointer, to pass it to builders later.
+    formats: &'builder FormatRegistry,
+
+    /// Memoized registers description, to pass it to builders later.
+    regs: &'builder IsaRegs,
+
+    /// All the recipes explicitly created in this file. This is different from the final set of
+    /// recipes, which is definitive only once encodings have generated new recipes on the fly.
+    recipes: Vec<EncodingRecipe>,
+
+    /// All the recipe templates created in this file.
+    templates: Vec<Rc<Template<'builder>>>,
+}
+
+impl<'builder> RecipeGroup<'builder> {
+    fn new(formats: &'builder FormatRegistry, regs: &'builder IsaRegs) -> Self {
+        Self {
+            formats,
+            regs,
+            recipes: Vec::new(),
+            templates: Vec::new(),
+        }
+    }
+    fn add_recipe(&mut self, recipe: EncodingRecipeBuilder) {
+        self.recipes.push(recipe.build(self.formats));
+    }
+    fn add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc<Template<'builder>> {
+        let template = Rc::new(Template::new(recipe, self.formats, self.regs));
+        self.templates.push(template.clone());
+        template
+    }
+    fn add_template(&mut self, template: Template<'builder>) -> Rc<Template<'builder>> {
+        let template = Rc::new(template);
+        self.templates.push(template.clone());
+        template
+    }
+    pub fn recipe(&self, name: &str) -> &EncodingRecipe {
+        self.recipes
+            .iter()
+            .find(|recipe| &recipe.name == name)
+            .expect(&format!("unknown recipe name: {}. Try template?", name))
+    }
+    pub fn template(&self, name: &str) -> &Template {
+        self.templates
+            .iter()
+            .find(|recipe| recipe.name() == name)
+            .expect(&format!("unknown tail recipe name: {}. Try recipe?", name))
+    }
+}
+
+// Opcode representation.
+//
+// Cranelift requires each recipe to have a single encoding size in bytes, and x86 opcodes are
+// variable length, so we use separate recipes for different styles of opcodes and prefixes. The
+// opcode format is indicated by the recipe name prefix.
+//
+// The match case below does not include the REX prefix which goes after the mandatory prefix.
+// VEX/XOP and EVEX prefixes are not yet supported. Encodings using any of these prefixes are
+// represented by separate recipes.
+//
+// The encoding bits are:
+//
+// 0-7:   The opcode byte <op>.
+// 8-9:   pp, mandatory prefix:
+//        00 none (Op*)
+//        01 66   (Mp*)
+//        10 F3   (Mp*)
+//        11 F2   (Mp*)
+// 10-11: mm, opcode map:
+//        00 <op>        (Op1/Mp1)
+//        01 0F <op>     (Op2/Mp2)
+//        10 0F 38 <op>  (Op3/Mp3)
+//        11 0F 3A <op>  (Op3/Mp3)
+// 12-14  rrr, opcode bits for the ModR/M byte for certain opcodes.
+// 15:    REX.W bit (or VEX.W/E)
+//
+// There is some redundancy between bits 8-11 and the recipe names, but we have enough bits, and
+// the pp+mm format is ready for supporting VEX prefixes.
+//
+// TODO Cranelift doesn't actually require recipe to have different encoding sizes anymore, so this
+// could be simplified.
+
+/// Given a sequence of opcode bytes, compute the recipe name prefix and encoding bits.
+fn decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16) {
+    assert!(op_bytes.len() >= 1, "at least one opcode byte");
+
+    let prefix_bytes = &op_bytes[..op_bytes.len() - 1];
+    let (name, mmpp) = match prefix_bytes {
+        [] => ("Op1", 0b000),
+        [0x66] => ("Mp1", 0b0001),
+        [0xf3] => ("Mp1", 0b0010),
+        [0xf2] => ("Mp1", 0b0011),
+        [0x0f] => ("Op2", 0b0100),
+        [0x66, 0x0f] => ("Mp2", 0b0101),
+        [0xf3, 0x0f] => ("Mp2", 0b0110),
+        [0xf2, 0x0f] => ("Mp2", 0b0111),
+        [0x0f, 0x38] => ("Op3", 0b1000),
+        [0x66, 0x0f, 0x38] => ("Mp3", 0b1001),
+        [0xf3, 0x0f, 0x38] => ("Mp3", 0b1010),
+        [0xf2, 0x0f, 0x38] => ("Mp3", 0b1011),
+        [0x0f, 0x3a] => ("Op3", 0b1100),
+        [0x66, 0x0f, 0x3a] => ("Mp3", 0b1101),
+        [0xf3, 0x0f, 0x3a] => ("Mp3", 0b1110),
+        [0xf2, 0x0f, 0x3a] => ("Mp3", 0b1111),
+        _ => {
+            panic!("unexpected opcode sequence: {:?}", op_bytes);
+        }
+    };
+
+    let opcode_byte = op_bytes[op_bytes.len() - 1] as u16;
+    (name, opcode_byte | (mmpp << 8) | (rrr << 12) | w << 15)
+}
+
+/// Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the
+/// corresponding `put_*` function from the `binemit.rs` module.
+fn replace_put_op(code: Option<String>, prefix: &str) -> Option<String> {
+    code.map(|code| code.replace("{{PUT_OP}}", &format!("put_{}", prefix.to_lowercase())))
+}
+
+/// Replaces constraints to a REX-prefixed register class by the equivalent non-REX register class.
+fn replace_nonrex_constraints(
+    regs: &IsaRegs,
+    constraints: Vec<OperandConstraint>,
+) -> Vec<OperandConstraint> {
+    constraints
+        .into_iter()
+        .map(|constraint| match constraint {
+            OperandConstraint::RegClass(rc_index) => {
+                let new_rc_index = if rc_index == regs.class_by_name("GPR") {
+                    regs.class_by_name("GPR8")
+                } else if rc_index == regs.class_by_name("FPR") {
+                    regs.class_by_name("FPR8")
+                } else {
+                    rc_index
+                };
+                OperandConstraint::RegClass(new_rc_index)
+            }
+            _ => constraint,
+        })
+        .collect()
+}
+
+/// Previously called a TailRecipe in the Python meta language, this allows to create multiple
+/// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different
+/// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating
+/// Encodings, in encodings.rs. This is an idiosyncrasy of the x86 meta-language, and could be
+/// reconsidered later.
+#[derive(Clone)]
+pub struct Template<'builder> {
+    /// Mapping of format indexes to format data, used in the build() method.
+    formats: &'builder FormatRegistry,
+
+    /// Description of registers, used in the build() method.
+    regs: &'builder IsaRegs,
+
+    /// The recipe template, which is to be specialized (by copy).
+    recipe: EncodingRecipeBuilder,
+
+    /// Does this recipe requires a REX prefix?
+    requires_prefix: bool,
+
+    /// Other recipe to use when REX-prefixed.
+    when_prefixed: Option<Rc<Template<'builder>>>,
+
+    // Specialized parameters.
+    /// Should we include the REX prefix?
+    rex: bool,
+    /// Value of the W bit (0 or 1).
+    w_bit: u16,
+    /// Value of the RRR bits (between 0 and 0b111).
+    rrr_bits: u16,
+    /// Opcode bytes.
+    op_bytes: Vec<u8>,
+}
+
+impl<'builder> Template<'builder> {
+    fn new(
+        recipe: EncodingRecipeBuilder,
+        formats: &'builder FormatRegistry,
+        regs: &'builder IsaRegs,
+    ) -> Self {
+        Self {
+            formats,
+            regs,
+            recipe,
+            requires_prefix: false,
+            when_prefixed: None,
+            rex: false,
+            w_bit: 0,
+            rrr_bits: 0,
+            op_bytes: Vec::new(),
+        }
+    }
+
+    fn name(&self) -> &str {
+        &self.recipe.name
+    }
+    fn requires_prefix(self, value: bool) -> Self {
+        Self {
+            requires_prefix: value,
+            ..self
+        }
+    }
+    fn when_prefixed(self, template: Rc<Template<'builder>>) -> Self {
+        assert!(self.when_prefixed.is_none());
+        Self {
+            when_prefixed: Some(template),
+            ..self
+        }
+    }
+
+    // Copy setters.
+    pub fn opcodes(&self, op_bytes: Vec<u8>) -> Self {
+        assert!(!op_bytes.is_empty());
+        let mut copy = self.clone();
+        copy.op_bytes = op_bytes;
+        copy
+    }
+    pub fn w(&self) -> Self {
+        let mut copy = self.clone();
+        copy.w_bit = 1;
+        copy
+    }
+    pub fn rrr(&self, value: u16) -> Self {
+        assert!(value <= 0b111);
+        let mut copy = self.clone();
+        copy.rrr_bits = value;
+        copy
+    }
+    pub fn nonrex(&self) -> Self {
+        assert!(!self.requires_prefix, "Tail recipe requires REX prefix.");
+        let mut copy = self.clone();
+        copy.rex = false;
+        copy
+    }
+    pub fn rex(&self) -> Self {
+        if let Some(prefixed) = &self.when_prefixed {
+            let mut ret = prefixed.rex();
+            // Forward specialized parameters.
+            ret.op_bytes = self.op_bytes.clone();
+            ret.w_bit = self.w_bit;
+            ret.rrr_bits = self.rrr_bits;
+            return ret;
+        }
+        let mut copy = self.clone();
+        copy.rex = true;
+        copy
+    }
+
+    pub fn build(mut self) -> (EncodingRecipe, u16) {
+        let (name, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit);
+
+        let (name, rex_prefix_size) = if self.rex {
+            ("Rex".to_string() + name, 1)
+        } else {
+            (name.into(), 0)
+        };
+
+        let size_addendum = self.op_bytes.len() as u64 + rex_prefix_size;
+        self.recipe.base_size += size_addendum;
+
+        // Branch ranges are relative to the end of the instruction.
+        self.recipe
+            .branch_range
+            .as_mut()
+            .map(|range| range.inst_size += size_addendum);
+
+        self.recipe.emit = replace_put_op(self.recipe.emit, &name);
+        self.recipe.name = name + &self.recipe.name;
+
+        if !self.rex {
+            let operands_in = self.recipe.operands_in.unwrap_or(Vec::new());
+            self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in));
+            let operands_out = self.recipe.operands_out.unwrap_or(Vec::new());
+            self.recipe.operands_out = Some(replace_nonrex_constraints(self.regs, operands_out));
+        }
+
+        (self.recipe.build(self.formats), bits)
+    }
+}
+
+/// Returns a predicate checking that the "cond" field of the instruction contains one of the
+/// directly supported floating point condition codes.
+fn supported_floatccs_predicate(
+    supported_cc: &[Literal],
+    format: &InstructionFormat,
+) -> InstructionPredicate {
+    supported_cc
+        .iter()
+        .fold(InstructionPredicate::new(), |pred, literal| {
+            pred.or(InstructionPredicate::new_is_field_equal(
+                format,
+                "cond",
+                literal.to_rust_code(),
+            ))
+        })
+}
+
+/// Return an instruction predicate that checks if `iform.imm` is a valid `scale` for a SIB byte.
+fn valid_scale(format: &InstructionFormat) -> InstructionPredicate {
+    ["1", "2", "4", "8"]
+        .iter()
+        .fold(InstructionPredicate::new(), |pred, &literal| {
+            pred.or(InstructionPredicate::new_is_field_equal(
+                format,
+                "imm",
+                literal.into(),
+            ))
+        })
+}
+
+pub(crate) fn define<'shared>(
+    shared_defs: &'shared SharedDefinitions,
+    settings: &'shared SettingGroup,
+    regs: &'shared IsaRegs,
+) -> RecipeGroup<'shared> {
+    // The set of floating point condition codes that are directly supported.
+    // Other condition codes need to be reversed or expressed as two tests.
+    let floatcc = &shared_defs.imm.floatcc;
+    let supported_floatccs: Vec<Literal> = ["ord", "uno", "one", "ueq", "gt", "ge", "ult", "ule"]
+        .iter()
+        .map(|name| Literal::enumerator_for(floatcc, name))
+        .collect();
+
+    let formats = &shared_defs.format_registry;
+
+    // Register classes shorthands.
+    let abcd = regs.class_by_name("ABCD");
+    let gpr = regs.class_by_name("GPR");
+    let fpr = regs.class_by_name("FPR");
+    let flag = regs.class_by_name("FLAG");
+
+    // Operand constraints shorthands.
+    let reg_rflags = Register::new(flag, regs.regunit_by_name(flag, "rflags"));
+    let reg_rax = Register::new(gpr, regs.regunit_by_name(gpr, "rax"));
+    let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx"));
+    let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx"));
+    let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15"));
+
+    // Stack operand with a 32-bit signed displacement from either RBP or RSP.
+    let stack_gpr32 = Stack::new(gpr);
+    let stack_fpr32 = Stack::new(fpr);
+
+    // Format shorthands, prefixed with f_.
+    let f_binary = formats.by_name("Binary");
+    let f_binary_imm = formats.by_name("BinaryImm");
+    let f_branch = formats.by_name("Branch");
+    let f_branch_float = formats.by_name("BranchFloat");
+    let f_branch_int = formats.by_name("BranchInt");
+    let f_branch_table_entry = formats.by_name("BranchTableEntry");
+    let f_branch_table_base = formats.by_name("BranchTableBase");
+    let f_call = formats.by_name("Call");
+    let f_call_indirect = formats.by_name("CallIndirect");
+    let f_copy_special = formats.by_name("CopySpecial");
+    let f_copy_to_ssa = formats.by_name("CopyToSsa");
+    let f_extract_lane = formats.by_name("ExtractLane"); // TODO this would preferably retrieve a BinaryImm8 format but because formats are compared structurally and ExtractLane has the same structure this is impossible--if we rename ExtractLane, it may even impact parsing
+    let f_float_compare = formats.by_name("FloatCompare");
+    let f_float_cond = formats.by_name("FloatCond");
+    let f_float_cond_trap = formats.by_name("FloatCondTrap");
+    let f_func_addr = formats.by_name("FuncAddr");
+    let f_indirect_jump = formats.by_name("IndirectJump");
+    let f_insert_lane = formats.by_name("InsertLane");
+    let f_int_compare = formats.by_name("IntCompare");
+    let f_int_compare_imm = formats.by_name("IntCompareImm");
+    let f_int_cond = formats.by_name("IntCond");
+    let f_int_cond_trap = formats.by_name("IntCondTrap");
+    let f_int_select = formats.by_name("IntSelect");
+    let f_jump = formats.by_name("Jump");
+    let f_load = formats.by_name("Load");
+    let f_load_complex = formats.by_name("LoadComplex");
+    let f_multiary = formats.by_name("MultiAry");
+    let f_nullary = formats.by_name("NullAry");
+    let f_reg_fill = formats.by_name("RegFill");
+    let f_reg_move = formats.by_name("RegMove");
+    let f_reg_spill = formats.by_name("RegSpill");
+    let f_stack_load = formats.by_name("StackLoad");
+    let f_store = formats.by_name("Store");
+    let f_store_complex = formats.by_name("StoreComplex");
+    let f_ternary = formats.by_name("Ternary");
+    let f_trap = formats.by_name("Trap");
+    let f_unary = formats.by_name("Unary");
+    let f_unary_bool = formats.by_name("UnaryBool");
+    let f_unary_const = formats.by_name("UnaryConst");
+    let f_unary_global_value = formats.by_name("UnaryGlobalValue");
+    let f_unary_ieee32 = formats.by_name("UnaryIeee32");
+    let f_unary_ieee64 = formats.by_name("UnaryIeee64");
+    let f_unary_imm = formats.by_name("UnaryImm");
+
+    // Predicates shorthands.
+    let use_sse41 = settings.predicate_by_name("use_sse41");
+
+    // Definitions.
+    let mut recipes = RecipeGroup::new(formats, regs);
+
+    // A null unary instruction that takes a GPR register. Can be used for identity copies and
+    // no-op conversions.
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("null", f_unary, 0)
+            .operands_in(vec![gpr])
+            .operands_out(vec![0])
+            .emit(""),
+    );
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("null_fpr", f_unary, 0)
+            .operands_in(vec![fpr])
+            .operands_out(vec![0])
+            .emit(""),
+    );
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("stacknull", f_unary, 0)
+            .operands_in(vec![stack_gpr32])
+            .operands_out(vec![stack_gpr32])
+            .emit(""),
+    );
+
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("get_pinned_reg", f_nullary, 0)
+            .operands_out(vec![reg_r15])
+            .emit(""),
+    );
+    // umr with a fixed register output that's r15.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("set_pinned_reg", f_unary, 1)
+            .operands_in(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let r15 = RU::r15.into();
+                    {{PUT_OP}}(bits, rex2(r15, in_reg0), sink);
+                    modrm_rr(r15, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    // No-op fills, created by late-stage redundant-fill removal.
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("fillnull", f_unary, 0)
+            .operands_in(vec![stack_gpr32])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(""),
+    );
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("ffillnull", f_unary, 0)
+            .operands_in(vec![stack_gpr32])
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(""),
+    );
+
+    recipes
+        .add_recipe(EncodingRecipeBuilder::new("debugtrap", f_nullary, 1).emit("sink.put1(0xcc);"));
+
+    // XX opcode, no ModR/M.
+    recipes.add_template_recipe(EncodingRecipeBuilder::new("trap", f_trap, 0).emit(
+        r#"
+            sink.trap(code, func.srclocs[inst]);
+            {{PUT_OP}}(bits, BASE_REX, sink);
+        "#,
+    ));
+
+    // Macro: conditional jump over a ud2.
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("trapif", f_int_cond_trap, 4)
+            .operands_in(vec![reg_rflags])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    // Jump over a 2-byte ud2.
+                    sink.put1(0x70 | (icc2opc(cond.inverse()) as u8));
+                    sink.put1(2);
+                    // ud2.
+                    sink.trap(code, func.srclocs[inst]);
+                    sink.put1(0x0f);
+                    sink.put1(0x0b);
+                "#,
+            ),
+    );
+
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("trapff", f_float_cond_trap, 4)
+            .operands_in(vec![reg_rflags])
+            .clobbers_flags(false)
+            .inst_predicate(supported_floatccs_predicate(
+                &supported_floatccs,
+                formats.get(f_float_cond_trap),
+            ))
+            .emit(
+                r#"
+                    // Jump over a 2-byte ud2.
+                    sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8));
+                    sink.put1(2);
+                    // ud2.
+                    sink.trap(code, func.srclocs[inst]);
+                    sink.put1(0x0f);
+                    sink.put1(0x0b);
+                "#,
+            ),
+    );
+
+    // XX /r
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rr", f_binary, 1)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                    modrm_rr(in_reg0, in_reg1, sink);
+                "#,
+            ),
+    );
+
+    // XX /r with operands swapped. (RM form).
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rrx", f_binary, 1)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r with FPR ins and outs. A form.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fa", f_binary, 1)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r with FPR ins and outs. A form with input operands swapped.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fax", f_binary, 1)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![1])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                    modrm_rr(in_reg0, in_reg1, sink);
+                "#,
+            ),
+    );
+
+    // XX /r with FPR ins and outs. A form with a byte immediate.
+    {
+        let format = formats.get(f_insert_lane);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fa_ib", f_insert_lane, 2)
+                .operands_in(vec![fpr, fpr])
+                .operands_out(vec![0])
+                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
+                    format, "lane", 8, 0,
+                ))
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                    let imm:i64 = lane.into();
+                    sink.put1(imm as u8);
+                "#,
+                ),
+        );
+    }
+
+    // XX /n for a unary operation with extension bits.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("ur", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, but for a unary operator with separate input/output register, like
+    // copies. MR form, preserving flags.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("umr", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
+                    modrm_rr(out_reg0, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    // Same as umr, but with FPR -> GPR registers.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rfumr", f_unary, 1)
+            .operands_in(vec![fpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
+                    modrm_rr(out_reg0, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    // Same as umr, but with the source register specified directly.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("umr_reg_to_ssa", f_copy_to_ssa, 1)
+            // No operands_in to mention, because a source register is specified directly.
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(out_reg0, src), sink);
+                    modrm_rr(out_reg0, src, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, but for a unary operator with separate input/output register.
+    // RM form. Clobbers FLAGS.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("urm", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r. Same as urm, but doesn't clobber FLAGS.
+    let urm_noflags = recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("urm_noflags", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r. Same as urm_noflags, but input limited to ABCD.
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("urm_noflags_abcd", f_unary, 1)
+                .operands_in(vec![abcd])
+                .operands_out(vec![gpr])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .when_prefixed(urm_noflags),
+    );
+
+    // XX /r, RM form, FPR -> FPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("furm", f_unary, 1)
+            .operands_in(vec![fpr])
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // Same as furm, but with the source register specified directly.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("furm_reg_to_ssa", f_copy_to_ssa, 1)
+            // No operands_in to mention, because a source register is specified directly.
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(src, out_reg0), sink);
+                    modrm_rr(src, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, RM form, GPR -> FPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("frurm", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, RM form, FPR -> GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rfurm", f_unary, 1)
+            .operands_in(vec![fpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("furmi_rnd", f_unary, 2)
+            .operands_in(vec![fpr])
+            .operands_out(vec![fpr])
+            .isa_predicate(use_sse41)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                    sink.put1(match opcode {
+                        Opcode::Nearest => 0b00,
+                        Opcode::Floor => 0b01,
+                        Opcode::Ceil => 0b10,
+                        Opcode::Trunc => 0b11,
+                        x => panic!("{} unexpected for furmi_rnd", opcode),
+                    });
+                "#,
+            ),
+    );
+
+    // XX /r, for regmove instructions.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rmov", f_reg_move, 1)
+            .operands_in(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(dst, src), sink);
+                    modrm_rr(dst, src, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, for regmove instructions (FPR version, RM encoded).
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("frmov", f_reg_move, 1)
+            .operands_in(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(src, dst), sink);
+                    modrm_rr(src, dst, sink);
+                "#,
+            ),
+    );
+
+    // XX /n with one arg in %rcx, for shifts.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rc", f_binary, 1)
+            .operands_in(vec![
+                OperandConstraint::RegClass(gpr),
+                OperandConstraint::FixedReg(reg_rcx),
+            ])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                "#,
+            ),
+    );
+
+    // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("div", f_ternary, 1)
+            .operands_in(vec![
+                OperandConstraint::FixedReg(reg_rax),
+                OperandConstraint::FixedReg(reg_rdx),
+                OperandConstraint::RegClass(gpr),
+            ])
+            .operands_out(vec![reg_rax, reg_rdx])
+            .emit(
+                r#"
+                    sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]);
+                    {{PUT_OP}}(bits, rex1(in_reg2), sink);
+                    modrm_r_bits(in_reg2, bits, sink);
+                "#,
+            ),
+    );
+
+    // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo)
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("mulx", f_binary, 1)
+            .operands_in(vec![
+                OperandConstraint::FixedReg(reg_rax),
+                OperandConstraint::RegClass(gpr),
+            ])
+            .operands_out(vec![
+                OperandConstraint::FixedReg(reg_rax),
+                OperandConstraint::FixedReg(reg_rdx),
+            ])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex1(in_reg1), sink);
+                    modrm_r_bits(in_reg1, bits, sink);
+                "#,
+            ),
+    );
+
+    // XX /n ib with 8-bit immediate sign-extended.
+    {
+        let format = formats.get(f_binary_imm);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("r_ib", f_binary_imm, 2)
+                .operands_in(vec![gpr])
+                .operands_out(vec![0])
+                .inst_predicate(InstructionPredicate::new_is_signed_int(format, "imm", 8, 0))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put1(imm as u8);
+                    "#,
+                ),
+        );
+
+        // XX /n id with 32-bit immediate sign-extended.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("r_id", f_binary_imm, 5)
+                .operands_in(vec![gpr])
+                .operands_out(vec![0])
+                .inst_predicate(InstructionPredicate::new_is_signed_int(
+                    format, "imm", 32, 0,
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put4(imm as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // XX /r ib with 8-bit unsigned immediate (e.g. for pshufd)
+    {
+        let format = formats.get(f_extract_lane);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("r_ib_unsigned_fpr", f_extract_lane, 2)
+                .operands_in(vec![fpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
+                    format, "lane", 8, 0,
+                )) // TODO if the format name is changed then "lane" should be renamed to something more appropriate--ordering mask? broadcast immediate?
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                    let imm:i64 = lane.into();
+                    sink.put1(imm as u8);
+                "#,
+                ),
+        );
+    }
+
+    // XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
+    {
+        let format = formats.get(f_extract_lane);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("r_ib_unsigned_gpr", f_extract_lane, 2)
+                .operands_in(vec![fpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
+                    format, "lane", 8, 0,
+                ))
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte
+                    let imm:i64 = lane.into();
+                    sink.put1(imm as u8);
+                "#,
+                ),
+        );
+    }
+
+    // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
+    {
+        let format = formats.get(f_insert_lane);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("r_ib_unsigned_r", f_insert_lane, 2)
+                .operands_in(vec![fpr, gpr])
+                .operands_out(vec![0])
+                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
+                    format, "lane", 8, 0,
+                ))
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                    let imm:i64 = lane.into();
+                    sink.put1(imm as u8);
+                "#,
+                ),
+        );
+    }
+
+    {
+        // XX /n id with 32-bit immediate sign-extended. UnaryImm version.
+        let format = formats.get(f_unary_imm);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("u_id", f_unary_imm, 5)
+                .operands_out(vec![gpr])
+                .inst_predicate(InstructionPredicate::new_is_signed_int(
+                    format, "imm", 32, 0,
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(out_reg0), sink);
+                        modrm_r_bits(out_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put4(imm as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // XX+rd id unary with 32-bit immediate. Note no recipe predicate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pu_id", f_unary_imm, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    // The destination register is encoded in the low bits of the opcode.
+                    // No ModR/M.
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    let imm: i64 = imm.into();
+                    sink.put4(imm as u32);
+                "#,
+            ),
+    );
+
+    // XX+rd id unary with bool immediate. Note no recipe predicate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pu_id_bool", f_unary_bool, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    // The destination register is encoded in the low bits of the opcode.
+                    // No ModR/M.
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    let imm: u32 = if imm { 1 } else { 0 };
+                    sink.put4(imm);
+                "#,
+            ),
+    );
+
+    // XX+rd id nullary with 0 as 32-bit immediate. Note no recipe predicate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pu_id_ref", f_nullary, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    // The destination register is encoded in the low bits of the opcode.
+                    // No ModR/M.
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq unary with 64-bit immediate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pu_iq", f_unary_imm, 8)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    let imm: i64 = imm.into();
+                    sink.put8(imm as u64);
+                "#,
+            ),
+    );
+
+    // XX+rd id unary with zero immediate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("u_id_z", f_unary_imm, 1)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+                    modrm_rr(out_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /n Unary with floating point 32-bit immediate equal to zero.
+    {
+        let format = formats.get(f_unary_ieee32);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("f32imm_z", f_unary_ieee32, 1)
+                .operands_out(vec![fpr])
+                .inst_predicate(InstructionPredicate::new_is_zero_32bit_float(format, "imm"))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+                        modrm_rr(out_reg0, out_reg0, sink);
+                    "#,
+                ),
+        );
+    }
+
+    // XX /n Unary with floating point 64-bit immediate equal to zero.
+    {
+        let format = formats.get(f_unary_ieee64);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("f64imm_z", f_unary_ieee64, 1)
+                .operands_out(vec![fpr])
+                .inst_predicate(InstructionPredicate::new_is_zero_64bit_float(format, "imm"))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+                        modrm_rr(out_reg0, out_reg0, sink);
+                    "#,
+                ),
+        );
+    }
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pushq", f_unary, 0)
+            .operands_in(vec![gpr])
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    {{PUT_OP}}(bits | (in_reg0 & 7), rex1(in_reg0), sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("popq", f_nullary, 0)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                "#,
+            ),
+    );
+
+    // XX /r, for regmove instructions.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("copysp", f_copy_special, 1)
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(dst, src), sink);
+                    modrm_rr(dst, src, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("adjustsp", f_unary, 1)
+            .operands_in(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(RU::rsp.into(), in_reg0), sink);
+                    modrm_rr(RU::rsp.into(), in_reg0, sink);
+                "#,
+            ),
+    );
+
+    {
+        let format = formats.get(f_unary_imm);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("adjustsp_ib", f_unary_imm, 2)
+                .inst_predicate(InstructionPredicate::new_is_signed_int(format, "imm", 8, 0))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink);
+                        modrm_r_bits(RU::rsp.into(), bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put1(imm as u8);
+                    "#,
+                ),
+        );
+
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("adjustsp_id", f_unary_imm, 5)
+                .inst_predicate(InstructionPredicate::new_is_signed_int(
+                    format, "imm", 32, 0,
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink);
+                        modrm_r_bits(RU::rsp.into(), bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put4(imm as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // XX+rd id with Abs4 function relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fnaddr4", f_func_addr, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(Reloc::Abs4,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq with Abs8 function relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fnaddr8", f_func_addr, 8)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(Reloc::Abs8,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    sink.put8(0);
+                "#,
+            ),
+    );
+
+    // Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey).
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("allones_fnaddr4", f_func_addr, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(Reloc::Abs4,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    // Write the immediate as `!0` for the benefit of BaldrMonkey.
+                    sink.put4(!0);
+                "#,
+            ),
+    );
+
+    // Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey).
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("allones_fnaddr8", f_func_addr, 8)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(Reloc::Abs8,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    // Write the immediate as `!0` for the benefit of BaldrMonkey.
+                    sink.put8(!0);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pcrel_fnaddr8", f_func_addr, 5)
+            .operands_out(vec![gpr])
+            // rex2 gets passed 0 for r/m register because the upper bit of
+            // r/m doesn't get decoded when in rip-relative addressing mode.
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_riprel(out_reg0, sink);
+                    // The addend adjusts for the difference between the end of the
+                    // instruction and the beginning of the immediate field.
+                    sink.reloc_external(Reloc::X86PCRel4,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("got_fnaddr8", f_func_addr, 5)
+            .operands_out(vec![gpr])
+            // rex2 gets passed 0 for r/m register because the upper bit of
+            // r/m doesn't get decoded when in rip-relative addressing mode.
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_riprel(out_reg0, sink);
+                    // The addend adjusts for the difference between the end of the
+                    // instruction and the beginning of the immediate field.
+                    sink.reloc_external(Reloc::X86GOTPCRel4,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd id with Abs4 globalsym relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("gvaddr4", f_unary_global_value, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(Reloc::Abs4,
+                                        &func.global_values[global_value].symbol_name(),
+                                        0);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq with Abs8 globalsym relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("gvaddr8", f_unary_global_value, 8)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(Reloc::Abs8,
+                                        &func.global_values[global_value].symbol_name(),
+                                        0);
+                    sink.put8(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq with PCRel4 globalsym relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pcrel_gvaddr8", f_unary_global_value, 5)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_rm(5, out_reg0, sink);
+                    // The addend adjusts for the difference between the end of the
+                    // instruction and the beginning of the immediate field.
+                    sink.reloc_external(Reloc::X86PCRel4,
+                                        &func.global_values[global_value].symbol_name(),
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq with Abs8 globalsym relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("got_gvaddr8", f_unary_global_value, 5)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_rm(5, out_reg0, sink);
+                    // The addend adjusts for the difference between the end of the
+                    // instruction and the beginning of the immediate field.
+                    sink.reloc_external(Reloc::X86GOTPCRel4,
+                                        &func.global_values[global_value].symbol_name(),
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // Stack addresses.
+    //
+    // TODO Alternative forms for 8-bit immediates, when applicable.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("spaddr4_id", f_stack_load, 6)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    let sp = StackRef::sp(stack_slot, &func.stack_slots);
+                    let base = stk_base(sp.base);
+                    {{PUT_OP}}(bits, rex2(out_reg0, base), sink);
+                    modrm_sib_disp8(out_reg0, sink);
+                    sib_noindex(base, sink);
+                    let imm : i32 = offset.into();
+                    sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("spaddr8_id", f_stack_load, 6)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    let sp = StackRef::sp(stack_slot, &func.stack_slots);
+                    let base = stk_base(sp.base);
+                    {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
+                    modrm_sib_disp32(out_reg0, sink);
+                    sib_noindex(base, sink);
+                    let imm : i32 = offset.into();
+                    sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
+                "#,
+            ),
+    );
+
+    // Store recipes.
+
+    {
+        // Simple stores.
+        let format = formats.get(f_store);
+
+        // A predicate asking if the offset is zero.
+        let has_no_offset = InstructionPredicate::new_is_field_equal(format, "offset", "0".into());
+
+        // XX /r register-indirect store with no offset.
+        let st = recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("st", f_store, 1)
+                .operands_in(vec![gpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else if needs_offset(in_reg1) {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with no offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("st_abcd", f_store, 1)
+                    .operands_in(vec![abcd, gpr])
+                    .inst_predicate(has_no_offset.clone())
+                    .clobbers_flags(false)
+                    .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_1")
+                    .emit(
+                        r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else if needs_offset(in_reg1) {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                    ),
+                formats,
+                regs,
+            )
+            .when_prefixed(st),
+        );
+
+        // XX /r register-indirect store of FPR with no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fst", f_store, 1)
+                .operands_in(vec![fpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else if needs_offset(in_reg1) {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        let has_small_offset = InstructionPredicate::new_is_signed_int(format, "offset", 8, 0);
+
+        // XX /r register-indirect store with 8-bit offset.
+        let st_disp8 = recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stDisp8", f_store, 2)
+                .operands_in(vec![gpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with 8-bit offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("stDisp8_abcd", f_store, 2)
+                    .operands_in(vec![abcd, gpr])
+                    .inst_predicate(has_small_offset.clone())
+                    .clobbers_flags(false)
+                    .compute_size("size_plus_maybe_sib_for_in_reg_1")
+                    .emit(
+                        r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                    ),
+                formats,
+                regs,
+            )
+            .when_prefixed(st_disp8),
+        );
+
+        // XX /r register-indirect store with 8-bit offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstDisp8", f_store, 2)
+                .operands_in(vec![fpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with 32-bit offset.
+        let st_disp32 = recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stDisp32", f_store, 5)
+                .operands_in(vec![gpr, gpr])
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp32(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp32(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with 32-bit offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("stDisp32_abcd", f_store, 5)
+                    .operands_in(vec![abcd, gpr])
+                    .clobbers_flags(false)
+                    .compute_size("size_plus_maybe_sib_for_in_reg_1")
+                    .emit(
+                        r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp32(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp32(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                    ),
+                formats,
+                regs,
+            )
+            .when_prefixed(st_disp32),
+        );
+
+        // XX /r register-indirect store with 32-bit offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstDisp32", f_store, 5)
+                .operands_in(vec![fpr, gpr])
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp32(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp32(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+    }
+
+    {
+        // Complex stores.
+        let format = formats.get(f_store_complex);
+
+        // A predicate asking if the offset is zero.
+        let has_no_offset = InstructionPredicate::new_is_field_equal(format, "offset", "0".into());
+
+        // XX /r register-indirect store with index and no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndex", f_store_complex, 2)
+                .operands_in(vec![gpr, gpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and no offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndex_abcd", f_store_complex, 2)
+                .operands_in(vec![abcd, gpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and no offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstWithIndex", f_store_complex, 2)
+                .operands_in(vec![fpr, gpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        let has_small_offset = InstructionPredicate::new_is_signed_int(format, "offset", 8, 0);
+
+        // XX /r register-indirect store with index and 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndexDisp8", f_store_complex, 3)
+                .operands_in(vec![gpr, gpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp8(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and 8-bit offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndexDisp8_abcd", f_store_complex, 3)
+                .operands_in(vec![abcd, gpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp8(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and 8-bit offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstWithIndexDisp8", f_store_complex, 3)
+                .operands_in(vec![fpr, gpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp8(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        let has_big_offset = InstructionPredicate::new_is_signed_int(format, "offset", 32, 0);
+
+        // XX /r register-indirect store with index and 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndexDisp32", f_store_complex, 6)
+                .operands_in(vec![gpr, gpr, gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp32(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and 32-bit offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndexDisp32_abcd", f_store_complex, 6)
+                .operands_in(vec![abcd, gpr, gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp32(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and 32-bit offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstWithIndexDisp32", f_store_complex, 6)
+                .operands_in(vec![fpr, gpr, gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp32(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // Unary spill with SIB and 32-bit displacement.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("spillSib32", f_unary, 6)
+            .operands_in(vec![gpr])
+            .operands_out(vec![stack_gpr32])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    let base = stk_base(out_stk0.base);
+                    {{PUT_OP}}(bits, rex2(base, in_reg0), sink);
+                    modrm_sib_disp32(in_reg0, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(out_stk0.offset as u32);
+                "#,
+            ),
+    );
+
+    // Like spillSib32, but targeting an FPR rather than a GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fspillSib32", f_unary, 6)
+            .operands_in(vec![fpr])
+            .operands_out(vec![stack_fpr32])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    let base = stk_base(out_stk0.base);
+                    {{PUT_OP}}(bits, rex2(base, in_reg0), sink);
+                    modrm_sib_disp32(in_reg0, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(out_stk0.offset as u32);
+                "#,
+            ),
+    );
+
+    // Regspill using RSP-relative addressing.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("regspill32", f_reg_spill, 6)
+            .operands_in(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    let dst = StackRef::sp(dst, &func.stack_slots);
+                    let base = stk_base(dst.base);
+                    {{PUT_OP}}(bits, rex2(base, src), sink);
+                    modrm_sib_disp32(src, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(dst.offset as u32);
+                "#,
+            ),
+    );
+
+    // Like regspill32, but targeting an FPR rather than a GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fregspill32", f_reg_spill, 6)
+            .operands_in(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    let dst = StackRef::sp(dst, &func.stack_slots);
+                    let base = stk_base(dst.base);
+                    {{PUT_OP}}(bits, rex2(base, src), sink);
+                    modrm_sib_disp32(src, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(dst.offset as u32);
+                "#,
+            ),
+    );
+
+    // Load recipes.
+
+    {
+        // Simple loads.
+        let format = formats.get(f_load);
+
+        // A predicate asking if the offset is zero.
+        let has_no_offset = InstructionPredicate::new_is_field_equal(format, "offset", "0".into());
+
+        // XX /r load with no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ld", f_load, 1)
+                .operands_in(vec![gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else if needs_offset(in_reg0) {
+                            modrm_disp8(in_reg0, out_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg0, out_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r float load with no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fld", f_load, 1)
+                .operands_in(vec![gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else if needs_offset(in_reg0) {
+                            modrm_disp8(in_reg0, out_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg0, out_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        let has_small_offset = InstructionPredicate::new_is_signed_int(format, "offset", 8, 0);
+
+        // XX /r load with 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldDisp8", f_load, 2)
+                .operands_in(vec![gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib_disp8(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else {
+                            modrm_disp8(in_reg0, out_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r float load with 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldDisp8", f_load, 2)
+                .operands_in(vec![gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib_disp8(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else {
+                            modrm_disp8(in_reg0, out_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        let has_big_offset = InstructionPredicate::new_is_signed_int(format, "offset", 32, 0);
+
+        // XX /r load with 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldDisp32", f_load, 5)
+                .operands_in(vec![gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib_disp32(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else {
+                            modrm_disp32(in_reg0, out_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r float load with 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldDisp32", f_load, 5)
+                .operands_in(vec![gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib_disp32(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else {
+                            modrm_disp32(in_reg0, out_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+    }
+
+    {
+        // Complex loads.
+        let format = formats.get(f_load_complex);
+
+        // A predicate asking if the offset is zero.
+        let has_no_offset = InstructionPredicate::new_is_field_equal(format, "offset", "0".into());
+
+        // XX /r load with index and no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldWithIndex", f_load_complex, 2)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg0) {
+                            modrm_sib_disp8(out_reg0, sink);
+                            sib(0, in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(out_reg0, sink);
+                            sib(0, in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r float load with index and no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldWithIndex", f_load_complex, 2)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg0) {
+                            modrm_sib_disp8(out_reg0, sink);
+                            sib(0, in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(out_reg0, sink);
+                            sib(0, in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        let has_small_offset = InstructionPredicate::new_is_signed_int(format, "offset", 8, 0);
+
+        // XX /r load with index and 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldWithIndexDisp8", f_load_complex, 3)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        modrm_sib_disp8(out_reg0, sink);
+                        sib(0, in_reg1, in_reg0, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r float load with 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldWithIndexDisp8", f_load_complex, 3)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        modrm_sib_disp8(out_reg0, sink);
+                        sib(0, in_reg1, in_reg0, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        let has_big_offset = InstructionPredicate::new_is_signed_int(format, "offset", 32, 0);
+
+        // XX /r load with index and 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldWithIndexDisp32", f_load_complex, 6)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        modrm_sib_disp32(out_reg0, sink);
+                        sib(0, in_reg1, in_reg0, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r float load with index and 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldWithIndexDisp32", f_load_complex, 6)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        modrm_sib_disp32(out_reg0, sink);
+                        sib(0, in_reg1, in_reg0, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // Unary fill with SIB and 32-bit displacement.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fillSib32", f_unary, 6)
+            .operands_in(vec![stack_gpr32])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let base = stk_base(in_stk0.base);
+                    {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
+                    modrm_sib_disp32(out_reg0, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(in_stk0.offset as u32);
+                "#,
+            ),
+    );
+
+    // Like fillSib32, but targeting an FPR rather than a GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("ffillSib32", f_unary, 6)
+            .operands_in(vec![stack_fpr32])
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let base = stk_base(in_stk0.base);
+                    {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
+                    modrm_sib_disp32(out_reg0, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(in_stk0.offset as u32);
+                "#,
+            ),
+    );
+
+    // Regfill with RSP-relative 32-bit displacement.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("regfill32", f_reg_fill, 6)
+            .operands_in(vec![stack_gpr32])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let src = StackRef::sp(src, &func.stack_slots);
+                    let base = stk_base(src.base);
+                    {{PUT_OP}}(bits, rex2(base, dst), sink);
+                    modrm_sib_disp32(dst, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(src.offset as u32);
+                "#,
+            ),
+    );
+
+    // Like regfill32, but targeting an FPR rather than a GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fregfill32", f_reg_fill, 6)
+            .operands_in(vec![stack_fpr32])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let src = StackRef::sp(src, &func.stack_slots);
+                    let base = stk_base(src.base);
+                    {{PUT_OP}}(bits, rex2(base, dst), sink);
+                    modrm_sib_disp32(dst, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(src.offset as u32);
+                "#,
+            ),
+    );
+
+    // Call/return.
+
+    recipes.add_template_recipe(EncodingRecipeBuilder::new("call_id", f_call, 4).emit(
+        r#"
+            sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+            {{PUT_OP}}(bits, BASE_REX, sink);
+            // The addend adjusts for the difference between the end of the
+            // instruction and the beginning of the immediate field.
+            sink.reloc_external(Reloc::X86CallPCRel4,
+                                &func.dfg.ext_funcs[func_ref].name,
+                                -4);
+            sink.put4(0);
+        "#,
+    ));
+
+    recipes.add_template_recipe(EncodingRecipeBuilder::new("call_plt_id", f_call, 4).emit(
+        r#"
+            sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+            {{PUT_OP}}(bits, BASE_REX, sink);
+            sink.reloc_external(Reloc::X86CallPLTRel4,
+                                &func.dfg.ext_funcs[func_ref].name,
+                                -4);
+            sink.put4(0);
+        "#,
+    ));
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("call_r", f_call_indirect, 1)
+            .operands_in(vec![gpr])
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("ret", f_multiary, 0).emit("{{PUT_OP}}(bits, BASE_REX, sink);"),
+    );
+
+    // Branches.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("jmpb", f_jump, 1)
+            .branch_range((1, 8))
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, BASE_REX, sink);
+                    disp1(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("jmpd", f_jump, 4)
+            .branch_range((4, 32))
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, BASE_REX, sink);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("brib", f_branch_int, 1)
+            .operands_in(vec![reg_rflags])
+            .branch_range((1, 8))
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink);
+                    disp1(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("brid", f_branch_int, 4)
+            .operands_in(vec![reg_rflags])
+            .branch_range((4, 32))
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("brfb", f_branch_float, 1)
+            .operands_in(vec![reg_rflags])
+            .branch_range((1, 8))
+            .clobbers_flags(false)
+            .inst_predicate(supported_floatccs_predicate(
+                &supported_floatccs,
+                formats.get(f_branch_float),
+            ))
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink);
+                    disp1(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("brfd", f_branch_float, 4)
+            .operands_in(vec![reg_rflags])
+            .branch_range((4, 32))
+            .clobbers_flags(false)
+            .inst_predicate(supported_floatccs_predicate(
+                &supported_floatccs,
+                formats.get(f_branch_float),
+            ))
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("indirect_jmp", f_indirect_jump, 1)
+            .operands_in(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("jt_entry", f_branch_table_entry, 2)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .inst_predicate(valid_scale(formats.get(f_branch_table_entry)))
+            .compute_size("size_plus_maybe_offset_for_in_reg_1")
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink);
+                    if needs_offset(in_reg1) {
+                        modrm_sib_disp8(out_reg0, sink);
+                        sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
+                        sink.put1(0);
+                    } else {
+                        modrm_sib(out_reg0, sink);
+                        sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
+                    }
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("vconst", f_unary_const, 5)
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_riprel(out_reg0, sink);
+                    const_disp4(constant_handle, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("jt_base", f_branch_table_base, 5)
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_riprel(out_reg0, sink);
+
+                    // No reloc is needed here as the jump table is emitted directly after
+                    // the function body.
+                    jt_disp4(table, func, sink);
+                "#,
+            ),
+    );
+
+    // Test flags and set a register.
+    //
+    // These setCC instructions only set the low 8 bits, and they can only write ABCD registers
+    // without a REX prefix.
+    //
+    // Other instruction encodings accepting `b1` inputs have the same constraints and only look at
+    // the low 8 bits of the input register.
+
+    let seti = recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("seti", f_int_cond, 1)
+                .operands_in(vec![reg_rflags])
+                .operands_out(vec![gpr])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink);
+                    modrm_r_bits(out_reg0, bits, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .requires_prefix(true),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("seti_abcd", f_int_cond, 1)
+                .operands_in(vec![reg_rflags])
+                .operands_out(vec![abcd])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink);
+                    modrm_r_bits(out_reg0, bits, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .when_prefixed(seti),
+    );
+
+    let setf = recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("setf", f_float_cond, 1)
+                .operands_in(vec![reg_rflags])
+                .operands_out(vec![gpr])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink);
+                    modrm_r_bits(out_reg0, bits, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .requires_prefix(true),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("setf_abcd", f_float_cond, 1)
+                .operands_in(vec![reg_rflags])
+                .operands_out(vec![abcd])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink);
+                    modrm_r_bits(out_reg0, bits, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .when_prefixed(setf),
+    );
+
+    // Conditional move (a.k.a integer select)
+    // (maybe-REX.W) 0F 4x modrm(r,r)
+    // 1 byte, modrm(r,r), is after the opcode
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("cmov", f_int_select, 1)
+            .operands_in(vec![
+                OperandConstraint::FixedReg(reg_rflags),
+                OperandConstraint::RegClass(gpr),
+                OperandConstraint::RegClass(gpr),
+            ])
+            .operands_out(vec![2])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink);
+                    modrm_rr(in_reg1, in_reg2, sink);
+                "#,
+            ),
+    );
+
+    // Bit scan forwards and reverse
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("bsf_and_bsr", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![
+                OperandConstraint::RegClass(gpr),
+                OperandConstraint::FixedReg(reg_rflags),
+            ])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // Arithematic with flag I/O.
+
+    // XX /r, MR form. Add two GPR registers and set carry flag.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rout", f_binary, 1)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![
+                OperandConstraint::TiedInput(0),
+                OperandConstraint::FixedReg(reg_rflags),
+            ])
+            .clobbers_flags(true)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                    modrm_rr(in_reg0, in_reg1, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, MR form. Add two GPR registers and get carry flag.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rin", f_ternary, 1)
+            .operands_in(vec![
+                OperandConstraint::RegClass(gpr),
+                OperandConstraint::RegClass(gpr),
+                OperandConstraint::FixedReg(reg_rflags),
+            ])
+            .operands_out(vec![0])
+            .clobbers_flags(true)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                    modrm_rr(in_reg0, in_reg1, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, MR form. Add two GPR registers with carry flag.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rio", f_ternary, 1)
+            .operands_in(vec![
+                OperandConstraint::RegClass(gpr),
+                OperandConstraint::RegClass(gpr),
+                OperandConstraint::FixedReg(reg_rflags),
+            ])
+            .operands_out(vec![
+                OperandConstraint::TiedInput(0),
+                OperandConstraint::FixedReg(reg_rflags),
+            ])
+            .clobbers_flags(true)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                    modrm_rr(in_reg0, in_reg1, sink);
+                "#,
+            ),
+    );
+
+    // Compare and set flags.
+
+    // XX /r, MR form. Compare two GPR registers and set flags.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rcmp", f_binary, 1)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![reg_rflags])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                    modrm_rr(in_reg0, in_reg1, sink);
+                "#,
+            ),
+    );
+
+    // Same as rcmp, but second operand is the stack pointer.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rcmp_sp", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![reg_rflags])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, RU::rsp.into()), sink);
+                    modrm_rr(in_reg0, RU::rsp.into(), sink);
+                "#,
+            ),
+    );
+
+    // XX /r, RM form. Compare two FPR registers and set flags.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fcmp", f_binary, 1)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![reg_rflags])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    {
+        let format = formats.get(f_binary_imm);
+
+        let has_small_offset = InstructionPredicate::new_is_signed_int(format, "imm", 8, 0);
+
+        // XX /n, MI form with imm8.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("rcmp_ib", f_binary_imm, 2)
+                .operands_in(vec![gpr])
+                .operands_out(vec![reg_rflags])
+                .inst_predicate(has_small_offset)
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put1(imm as u8);
+                    "#,
+                ),
+        );
+
+        let has_big_offset = InstructionPredicate::new_is_signed_int(format, "imm", 32, 0);
+
+        // XX /n, MI form with imm32.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("rcmp_id", f_binary_imm, 5)
+                .operands_in(vec![gpr])
+                .operands_out(vec![reg_rflags])
+                .inst_predicate(has_big_offset)
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put4(imm as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // Test-and-branch.
+    //
+    // This recipe represents the macro fusion of a test and a conditional branch.
+    // This serves two purposes:
+    //
+    // 1. Guarantee that the test and branch get scheduled next to each other so
+    //    macro fusion is guaranteed to be possible.
+    // 2. Hide the status flags from Cranelift which doesn't currently model flags.
+    //
+    // The encoding bits affect both the test and the branch instruction:
+    //
+    // Bits 0-7 are the Jcc opcode.
+    // Bits 8-15 control the test instruction which always has opcode byte 0x85.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("tjccb", f_branch, 1 + 2)
+            .operands_in(vec![gpr])
+            .branch_range((3, 8))
+            .emit(
+                r#"
+                    // test r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(bits as u8);
+                    disp1(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("tjccd", f_branch, 1 + 6)
+            .operands_in(vec![gpr])
+            .branch_range((7, 32))
+            .emit(
+                r#"
+                    // test r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(0x0f);
+                    sink.put1(bits as u8);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    // 8-bit test-and-branch.
+
+    let t8jccb = recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("t8jccb", f_branch, 1 + 2)
+                .operands_in(vec![gpr])
+                .branch_range((3, 8))
+                .emit(
+                    r#"
+                    // test8 r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(bits as u8);
+                    disp1(destination, func, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .requires_prefix(true),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("t8jccb_abcd", f_branch, 1 + 2)
+                .operands_in(vec![abcd])
+                .branch_range((3, 8))
+                .emit(
+                    r#"
+                    // test8 r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(bits as u8);
+                    disp1(destination, func, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .when_prefixed(t8jccb),
+    );
+
+    let t8jccd = recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("t8jccd", f_branch, 1 + 6)
+                .operands_in(vec![gpr])
+                .branch_range((7, 32))
+                .emit(
+                    r#"
+                    // test8 r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(0x0f);
+                    sink.put1(bits as u8);
+                    disp4(destination, func, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .requires_prefix(true),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("t8jccd_abcd", f_branch, 1 + 6)
+                .operands_in(vec![abcd])
+                .branch_range((7, 32))
+                .emit(
+                    r#"
+                    // test8 r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(0x0f);
+                    sink.put1(bits as u8);
+                    disp4(destination, func, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .when_prefixed(t8jccd),
+    );
+
+    // Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode.
+    // The register allocator can't handle a branch instruction with constrained
+    // operands like the t8jccd_abcd above. This variant can accept the b1 opernd in
+    // any register, but is is larger because it uses a 32-bit test instruction with
+    // a 0xff immediate.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("t8jccd_long", f_branch, 5 + 6)
+            .operands_in(vec![gpr])
+            .branch_range((11, 32))
+            .emit(
+                r#"
+                    // test32 r, 0xff.
+                    {{PUT_OP}}((bits & 0xff00) | 0xf7, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                    sink.put4(0xff);
+                    // Jcc instruction.
+                    sink.put1(0x0f);
+                    sink.put1(bits as u8);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    // Comparison that produces a `b1` result in a GPR.
+    //
+    // This is a macro of a `cmp` instruction followed by a `setCC` instruction.
+    //
+    // TODO This is not a great solution because:
+    //
+    // - The cmp+setcc combination is not recognized by CPU's macro fusion.
+    // - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC`
+    //   instructions may need a REX independently.
+    // - Modeling CPU flags in the type system would be better.
+    //
+    // Since the `setCC` instructions only write an 8-bit register, we use that as
+    // our `b1` representation: A `b1` value is represented as a GPR where the low 8
+    // bits are known to be 0 or 1. The high bits are undefined.
+    //
+    // This bandaid macro doesn't support a REX prefix for the final `setCC`
+    // instruction, so it is limited to the `ABCD` register class for booleans.
+    // The omission of a `when_prefixed` alternative is deliberate here.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("icscc", f_int_compare, 1 + 3)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![abcd])
+            .emit(
+                r#"
+                    // Comparison instruction.
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                    modrm_rr(in_reg0, in_reg1, sink);
+                    // `setCC` instruction, no REX.
+                    use crate::ir::condcodes::IntCC::*;
+                    let setcc = match cond {
+                        Equal => 0x94,
+                        NotEqual => 0x95,
+                        SignedLessThan => 0x9c,
+                        SignedGreaterThanOrEqual => 0x9d,
+                        SignedGreaterThan => 0x9f,
+                        SignedLessThanOrEqual => 0x9e,
+                        UnsignedLessThan => 0x92,
+                        UnsignedGreaterThanOrEqual => 0x93,
+                        UnsignedGreaterThan => 0x97,
+                        UnsignedLessThanOrEqual => 0x96,
+                    };
+                    sink.put1(0x0f);
+                    sink.put1(setcc);
+                    modrm_rr(out_reg0, 0, sink);
+                "#,
+            ),
+    );
+
+    {
+        let format = formats.get(f_int_compare_imm);
+
+        let is_small_imm = InstructionPredicate::new_is_signed_int(format, "imm", 8, 0);
+
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("icscc_ib", f_int_compare_imm, 2 + 3)
+                .operands_in(vec![gpr])
+                .operands_out(vec![abcd])
+                .inst_predicate(is_small_imm)
+                .emit(
+                    r#"
+                        // Comparison instruction.
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put1(imm as u8);
+                        // `setCC` instruction, no REX.
+                        use crate::ir::condcodes::IntCC::*;
+                        let setcc = match cond {
+                            Equal => 0x94,
+                            NotEqual => 0x95,
+                            SignedLessThan => 0x9c,
+                            SignedGreaterThanOrEqual => 0x9d,
+                            SignedGreaterThan => 0x9f,
+                            SignedLessThanOrEqual => 0x9e,
+                            UnsignedLessThan => 0x92,
+                            UnsignedGreaterThanOrEqual => 0x93,
+                            UnsignedGreaterThan => 0x97,
+                            UnsignedLessThanOrEqual => 0x96,
+                        };
+                        sink.put1(0x0f);
+                        sink.put1(setcc);
+                        modrm_rr(out_reg0, 0, sink);
+                    "#,
+                ),
+        );
+
+        let is_big_imm = InstructionPredicate::new_is_signed_int(format, "imm", 32, 0);
+
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("icscc_id", f_int_compare_imm, 5 + 3)
+                .operands_in(vec![gpr])
+                .operands_out(vec![abcd])
+                .inst_predicate(is_big_imm)
+                .emit(
+                    r#"
+                        // Comparison instruction.
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put4(imm as u32);
+                        // `setCC` instruction, no REX.
+                        use crate::ir::condcodes::IntCC::*;
+                        let setcc = match cond {
+                            Equal => 0x94,
+                            NotEqual => 0x95,
+                            SignedLessThan => 0x9c,
+                            SignedGreaterThanOrEqual => 0x9d,
+                            SignedGreaterThan => 0x9f,
+                            SignedLessThanOrEqual => 0x9e,
+                            UnsignedLessThan => 0x92,
+                            UnsignedGreaterThanOrEqual => 0x93,
+                            UnsignedGreaterThan => 0x97,
+                            UnsignedLessThanOrEqual => 0x96,
+                        };
+                        sink.put1(0x0f);
+                        sink.put1(setcc);
+                        modrm_rr(out_reg0, 0, sink);
+                    "#,
+                ),
+        );
+    }
+
+    // Make a FloatCompare instruction predicate with the supported condition codes.
+    //
+    // Same thing for floating point.
+    //
+    // The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
+    //
+    //    ZPC OSA
+    // UN 111 000
+    // GT 000 000
+    // LT 001 000
+    // EQ 100 000
+    //
+    // Not all floating point condition codes are supported.
+    // The omission of a `when_prefixed` alternative is deliberate here.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fcscc", f_float_compare, 1 + 3)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![abcd])
+            .inst_predicate(supported_floatccs_predicate(
+                &supported_floatccs,
+                formats.get(f_float_compare),
+            ))
+            .emit(
+                r#"
+                    // Comparison instruction.
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                    // `setCC` instruction, no REX.
+                    use crate::ir::condcodes::FloatCC::*;
+                    let setcc = match cond {
+                        Ordered                    => 0x9b, // EQ|LT|GT => setnp (P=0)
+                        Unordered                  => 0x9a, // UN       => setp  (P=1)
+                        OrderedNotEqual            => 0x95, // LT|GT    => setne (Z=0),
+                        UnorderedOrEqual           => 0x94, // UN|EQ    => sete  (Z=1)
+                        GreaterThan                => 0x97, // GT       => seta  (C=0&Z=0)
+                        GreaterThanOrEqual         => 0x93, // GT|EQ    => setae (C=0)
+                        UnorderedOrLessThan        => 0x92, // UN|LT    => setb  (C=1)
+                        UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1)
+                        Equal |                       // EQ
+                        NotEqual |                    // UN|LT|GT
+                        LessThan |                    // LT
+                        LessThanOrEqual |             // LT|EQ
+                        UnorderedOrGreaterThan |      // UN|GT
+                        UnorderedOrGreaterThanOrEqual // UN|GT|EQ
+                        => panic!("{} not supported by fcscc", cond),
+                    };
+                    sink.put1(0x0f);
+                    sink.put1(setcc);
+                    modrm_rr(out_reg0, 0, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("is_zero", f_unary, 2 + 2)
+            .operands_in(vec![gpr])
+            .operands_out(vec![abcd])
+            .emit(
+                r#"
+                    // Test instruction.
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Check ZF = 1 flag to see if register holds 0.
+                    sink.put1(0x0f);
+                    sink.put1(0x94);
+                    modrm_rr(out_reg0, 0, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_recipe(EncodingRecipeBuilder::new("safepoint", f_multiary, 0).emit(
+        r#"
+            sink.add_stackmap(args, func, isa);
+        "#,
+    ));
+
+    recipes
+}
diff --git a/cranelift-codegen/meta/src/isa/x86/registers.rs b/cranelift-codegen/meta/src/isa/x86/registers.rs
index 3039bafba..4157084c1 100644
--- a/cranelift-codegen/meta/src/isa/x86/registers.rs
+++ b/cranelift-codegen/meta/src/isa/x86/registers.rs
@@ -6,7 +6,8 @@ pub fn define() -> IsaRegs {
     let builder = RegBankBuilder::new("IntRegs", "r")
         .units(16)
         .names(vec!["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"])
-        .track_pressure(true);
+        .track_pressure(true)
+        .pinned_reg(15);
     let int_regs = regs.add_bank(builder);
 
     let builder = RegBankBuilder::new("FloatRegs", "xmm")
diff --git a/cranelift-codegen/meta/src/isa/x86/settings.rs b/cranelift-codegen/meta/src/isa/x86/settings.rs
index 52486d7de..10cb51620 100644
--- a/cranelift-codegen/meta/src/isa/x86/settings.rs
+++ b/cranelift-codegen/meta/src/isa/x86/settings.rs
@@ -30,11 +30,28 @@ pub fn define(shared: &SettingGroup) -> SettingGroup {
         false,
     );
 
+    let shared_enable_simd = shared.get_bool("enable_simd");
+
+    settings.add_predicate("use_ssse3", predicate!(has_ssse3));
     settings.add_predicate("use_sse41", predicate!(has_sse41));
     settings.add_predicate("use_sse42", predicate!(has_sse41 && has_sse42));
+
+    settings.add_predicate(
+        "use_ssse3_simd",
+        predicate!(shared_enable_simd && has_ssse3),
+    );
+    settings.add_predicate(
+        "use_sse41_simd",
+        predicate!(shared_enable_simd && has_sse41),
+    );
+    settings.add_predicate(
+        "use_sse42_simd",
+        predicate!(shared_enable_simd && has_sse41 && has_sse42),
+    );
+
     settings.add_predicate("use_popcnt", predicate!(has_popcnt && has_sse42));
     settings.add_predicate("use_bmi1", predicate!(has_bmi1));
-    settings.add_predicate("use_lznct", predicate!(has_lzcnt));
+    settings.add_predicate("use_lzcnt", predicate!(has_lzcnt));
 
     // Some shared boolean values are used in x86 instruction predicates, so we need to group them
     // in the same TargetIsa, for compabitibity with code generated by meta-python.
diff --git a/cranelift-codegen/meta/src/lib.rs b/cranelift-codegen/meta/src/lib.rs
index 9ac2a5430..ef450e787 100644
--- a/cranelift-codegen/meta/src/lib.rs
+++ b/cranelift-codegen/meta/src/lib.rs
@@ -5,6 +5,8 @@ mod srcgen;
 pub mod error;
 pub mod isa;
 
+mod gen_binemit;
+mod gen_encodings;
 mod gen_inst;
 mod gen_legalizer;
 mod gen_registers;
@@ -12,6 +14,7 @@ mod gen_settings;
 mod gen_types;
 
 mod constant_hash;
+mod default_map;
 mod shared;
 mod unique_table;
 
@@ -35,16 +38,7 @@ pub fn generate(isas: &Vec<isa::Isa>, out_dir: &str) -> Result<(), error::Error>
     // Per ISA definitions.
     let isas = isa::define(isas, &mut shared_defs);
 
-    let mut all_inst_groups = vec![&shared_defs.instructions];
-    all_inst_groups.extend(isas.iter().map(|isa| &isa.instructions));
-
-    gen_inst::generate(
-        all_inst_groups,
-        &shared_defs.format_registry,
-        "opcodes.rs",
-        "inst_builder.rs",
-        &out_dir,
-    )?;
+    gen_inst::generate(&shared_defs, "opcodes.rs", "inst_builder.rs", &out_dir)?;
 
     gen_legalizer::generate(
         &isas,
@@ -56,12 +50,28 @@ pub fn generate(isas: &Vec<isa::Isa>, out_dir: &str) -> Result<(), error::Error>
 
     for isa in isas {
         gen_registers::generate(&isa, &format!("registers-{}.rs", isa.name), &out_dir)?;
+
         gen_settings::generate(
             &isa.settings,
             gen_settings::ParentGroup::Shared,
             &format!("settings-{}.rs", isa.name),
             &out_dir,
         )?;
+
+        gen_encodings::generate(
+            &shared_defs,
+            &isa,
+            &format!("encoding-{}.rs", isa.name),
+            &out_dir,
+        )?;
+
+        gen_binemit::generate(
+            &shared_defs.format_registry,
+            &isa.name,
+            &isa.recipes,
+            &format!("binemit-{}.rs", isa.name),
+            &out_dir,
+        )?;
     }
 
     Ok(())
diff --git a/cranelift-codegen/meta/src/shared/entities.rs b/cranelift-codegen/meta/src/shared/entities.rs
index 358ef8f23..910722789 100644
--- a/cranelift-codegen/meta/src/shared/entities.rs
+++ b/cranelift-codegen/meta/src/shared/entities.rs
@@ -1,65 +1,76 @@
 use crate::cdsl::operands::{OperandKind, OperandKindBuilder as Builder, OperandKindFields};
 
+pub struct EntityRefs {
+    /// A reference to an extended basic block in the same function.
+    /// This is primarliy used in control flow instructions.
+    pub ebb: OperandKind,
+
+    /// A reference to a stack slot declared in the function preamble.
+    pub stack_slot: OperandKind,
+
+    /// A reference to a global value.
+    pub global_value: OperandKind,
+
+    /// A reference to a function signature declared in the function preamble.
+    /// This is used to provide the call signature in a call_indirect instruction.
+    pub sig_ref: OperandKind,
+
+    /// A reference to an external function declared in the function preamble.
+    /// This is used to provide the callee and signature in a call instruction.
+    pub func_ref: OperandKind,
+
+    /// A reference to a jump table declared in the function preamble.
+    pub jump_table: OperandKind,
+
+    /// A reference to a heap declared in the function preamble.
+    pub heap: OperandKind,
+
+    /// A reference to a table declared in the function preamble.
+    pub table: OperandKind,
+
+    /// A variable-sized list of value operands. Use for Ebb and function call arguments.
+    pub varargs: OperandKind,
+}
+
+impl EntityRefs {
+    pub fn new() -> Self {
+        Self {
+            ebb: create("ebb", "An extended basic block in the same function.")
+                .default_member("destination")
+                .build(),
+
+            stack_slot: create("stack_slot", "A stack slot").build(),
+
+            global_value: create("global_value", "A global value.").build(),
+
+            sig_ref: create("sig_ref", "A function signature.").build(),
+
+            func_ref: create("func_ref", "An external function.").build(),
+
+            jump_table: create("jump_table", "A jump table.")
+                .default_member("table")
+                .build(),
+
+            heap: create("heap", "A heap.").build(),
+
+            table: create("table", "A table.").build(),
+
+            varargs: Builder::new("variable_args", OperandKindFields::VariableArgs)
+                .doc(
+                    r#"
+                        A variable size list of `value` operands.
+
+                        Use this to represent arguments passed to a function call, arguments
+                        passed to an extended basic block, or a variable number of results
+                        returned from an instruction.
+                    "#,
+                )
+                .build(),
+        }
+    }
+}
+
 /// Small helper to initialize an OperandBuilder with the right kind, for a given name and doc.
 fn create(name: &'static str, doc: &'static str) -> Builder {
     Builder::new(name, OperandKindFields::EntityRef).doc(doc)
 }
-
-pub fn define() -> Vec<OperandKind> {
-    let mut kinds = Vec::new();
-
-    // A reference to an extended basic block in the same function.
-    // This is primarliy used in control flow instructions.
-    let ebb = create("ebb", "An extended basic block in the same function.")
-        .default_member("destination")
-        .build();
-    kinds.push(ebb);
-
-    // A reference to a stack slot declared in the function preamble.
-    let stack_slot = create("stack_slot", "A stack slot").build();
-    kinds.push(stack_slot);
-
-    // A reference to a global value.
-    let global_value = create("global_value", "A global value.").build();
-    kinds.push(global_value);
-
-    // A reference to a function signature declared in the function preamble.
-    // This is used to provide the call signature in a call_indirect instruction.
-    let sig_ref = create("sig_ref", "A function signature.").build();
-    kinds.push(sig_ref);
-
-    // A reference to an external function declared in the function preamble.
-    // This is used to provide the callee and signature in a call instruction.
-    let func_ref = create("func_ref", "An external function.").build();
-    kinds.push(func_ref);
-
-    // A reference to a jump table declared in the function preamble.
-    let jump_table = create("jump_table", "A jump table.")
-        .default_member("table")
-        .build();
-    kinds.push(jump_table);
-
-    // A reference to a heap declared in the function preamble.
-    let heap = create("heap", "A heap.").build();
-    kinds.push(heap);
-
-    // A reference to a table declared in the function preamble.
-    let table = create("table", "A table.").build();
-    kinds.push(table);
-
-    // A variable-sized list of value operands. Use for Ebb and function call arguments.
-    let varargs = Builder::new("variable_args", OperandKindFields::VariableArgs)
-        .doc(
-            r#"
-            A variable size list of `value` operands.
-
-            Use this to represent arguments passed to a function call, arguments
-            passed to an extended basic block, or a variable number of results
-            returned from an instruction.
-        "#,
-        )
-        .build();
-    kinds.push(varargs);
-
-    return kinds;
-}
diff --git a/cranelift-codegen/meta/src/shared/formats.rs b/cranelift-codegen/meta/src/shared/formats.rs
index 0af3c264a..73aaa4263 100644
--- a/cranelift-codegen/meta/src/shared/formats.rs
+++ b/cranelift-codegen/meta/src/shared/formats.rs
@@ -1,42 +1,19 @@
 use crate::cdsl::formats::{FormatRegistry, InstructionFormatBuilder as Builder};
-use crate::shared::OperandKinds;
-
-pub fn define(immediates: &OperandKinds, entities: &OperandKinds) -> FormatRegistry {
-    // Shorthands for immediates.
-    let uimm8 = immediates.by_name("uimm8");
-    let uimm32 = immediates.by_name("uimm32");
-    let imm64 = immediates.by_name("imm64");
-    let ieee32 = immediates.by_name("ieee32");
-    let ieee64 = immediates.by_name("ieee64");
-    let boolean = immediates.by_name("boolean");
-    let intcc = immediates.by_name("intcc");
-    let floatcc = immediates.by_name("floatcc");
-    let memflags = immediates.by_name("memflags");
-    let offset32 = immediates.by_name("offset32");
-    let trapcode = immediates.by_name("trapcode");
-    let regunit = immediates.by_name("regunit");
-
-    // Shorthands for entities.
-    let global_value = entities.by_name("global_value");
-    let ebb = entities.by_name("ebb");
-    let jump_table = entities.by_name("jump_table");
-    let func_ref = entities.by_name("func_ref");
-    let sig_ref = entities.by_name("sig_ref");
-    let stack_slot = entities.by_name("stack_slot");
-    let heap = entities.by_name("heap");
-    let table = entities.by_name("table");
+use crate::shared::{entities::EntityRefs, immediates::Immediates};
 
+pub(crate) fn define(imm: &Immediates, entities: &EntityRefs) -> FormatRegistry {
     let mut registry = FormatRegistry::new();
 
     registry.insert(Builder::new("Unary").value());
-    registry.insert(Builder::new("UnaryImm").imm(imm64));
-    registry.insert(Builder::new("UnaryIeee32").imm(ieee32));
-    registry.insert(Builder::new("UnaryIeee64").imm(ieee64));
-    registry.insert(Builder::new("UnaryBool").imm(boolean));
-    registry.insert(Builder::new("UnaryGlobalValue").imm(global_value));
+    registry.insert(Builder::new("UnaryImm").imm(&imm.imm64));
+    registry.insert(Builder::new("UnaryIeee32").imm(&imm.ieee32));
+    registry.insert(Builder::new("UnaryIeee64").imm(&imm.ieee64));
+    registry.insert(Builder::new("UnaryBool").imm(&imm.boolean));
+    registry.insert(Builder::new("UnaryConst").imm(&imm.pool_constant));
+    registry.insert(Builder::new("UnaryGlobalValue").imm(&entities.global_value));
 
     registry.insert(Builder::new("Binary").value().value());
-    registry.insert(Builder::new("BinaryImm").value().imm(imm64));
+    registry.insert(Builder::new("BinaryImm").value().imm(&imm.imm64));
 
     // The select instructions are controlled by the second VALUE operand.
     // The first VALUE operand is the controlling flag which has a derived type.
@@ -58,126 +35,191 @@ pub fn define(immediates: &OperandKinds, entities: &OperandKinds) -> FormatRegis
     registry.insert(
         Builder::new("InsertLane")
             .value()
-            .imm(("lane", uimm8))
+            .imm_with_name("lane", &imm.uimm8)
             .value(),
     );
-    registry.insert(Builder::new("ExtractLane").value().imm(("lane", uimm8)));
+    registry.insert(
+        Builder::new("ExtractLane")
+            .value()
+            .imm_with_name("lane", &imm.uimm8),
+    );
+    registry.insert(
+        Builder::new("Shuffle")
+            .value()
+            .value()
+            .imm_with_name("mask", &imm.uimm128),
+    );
 
-    registry.insert(Builder::new("IntCompare").imm(intcc).value().value());
-    registry.insert(Builder::new("IntCompareImm").imm(intcc).value().imm(imm64));
-    registry.insert(Builder::new("IntCond").imm(intcc).value());
+    registry.insert(Builder::new("IntCompare").imm(&imm.intcc).value().value());
+    registry.insert(
+        Builder::new("IntCompareImm")
+            .imm(&imm.intcc)
+            .value()
+            .imm(&imm.imm64),
+    );
+    registry.insert(Builder::new("IntCond").imm(&imm.intcc).value());
 
-    registry.insert(Builder::new("FloatCompare").imm(floatcc).value().value());
-    registry.insert(Builder::new("FloatCond").imm(floatcc).value());;
+    registry.insert(
+        Builder::new("FloatCompare")
+            .imm(&imm.floatcc)
+            .value()
+            .value(),
+    );
+    registry.insert(Builder::new("FloatCond").imm(&imm.floatcc).value());
 
-    registry.insert(Builder::new("IntSelect").imm(intcc).value().value().value());
+    registry.insert(
+        Builder::new("IntSelect")
+            .imm(&imm.intcc)
+            .value()
+            .value()
+            .value(),
+    );
 
-    registry.insert(Builder::new("Jump").imm(ebb).varargs());
-    registry.insert(Builder::new("Branch").value().imm(ebb).varargs());
+    registry.insert(Builder::new("Jump").imm(&entities.ebb).varargs());
+    registry.insert(Builder::new("Branch").value().imm(&entities.ebb).varargs());
     registry.insert(
         Builder::new("BranchInt")
-            .imm(intcc)
+            .imm(&imm.intcc)
             .value()
-            .imm(ebb)
+            .imm(&entities.ebb)
             .varargs(),
     );
     registry.insert(
         Builder::new("BranchFloat")
-            .imm(floatcc)
+            .imm(&imm.floatcc)
             .value()
-            .imm(ebb)
+            .imm(&entities.ebb)
             .varargs(),
     );
     registry.insert(
         Builder::new("BranchIcmp")
-            .imm(intcc)
+            .imm(&imm.intcc)
             .value()
             .value()
-            .imm(ebb)
+            .imm(&entities.ebb)
             .varargs(),
     );
-    registry.insert(Builder::new("BranchTable").value().imm(ebb).imm(jump_table));
+    registry.insert(
+        Builder::new("BranchTable")
+            .value()
+            .imm(&entities.ebb)
+            .imm(&entities.jump_table),
+    );
     registry.insert(
         Builder::new("BranchTableEntry")
             .value()
             .value()
-            .imm(uimm8)
-            .imm(jump_table),
+            .imm(&imm.uimm8)
+            .imm(&entities.jump_table),
+    );
+    registry.insert(Builder::new("BranchTableBase").imm(&entities.jump_table));
+    registry.insert(
+        Builder::new("IndirectJump")
+            .value()
+            .imm(&entities.jump_table),
     );
-    registry.insert(Builder::new("BranchTableBase").imm(jump_table));
-    registry.insert(Builder::new("IndirectJump").value().imm(jump_table));
 
-    registry.insert(Builder::new("Call").imm(func_ref).varargs());
-    registry.insert(Builder::new("CallIndirect").imm(sig_ref).value().varargs());
-    registry.insert(Builder::new("FuncAddr").imm(func_ref));
+    registry.insert(Builder::new("Call").imm(&entities.func_ref).varargs());
+    registry.insert(
+        Builder::new("CallIndirect")
+            .imm(&entities.sig_ref)
+            .value()
+            .varargs(),
+    );
+    registry.insert(Builder::new("FuncAddr").imm(&entities.func_ref));
 
-    registry.insert(Builder::new("Load").imm(memflags).value().imm(offset32));
+    registry.insert(
+        Builder::new("Load")
+            .imm(&imm.memflags)
+            .value()
+            .imm(&imm.offset32),
+    );
     registry.insert(
         Builder::new("LoadComplex")
-            .imm(memflags)
+            .imm(&imm.memflags)
             .varargs()
-            .imm(offset32),
+            .imm(&imm.offset32),
     );
     registry.insert(
         Builder::new("Store")
-            .imm(memflags)
+            .imm(&imm.memflags)
             .value()
             .value()
-            .imm(offset32),
+            .imm(&imm.offset32),
     );
     registry.insert(
         Builder::new("StoreComplex")
-            .imm(memflags)
+            .imm(&imm.memflags)
             .value()
             .varargs()
-            .imm(offset32),
+            .imm(&imm.offset32),
+    );
+    registry.insert(
+        Builder::new("StackLoad")
+            .imm(&entities.stack_slot)
+            .imm(&imm.offset32),
     );
-    registry.insert(Builder::new("StackLoad").imm(stack_slot).imm(offset32));
     registry.insert(
         Builder::new("StackStore")
             .value()
-            .imm(stack_slot)
-            .imm(offset32),
+            .imm(&entities.stack_slot)
+            .imm(&imm.offset32),
     );
 
     // Accessing a WebAssembly heap.
-    registry.insert(Builder::new("HeapAddr").imm(heap).value().imm(uimm32));
+    registry.insert(
+        Builder::new("HeapAddr")
+            .imm(&entities.heap)
+            .value()
+            .imm(&imm.uimm32),
+    );
 
     // Accessing a WebAssembly table.
-    registry.insert(Builder::new("TableAddr").imm(table).value().imm(offset32));
+    registry.insert(
+        Builder::new("TableAddr")
+            .imm(&entities.table)
+            .value()
+            .imm(&imm.offset32),
+    );
 
     registry.insert(
         Builder::new("RegMove")
             .value()
-            .imm(("src", regunit))
-            .imm(("dst", regunit)),
+            .imm_with_name("src", &imm.regunit)
+            .imm_with_name("dst", &imm.regunit),
     );
     registry.insert(
         Builder::new("CopySpecial")
-            .imm(("src", regunit))
-            .imm(("dst", regunit)),
+            .imm_with_name("src", &imm.regunit)
+            .imm_with_name("dst", &imm.regunit),
     );
+    registry.insert(Builder::new("CopyToSsa").imm_with_name("src", &imm.regunit));
     registry.insert(
         Builder::new("RegSpill")
             .value()
-            .imm(("src", regunit))
-            .imm(("dst", stack_slot)),
+            .imm_with_name("src", &imm.regunit)
+            .imm_with_name("dst", &entities.stack_slot),
     );
     registry.insert(
         Builder::new("RegFill")
             .value()
-            .imm(("src", stack_slot))
-            .imm(("dst", regunit)),
+            .imm_with_name("src", &entities.stack_slot)
+            .imm_with_name("dst", &imm.regunit),
     );
 
-    registry.insert(Builder::new("Trap").imm(trapcode));
-    registry.insert(Builder::new("CondTrap").value().imm(trapcode));
-    registry.insert(Builder::new("IntCondTrap").imm(intcc).value().imm(trapcode));
+    registry.insert(Builder::new("Trap").imm(&imm.trapcode));
+    registry.insert(Builder::new("CondTrap").value().imm(&imm.trapcode));
+    registry.insert(
+        Builder::new("IntCondTrap")
+            .imm(&imm.intcc)
+            .value()
+            .imm(&imm.trapcode),
+    );
     registry.insert(
         Builder::new("FloatCondTrap")
-            .imm(floatcc)
+            .imm(&imm.floatcc)
             .value()
-            .imm(trapcode),
+            .imm(&imm.trapcode),
     );
 
     registry
diff --git a/cranelift-codegen/meta/src/shared/immediates.rs b/cranelift-codegen/meta/src/shared/immediates.rs
index bee762a9e..0b5e84c52 100644
--- a/cranelift-codegen/meta/src/shared/immediates.rs
+++ b/cranelift-codegen/meta/src/shared/immediates.rs
@@ -2,144 +2,186 @@ use crate::cdsl::operands::{OperandKind, OperandKindBuilder as Builder};
 
 use std::collections::HashMap;
 
-pub fn define() -> Vec<OperandKind> {
-    let mut kinds = Vec::new();
-
-    // A 64-bit immediate integer operand.
-    //
-    // This type of immediate integer can interact with SSA values with any
-    // IntType type.
-    let imm64 = Builder::new_imm("imm64")
-        .doc("A 64-bit immediate integer.")
-        .build();
-    kinds.push(imm64);
-
-    // An unsigned 8-bit immediate integer operand.
-    //
-    // This small operand is used to indicate lane indexes in SIMD vectors and
-    // immediate bit counts on shift instructions.
-    let uimm8 = Builder::new_imm("uimm8")
-        .doc("An 8-bit immediate unsigned integer.")
-        .build();
-    kinds.push(uimm8);
-
-    // An unsigned 32-bit immediate integer operand.
-    let uimm32 = Builder::new_imm("uimm32")
-        .doc("A 32-bit immediate unsigned integer.")
-        .build();
-    kinds.push(uimm32);
-
-    // A 32-bit immediate signed offset.
-    //
-    // This is used to represent an immediate address offset in load/store
-    // instructions.
-    let offset32 = Builder::new_imm("offset32")
-        .doc("A 32-bit immediate signed offset.")
-        .default_member("offset")
-        .build();
-    kinds.push(offset32);
-
-    // A 32-bit immediate floating point operand.
-    //
-    // IEEE 754-2008 binary32 interchange format.
-    let ieee32 = Builder::new_imm("ieee32")
-        .doc("A 32-bit immediate floating point number.")
-        .build();
-    kinds.push(ieee32);
-
-    // A 64-bit immediate floating point operand.
-    //
-    // IEEE 754-2008 binary64 interchange format.
-    let ieee64 = Builder::new_imm("ieee64")
-        .doc("A 64-bit immediate floating point number.")
-        .build();
-    kinds.push(ieee64);
-
-    // An immediate boolean operand.
-    //
-    // This type of immediate boolean can interact with SSA values with any
-    // BoolType type.
-    let boolean = Builder::new_imm("boolean")
-        .doc("An immediate boolean.")
-        .rust_type("bool")
-        .build();
-    kinds.push(boolean);
-
-    // A condition code for comparing integer values.
-    // This enumerated operand kind is used for the `icmp` instruction and corresponds to the
-    // condcodes::IntCC` Rust type.
-    let mut intcc_values = HashMap::new();
-    intcc_values.insert("eq", "Equal");
-    intcc_values.insert("ne", "NotEqual");
-    intcc_values.insert("sge", "SignedGreaterThanOrEqual");
-    intcc_values.insert("sgt", "SignedGreaterThan");
-    intcc_values.insert("sle", "SignedLessThanOrEqual");
-    intcc_values.insert("slt", "SignedLessThan");
-    intcc_values.insert("uge", "UnsignedGreaterThanOrEqual");
-    intcc_values.insert("ugt", "UnsignedGreaterThan");
-    intcc_values.insert("ule", "UnsignedLessThanOrEqual");
-    intcc_values.insert("ult", "UnsignedLessThan");
-    let intcc = Builder::new_enum("intcc", intcc_values)
-        .doc("An integer comparison condition code.")
-        .default_member("cond")
-        .rust_type("ir::condcodes::IntCC")
-        .build();
-    kinds.push(intcc);
-
-    // A condition code for comparing floating point values.  This enumerated operand kind is used
-    // for the `fcmp` instruction and corresponds to the `condcodes::FloatCC` Rust type.
-    let mut floatcc_values = HashMap::new();
-    floatcc_values.insert("ord", "Ordered");
-    floatcc_values.insert("uno", "Unordered");
-    floatcc_values.insert("eq", "Equal");
-    floatcc_values.insert("ne", "NotEqual");
-    floatcc_values.insert("one", "OrderedNotEqual");
-    floatcc_values.insert("ueq", "UnorderedOrEqual");
-    floatcc_values.insert("lt", "LessThan");
-    floatcc_values.insert("le", "LessThanOrEqual");
-    floatcc_values.insert("gt", "GreaterThan");
-    floatcc_values.insert("ge", "GreaterThanOrEqual");
-    floatcc_values.insert("ult", "UnorderedOrLessThan");
-    floatcc_values.insert("ule", "UnorderedOrLessThanOrEqual");
-    floatcc_values.insert("ugt", "UnorderedOrGreaterThan");
-    floatcc_values.insert("uge", "UnorderedOrGreaterThanOrEqual");
-    let floatcc = Builder::new_enum("floatcc", floatcc_values)
-        .doc("A floating point comparison condition code")
-        .default_member("cond")
-        .rust_type("ir::condcodes::FloatCC")
-        .build();
-    kinds.push(floatcc);
-
-    // Flags for memory operations like :clif:inst:`load` and :clif:inst:`store`.
-    let memflags = Builder::new_imm("memflags")
-        .doc("Memory operation flags")
-        .default_member("flags")
-        .rust_type("ir::MemFlags")
-        .build();
-    kinds.push(memflags);
-
-    // A register unit in the current target ISA.
-    let regunit = Builder::new_imm("regunit")
-        .doc("A register unit in the target ISA")
-        .rust_type("isa::RegUnit")
-        .build();
-    kinds.push(regunit);
-
-    // A trap code indicating the reason for trapping.
-    //
-    // The Rust enum type also has a `User(u16)` variant for user-provided trap
-    // codes.
-    let mut trapcode_values = HashMap::new();
-    trapcode_values.insert("stk_ovf", "StackOverflow");
-    trapcode_values.insert("heap_oob", "HeapOutOfBounds");
-    trapcode_values.insert("int_ovf", "IntegerOverflow");
-    trapcode_values.insert("int_divz", "IntegerDivisionByZero");
-    let trapcode = Builder::new_enum("trapcode", trapcode_values)
-        .doc("A trap reason code.")
-        .default_member("code")
-        .rust_type("ir::TrapCode")
-        .build();
-    kinds.push(trapcode);
-
-    return kinds;
+pub(crate) struct Immediates {
+    /// A 64-bit immediate integer operand.
+    ///
+    /// This type of immediate integer can interact with SSA values with any IntType type.
+    pub imm64: OperandKind,
+
+    /// An unsigned 8-bit immediate integer operand.
+    ///
+    /// This small operand is used to indicate lane indexes in SIMD vectors and immediate bit
+    /// counts on shift instructions.
+    pub uimm8: OperandKind,
+
+    /// An unsigned 32-bit immediate integer operand.
+    pub uimm32: OperandKind,
+
+    /// An unsigned 128-bit immediate integer operand.
+    ///
+    /// This operand is used to pass entire 128-bit vectors as immediates to instructions like
+    /// const.
+    pub uimm128: OperandKind,
+
+    /// A constant stored in the constant pool.
+    ///
+    /// This operand is used to pass constants to instructions like vconst while storing the
+    /// actual bytes in the constant pool.
+    pub pool_constant: OperandKind,
+
+    /// A 32-bit immediate signed offset.
+    ///
+    /// This is used to represent an immediate address offset in load/store instructions.
+    pub offset32: OperandKind,
+
+    /// A 32-bit immediate floating point operand.
+    ///
+    /// IEEE 754-2008 binary32 interchange format.
+    pub ieee32: OperandKind,
+
+    /// A 64-bit immediate floating point operand.
+    ///
+    /// IEEE 754-2008 binary64 interchange format.
+    pub ieee64: OperandKind,
+
+    /// An immediate boolean operand.
+    ///
+    /// This type of immediate boolean can interact with SSA values with any BoolType type.
+    pub boolean: OperandKind,
+
+    /// A condition code for comparing integer values.
+    ///
+    /// This enumerated operand kind is used for the `icmp` instruction and corresponds to the
+    /// condcodes::IntCC` Rust type.
+    pub intcc: OperandKind,
+
+    /// A condition code for comparing floating point values.
+    ///
+    /// This enumerated operand kind is used for the `fcmp` instruction and corresponds to the
+    /// `condcodes::FloatCC` Rust type.
+    pub floatcc: OperandKind,
+
+    /// Flags for memory operations like `load` and `store`.
+    pub memflags: OperandKind,
+
+    /// A register unit in the current target ISA.
+    pub regunit: OperandKind,
+
+    /// A trap code indicating the reason for trapping.
+    ///
+    /// The Rust enum type also has a `User(u16)` variant for user-provided trap codes.
+    pub trapcode: OperandKind,
+}
+
+impl Immediates {
+    pub fn new() -> Self {
+        Self {
+            imm64: Builder::new_imm("imm64")
+                .doc("A 64-bit immediate integer.")
+                .build(),
+
+            uimm8: Builder::new_imm("uimm8")
+                .doc("An 8-bit immediate unsigned integer.")
+                .build(),
+
+            uimm32: Builder::new_imm("uimm32")
+                .doc("A 32-bit immediate unsigned integer.")
+                .build(),
+
+            uimm128: Builder::new_imm("uimm128")
+                .doc("A 128-bit immediate unsigned integer.")
+                .rust_type("ir::Immediate")
+                .build(),
+
+            pool_constant: Builder::new_imm("poolConstant")
+                .doc("A constant stored in the constant pool.")
+                .default_member("constant_handle")
+                .rust_type("ir::Constant")
+                .build(),
+
+            offset32: Builder::new_imm("offset32")
+                .doc("A 32-bit immediate signed offset.")
+                .default_member("offset")
+                .build(),
+
+            ieee32: Builder::new_imm("ieee32")
+                .doc("A 32-bit immediate floating point number.")
+                .build(),
+
+            ieee64: Builder::new_imm("ieee64")
+                .doc("A 64-bit immediate floating point number.")
+                .build(),
+
+            boolean: Builder::new_imm("boolean")
+                .doc("An immediate boolean.")
+                .rust_type("bool")
+                .build(),
+
+            intcc: {
+                let mut intcc_values = HashMap::new();
+                intcc_values.insert("eq", "Equal");
+                intcc_values.insert("ne", "NotEqual");
+                intcc_values.insert("sge", "SignedGreaterThanOrEqual");
+                intcc_values.insert("sgt", "SignedGreaterThan");
+                intcc_values.insert("sle", "SignedLessThanOrEqual");
+                intcc_values.insert("slt", "SignedLessThan");
+                intcc_values.insert("uge", "UnsignedGreaterThanOrEqual");
+                intcc_values.insert("ugt", "UnsignedGreaterThan");
+                intcc_values.insert("ule", "UnsignedLessThanOrEqual");
+                intcc_values.insert("ult", "UnsignedLessThan");
+                Builder::new_enum("intcc", intcc_values)
+                    .doc("An integer comparison condition code.")
+                    .default_member("cond")
+                    .rust_type("ir::condcodes::IntCC")
+                    .build()
+            },
+
+            floatcc: {
+                let mut floatcc_values = HashMap::new();
+                floatcc_values.insert("ord", "Ordered");
+                floatcc_values.insert("uno", "Unordered");
+                floatcc_values.insert("eq", "Equal");
+                floatcc_values.insert("ne", "NotEqual");
+                floatcc_values.insert("one", "OrderedNotEqual");
+                floatcc_values.insert("ueq", "UnorderedOrEqual");
+                floatcc_values.insert("lt", "LessThan");
+                floatcc_values.insert("le", "LessThanOrEqual");
+                floatcc_values.insert("gt", "GreaterThan");
+                floatcc_values.insert("ge", "GreaterThanOrEqual");
+                floatcc_values.insert("ult", "UnorderedOrLessThan");
+                floatcc_values.insert("ule", "UnorderedOrLessThanOrEqual");
+                floatcc_values.insert("ugt", "UnorderedOrGreaterThan");
+                floatcc_values.insert("uge", "UnorderedOrGreaterThanOrEqual");
+                Builder::new_enum("floatcc", floatcc_values)
+                    .doc("A floating point comparison condition code")
+                    .default_member("cond")
+                    .rust_type("ir::condcodes::FloatCC")
+                    .build()
+            },
+
+            memflags: Builder::new_imm("memflags")
+                .doc("Memory operation flags")
+                .default_member("flags")
+                .rust_type("ir::MemFlags")
+                .build(),
+
+            regunit: Builder::new_imm("regunit")
+                .doc("A register unit in the target ISA")
+                .rust_type("isa::RegUnit")
+                .build(),
+
+            trapcode: {
+                let mut trapcode_values = HashMap::new();
+                trapcode_values.insert("stk_ovf", "StackOverflow");
+                trapcode_values.insert("heap_oob", "HeapOutOfBounds");
+                trapcode_values.insert("int_ovf", "IntegerOverflow");
+                trapcode_values.insert("int_divz", "IntegerDivisionByZero");
+                Builder::new_enum("trapcode", trapcode_values)
+                    .doc("A trap reason code.")
+                    .default_member("code")
+                    .rust_type("ir::TrapCode")
+                    .build()
+            },
+        }
+    }
 }
diff --git a/cranelift-codegen/meta/src/shared/instructions.rs b/cranelift-codegen/meta/src/shared/instructions.rs
index d9a45f0a3..b52a8dafe 100644
--- a/cranelift-codegen/meta/src/shared/instructions.rs
+++ b/cranelift-codegen/meta/src/shared/instructions.rs
@@ -2,46 +2,29 @@
 
 use crate::cdsl::formats::FormatRegistry;
 use crate::cdsl::instructions::{
-    InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder,
+    AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder,
 };
 use crate::cdsl::operands::{create_operand as operand, create_operand_doc as operand_doc};
 use crate::cdsl::type_inference::Constraint::WiderOrEq;
 use crate::cdsl::types::{LaneType, ValueType};
 use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar};
-use crate::shared::{types, OperandKinds};
+use crate::shared::types;
+use crate::shared::{entities::EntityRefs, immediates::Immediates};
 
-pub fn define(
+pub(crate) fn define(
+    all_instructions: &mut AllInstructions,
     format_registry: &FormatRegistry,
-    immediates: &OperandKinds,
-    entities: &OperandKinds,
+    imm: &Immediates,
+    entities: &EntityRefs,
 ) -> InstructionGroup {
-    let mut ig =
-        InstructionGroupBuilder::new("base", "Shared base instruction set", format_registry);
+    let mut ig = InstructionGroupBuilder::new(
+        "base",
+        "Shared base instruction set",
+        all_instructions,
+        format_registry,
+    );
 
     // Operand kind shorthands.
-    let intcc = immediates.by_name("intcc");
-    let floatcc = immediates.by_name("floatcc");
-    let trapcode = immediates.by_name("trapcode");
-    let uimm8 = immediates.by_name("uimm8");
-    let uimm32 = immediates.by_name("uimm32");
-    let imm64 = immediates.by_name("imm64");
-    let offset32 = immediates.by_name("offset32");
-    let memflags = immediates.by_name("memflags");
-    let ieee32 = immediates.by_name("ieee32");
-    let ieee64 = immediates.by_name("ieee64");
-    let boolean = immediates.by_name("boolean");
-    let regunit = immediates.by_name("regunit");
-
-    let ebb = entities.by_name("ebb");
-    let jump_table = entities.by_name("jump_table");
-    let variable_args = entities.by_name("variable_args");
-    let func_ref = entities.by_name("func_ref");
-    let sig_ref = entities.by_name("sig_ref");
-    let stack_slot = entities.by_name("stack_slot");
-    let global_value = entities.by_name("global_value");
-    let heap = entities.by_name("heap");
-    let table = entities.by_name("table");
-
     let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
     let fflags: &TypeVar = &ValueType::Special(types::Flag::FFlags.into()).into();
 
@@ -80,6 +63,12 @@ pub fn define(
         TypeSetBuilder::new().ints(32..64).build(),
     );
 
+    let Ref = &TypeVar::new(
+        "Ref",
+        "A scalar reference type",
+        TypeSetBuilder::new().refs(Interval::All).build(),
+    );
+
     let Testable = &TypeVar::new(
         "Testable",
         "A scalar boolean or integer type",
@@ -103,16 +92,19 @@ pub fn define(
 
     let Any = &TypeVar::new(
         "Any",
-        "Any integer, float, or boolean scalar or vector type",
+        "Any integer, float, boolean, or reference scalar or vector type",
         TypeSetBuilder::new()
             .ints(Interval::All)
             .floats(Interval::All)
             .bools(Interval::All)
+            .refs(Interval::All)
             .simd_lanes(Interval::All)
             .includes_scalars(true)
             .build(),
     );
 
+    let AnyTo = &TypeVar::copy_from(Any, "AnyTo".to_string());
+
     let Mem = &TypeVar::new(
         "Mem",
         "Any type that can be stored in memory",
@@ -123,23 +115,15 @@ pub fn define(
             .build(),
     );
 
-    let MemTo = &TypeVar::new(
-        "MemTo",
-        "Any type that can be stored in memory",
-        TypeSetBuilder::new()
-            .ints(Interval::All)
-            .floats(Interval::All)
-            .simd_lanes(Interval::All)
-            .build(),
-    );
+    let MemTo = &TypeVar::copy_from(Mem, "MemTo".to_string());
 
     let addr = &operand("addr", iAddr);
     let c = &operand_doc("c", Testable, "Controlling value to test");
-    let Cond = &operand("Cond", intcc);
+    let Cond = &operand("Cond", &imm.intcc);
     let x = &operand("x", iB);
     let y = &operand("y", iB);
-    let EBB = &operand_doc("EBB", ebb, "Destination extended basic block");
-    let args = &operand_doc("args", variable_args, "EBB arguments");
+    let EBB = &operand_doc("EBB", &entities.ebb, "Destination extended basic block");
+    let args = &operand_doc("args", &entities.varargs, "EBB arguments");
 
     ig.push(
         Inst::new(
@@ -163,7 +147,7 @@ pub fn define(
             r#"
         Fall through to the next EBB.
 
-        This is the same as :inst:`jump`, except the destination EBB must be
+        This is the same as `jump`, except the destination EBB must be
         the next one in the layout.
 
         Jumps are turned into fall-through instructions by the branch
@@ -182,7 +166,7 @@ pub fn define(
             r#"
         Branch when zero.
 
-        If ``c`` is a :type:`b1` value, take the branch when ``c`` is false. If
+        If ``c`` is a `b1` value, take the branch when ``c`` is false. If
         ``c`` is an integer value, take the branch when ``c = 0``.
         "#,
         )
@@ -196,7 +180,7 @@ pub fn define(
             r#"
         Branch when non-zero.
 
-        If ``c`` is a :type:`b1` value, take the branch when ``c`` is true. If
+        If ``c`` is a `b1` value, take the branch when ``c`` is true. If
         ``c`` is an integer value, take the branch when ``c != 0``.
         "#,
         )
@@ -210,15 +194,19 @@ pub fn define(
             r#"
         Compare scalar integers and branch.
 
-        Compare ``x`` and ``y`` in the same way as the :inst:`icmp` instruction
-        and take the branch if the condition is true::
+        Compare ``x`` and ``y`` in the same way as the `icmp` instruction
+        and take the branch if the condition is true:
 
+        ```text
             br_icmp ugt v1, v2, ebb4(v5, v6)
+        ```
 
-        is semantically equivalent to::
+        is semantically equivalent to:
 
+        ```text
             v10 = icmp ugt, v1, v2
             brnz v10, ebb4(v5, v6)
+        ```
 
         Some RISC architectures like MIPS and RISC-V provide instructions that
         implement all or some of the condition codes. The instruction can also
@@ -242,7 +230,7 @@ pub fn define(
         .is_branch(true),
     );
 
-    let Cond = &operand("Cond", floatcc);
+    let Cond = &operand("Cond", &imm.floatcc);
     let f = &operand("f", fflags);
 
     ig.push(
@@ -256,16 +244,10 @@ pub fn define(
         .is_branch(true),
     );
 
+    // The index into the br_table can be any type; legalizer will convert it to the right type.
     let x = &operand_doc("x", iB, "index into jump table");
-
-    let Entry = &TypeVar::new(
-        "Entry",
-        "A scalar integer type",
-        TypeSetBuilder::new().ints(Interval::All).build(),
-    );
-
-    let entry = &operand_doc("entry", Entry, "entry of jump table");
-    let JT = &operand("JT", jump_table);
+    let entry = &operand_doc("entry", iAddr, "entry of jump table");
+    let JT = &operand("JT", &entities.jump_table);
 
     ig.push(
         Inst::new(
@@ -292,7 +274,10 @@ pub fn define(
         .is_branch(true),
     );
 
-    let Size = &operand_doc("Size", uimm8, "Size in bytes");
+    // These are the instructions which br_table legalizes to: they perform address computations,
+    // using pointer-sized integers, so their type variables are more constrained.
+    let x = &operand_doc("x", iAddr, "index into jump table");
+    let Size = &operand_doc("Size", &imm.uimm8, "Size in bytes");
 
     ig.push(
         Inst::new(
@@ -357,7 +342,7 @@ pub fn define(
         .can_store(true),
     );
 
-    let code = &operand("code", trapcode);
+    let code = &operand("code", &imm.trapcode);
 
     ig.push(
         Inst::new(
@@ -384,6 +369,19 @@ pub fn define(
         .can_trap(true),
     );
 
+    ig.push(
+        Inst::new(
+            "resumable_trap",
+            r#"
+        A resumable trap.
+
+        This instruction allows non-conditional traps to be used as non-terminal instructions.
+        "#,
+        )
+        .operands_in(vec![code])
+        .can_trap(true),
+    );
+
     ig.push(
         Inst::new(
             "trapnz",
@@ -397,7 +395,7 @@ pub fn define(
         .can_trap(true),
     );
 
-    let Cond = &operand("Cond", intcc);
+    let Cond = &operand("Cond", &imm.intcc);
     let f = &operand("f", iflags);
 
     ig.push(
@@ -411,7 +409,7 @@ pub fn define(
         .can_trap(true),
     );
 
-    let Cond = &operand("Cond", floatcc);
+    let Cond = &operand("Cond", &imm.floatcc);
     let f = &operand("f", fflags);
 
     ig.push(
@@ -425,7 +423,7 @@ pub fn define(
         .can_trap(true),
     );
 
-    let rvals = &operand_doc("rvals", variable_args, "return values");
+    let rvals = &operand_doc("rvals", &entities.varargs, "return values");
 
     ig.push(
         Inst::new(
@@ -461,10 +459,10 @@ pub fn define(
 
     let FN = &operand_doc(
         "FN",
-        func_ref,
-        "function to call, declared by :inst:`function`",
+        &entities.func_ref,
+        "function to call, declared by `function`",
     );
-    let args = &operand_doc("args", variable_args, "call arguments");
+    let args = &operand_doc("args", &entities.varargs, "call arguments");
 
     ig.push(
         Inst::new(
@@ -481,7 +479,7 @@ pub fn define(
         .is_call(true),
     );
 
-    let SIG = &operand_doc("SIG", sig_ref, "function signature");
+    let SIG = &operand_doc("SIG", &entities.sig_ref, "function signature");
     let callee = &operand_doc("callee", iAddr, "address of function to call");
 
     ig.push(
@@ -495,7 +493,7 @@ pub fn define(
 
         Note that this is different from WebAssembly's ``call_indirect``; the
         callee is a native address, rather than a table index. For WebAssembly,
-        :inst:`table_addr` and :inst:`load` are used to obtain a native address
+        `table_addr` and `load` are used to obtain a native address
         from a table.
         "#,
         )
@@ -512,8 +510,8 @@ pub fn define(
 
         Compute the absolute address of a function declared in the preamble.
         The returned address can be used as a ``callee`` argument to
-        :inst:`call_indirect`. This is also a method for calling functions that
-        are too far away to be addressable by a direct :inst:`call`
+        `call_indirect`. This is also a method for calling functions that
+        are too far away to be addressable by a direct `call`
         instruction.
         "#,
         )
@@ -521,13 +519,13 @@ pub fn define(
         .operands_out(vec![addr]),
     );
 
-    let SS = &operand("SS", stack_slot);
-    let Offset = &operand_doc("Offset", offset32, "Byte offset from base address");
+    let SS = &operand("SS", &entities.stack_slot);
+    let Offset = &operand_doc("Offset", &imm.offset32, "Byte offset from base address");
     let x = &operand_doc("x", Mem, "Value to be stored");
     let a = &operand_doc("a", Mem, "Value loaded");
     let p = &operand("p", iAddr);
-    let MemFlags = &operand("MemFlags", memflags);
-    let args = &operand_doc("args", variable_args, "Address arguments");
+    let MemFlags = &operand("MemFlags", &imm.memflags);
+    let args = &operand_doc("args", &entities.varargs, "Address arguments");
 
     ig.push(
         Inst::new(
@@ -859,7 +857,7 @@ pub fn define(
 
     let x = &operand_doc("x", Mem, "Value to be stored");
     let a = &operand_doc("a", Mem, "Value loaded");
-    let Offset = &operand_doc("Offset", offset32, "In-bounds offset into stack slot");
+    let Offset = &operand_doc("Offset", &imm.offset32, "In-bounds offset into stack slot");
 
     ig.push(
         Inst::new(
@@ -872,7 +870,7 @@ pub fn define(
 
         The offset is an immediate constant, not an SSA value. The memory
         access cannot go out of bounds, i.e.
-        :math:`sizeof(a) + Offset <= sizeof(SS)`.
+        `sizeof(a) + Offset <= sizeof(SS)`.
         "#,
         )
         .operands_in(vec![SS, Offset])
@@ -891,7 +889,7 @@ pub fn define(
 
         The offset is an immediate constant, not an SSA value. The memory
         access cannot go out of bounds, i.e.
-        :math:`sizeof(a) + Offset <= sizeof(SS)`.
+        `sizeof(a) + Offset <= sizeof(SS)`.
         "#,
         )
         .operands_in(vec![x, SS, Offset])
@@ -906,14 +904,14 @@ pub fn define(
 
         Compute the absolute address of a byte in a stack slot. The offset must
         refer to a byte inside the stack slot:
-        :math:`0 <= Offset < sizeof(SS)`.
+        `0 <= Offset < sizeof(SS)`.
         "#,
         )
         .operands_in(vec![SS, Offset])
         .operands_out(vec![addr]),
     );
 
-    let GV = &operand("GV", global_value);
+    let GV = &operand("GV", &entities.global_value);
 
     ig.push(
         Inst::new(
@@ -943,9 +941,9 @@ pub fn define(
         TypeSetBuilder::new().ints(32..64).build(),
     );
 
-    let H = &operand("H", heap);
+    let H = &operand("H", &entities.heap);
     let p = &operand("p", HeapOffset);
-    let Size = &operand_doc("Size", uimm32, "Size in bytes");
+    let Size = &operand_doc("Size", &imm.uimm32, "Size in bytes");
 
     ig.push(
         Inst::new(
@@ -966,14 +964,42 @@ pub fn define(
         .operands_out(vec![addr]),
     );
 
+    // Note this instruction is marked as having other side-effects, so GVN won't try to hoist it,
+    // which would result in it being subject to spilling. While not hoisting would generally hurt
+    // performance, since a computed value used many times may need to be regenerated before each
+    // use, it is not the case here: this instruction doesn't generate any code.  That's because,
+    // by definition the pinned register is never used by the register allocator, but is written to
+    // and read explicitly and exclusively by set_pinned_reg and get_pinned_reg.
+    ig.push(
+        Inst::new(
+            "get_pinned_reg",
+            r#"
+            Gets the content of the pinned register, when it's enabled.
+        "#,
+        )
+        .operands_out(vec![addr])
+        .other_side_effects(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "set_pinned_reg",
+            r#"
+        Sets the content of the pinned register, when it's enabled.
+        "#,
+        )
+        .operands_in(vec![addr])
+        .other_side_effects(true),
+    );
+
     let TableOffset = &TypeVar::new(
         "TableOffset",
         "An unsigned table offset",
         TypeSetBuilder::new().ints(32..64).build(),
     );
-    let T = &operand("T", table);
+    let T = &operand("T", &entities.table);
     let p = &operand("p", TableOffset);
-    let Offset = &operand_doc("Offset", offset32, "Byte offset from element address");
+    let Offset = &operand_doc("Offset", &imm.offset32, "Byte offset from element address");
 
     ig.push(
         Inst::new(
@@ -996,7 +1022,7 @@ pub fn define(
         .operands_out(vec![addr]),
     );
 
-    let N = &operand("N", imm64);
+    let N = &operand("N", &imm.imm64);
     let a = &operand_doc("a", Int, "A constant integer scalar or vector value");
 
     ig.push(
@@ -1013,7 +1039,7 @@ pub fn define(
         .operands_out(vec![a]),
     );
 
-    let N = &operand("N", ieee32);
+    let N = &operand("N", &imm.ieee32);
     let a = &operand_doc("a", f32_, "A constant f32 scalar value");
 
     ig.push(
@@ -1022,14 +1048,14 @@ pub fn define(
             r#"
         Floating point constant.
 
-        Create a :type:`f32` SSA value with an immediate constant value.
+        Create a `f32` SSA value with an immediate constant value.
         "#,
         )
         .operands_in(vec![N])
         .operands_out(vec![a]),
     );
 
-    let N = &operand("N", ieee64);
+    let N = &operand("N", &imm.ieee64);
     let a = &operand_doc("a", f64_, "A constant f64 scalar value");
 
     ig.push(
@@ -1038,14 +1064,14 @@ pub fn define(
             r#"
         Floating point constant.
 
-        Create a :type:`f64` SSA value with an immediate constant value.
+        Create a `f64` SSA value with an immediate constant value.
         "#,
         )
         .operands_in(vec![N])
         .operands_out(vec![a]),
     );
 
-    let N = &operand("N", boolean);
+    let N = &operand("N", &imm.boolean);
     let a = &operand_doc("a", Bool, "A constant boolean scalar or vector value");
 
     ig.push(
@@ -1062,6 +1088,75 @@ pub fn define(
         .operands_out(vec![a]),
     );
 
+    let N = &operand_doc(
+        "N",
+        &imm.pool_constant,
+        "The 16 immediate bytes of a 128-bit vector",
+    );
+    let a = &operand_doc("a", TxN, "A constant vector value");
+
+    ig.push(
+        Inst::new(
+            "vconst",
+            r#"
+        SIMD vector constant.
+
+        Construct a vector with the given immediate bytes.
+        "#,
+        )
+        .operands_in(vec![N])
+        .operands_out(vec![a]),
+    );
+
+    let mask = &operand_doc(
+        "mask",
+        &imm.uimm128,
+        "The 16 immediate bytes used for selecting the elements to shuffle",
+    );
+    let Tx16 = &TypeVar::new(
+        "Tx16",
+        "A SIMD vector with exactly 16 lanes of 8-bit values; eventually this may support other \
+         lane counts and widths",
+        TypeSetBuilder::new()
+            .ints(8..8)
+            .bools(8..8)
+            .simd_lanes(16..16)
+            .includes_scalars(false)
+            .build(),
+    );
+    let a = &operand_doc("a", Tx16, "A vector value");
+    let b = &operand_doc("b", Tx16, "A vector value");
+
+    ig.push(
+        Inst::new(
+            "shuffle",
+            r#"
+        SIMD vector shuffle.
+        
+        Shuffle two vectors using the given immediate bytes. For each of the 16 bytes of the
+        immediate, a value i of 0-15 selects the i-th element of the first vector and a value i of 
+        16-31 selects the (i-16)th element of the second vector. Immediate values outside of the 
+        0-31 range place a 0 in the resulting vector lane.
+        "#,
+        )
+        .operands_in(vec![a, b, mask])
+        .operands_out(vec![a]),
+    );
+
+    let a = &operand_doc("a", Ref, "A constant reference null value");
+
+    ig.push(
+        Inst::new(
+            "null",
+            r#"
+        Null constant value for reference types.
+
+        Create a scalar reference SSA value with a constant null value.
+        "#,
+        )
+        .operands_out(vec![a]),
+    );
+
     ig.push(Inst::new(
         "nop",
         r#"
@@ -1082,7 +1177,7 @@ pub fn define(
             r#"
         Conditional select.
 
-        This instruction selects whole values. Use :inst:`vselect` for
+        This instruction selects whole values. Use `vselect` for
         lane-wise selection.
         "#,
         )
@@ -1090,7 +1185,7 @@ pub fn define(
         .operands_out(vec![a]),
     );
 
-    let cc = &operand_doc("cc", intcc, "Controlling condition code");
+    let cc = &operand_doc("cc", &imm.intcc, "Controlling condition code");
     let flags = &operand_doc("flags", iflags, "The machine's flag register");
 
     ig.push(
@@ -1130,7 +1225,7 @@ pub fn define(
             r#"
         Spill a register value to a stack slot.
 
-        This instruction behaves exactly like :inst:`copy`, but the result
+        This instruction behaves exactly like `copy`, but the result
         value is assigned to a spill slot.
         "#,
         )
@@ -1145,7 +1240,7 @@ pub fn define(
             r#"
         Load a register value from a stack slot.
 
-        This instruction behaves exactly like :inst:`copy`, but creates a new
+        This instruction behaves exactly like `copy`, but creates a new
         SSA value for the spilled input value.
         "#,
         )
@@ -1154,8 +1249,24 @@ pub fn define(
         .can_load(true),
     );
 
-    let src = &operand("src", regunit);
-    let dst = &operand("dst", regunit);
+    ig.push(
+        Inst::new(
+            "fill_nop",
+            r#"
+        This is identical to `fill`, except it has no encoding, since it is a no-op.
+
+        This instruction is created only during late-stage redundant-reload removal, after all
+        registers and stack slots have been assigned.  It is used to replace `fill`s that have
+        been identified as redundant.
+        "#,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    let src = &operand("src", &imm.regunit);
+    let dst = &operand("dst", &imm.regunit);
 
     ig.push(
         Inst::new(
@@ -1193,6 +1304,23 @@ pub fn define(
         .other_side_effects(true),
     );
 
+    ig.push(
+        Inst::new(
+            "copy_to_ssa",
+            r#"
+        Copies the contents of ''src'' register to ''a'' SSA name.
+
+        This instruction copies the contents of one register, regardless of its SSA name, to
+        another register, creating a new SSA name.  In that sense it is a one-sided version
+        of ''copy_special''.  This instruction is internal and should not be created by
+        Cranelift users.
+        "#,
+        )
+        .operands_in(vec![src])
+        .operands_out(vec![a])
+        .other_side_effects(true),
+    );
+
     ig.push(
         Inst::new(
             "copy_nop",
@@ -1222,7 +1350,7 @@ pub fn define(
         .other_side_effects(true),
     );
 
-    let Offset = &operand_doc("Offset", imm64, "Offset from current stack pointer");
+    let Offset = &operand_doc("Offset", &imm.imm64, "Offset from current stack pointer");
 
     ig.push(
         Inst::new(
@@ -1239,7 +1367,7 @@ pub fn define(
         .other_side_effects(true),
     );
 
-    let Offset = &operand_doc("Offset", imm64, "Offset from current stack pointer");
+    let Offset = &operand_doc("Offset", &imm.imm64, "Offset from current stack pointer");
 
     ig.push(
         Inst::new(
@@ -1265,7 +1393,7 @@ pub fn define(
             r#"
     Compare ``addr`` with the stack pointer and set the CPU flags.
 
-    This is like :inst:`ifcmp` where ``addr`` is the LHS operand and the stack
+    This is like `ifcmp` where ``addr`` is the LHS operand and the stack
     pointer is the RHS.
     "#,
         )
@@ -1284,7 +1412,7 @@ pub fn define(
         allocator to temporarily rearrange register assignments in order to
         satisfy instruction constraints.
 
-        See also :inst:`regmove`.
+        See also `regmove`.
         "#,
         )
         .operands_in(vec![x, src, SS])
@@ -1302,13 +1430,31 @@ pub fn define(
         allocator to temporarily rearrange register assignments in order to
         satisfy instruction constraints.
 
-        See also :inst:`regmove`.
+        See also `regmove`.
         "#,
         )
         .operands_in(vec![x, SS, dst])
         .other_side_effects(true),
     );
 
+    let N = &operand_doc(
+        "args",
+        &entities.varargs,
+        "Variable number of args for Stackmap",
+    );
+
+    ig.push(
+        Inst::new(
+            "safepoint",
+            r#"
+        This instruction will provide live reference values at a point in
+        the function. It can only be used by the compiler.
+        "#,
+        )
+        .operands_in(vec![N])
+        .other_side_effects(true),
+    );
+
     let x = &operand_doc("x", TxN, "Vector to split");
     let lo = &operand_doc("lo", &TxN.half_vector(), "Low-numbered lanes of `x`");
     let hi = &operand_doc("hi", &TxN.half_vector(), "High-numbered lanes of `x`");
@@ -1400,7 +1546,7 @@ pub fn define(
 
     let x = &operand_doc("x", TxN, "SIMD vector to modify");
     let y = &operand_doc("y", &TxN.lane_of(), "New lane value");
-    let Idx = &operand_doc("Idx", uimm8, "Lane index");
+    let Idx = &operand_doc("Idx", &imm.uimm8, "Lane index");
 
     ig.push(
         Inst::new(
@@ -1426,7 +1572,9 @@ pub fn define(
         Extract lane ``Idx`` from ``x``.
 
         The lane index, ``Idx``, is an immediate value, not an SSA value. It
-        must indicate a valid lane index for the type of ``x``.
+        must indicate a valid lane index for the type of ``x``. Note that the upper bits of ``a``
+        may or may not be zeroed depending on the ISA but the type system should prevent using 
+        ``a`` as anything other than the extracted value.
         "#,
         )
         .operands_in(vec![x, Idx])
@@ -1434,7 +1582,7 @@ pub fn define(
     );
 
     let a = &operand("a", &Int.as_bool());
-    let Cond = &operand("Cond", intcc);
+    let Cond = &operand("Cond", &imm.intcc);
     let x = &operand("x", Int);
     let y = &operand("y", Int);
 
@@ -1468,7 +1616,7 @@ pub fn define(
 
     let a = &operand("a", b1);
     let x = &operand("x", iB);
-    let Y = &operand("Y", imm64);
+    let Y = &operand("Y", &imm.imm64);
 
     ig.push(
         Inst::new(
@@ -1476,10 +1624,10 @@ pub fn define(
             r#"
         Compare scalar integer to a constant.
 
-        This is the same as the :inst:`icmp` instruction, except one operand is
+        This is the same as the `icmp` instruction, except one operand is
         an immediate constant.
 
-        This instruction can only compare scalars. Use :inst:`icmp` for
+        This instruction can only compare scalars. Use `icmp` for
         lane-wise vector comparisons.
         "#,
         )
@@ -1511,7 +1659,7 @@ pub fn define(
             r#"
         Compare scalar integer to a constant and return flags.
 
-        Like :inst:`icmp_imm`, but returns integer CPU flags instead of testing
+        Like `icmp_imm`, but returns integer CPU flags instead of testing
         a specific condition code.
         "#,
         )
@@ -1527,7 +1675,7 @@ pub fn define(
         Inst::new(
             "iadd",
             r#"
-        Wrapping integer addition: :math:`a := x + y \pmod{2^B}`.
+        Wrapping integer addition: `a := x + y \pmod{2^B}`.
 
         This instruction does not depend on the signed/unsigned interpretation
         of the operands.
@@ -1541,7 +1689,7 @@ pub fn define(
         Inst::new(
             "isub",
             r#"
-        Wrapping integer subtraction: :math:`a := x - y \pmod{2^B}`.
+        Wrapping integer subtraction: `a := x - y \pmod{2^B}`.
 
         This instruction does not depend on the signed/unsigned interpretation
         of the operands.
@@ -1555,7 +1703,7 @@ pub fn define(
         Inst::new(
             "imul",
             r#"
-        Wrapping integer multiplication: :math:`a := x y \pmod{2^B}`.
+        Wrapping integer multiplication: `a := x y \pmod{2^B}`.
 
         This instruction does not depend on the signed/unsigned interpretation
         of the
@@ -1602,7 +1750,7 @@ pub fn define(
         Inst::new(
             "udiv",
             r#"
-        Unsigned integer division: :math:`a := \lfloor {x \over y} \rfloor`.
+        Unsigned integer division: `a := \lfloor {x \over y} \rfloor`.
 
         This operation traps if the divisor is zero.
         "#,
@@ -1616,12 +1764,12 @@ pub fn define(
         Inst::new(
             "sdiv",
             r#"
-        Signed integer division rounded toward zero: :math:`a := sign(xy)
+        Signed integer division rounded toward zero: `a := sign(xy)
         \lfloor {|x| \over |y|}\rfloor`.
 
         This operation traps if the divisor is zero, or if the result is not
-        representable in :math:`B` bits two's complement. This only happens
-        when :math:`x = -2^{B-1}, y = -1`.
+        representable in `B` bits two's complement. This only happens
+        when `x = -2^{B-1}, y = -1`.
         "#,
         )
         .operands_in(vec![x, y])
@@ -1659,7 +1807,7 @@ pub fn define(
 
     let a = &operand("a", iB);
     let x = &operand("x", iB);
-    let Y = &operand("Y", imm64);
+    let Y = &operand("Y", &imm.imm64);
 
     ig.push(
         Inst::new(
@@ -1667,7 +1815,7 @@ pub fn define(
             r#"
         Add immediate integer.
 
-        Same as :inst:`iadd`, but one operand is an immediate constant.
+        Same as `iadd`, but one operand is an immediate constant.
 
         Polymorphic over all scalar integer types, but does not support vector
         types.
@@ -1711,8 +1859,8 @@ pub fn define(
         Signed integer division by an immediate constant.
 
         This operation traps if the divisor is zero, or if the result is not
-        representable in :math:`B` bits two's complement. This only happens
-        when :math:`x = -2^{B-1}, Y = -1`.
+        representable in `B` bits two's complement. This only happens
+        when `x = -2^{B-1}, Y = -1`.
         "#,
         )
         .operands_in(vec![x, Y])
@@ -1749,9 +1897,9 @@ pub fn define(
         Inst::new(
             "irsub_imm",
             r#"
-        Immediate reverse wrapping subtraction: :math:`a := Y - x \pmod{2^B}`.
+        Immediate reverse wrapping subtraction: `a := Y - x \pmod{2^B}`.
 
-        Also works as integer negation when :math:`Y = 0`. Use :inst:`iadd_imm`
+        Also works as integer negation when `Y = 0`. Use `iadd_imm`
         with a negative immediate operand for the reverse immediate
         subtraction.
 
@@ -1766,22 +1914,28 @@ pub fn define(
     let a = &operand("a", iB);
     let x = &operand("x", iB);
     let y = &operand("y", iB);
+
     let c_in = &operand_doc("c_in", b1, "Input carry flag");
     let c_out = &operand_doc("c_out", b1, "Output carry flag");
     let b_in = &operand_doc("b_in", b1, "Input borrow flag");
     let b_out = &operand_doc("b_out", b1, "Output borrow flag");
 
+    let c_if_in = &operand("c_in", iflags);
+    let c_if_out = &operand("c_out", iflags);
+    let b_if_in = &operand("b_in", iflags);
+    let b_if_out = &operand("b_out", iflags);
+
     ig.push(
         Inst::new(
             "iadd_cin",
             r#"
         Add integers with carry in.
 
-        Same as :inst:`iadd` with an additional carry input. Computes:
-
-        .. math::
+        Same as `iadd` with an additional carry input. Computes:
 
+        ```text
             a = x + y + c_{in} \pmod 2^B
+        ```
 
         Polymorphic over all scalar integer types, but does not support vector
         types.
@@ -1791,18 +1945,38 @@ pub fn define(
         .operands_out(vec![a]),
     );
 
+    ig.push(
+        Inst::new(
+            "iadd_ifcin",
+            r#"
+        Add integers with carry in.
+
+        Same as `iadd` with an additional carry flag input. Computes:
+
+        ```text
+            a = x + y + c_{in} \pmod 2^B
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+        )
+        .operands_in(vec![x, y, c_if_in])
+        .operands_out(vec![a]),
+    );
+
     ig.push(
         Inst::new(
             "iadd_cout",
             r#"
         Add integers with carry out.
 
-        Same as :inst:`iadd` with an additional carry output.
-
-        .. math::
+        Same as `iadd` with an additional carry output.
 
+        ```text
             a &= x + y \pmod 2^B \\
             c_{out} &= x+y >= 2^B
+        ```
 
         Polymorphic over all scalar integer types, but does not support vector
         types.
@@ -1812,18 +1986,39 @@ pub fn define(
         .operands_out(vec![a, c_out]),
     );
 
+    ig.push(
+        Inst::new(
+            "iadd_ifcout",
+            r#"
+        Add integers with carry out.
+
+        Same as `iadd` with an additional carry flag output.
+
+        ```text
+            a &= x + y \pmod 2^B \\
+            c_{out} &= x+y >= 2^B
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a, c_if_out]),
+    );
+
     ig.push(
         Inst::new(
             "iadd_carry",
             r#"
         Add integers with carry in and out.
 
-        Same as :inst:`iadd` with an additional carry input and output.
-
-        .. math::
+        Same as `iadd` with an additional carry input and output.
 
+        ```text
             a &= x + y + c_{in} \pmod 2^B \\
             c_{out} &= x + y + c_{in} >= 2^B
+        ```
 
         Polymorphic over all scalar integer types, but does not support vector
         types.
@@ -1833,17 +2028,38 @@ pub fn define(
         .operands_out(vec![a, c_out]),
     );
 
+    ig.push(
+        Inst::new(
+            "iadd_ifcarry",
+            r#"
+        Add integers with carry in and out.
+
+        Same as `iadd` with an additional carry flag input and output.
+
+        ```text
+            a &= x + y + c_{in} \pmod 2^B \\
+            c_{out} &= x + y + c_{in} >= 2^B
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+        )
+        .operands_in(vec![x, y, c_if_in])
+        .operands_out(vec![a, c_if_out]),
+    );
+
     ig.push(
         Inst::new(
             "isub_bin",
             r#"
         Subtract integers with borrow in.
 
-        Same as :inst:`isub` with an additional borrow flag input. Computes:
-
-        .. math::
+        Same as `isub` with an additional borrow flag input. Computes:
 
+        ```text
             a = x - (y + b_{in}) \pmod 2^B
+        ```
 
         Polymorphic over all scalar integer types, but does not support vector
         types.
@@ -1853,18 +2069,38 @@ pub fn define(
         .operands_out(vec![a]),
     );
 
+    ig.push(
+        Inst::new(
+            "isub_ifbin",
+            r#"
+        Subtract integers with borrow in.
+
+        Same as `isub` with an additional borrow flag input. Computes:
+
+        ```text
+            a = x - (y + b_{in}) \pmod 2^B
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+        )
+        .operands_in(vec![x, y, b_if_in])
+        .operands_out(vec![a]),
+    );
+
     ig.push(
         Inst::new(
             "isub_bout",
             r#"
         Subtract integers with borrow out.
 
-        Same as :inst:`isub` with an additional borrow flag output.
-
-        .. math::
+        Same as `isub` with an additional borrow flag output.
 
+        ```text
             a &= x - y \pmod 2^B \\
             b_{out} &= x < y
+        ```
 
         Polymorphic over all scalar integer types, but does not support vector
         types.
@@ -1874,18 +2110,39 @@ pub fn define(
         .operands_out(vec![a, b_out]),
     );
 
+    ig.push(
+        Inst::new(
+            "isub_ifbout",
+            r#"
+        Subtract integers with borrow out.
+
+        Same as `isub` with an additional borrow flag output.
+
+        ```text
+            a &= x - y \pmod 2^B \\
+            b_{out} &= x < y
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a, b_if_out]),
+    );
+
     ig.push(
         Inst::new(
             "isub_borrow",
             r#"
         Subtract integers with borrow in and out.
 
-        Same as :inst:`isub` with an additional borrow flag input and output.
-
-        .. math::
+        Same as `isub` with an additional borrow flag input and output.
 
+        ```text
             a &= x - (y + b_{in}) \pmod 2^B \\
             b_{out} &= x < y + b_{in}
+        ```
 
         Polymorphic over all scalar integer types, but does not support vector
         types.
@@ -1895,6 +2152,27 @@ pub fn define(
         .operands_out(vec![a, b_out]),
     );
 
+    ig.push(
+        Inst::new(
+            "isub_ifborrow",
+            r#"
+        Subtract integers with borrow in and out.
+
+        Same as `isub` with an additional borrow flag input and output.
+
+        ```text
+            a &= x - (y + b_{in}) \pmod 2^B \\
+            b_{out} &= x < y + b_{in}
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+        )
+        .operands_in(vec![x, y, b_if_in])
+        .operands_out(vec![a, b_if_out]),
+    );
+
     let bits = &TypeVar::new(
         "bits",
         "Any integer, float, or boolean scalar or vector type",
@@ -1994,7 +2272,7 @@ pub fn define(
     );
 
     let x = &operand("x", iB);
-    let Y = &operand("Y", imm64);
+    let Y = &operand("Y", &imm.imm64);
     let a = &operand("a", iB);
 
     ig.push(
@@ -2003,7 +2281,7 @@ pub fn define(
             r#"
         Bitwise and with immediate.
 
-        Same as :inst:`band`, but one operand is an immediate constant.
+        Same as `band`, but one operand is an immediate constant.
 
         Polymorphic over all scalar integer types, but does not support vector
         types.
@@ -2019,7 +2297,7 @@ pub fn define(
             r#"
         Bitwise or with immediate.
 
-        Same as :inst:`bor`, but one operand is an immediate constant.
+        Same as `bor`, but one operand is an immediate constant.
 
         Polymorphic over all scalar integer types, but does not support vector
         types.
@@ -2035,7 +2313,7 @@ pub fn define(
             r#"
         Bitwise xor with immediate.
 
-        Same as :inst:`bxor`, but one operand is an immediate constant.
+        Same as `bxor`, but one operand is an immediate constant.
 
         Polymorphic over all scalar integer types, but does not support vector
         types.
@@ -2047,7 +2325,7 @@ pub fn define(
 
     let x = &operand_doc("x", Int, "Scalar or vector value to shift");
     let y = &operand_doc("y", iB, "Number of bits to shift");
-    let Y = &operand("Y", imm64);
+    let Y = &operand("Y", &imm.imm64);
     let a = &operand("a", Int);
 
     ig.push(
@@ -2109,9 +2387,10 @@ pub fn define(
 
         When shifting a B-bits integer type, this instruction computes:
 
-        .. math::
-            s &:= y \pmod B,                \\
+        ```text
+            s &:= y \pmod B,
             a &:= x \cdot 2^s \pmod{2^B}.
+        ```
         "#,
         )
         .operands_in(vec![x, y])
@@ -2130,9 +2409,10 @@ pub fn define(
 
         When shifting a B-bits integer type, this instruction computes:
 
-        .. math::
-            s &:= y \pmod B,                \\
+        ```text
+            s &:= y \pmod B,
             a &:= \lfloor x \cdot 2^{-s} \rfloor.
+        ```
         "#,
         )
         .operands_in(vec![x, y])
@@ -2275,7 +2555,7 @@ pub fn define(
             .simd_lanes(Interval::All)
             .build(),
     );
-    let Cond = &operand("Cond", floatcc);
+    let Cond = &operand("Cond", &imm.floatcc);
     let x = &operand("x", Float);
     let y = &operand("y", Float);
     let a = &operand("a", &Float.as_bool());
@@ -2291,12 +2571,12 @@ pub fn define(
 
         == ==========================================
         UN Unordered when one or both numbers is NaN.
-        EQ When :math:`x = y`. (And :math:`0.0 = -0.0`).
-        LT When :math:`x < y`.
-        GT When :math:`x > y`.
+        EQ When `x = y`. (And `0.0 = -0.0`).
+        LT When `x < y`.
+        GT When `x > y`.
         == ==========================================
 
-        The 14 :type:`floatcc` condition codes each correspond to a subset of
+        The 14 `floatcc` condition codes each correspond to a subset of
         the four relations, except for the empty set which would always be
         false, and the full set which would always be true.
 
@@ -2324,7 +2604,7 @@ pub fn define(
         The standard C comparison operators, `<, <=, >, >=`, are all ordered,
         so they are false if either operand is NaN. The C equality operator,
         `==`, is ordered, and since inequality is defined as the logical
-        inverse it is *unordered*. They map to the :type:`floatcc` condition
+        inverse it is *unordered*. They map to the `floatcc` condition
         codes as follows:
 
         ==== ====== ============
@@ -2357,7 +2637,7 @@ pub fn define(
             r#"
         Floating point comparison returning flags.
 
-        Compares two numbers like :inst:`fcmp`, but returns floating point CPU
+        Compares two numbers like `fcmp`, but returns floating point CPU
         flags instead of testing a specific condition.
         "#,
         )
@@ -2409,8 +2689,8 @@ pub fn define(
             r#"
         Floating point division.
 
-        Unlike the integer division instructions :clif:inst:`sdiv` and
-        :clif:inst:`udiv`, this can't trap. Division by zero is infinity or
+        Unlike the integer division instructions ` and
+        `udiv`, this can't trap. Division by zero is infinity or
         NaN, depending on the dividend.
         "#,
         )
@@ -2435,7 +2715,7 @@ pub fn define(
             r#"
         Floating point fused multiply-and-add.
 
-        Computes :math:`a := xy+z` without any intermediate rounding of the
+        Computes `a := xy+z` without any intermediate rounding of the
         product.
         "#,
         )
@@ -2570,7 +2850,24 @@ pub fn define(
         .operands_out(vec![a]),
     );
 
-    let Cond = &operand("Cond", intcc);
+    let a = &operand("a", b1);
+    let x = &operand("x", Ref);
+
+    ig.push(
+        Inst::new(
+            "is_null",
+            r#"
+        Reference verification.
+
+        The condition code determines if the reference type in question is
+        null or not.
+        "#,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let Cond = &operand("Cond", &imm.intcc);
     let f = &operand("f", iflags);
     let a = &operand("a", b1);
 
@@ -2588,7 +2885,7 @@ pub fn define(
         .operands_out(vec![a]),
     );
 
-    let Cond = &operand("Cond", floatcc);
+    let Cond = &operand("Cond", &imm.floatcc);
     let f = &operand("f", fflags);
 
     ig.push(
@@ -2623,6 +2920,46 @@ pub fn define(
         .operands_out(vec![a]),
     );
 
+    let x = &operand("x", Any);
+    let a = &operand_doc("a", AnyTo, "Bits of `x` reinterpreted");
+
+    ig.push(
+        Inst::new(
+            "raw_bitcast",
+            r#"
+        Cast the bits in `x` as a different type of the same bit width.
+
+        This instruction does not change the data's representation but allows
+        data in registers to be used as different types, e.g. an i32x4 as a
+        b8x16. The only constraint on the result `a` is that it can be
+        `raw_bitcast` back to the original type. Also, in a raw_bitcast between
+        vector types with the same number of lanes, the value of each result
+        lane is a raw_bitcast of the corresponding operand lane. TODO there is
+        currently no mechanism for enforcing the bit width constraint.
+        "#,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let a = &operand_doc("a", TxN, "A vector value");
+    let s = &operand_doc("s", &TxN.lane_of(), "A scalar value");
+
+    ig.push(
+        Inst::new(
+            "scalar_to_vector",
+            r#"
+    Scalar To Vector -- move a value out of a scalar register and into a vector register; the 
+    scalar will be moved to the lowest-order bits of the vector register. Note that this 
+    instruction is intended as a low-level legalization instruction and frontends should prefer 
+    insertlane; on certain architectures, scalar_to_vector may zero the highest-order bits for some
+    types (e.g. integers) but not for others (e.g. floats).
+    "#,
+        )
+        .operands_in(vec![s])
+        .operands_out(vec![a]),
+    );
+
     let Bool = &TypeVar::new(
         "Bool",
         "A scalar or vector boolean type",
@@ -2754,7 +3091,7 @@ pub fn define(
 
         Each lane in `x` is converted to a smaller integer type by discarding
         the most significant bits. This is the same as reducing modulo
-        :math:`2^n`.
+        `2^n`.
 
         The result type must have the same number of vector lanes as the input,
         and each lane must not have more bits that the input lanes. If the
@@ -2838,7 +3175,7 @@ pub fn define(
         This is an exact operation.
 
         Cranelift currently only supports two floating point formats
-        - :type:`f32` and :type:`f64`. This may change in the future.
+        - `f32` and `f64`. This may change in the future.
 
         The result type must have the same number of vector lanes as the input,
         and the result lanes must not have fewer bits than the input lanes. If
@@ -2860,7 +3197,7 @@ pub fn define(
         by rounding to nearest, ties to even.
 
         Cranelift currently only supports two floating point formats
-        - :type:`f32` and :type:`f64`. This may change in the future.
+        - `f32` and `f64`. This may change in the future.
 
         The result type must have the same number of vector lanes as the input,
         and the result lanes must not have more bits than the input lanes. If
@@ -2975,7 +3312,7 @@ pub fn define(
         "WideInt",
         "An integer type with lanes from `i16` upwards",
         TypeSetBuilder::new()
-            .ints(16..64)
+            .ints(16..128)
             .simd_lanes(Interval::All)
             .build(),
     );
@@ -3003,9 +3340,9 @@ pub fn define(
 
     let NarrowInt = &TypeVar::new(
         "NarrowInt",
-        "An integer type with lanes type to `i32`",
+        "An integer type with lanes type to `i64`",
         TypeSetBuilder::new()
-            .ints(8..32)
+            .ints(8..64)
             .simd_lanes(Interval::All)
             .build(),
     );
diff --git a/cranelift-codegen/meta/src/shared/legalize.rs b/cranelift-codegen/meta/src/shared/legalize.rs
index d8ec65e3b..30de84a19 100644
--- a/cranelift-codegen/meta/src/shared/legalize.rs
+++ b/cranelift-codegen/meta/src/shared/legalize.rs
@@ -2,12 +2,11 @@ use crate::cdsl::ast::{var, ExprBuilder, Literal};
 use crate::cdsl::instructions::{Instruction, InstructionGroup};
 use crate::cdsl::xform::{TransformGroupBuilder, TransformGroups};
 
-use crate::shared::OperandKinds;
-
+use crate::shared::immediates::Immediates;
 use crate::shared::types::Float::{F32, F64};
-use crate::shared::types::Int::{I16, I32, I64, I8};
+use crate::shared::types::Int::{I128, I16, I32, I64, I8};
 
-pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformGroups {
+pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGroups {
     let mut narrow = TransformGroupBuilder::new(
         "narrow",
         r#"
@@ -50,6 +49,8 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
     let bor = insts.by_name("bor");
     let bor_imm = insts.by_name("bor_imm");
     let bor_not = insts.by_name("bor_not");
+    let brnz = insts.by_name("brnz");
+    let brz = insts.by_name("brz");
     let br_icmp = insts.by_name("br_icmp");
     let br_table = insts.by_name("br_table");
     let bxor = insts.by_name("bxor");
@@ -63,12 +64,13 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
     let f64const = insts.by_name("f64const");
     let fcopysign = insts.by_name("fcopysign");
     let fcvt_from_sint = insts.by_name("fcvt_from_sint");
-    let fcvt_from_uint = insts.by_name("fcvt_from_uint");
     let fneg = insts.by_name("fneg");
     let iadd = insts.by_name("iadd");
-    let iadd_carry = insts.by_name("iadd_carry");
     let iadd_cin = insts.by_name("iadd_cin");
     let iadd_cout = insts.by_name("iadd_cout");
+    let iadd_carry = insts.by_name("iadd_carry");
+    let iadd_ifcin = insts.by_name("iadd_ifcin");
+    let iadd_ifcout = insts.by_name("iadd_ifcout");
     let iadd_imm = insts.by_name("iadd_imm");
     let icmp = insts.by_name("icmp");
     let icmp_imm = insts.by_name("icmp_imm");
@@ -87,8 +89,10 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
     let istore16 = insts.by_name("istore16");
     let isub = insts.by_name("isub");
     let isub_bin = insts.by_name("isub_bin");
-    let isub_borrow = insts.by_name("isub_borrow");
     let isub_bout = insts.by_name("isub_bout");
+    let isub_borrow = insts.by_name("isub_borrow");
+    let isub_ifbin = insts.by_name("isub_ifbin");
+    let isub_ifbout = insts.by_name("isub_ifbout");
     let load = insts.by_name("load");
     let popcnt = insts.by_name("popcnt");
     let rotl = insts.by_name("rotl");
@@ -141,12 +145,6 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
     expand.custom_legalize(insts.by_name("stack_load"), "expand_stack_load");
     expand.custom_legalize(insts.by_name("stack_store"), "expand_stack_store");
 
-    // List of immediates.
-    let imm64 = immediates.by_name("imm64");
-    let ieee32 = immediates.by_name("ieee32");
-    let ieee64 = immediates.by_name("ieee64");
-    let intcc = immediates.by_name("intcc");
-
     // List of variables to reuse in patterns.
     let x = var("x");
     let y = var("y");
@@ -190,33 +188,20 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
     let al = var("al");
     let ah = var("ah");
     let cc = var("cc");
+    let ebb = var("ebb");
     let ptr = var("ptr");
     let flags = var("flags");
     let offset = var("off");
+    let vararg = var("vararg");
 
-    narrow.legalize(
-        def!(a = iadd(x, y)),
-        vec![
-            def!((xl, xh) = isplit(x)),
-            def!((yl, yh) = isplit(y)),
-            def!((al, c) = iadd_cout(xl, yl)),
-            def!(ah = iadd_cin(xh, yh, c)),
-            def!(a = iconcat(al, ah)),
-        ],
-    );
+    narrow.custom_legalize(load, "narrow_load");
+    narrow.custom_legalize(store, "narrow_store");
 
-    narrow.legalize(
-        def!(a = isub(x, y)),
-        vec![
-            def!((xl, xh) = isplit(x)),
-            def!((yl, yh) = isplit(y)),
-            def!((al, b) = isub_bout(xl, yl)),
-            def!(ah = isub_bin(xh, yh, b)),
-            def!(a = iconcat(al, ah)),
-        ],
-    );
+    // iconst.i64 can't be legalized in the meta langage (because integer literals can't be
+    // embedded as part of arguments), so use a custom legalization for now.
+    narrow.custom_legalize(iconst, "narrow_iconst");
 
-    for &bin_op in &[band, bor, bxor] {
+    for &bin_op in &[band, bor, bxor, band_not, bor_not, bxor_not] {
         narrow.legalize(
             def!(a = bin_op(x, y)),
             vec![
@@ -229,6 +214,16 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
         );
     }
 
+    narrow.legalize(
+        def!(a = bnot(x)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!(al = bnot(xl)),
+            def!(ah = bnot(xh)),
+            def!(a = iconcat(al, ah)),
+        ],
+    );
+
     narrow.legalize(
         def!(a = select(c, x, y)),
         vec![
@@ -240,6 +235,38 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
         ],
     );
 
+    narrow.legalize(
+        def!(brz.I128(x, ebb, vararg)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!(
+                a = icmp_imm(
+                    Literal::enumerator_for(&imm.intcc, "eq"),
+                    xl,
+                    Literal::constant(&imm.imm64, 0)
+                )
+            ),
+            def!(
+                b = icmp_imm(
+                    Literal::enumerator_for(&imm.intcc, "eq"),
+                    xh,
+                    Literal::constant(&imm.imm64, 0)
+                )
+            ),
+            def!(c = band(a, b)),
+            def!(brz(c, ebb, vararg)),
+        ],
+    );
+
+    narrow.legalize(
+        def!(brnz.I128(x, ebb, vararg)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!(brnz(xl, ebb, vararg)),
+            def!(brnz(xh, ebb, vararg)),
+        ],
+    );
+
     // Widen instructions with one input operand.
     for &op in &[bnot, popcnt] {
         for &int_ty in &[I8, I16] {
@@ -304,7 +331,7 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
     }
 
     for &(int_ty, num) in &[(I8, 24), (I16, 16)] {
-        let imm = Literal::constant(imm64, -num);
+        let imm = Literal::constant(&imm.imm64, -num);
 
         widen.legalize(
             def!(a = clz.int_ty(b)),
@@ -328,7 +355,7 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
     }
 
     for &(int_ty, num) in &[(I8, 1 << 8), (I16, 1 << 16)] {
-        let num = Literal::constant(imm64, num);
+        let num = Literal::constant(&imm.imm64, num);
         widen.legalize(
             def!(a = ctz.int_ty(b)),
             vec![
@@ -429,7 +456,7 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
         }
 
         for cc in &["eq", "ne", "ugt", "ult", "uge", "ule"] {
-            let w_cc = Literal::enumerator_for(intcc, cc);
+            let w_cc = Literal::enumerator_for(&imm.intcc, cc);
             widen.legalize(
                 def!(a = icmp_imm.int_ty(w_cc, b, c)),
                 vec![def!(x = uextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))],
@@ -445,7 +472,7 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
         }
 
         for cc in &["sgt", "slt", "sge", "sle"] {
-            let w_cc = Literal::enumerator_for(intcc, cc);
+            let w_cc = Literal::enumerator_for(&imm.intcc, cc);
             widen.legalize(
                 def!(a = icmp_imm.int_ty(w_cc, b, c)),
                 vec![def!(x = sextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))],
@@ -464,13 +491,13 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
 
     // Expand integer operations with carry for RISC architectures that don't have
     // the flags.
-    let intcc_ult = Literal::enumerator_for(intcc, "ult");
+    let intcc_ult = Literal::enumerator_for(&imm.intcc, "ult");
     expand.legalize(
         def!((a, c) = iadd_cout(x, y)),
         vec![def!(a = iadd(x, y)), def!(c = icmp(intcc_ult, a, x))],
     );
 
-    let intcc_ugt = Literal::enumerator_for(intcc, "ugt");
+    let intcc_ugt = Literal::enumerator_for(&imm.intcc, "ugt");
     expand.legalize(
         def!((a, b) = isub_bout(x, y)),
         vec![def!(a = isub(x, y)), def!(b = icmp(intcc_ugt, a, x))],
@@ -514,21 +541,12 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
         ],
     );
 
-    // Expansions for fcvt_from_{u,s}int for smaller integer types.
-    // These use expand and not widen because the controlling type variable for
-    // these instructions are f32/f64, which are legalized as part of the expand
+    // Expansion for fcvt_from_sint for smaller integer types.
+    // This uses expand and not widen because the controlling type variable for
+    // this instruction is f32/f64, which is legalized as part of the expand
     // group.
     for &dest_ty in &[F32, F64] {
         for &src_ty in &[I8, I16] {
-            let bound_inst = fcvt_from_uint.bind(dest_ty).bind(src_ty);
-            expand.legalize(
-                def!(a = bound_inst(b)),
-                vec![
-                    def!(x = uextend.I32(b)),
-                    def!(a = fcvt_from_uint.dest_ty(x)),
-                ],
-            );
-
             let bound_inst = fcvt_from_sint.bind(dest_ty).bind(src_ty);
             expand.legalize(
                 def!(a = bound_inst(b)),
@@ -592,7 +610,7 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
     }
 
     //# Expand bnot using xor.
-    let minus_one = Literal::constant(imm64, -1);
+    let minus_one = Literal::constant(&imm.imm64, -1);
     expand.legalize(
         def!(a = bnot(x)),
         vec![def!(y = iconst(minus_one)), def!(a = bxor(x, y))],
@@ -601,82 +619,82 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
     //# Expand bitrev
     //# Adapted from Stack Overflow.
     //# https://stackoverflow.com/questions/746171/most-efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c
-    let imm64_1 = Literal::constant(imm64, 1);
-    let imm64_2 = Literal::constant(imm64, 2);
-    let imm64_4 = Literal::constant(imm64, 4);
+    let imm64_1 = Literal::constant(&imm.imm64, 1);
+    let imm64_2 = Literal::constant(&imm.imm64, 2);
+    let imm64_4 = Literal::constant(&imm.imm64, 4);
 
     widen.legalize(
         def!(a = bitrev.I8(x)),
         vec![
-            def!(a1 = band_imm(x, Literal::constant(imm64, 0xaa))),
+            def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaa))),
             def!(a2 = ushr_imm(a1, imm64_1)),
-            def!(a3 = band_imm(x, Literal::constant(imm64, 0x55))),
+            def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x55))),
             def!(a4 = ishl_imm(a3, imm64_1)),
             def!(b = bor(a2, a4)),
-            def!(b1 = band_imm(b, Literal::constant(imm64, 0xcc))),
+            def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcc))),
             def!(b2 = ushr_imm(b1, imm64_2)),
-            def!(b3 = band_imm(b, Literal::constant(imm64, 0x33))),
+            def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x33))),
             def!(b4 = ishl_imm(b3, imm64_2)),
             def!(c = bor(b2, b4)),
-            def!(c1 = band_imm(c, Literal::constant(imm64, 0xf0))),
+            def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0))),
             def!(c2 = ushr_imm(c1, imm64_4)),
-            def!(c3 = band_imm(c, Literal::constant(imm64, 0x0f))),
+            def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f))),
             def!(c4 = ishl_imm(c3, imm64_4)),
             def!(a = bor(c2, c4)),
         ],
     );
 
-    let imm64_8 = Literal::constant(imm64, 8);
+    let imm64_8 = Literal::constant(&imm.imm64, 8);
 
     widen.legalize(
         def!(a = bitrev.I16(x)),
         vec![
-            def!(a1 = band_imm(x, Literal::constant(imm64, 0xaaaa))),
+            def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa))),
             def!(a2 = ushr_imm(a1, imm64_1)),
-            def!(a3 = band_imm(x, Literal::constant(imm64, 0x5555))),
+            def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555))),
             def!(a4 = ishl_imm(a3, imm64_1)),
             def!(b = bor(a2, a4)),
-            def!(b1 = band_imm(b, Literal::constant(imm64, 0xcccc))),
+            def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc))),
             def!(b2 = ushr_imm(b1, imm64_2)),
-            def!(b3 = band_imm(b, Literal::constant(imm64, 0x3333))),
+            def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333))),
             def!(b4 = ishl_imm(b3, imm64_2)),
             def!(c = bor(b2, b4)),
-            def!(c1 = band_imm(c, Literal::constant(imm64, 0xf0f0))),
+            def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0))),
             def!(c2 = ushr_imm(c1, imm64_4)),
-            def!(c3 = band_imm(c, Literal::constant(imm64, 0x0f0f))),
+            def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f))),
             def!(c4 = ishl_imm(c3, imm64_4)),
             def!(d = bor(c2, c4)),
-            def!(d1 = band_imm(d, Literal::constant(imm64, 0xff00))),
+            def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00))),
             def!(d2 = ushr_imm(d1, imm64_8)),
-            def!(d3 = band_imm(d, Literal::constant(imm64, 0x00ff))),
+            def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff))),
             def!(d4 = ishl_imm(d3, imm64_8)),
             def!(a = bor(d2, d4)),
         ],
     );
 
-    let imm64_16 = Literal::constant(imm64, 16);
+    let imm64_16 = Literal::constant(&imm.imm64, 16);
 
     expand.legalize(
         def!(a = bitrev.I32(x)),
         vec![
-            def!(a1 = band_imm(x, Literal::constant(imm64, 0xaaaaaaaa))),
+            def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaaaaaa))),
             def!(a2 = ushr_imm(a1, imm64_1)),
-            def!(a3 = band_imm(x, Literal::constant(imm64, 0x55555555))),
+            def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x55555555))),
             def!(a4 = ishl_imm(a3, imm64_1)),
             def!(b = bor(a2, a4)),
-            def!(b1 = band_imm(b, Literal::constant(imm64, 0xcccccccc))),
+            def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccccccc))),
             def!(b2 = ushr_imm(b1, imm64_2)),
-            def!(b3 = band_imm(b, Literal::constant(imm64, 0x33333333))),
+            def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x33333333))),
             def!(b4 = ishl_imm(b3, imm64_2)),
             def!(c = bor(b2, b4)),
-            def!(c1 = band_imm(c, Literal::constant(imm64, 0xf0f0f0f0))),
+            def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0f0f0))),
             def!(c2 = ushr_imm(c1, imm64_4)),
-            def!(c3 = band_imm(c, Literal::constant(imm64, 0x0f0f0f0f))),
+            def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f0f0f))),
             def!(c4 = ishl_imm(c3, imm64_4)),
             def!(d = bor(c2, c4)),
-            def!(d1 = band_imm(d, Literal::constant(imm64, 0xff00ff00))),
+            def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00ff00))),
             def!(d2 = ushr_imm(d1, imm64_8)),
-            def!(d3 = band_imm(d, Literal::constant(imm64, 0x00ff00ff))),
+            def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff00ff))),
             def!(d4 = ishl_imm(d3, imm64_8)),
             def!(e = bor(d2, d4)),
             def!(e1 = ushr_imm(e, imm64_16)),
@@ -686,21 +704,21 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
     );
 
     #[allow(overflowing_literals)]
-    let imm64_0xaaaaaaaaaaaaaaaa = Literal::constant(imm64, 0xaaaaaaaaaaaaaaaa);
-    let imm64_0x5555555555555555 = Literal::constant(imm64, 0x5555555555555555);
+    let imm64_0xaaaaaaaaaaaaaaaa = Literal::constant(&imm.imm64, 0xaaaaaaaaaaaaaaaa);
+    let imm64_0x5555555555555555 = Literal::constant(&imm.imm64, 0x5555555555555555);
     #[allow(overflowing_literals)]
-    let imm64_0xcccccccccccccccc = Literal::constant(imm64, 0xcccccccccccccccc);
-    let imm64_0x3333333333333333 = Literal::constant(imm64, 0x3333333333333333);
+    let imm64_0xcccccccccccccccc = Literal::constant(&imm.imm64, 0xcccccccccccccccc);
+    let imm64_0x3333333333333333 = Literal::constant(&imm.imm64, 0x3333333333333333);
     #[allow(overflowing_literals)]
-    let imm64_0xf0f0f0f0f0f0f0f0 = Literal::constant(imm64, 0xf0f0f0f0f0f0f0f0);
-    let imm64_0x0f0f0f0f0f0f0f0f = Literal::constant(imm64, 0x0f0f0f0f0f0f0f0f);
+    let imm64_0xf0f0f0f0f0f0f0f0 = Literal::constant(&imm.imm64, 0xf0f0f0f0f0f0f0f0);
+    let imm64_0x0f0f0f0f0f0f0f0f = Literal::constant(&imm.imm64, 0x0f0f0f0f0f0f0f0f);
     #[allow(overflowing_literals)]
-    let imm64_0xff00ff00ff00ff00 = Literal::constant(imm64, 0xff00ff00ff00ff00);
-    let imm64_0x00ff00ff00ff00ff = Literal::constant(imm64, 0x00ff00ff00ff00ff);
+    let imm64_0xff00ff00ff00ff00 = Literal::constant(&imm.imm64, 0xff00ff00ff00ff00);
+    let imm64_0x00ff00ff00ff00ff = Literal::constant(&imm.imm64, 0x00ff00ff00ff00ff);
     #[allow(overflowing_literals)]
-    let imm64_0xffff0000ffff0000 = Literal::constant(imm64, 0xffff0000ffff0000);
-    let imm64_0x0000ffff0000ffff = Literal::constant(imm64, 0x0000ffff0000ffff);
-    let imm64_32 = Literal::constant(imm64, 32);
+    let imm64_0xffff0000ffff0000 = Literal::constant(&imm.imm64, 0xffff0000ffff0000);
+    let imm64_0x0000ffff0000ffff = Literal::constant(&imm.imm64, 0x0000ffff0000ffff);
+    let imm64_32 = Literal::constant(&imm.imm64, 32);
 
     expand.legalize(
         def!(a = bitrev.I64(x)),
@@ -738,8 +756,12 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
 
     // Floating-point sign manipulations.
     for &(ty, const_inst, minus_zero) in &[
-        (F32, f32const, &Literal::bits(ieee32, 0x80000000)),
-        (F64, f64const, &Literal::bits(ieee64, 0x8000000000000000)),
+        (F32, f32const, &Literal::bits(&imm.ieee32, 0x80000000)),
+        (
+            F64,
+            f64const,
+            &Literal::bits(&imm.ieee64, 0x8000000000000000),
+        ),
     ] {
         expand.legalize(
             def!(a = fabs.ty(x)),
@@ -766,7 +788,7 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
 
     let mut groups = TransformGroups::new();
 
-    narrow.build_and_add_to(&mut groups);
+    let narrow_id = narrow.build_and_add_to(&mut groups);
     let expand_id = expand.build_and_add_to(&mut groups);
 
     // Expansions using CPU flags.
@@ -782,9 +804,9 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
     )
     .chain_with(expand_id);
 
-    let imm64_0 = Literal::constant(imm64, 0);
-    let intcc_ne = Literal::enumerator_for(intcc, "ne");
-    let intcc_eq = Literal::enumerator_for(intcc, "eq");
+    let imm64_0 = Literal::constant(&imm.imm64, 0);
+    let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne");
+    let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
 
     expand_flags.legalize(
         def!(trapnz(x, c)),
@@ -804,7 +826,83 @@ pub fn define(insts: &InstructionGroup, immediates: &OperandKinds) -> TransformG
 
     expand_flags.build_and_add_to(&mut groups);
 
-    // XXX The order of declarations unfortunately matters to be compatible with the Python code.
+    // Narrow legalizations using CPU flags.
+    let mut narrow_flags = TransformGroupBuilder::new(
+        "narrow_flags",
+        r#"
+        Narrow instructions for architectures with flags.
+
+        Narrow some instructions using CPU flags, then fall back to the normal
+        legalizations. Not all architectures support CPU flags, so these
+        patterns are kept separate.
+    "#,
+    )
+    .chain_with(narrow_id);
+
+    narrow_flags.legalize(
+        def!(a = iadd(x, y)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!((yl, yh) = isplit(y)),
+            def!((al, c) = iadd_ifcout(xl, yl)),
+            def!(ah = iadd_ifcin(xh, yh, c)),
+            def!(a = iconcat(al, ah)),
+        ],
+    );
+
+    narrow_flags.legalize(
+        def!(a = isub(x, y)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!((yl, yh) = isplit(y)),
+            def!((al, b) = isub_ifbout(xl, yl)),
+            def!(ah = isub_ifbin(xh, yh, b)),
+            def!(a = iconcat(al, ah)),
+        ],
+    );
+
+    narrow_flags.build_and_add_to(&mut groups);
+
+    // TODO(ryzokuken): figure out a way to legalize iadd_c* to iadd_ifc* (and
+    // similarly isub_b* to isub_ifb*) on expand_flags so that this isn't required.
+    // Narrow legalizations for ISAs that don't have CPU flags.
+    let mut narrow_no_flags = TransformGroupBuilder::new(
+        "narrow_no_flags",
+        r#"
+        Narrow instructions for architectures without flags.
+
+        Narrow some instructions avoiding the use of CPU flags, then fall back
+        to the normal legalizations. Not all architectures support CPU flags,
+        so these patterns are kept separate.
+    "#,
+    )
+    .chain_with(narrow_id);
+
+    narrow_no_flags.legalize(
+        def!(a = iadd(x, y)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!((yl, yh) = isplit(y)),
+            def!((al, c) = iadd_cout(xl, yl)),
+            def!(ah = iadd_cin(xh, yh, c)),
+            def!(a = iconcat(al, ah)),
+        ],
+    );
+
+    narrow_no_flags.legalize(
+        def!(a = isub(x, y)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!((yl, yh) = isplit(y)),
+            def!((al, b) = isub_bout(xl, yl)),
+            def!(ah = isub_bin(xh, yh, b)),
+            def!(a = iconcat(al, ah)),
+        ],
+    );
+
+    narrow_no_flags.build_and_add_to(&mut groups);
+
+    // TODO The order of declarations unfortunately matters to be compatible with the Python code.
     // When it's all migrated, we can put this next to the narrow/expand build_and_add_to calls
     // above.
     widen.build_and_add_to(&mut groups);
diff --git a/cranelift-codegen/meta/src/shared/mod.rs b/cranelift-codegen/meta/src/shared/mod.rs
index f16fb7bb8..73cdb4254 100644
--- a/cranelift-codegen/meta/src/shared/mod.rs
+++ b/cranelift-codegen/meta/src/shared/mod.rs
@@ -1,6 +1,6 @@
 //! Shared definitions for the Cranelift intermediate language.
 
-pub mod entities;
+mod entities;
 pub mod formats;
 pub mod immediates;
 pub mod instructions;
@@ -9,57 +9,41 @@ pub mod settings;
 pub mod types;
 
 use crate::cdsl::formats::FormatRegistry;
-use crate::cdsl::instructions::InstructionGroup;
-use crate::cdsl::operands::OperandKind;
+use crate::cdsl::instructions::{AllInstructions, InstructionGroup};
 use crate::cdsl::settings::SettingGroup;
 use crate::cdsl::xform::TransformGroups;
 
-pub struct Definitions {
+use crate::shared::entities::EntityRefs;
+use crate::shared::immediates::Immediates;
+
+pub(crate) struct Definitions {
     pub settings: SettingGroup,
+    pub all_instructions: AllInstructions,
     pub instructions: InstructionGroup,
-    pub operand_kinds: OperandKinds,
+    pub imm: Immediates,
     pub format_registry: FormatRegistry,
     pub transform_groups: TransformGroups,
 }
 
-pub struct OperandKinds(Vec<OperandKind>);
-
-impl OperandKinds {
-    pub fn new() -> Self {
-        Self(Vec::new())
-    }
-
-    pub fn by_name(&self, name: &'static str) -> &OperandKind {
-        self.0
-            .iter()
-            .find(|op| op.name == name)
-            .expect(&format!("unknown Operand name: {}", name))
-    }
-
-    pub fn push(&mut self, operand_kind: OperandKind) {
-        assert!(
-            self.0
-                .iter()
-                .find(|existing| existing.name == operand_kind.name)
-                .is_none(),
-            "trying to insert operand kind '{}' for the second time",
-            operand_kind.name
-        );
-        self.0.push(operand_kind);
-    }
-}
+pub(crate) fn define() -> Definitions {
+    let mut all_instructions = AllInstructions::new();
 
-pub fn define() -> Definitions {
-    let immediates = OperandKinds(immediates::define());
-    let entities = OperandKinds(entities::define());
+    let immediates = Immediates::new();
+    let entities = EntityRefs::new();
     let format_registry = formats::define(&immediates, &entities);
-    let instructions = instructions::define(&format_registry, &immediates, &entities);
+    let instructions = instructions::define(
+        &mut all_instructions,
+        &format_registry,
+        &immediates,
+        &entities,
+    );
     let transform_groups = legalize::define(&instructions, &immediates);
 
     Definitions {
         settings: settings::define(),
+        all_instructions,
         instructions,
-        operand_kinds: immediates,
+        imm: immediates,
         format_registry,
         transform_groups,
     }
diff --git a/cranelift-codegen/meta/src/shared/settings.rs b/cranelift-codegen/meta/src/shared/settings.rs
index 6e3af7062..43fe357ab 100644
--- a/cranelift-codegen/meta/src/shared/settings.rs
+++ b/cranelift-codegen/meta/src/shared/settings.rs
@@ -8,11 +8,12 @@ pub fn define() -> SettingGroup {
         r#"
         Optimization level:
 
-        - default: Very profitable optimizations enabled, none slow.
-        - best: Enable all optimizations
-        - fastest: Optimize for compile time by disabling most optimizations.
+        - none: Minimise compile time by disabling most optimizations.
+        - speed: Generate the fastest possible code
+        - speed_and_size: like "speed", but also perform transformations
+          aimed at reducing code size.
         "#,
-        vec!["default", "best", "fastest"],
+        vec!["none", "speed", "speed_and_size"],
     );
 
     settings.add_bool(
@@ -84,7 +85,33 @@ pub fn define() -> SettingGroup {
         false,
     );
 
-    settings.add_bool("enable_simd", "Enable the use of SIMD instructions.", true);
+    settings.add_bool(
+        "enable_pinned_reg",
+        r#"Enable the use of the pinned register.
+
+        This register is excluded from register allocation, and is completely under the control of
+        the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it
+        with the set_pinned_reg instruction.
+        "#,
+        false,
+    );
+
+    settings.add_bool(
+        "use_pinned_reg_as_heap_base",
+        r#"Use the pinned register as the heap base.
+
+        Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom
+        legalization of the `heap_addr` instruction so it will use the pinned register as the heap
+        base, instead of fetching it from a global value.
+
+        Warning! Enabling this means that the pinned register *must* be maintained to contain the
+        heap base address at all times, during the lifetime of a function. Using the pinned
+        register for other purposes when this is set is very likely to cause crashes.
+        "#,
+        false,
+    );
+
+    settings.add_bool("enable_simd", "Enable the use of SIMD instructions.", false);
 
     settings.add_bool(
         "enable_atomics",
@@ -92,8 +119,44 @@ pub fn define() -> SettingGroup {
         true,
     );
 
+    settings.add_bool(
+        "enable_safepoints",
+        r#"
+            Enable safepoint instruction insertions.
+
+            This will allow the emit_stackmaps() function to insert the safepoint
+            instruction on top of calls and interrupt traps in order to display the
+            live reference values at that point in the program.
+            "#,
+        false,
+    );
+
     // Settings specific to the `baldrdash` calling convention.
 
+    settings.add_enum(
+        "libcall_call_conv",
+        r#"
+            Defines the calling convention to use for LibCalls call expansion,
+            since it may be different from the ISA default calling convention.
+
+            The default value is to use the same calling convention as the ISA
+            default calling convention.
+
+            This list should be kept in sync with the list of calling
+            conventions available in isa/call_conv.rs.
+        "#,
+        vec![
+            "isa_default",
+            "fast",
+            "cold",
+            "system_v",
+            "windows_fastcall",
+            "baldrdash_system_v",
+            "baldrdash_windows",
+            "probestack",
+        ],
+    );
+
     settings.add_num(
         "baldrdash_prologue_words",
         r#"
diff --git a/cranelift-codegen/meta/src/shared/types.rs b/cranelift-codegen/meta/src/shared/types.rs
index aff837252..52fa9545c 100644
--- a/cranelift-codegen/meta/src/shared/types.rs
+++ b/cranelift-codegen/meta/src/shared/types.rs
@@ -1,6 +1,6 @@
 //! This module predefines all the Cranelift scalar types.
 
-#[derive(Debug, Clone, Copy, Eq, PartialEq)]
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
 pub enum Bool {
     /// 1-bit bool.
     B1 = 1,
@@ -12,6 +12,8 @@ pub enum Bool {
     B32 = 32,
     /// 64-bit bool.
     B64 = 64,
+    /// 128-bit bool.
+    B128 = 128,
 }
 
 /// This provides an iterator through all of the supported bool variants.
@@ -34,6 +36,7 @@ impl Iterator for BoolIterator {
             2 => Some(Bool::B16),
             3 => Some(Bool::B32),
             4 => Some(Bool::B64),
+            5 => Some(Bool::B128),
             _ => return None,
         };
         self.index += 1;
@@ -41,7 +44,7 @@ impl Iterator for BoolIterator {
     }
 }
 
-#[derive(Debug, Clone, Copy, Eq, PartialEq)]
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
 pub enum Int {
     /// 8-bit int.
     I8 = 8,
@@ -51,6 +54,8 @@ pub enum Int {
     I32 = 32,
     /// 64-bit int.
     I64 = 64,
+    /// 128-bit int.
+    I128 = 128,
 }
 
 /// This provides an iterator through all of the supported int variants.
@@ -72,6 +77,7 @@ impl Iterator for IntIterator {
             1 => Some(Int::I16),
             2 => Some(Int::I32),
             3 => Some(Int::I64),
+            4 => Some(Int::I128),
             _ => return None,
         };
         self.index += 1;
@@ -79,7 +85,7 @@ impl Iterator for IntIterator {
     }
 }
 
-#[derive(Debug, Clone, Copy, Eq, PartialEq)]
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
 pub enum Float {
     F32 = 32,
     F64 = 64,
@@ -145,6 +151,38 @@ impl Iterator for FlagIterator {
     }
 }
 
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub enum Reference {
+    /// 32-bit reference.
+    R32 = 32,
+    /// 64-bit reference.
+    R64 = 64,
+}
+
+/// This provides an iterator through all of the supported reference variants.
+pub struct ReferenceIterator {
+    index: u8,
+}
+
+impl ReferenceIterator {
+    pub fn new() -> Self {
+        Self { index: 0 }
+    }
+}
+
+impl Iterator for ReferenceIterator {
+    type Item = Reference;
+    fn next(&mut self) -> Option<Self::Item> {
+        let res = match self.index {
+            0 => Some(Reference::R32),
+            1 => Some(Reference::R64),
+            _ => return None,
+        };
+        self.index += 1;
+        res
+    }
+}
+
 #[cfg(test)]
 mod iter_tests {
     use super::*;
@@ -157,6 +195,7 @@ mod iter_tests {
         assert_eq!(bool_iter.next(), Some(Bool::B16));
         assert_eq!(bool_iter.next(), Some(Bool::B32));
         assert_eq!(bool_iter.next(), Some(Bool::B64));
+        assert_eq!(bool_iter.next(), Some(Bool::B128));
         assert_eq!(bool_iter.next(), None);
     }
 
@@ -167,6 +206,7 @@ mod iter_tests {
         assert_eq!(int_iter.next(), Some(Int::I16));
         assert_eq!(int_iter.next(), Some(Int::I32));
         assert_eq!(int_iter.next(), Some(Int::I64));
+        assert_eq!(int_iter.next(), Some(Int::I128));
         assert_eq!(int_iter.next(), None);
     }
 
@@ -185,4 +225,12 @@ mod iter_tests {
         assert_eq!(flag_iter.next(), Some(Flag::FFlags));
         assert_eq!(flag_iter.next(), None);
     }
+
+    #[test]
+    fn reference_iter_works() {
+        let mut reference_iter = ReferenceIterator::new();
+        assert_eq!(reference_iter.next(), Some(Reference::R32));
+        assert_eq!(reference_iter.next(), Some(Reference::R64));
+        assert_eq!(reference_iter.next(), None);
+    }
 }
diff --git a/cranelift-codegen/src/binemit/memorysink.rs b/cranelift-codegen/src/binemit/memorysink.rs
index b70b4a2c8..ffb8b44b4 100644
--- a/cranelift-codegen/src/binemit/memorysink.rs
+++ b/cranelift-codegen/src/binemit/memorysink.rs
@@ -13,9 +13,11 @@
 //! that a `MemoryCodeSink` will always write binary machine code to raw memory. It forwards any
 //! relocations to a `RelocSink` trait object. Relocations are less frequent than the
 //! `CodeSink::put*` methods, so the performance impact of the virtual callbacks is less severe.
-
 use super::{Addend, CodeInfo, CodeOffset, CodeSink, Reloc};
-use crate::ir::{ExternalName, JumpTable, SourceLoc, TrapCode};
+use crate::binemit::stackmap::Stackmap;
+use crate::ir::entities::Value;
+use crate::ir::{ConstantOffset, ExternalName, Function, JumpTable, SourceLoc, TrapCode};
+use crate::isa::TargetIsa;
 use core::ptr::write_unaligned;
 
 /// A `CodeSink` that writes binary machine code directly into memory.
@@ -36,6 +38,7 @@ pub struct MemoryCodeSink<'a> {
     offset: isize,
     relocs: &'a mut dyn RelocSink,
     traps: &'a mut dyn TrapSink,
+    stackmaps: &'a mut dyn StackmapSink,
     /// Information about the generated code and read-only data.
     pub info: CodeInfo,
 }
@@ -49,6 +52,7 @@ impl<'a> MemoryCodeSink<'a> {
         data: *mut u8,
         relocs: &'a mut dyn RelocSink,
         traps: &'a mut dyn TrapSink,
+        stackmaps: &'a mut dyn StackmapSink,
     ) -> Self {
         Self {
             data,
@@ -61,6 +65,7 @@ impl<'a> MemoryCodeSink<'a> {
             },
             relocs,
             traps,
+            stackmaps,
         }
     }
 }
@@ -73,6 +78,9 @@ pub trait RelocSink {
     /// Add a relocation referencing an external symbol at the current offset.
     fn reloc_external(&mut self, _: CodeOffset, _: Reloc, _: &ExternalName, _: Addend);
 
+    /// Add a relocation referencing a constant.
+    fn reloc_constant(&mut self, _: CodeOffset, _: Reloc, _: ConstantOffset);
+
     /// Add a relocation referencing a jump table.
     fn reloc_jt(&mut self, _: CodeOffset, _: Reloc, _: JumpTable);
 }
@@ -80,7 +88,7 @@ pub trait RelocSink {
 /// A trait for receiving trap codes and offsets.
 ///
 /// If you don't need information about possible traps, you can use the
-/// [`NullTrapSink`](binemit/trait.TrapSink.html) implementation.
+/// [`NullTrapSink`](NullTrapSink) implementation.
 pub trait TrapSink {
     /// Add trap information for a specific offset.
     fn trap(&mut self, _: CodeOffset, _: SourceLoc, _: TrapCode);
@@ -127,6 +135,11 @@ impl<'a> CodeSink for MemoryCodeSink<'a> {
         self.relocs.reloc_external(ofs, rel, name, addend);
     }
 
+    fn reloc_constant(&mut self, rel: Reloc, constant_offset: ConstantOffset) {
+        let ofs = self.offset();
+        self.relocs.reloc_constant(ofs, rel, constant_offset);
+    }
+
     fn reloc_jt(&mut self, rel: Reloc, jt: JumpTable) {
         let ofs = self.offset();
         self.relocs.reloc_jt(ofs, rel, jt);
@@ -149,6 +162,23 @@ impl<'a> CodeSink for MemoryCodeSink<'a> {
         self.info.rodata_size = self.offset() - (self.info.jumptables_size + self.info.code_size);
         self.info.total_size = self.offset();
     }
+
+    fn add_stackmap(&mut self, val_list: &[Value], func: &Function, isa: &dyn TargetIsa) {
+        let ofs = self.offset();
+        let stackmap = Stackmap::from_values(&val_list, func, isa);
+        self.stackmaps.add_stackmap(ofs, stackmap);
+    }
+}
+
+/// A `RelocSink` implementation that does nothing, which is convenient when
+/// compiling code that does not relocate anything.
+pub struct NullRelocSink {}
+
+impl RelocSink for NullRelocSink {
+    fn reloc_ebb(&mut self, _: u32, _: Reloc, _: u32) {}
+    fn reloc_external(&mut self, _: u32, _: Reloc, _: &ExternalName, _: i64) {}
+    fn reloc_constant(&mut self, _: CodeOffset, _: Reloc, _: ConstantOffset) {}
+    fn reloc_jt(&mut self, _: u32, _: Reloc, _: JumpTable) {}
 }
 
 /// A `TrapSink` implementation that does nothing, which is convenient when
@@ -158,3 +188,16 @@ pub struct NullTrapSink {}
 impl TrapSink for NullTrapSink {
     fn trap(&mut self, _offset: CodeOffset, _srcloc: SourceLoc, _code: TrapCode) {}
 }
+
+/// A trait for emitting stackmaps.
+pub trait StackmapSink {
+    /// Output a bitmap of the stack representing the live reference variables at this code offset.
+    fn add_stackmap(&mut self, _: CodeOffset, _: Stackmap);
+}
+
+/// Placeholder StackmapSink that does nothing.
+pub struct NullStackmapSink {}
+
+impl StackmapSink for NullStackmapSink {
+    fn add_stackmap(&mut self, _: CodeOffset, _: Stackmap) {}
+}
diff --git a/cranelift-codegen/src/binemit/mod.rs b/cranelift-codegen/src/binemit/mod.rs
index 21b07587e..0123e4dbd 100644
--- a/cranelift-codegen/src/binemit/mod.rs
+++ b/cranelift-codegen/src/binemit/mod.rs
@@ -6,14 +6,22 @@
 mod memorysink;
 mod relaxation;
 mod shrink;
+mod stackmap;
 
-pub use self::memorysink::{MemoryCodeSink, NullTrapSink, RelocSink, TrapSink};
+pub use self::memorysink::{
+    MemoryCodeSink, NullRelocSink, NullStackmapSink, NullTrapSink, RelocSink, StackmapSink,
+    TrapSink,
+};
 pub use self::relaxation::relax_branches;
 pub use self::shrink::shrink_instructions;
+pub use self::stackmap::Stackmap;
+use crate::ir::entities::Value;
+use crate::ir::{ConstantOffset, ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode};
+use crate::isa::TargetIsa;
 pub use crate::regalloc::RegDiversions;
-
-use crate::ir::{ExternalName, Function, Inst, JumpTable, SourceLoc, TrapCode};
 use core::fmt;
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
 
 /// Offset in bytes from the beginning of the function.
 ///
@@ -25,7 +33,8 @@ pub type CodeOffset = u32;
 pub type Addend = i64;
 
 /// Relocation kinds for every ISA
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum Reloc {
     /// absolute 4-byte
     Abs4,
@@ -124,6 +133,9 @@ pub trait CodeSink {
     /// Add a relocation referencing an external symbol plus the addend at the current offset.
     fn reloc_external(&mut self, _: Reloc, _: &ExternalName, _: Addend);
 
+    /// Add a relocation referencing a constant.
+    fn reloc_constant(&mut self, _: Reloc, _: ConstantOffset);
+
     /// Add a relocation referencing a jump table.
     fn reloc_jt(&mut self, _: Reloc, _: JumpTable);
 
@@ -138,6 +150,9 @@ pub trait CodeSink {
 
     /// Read-only data output is complete, we're done.
     fn end_codegen(&mut self);
+
+    /// Add a stackmap at the current code offset.
+    fn add_stackmap(&mut self, _: &[Value], _: &Function, _: &dyn TargetIsa);
 }
 
 /// Report a bad encoding error.
@@ -154,23 +169,23 @@ pub fn bad_encoding(func: &Function, inst: Inst) -> ! {
 ///
 /// This function is called from the `TargetIsa::emit_function()` implementations with the
 /// appropriate instruction emitter.
-pub fn emit_function<CS, EI>(func: &Function, emit_inst: EI, sink: &mut CS)
+pub fn emit_function<CS, EI>(func: &Function, emit_inst: EI, sink: &mut CS, isa: &dyn TargetIsa)
 where
     CS: CodeSink,
-    EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS),
+    EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS, &dyn TargetIsa),
 {
     let mut divert = RegDiversions::new();
     for ebb in func.layout.ebbs() {
-        divert.clear();
+        divert.at_ebb(&func.entry_diversions, ebb);
         debug_assert_eq!(func.offsets[ebb], sink.offset());
         for inst in func.layout.ebb_insts(ebb) {
-            emit_inst(func, inst, &mut divert, sink);
+            emit_inst(func, inst, &mut divert, sink, isa);
         }
     }
 
     sink.begin_jumptables();
 
-    // output jump tables
+    // Output jump tables.
     for (jt, jt_data) in func.jump_tables.iter() {
         let jt_offset = func.jt_offsets[jt];
         for ebb in jt_data.iter() {
@@ -180,7 +195,13 @@ where
     }
 
     sink.begin_rodata();
-    // TODO: No read-only data (constant pools) at this time.
+
+    // Output constants.
+    for (_, constant_data) in func.dfg.constants.iter() {
+        for byte in constant_data.iter() {
+            sink.put1(*byte)
+        }
+    }
 
     sink.end_codegen();
 }
diff --git a/cranelift-codegen/src/binemit/relaxation.rs b/cranelift-codegen/src/binemit/relaxation.rs
index 1e84f2f4b..43f361885 100644
--- a/cranelift-codegen/src/binemit/relaxation.rs
+++ b/cranelift-codegen/src/binemit/relaxation.rs
@@ -29,18 +29,29 @@
 
 use crate::binemit::{CodeInfo, CodeOffset};
 use crate::cursor::{Cursor, FuncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::flowgraph::ControlFlowGraph;
 use crate::ir::{Function, InstructionData, Opcode};
 use crate::isa::{EncInfo, TargetIsa};
 use crate::iterators::IteratorExtras;
 use crate::regalloc::RegDiversions;
 use crate::timing;
 use crate::CodegenResult;
+use core::convert::TryFrom;
 use log::debug;
 
+#[cfg(feature = "basic-blocks")]
+use crate::ir::{Ebb, Inst, Value, ValueList};
+
 /// Relax branches and compute the final layout of EBB headers in `func`.
 ///
 /// Fill in the `func.offsets` table so the function is ready for binary emission.
-pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult<CodeInfo> {
+pub fn relax_branches(
+    func: &mut Function,
+    _cfg: &mut ControlFlowGraph,
+    _domtree: &mut DominatorTree,
+    isa: &dyn TargetIsa,
+) -> CodegenResult<CodeInfo> {
     let _tt = timing::relax_branches();
 
     let encinfo = isa.encoding_info();
@@ -49,7 +60,11 @@ pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult
     func.offsets.clear();
     func.offsets.resize(func.dfg.num_ebbs());
 
-    // Start by inserting fall through instructions.
+    // Start by removing redundant jumps.
+    #[cfg(feature = "basic-blocks")]
+    fold_redundant_jumps(func, _cfg, _domtree);
+
+    // Convert jumps to fallthrough instructions where possible.
     fallthroughs(func);
 
     let mut offset = 0;
@@ -59,7 +74,7 @@ pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult
     {
         let mut cur = FuncCursor::new(func);
         while let Some(ebb) = cur.next_ebb() {
-            divert.clear();
+            divert.at_ebb(&cur.func.entry_diversions, ebb);
             cur.func.offsets[ebb] = offset;
             while let Some(inst) = cur.next_inst() {
                 divert.apply(&cur.func.dfg[inst]);
@@ -78,7 +93,7 @@ pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult
         // Visit all instructions in layout order.
         let mut cur = FuncCursor::new(func);
         while let Some(ebb) = cur.next_ebb() {
-            divert.clear();
+            divert.at_ebb(&cur.func.entry_diversions, ebb);
 
             // Record the offset for `ebb` and make sure we iterate until offsets are stable.
             if cur.func.offsets[ebb] != offset {
@@ -114,15 +129,18 @@ pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult
 
     for (jt, jt_data) in func.jump_tables.iter() {
         func.jt_offsets[jt] = offset;
-        // TODO: this should be computed based on the min size needed to hold
-        //        the furthest branch.
+        // TODO: this should be computed based on the min size needed to hold the furthest branch.
         offset += jt_data.len() as u32 * 4;
     }
 
     let jumptables_size = offset - jumptables;
     let rodata = offset;
 
-    // TODO: Once we have constant pools we'll do some processing here to update offset.
+    for constant in func.dfg.constants.entries_mut() {
+        constant.set_offset(offset);
+        offset +=
+            u32::try_from(constant.len()).expect("Constants must have a length that fits in a u32")
+    }
 
     let rodata_size = offset - rodata;
 
@@ -134,6 +152,141 @@ pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult
     })
 }
 
+/// Folds an instruction if it is a redundant jump.
+/// Returns whether folding was performed (which invalidates the CFG).
+#[cfg(feature = "basic-blocks")]
+fn try_fold_redundant_jump(
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    ebb: Ebb,
+    first_inst: Inst,
+) -> bool {
+    let first_dest = match func.dfg[first_inst].branch_destination() {
+        Some(ebb) => ebb, // The instruction was a single-target branch.
+        None => {
+            return false; // The instruction was either multi-target or not a branch.
+        }
+    };
+
+    // For the moment, only attempt to fold a branch to an ebb that is parameterless.
+    // These blocks are mainly produced by critical edge splitting.
+    //
+    // TODO: Allow folding blocks that define SSA values and function as phi nodes.
+    if func.dfg.num_ebb_params(first_dest) != 0 {
+        return false;
+    }
+
+    // Look at the first instruction of the first branch's destination.
+    // If it is an unconditional branch, maybe the second jump can be bypassed.
+    let second_inst = func.layout.first_inst(first_dest).expect("Instructions");
+    if func.dfg[second_inst].opcode() != Opcode::Jump {
+        return false;
+    }
+
+    // Now we need to fix up first_inst's ebb parameters to match second_inst's,
+    // without changing the branch-specific arguments.
+    //
+    // The intermediary block is allowed to reference any SSA value that dominates it,
+    // but that SSA value may not necessarily also dominate the instruction that's
+    // being patched.
+
+    // Get the arguments and parameters passed by the first branch.
+    let num_fixed = func.dfg[first_inst]
+        .opcode()
+        .constraints()
+        .num_fixed_value_arguments();
+    let (first_args, first_params) = func.dfg[first_inst]
+        .arguments(&func.dfg.value_lists)
+        .split_at(num_fixed);
+
+    // Get the parameters passed by the second jump.
+    let num_fixed = func.dfg[second_inst]
+        .opcode()
+        .constraints()
+        .num_fixed_value_arguments();
+    let (_, second_params) = func.dfg[second_inst]
+        .arguments(&func.dfg.value_lists)
+        .split_at(num_fixed);
+    let mut second_params = second_params.to_vec(); // Clone for rewriting below.
+
+    // For each parameter passed by the second jump, if any of those parameters
+    // was a block parameter, rewrite it to refer to the value that the first jump
+    // passed in its parameters. Otherwise, make sure it dominates first_inst.
+    //
+    // For example: if we `ebb0: jump ebb1(v1)` to `ebb1(v2): jump ebb2(v2)`,
+    // we want to rewrite the original jump to `jump ebb2(v1)`.
+    let ebb_params: &[Value] = func.dfg.ebb_params(first_dest);
+    debug_assert!(ebb_params.len() == first_params.len());
+
+    for value in second_params.iter_mut() {
+        if let Some((n, _)) = ebb_params.iter().enumerate().find(|(_, &p)| p == *value) {
+            // This value was the Nth parameter passed to the second_inst's ebb.
+            // Rewrite it as the Nth parameter passed by first_inst.
+            *value = first_params[n];
+        }
+    }
+
+    // Build a value list of first_args (unchanged) followed by second_params (rewritten).
+    let arguments_vec: std::vec::Vec<_> = first_args
+        .iter()
+        .chain(second_params.iter())
+        .map(|x| *x)
+        .collect();
+    let value_list = ValueList::from_slice(&arguments_vec, &mut func.dfg.value_lists);
+
+    func.dfg[first_inst].take_value_list(); // Drop the current list.
+    func.dfg[first_inst].put_value_list(value_list); // Put the new list.
+
+    // Bypass the second jump.
+    // This can disconnect the Ebb containing `second_inst`, to be cleaned up later.
+    let second_dest = func.dfg[second_inst].branch_destination().expect("Dest");
+    func.change_branch_destination(first_inst, second_dest);
+    cfg.recompute_ebb(func, ebb);
+
+    // The previously-intermediary Ebb may now be unreachable. Update CFG.
+    if cfg.pred_iter(first_dest).count() == 0 {
+        // Remove all instructions from that ebb.
+        while let Some(inst) = func.layout.first_inst(first_dest) {
+            func.layout.remove_inst(inst);
+        }
+
+        // Remove the block...
+        cfg.recompute_ebb(func, first_dest); // ...from predecessor lists.
+        func.layout.remove_ebb(first_dest); // ...from the layout.
+    }
+
+    return true;
+}
+
+/// Redirects `jump` instructions that point to other `jump` instructions to the final destination.
+/// This transformation may orphan some blocks.
+#[cfg(feature = "basic-blocks")]
+fn fold_redundant_jumps(
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    domtree: &mut DominatorTree,
+) {
+    let mut folded = false;
+
+    // Postorder iteration guarantees that a chain of jumps is visited from
+    // the end of the chain to the start of the chain.
+    for &ebb in domtree.cfg_postorder() {
+        // Only proceed if the first terminator instruction is a single-target branch.
+        let first_inst = func.layout.last_inst(ebb).expect("Ebb has no terminator");
+        folded |= try_fold_redundant_jump(func, cfg, ebb, first_inst);
+
+        // Also try the previous instruction.
+        if let Some(prev_inst) = func.layout.prev_inst(first_inst) {
+            folded |= try_fold_redundant_jump(func, cfg, ebb, prev_inst);
+        }
+    }
+
+    // Folding jumps invalidates the dominator tree.
+    if folded {
+        domtree.compute(func, cfg);
+    }
+}
+
 /// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any
 /// existing `fallthrough` instructions are correct.
 fn fallthroughs(func: &mut Function) {
diff --git a/cranelift-codegen/src/binemit/shrink.rs b/cranelift-codegen/src/binemit/shrink.rs
index 281a93ae3..084ed2bc3 100644
--- a/cranelift-codegen/src/binemit/shrink.rs
+++ b/cranelift-codegen/src/binemit/shrink.rs
@@ -20,7 +20,9 @@ pub fn shrink_instructions(func: &mut Function, isa: &dyn TargetIsa) {
     let mut divert = RegDiversions::new();
 
     for ebb in func.layout.ebbs() {
-        divert.clear();
+        // Load diversions from predecessors.
+        divert.at_ebb(&func.entry_diversions, ebb);
+
         for inst in func.layout.ebb_insts(ebb) {
             let enc = func.encodings[inst];
             if enc.is_legal() {
@@ -33,11 +35,12 @@ pub fn shrink_instructions(func: &mut Function, isa: &dyn TargetIsa) {
                 //
                 // TODO: Eventually, we want the register allocator to avoid leaving these special
                 // instructions behind, but for now, just temporarily avoid trying to shrink them.
-                match func.dfg[inst] {
+                let inst_data = &func.dfg[inst];
+                match inst_data {
                     InstructionData::RegMove { .. }
                     | InstructionData::RegFill { .. }
                     | InstructionData::RegSpill { .. } => {
-                        divert.apply(&func.dfg[inst]);
+                        divert.apply(inst_data);
                         continue;
                     }
                     _ => (),
diff --git a/cranelift-codegen/src/binemit/stackmap.rs b/cranelift-codegen/src/binemit/stackmap.rs
new file mode 100644
index 000000000..4051b47e1
--- /dev/null
+++ b/cranelift-codegen/src/binemit/stackmap.rs
@@ -0,0 +1,123 @@
+use crate::bitset::BitSet;
+use crate::ir;
+use crate::isa::TargetIsa;
+use std::vec::Vec;
+
+type Num = u32;
+const NUM_BITS: usize = std::mem::size_of::<Num>() * 8;
+
+/// Wrapper class for longer bit vectors that cannot be represented by a single BitSet.
+#[derive(Clone, Debug)]
+pub struct Stackmap {
+    bitmap: Vec<BitSet<Num>>,
+}
+
+impl Stackmap {
+    /// Create a stackmap based on where references are located on a function's stack.
+    pub fn from_values(
+        args: &[ir::entities::Value],
+        func: &ir::Function,
+        isa: &dyn TargetIsa,
+    ) -> Self {
+        let loc = &func.locations;
+        let mut live_ref_in_stack_slot = std::collections::HashSet::new();
+        // References can be in registers, and live registers values are pushed onto the stack before calls and traps.
+        // TODO: Implement register maps. If a register containing a reference is spilled and reused after a safepoint,
+        // it could contain a stale reference value if the garbage collector relocated the value.
+        for val in args {
+            if let Some(value_loc) = loc.get(*val) {
+                match *value_loc {
+                    ir::ValueLoc::Stack(stack_slot) => live_ref_in_stack_slot.insert(stack_slot),
+                    _ => false,
+                };
+            }
+        }
+
+        // SpiderMonkey stackmap structure:
+        // <trap reg dump> + <general spill> + <frame> + <inbound args>
+        // Bit vector goes from lower addresses to higher addresses.
+
+        // TODO: Get trap register layout from Spidermonkey and prepend to bitvector below.
+        let stack = &func.stack_slots;
+        let frame_size = stack.frame_size.unwrap();
+        let word_size = ir::stackslot::StackSize::from(isa.pointer_bytes());
+        let num_words = (frame_size / word_size) as usize;
+        let mut vec = std::vec::Vec::with_capacity(num_words);
+
+        vec.resize(num_words, false);
+
+        // Frame (includes spills and inbound args).
+        for (ss, ssd) in stack.iter() {
+            if live_ref_in_stack_slot.contains(&ss) {
+                // Assumption: greater magnitude of offset imply higher address.
+                let index = (((ssd.offset.unwrap().abs() as u32) - ssd.size) / word_size) as usize;
+                vec[index] = true;
+            }
+        }
+
+        Stackmap::from_slice(&vec)
+    }
+
+    /// Create a vec of Bitsets from a slice of bools.
+    pub fn from_slice(vec: &[bool]) -> Self {
+        let len = vec.len();
+        let num_word = len / NUM_BITS + (len % NUM_BITS != 0) as usize;
+        let mut bitmap = Vec::with_capacity(num_word);
+
+        for segment in vec.chunks(NUM_BITS) {
+            let mut curr_word = 0;
+            for (i, set) in segment.iter().enumerate() {
+                if *set {
+                    curr_word |= 1 << i;
+                }
+            }
+            bitmap.push(BitSet(curr_word));
+        }
+        Self { bitmap }
+    }
+
+    /// Returns a specified bit.
+    pub fn get_bit(&self, bit_index: usize) -> bool {
+        assert!(bit_index < NUM_BITS * self.bitmap.len());
+        let word_index = bit_index / NUM_BITS;
+        let word_offset = (bit_index % NUM_BITS) as u8;
+        self.bitmap[word_index].contains(word_offset)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn stackmaps() {
+        let vec: Vec<bool> = Vec::new();
+        assert!(Stackmap::from_slice(&vec).bitmap.is_empty());
+
+        let mut vec: [bool; NUM_BITS] = Default::default();
+        let set_true_idx = [5, 7, 24, 31];
+
+        for &idx in &set_true_idx {
+            vec[idx] = true;
+        }
+
+        let mut vec = vec.to_vec();
+        assert_eq!(
+            vec![BitSet::<Num>(2164261024)],
+            Stackmap::from_slice(&vec).bitmap
+        );
+
+        vec.push(false);
+        vec.push(true);
+        let res = Stackmap::from_slice(&vec);
+        assert_eq!(
+            vec![BitSet::<Num>(2164261024), BitSet::<Num>(2)],
+            res.bitmap
+        );
+
+        assert!(res.get_bit(5));
+        assert!(res.get_bit(31));
+        assert!(res.get_bit(33));
+        assert!(!res.get_bit(1));
+    }
+}
diff --git a/cranelift-codegen/src/cfg_printer.rs b/cranelift-codegen/src/cfg_printer.rs
index e3ce37e73..e3c62820c 100644
--- a/cranelift-codegen/src/cfg_printer.rs
+++ b/cranelift-codegen/src/cfg_printer.rs
@@ -1,10 +1,12 @@
 //! The `CFGPrinter` utility.
 
 use core::fmt::{Display, Formatter, Result, Write};
+use std::vec::Vec;
 
+use crate::entity::SecondaryMap;
 use crate::flowgraph::{BasicBlock, ControlFlowGraph};
-use crate::ir::instructions::BranchInfo;
 use crate::ir::Function;
+use crate::write::{FuncWriter, PlainWriter};
 
 /// A utility for pretty-printing the CFG of a `Function`.
 pub struct CFGPrinter<'a> {
@@ -39,23 +41,21 @@ impl<'a> CFGPrinter<'a> {
     }
 
     fn ebb_nodes(&self, w: &mut dyn Write) -> Result {
+        let mut aliases = SecondaryMap::<_, Vec<_>>::new();
+        for v in self.func.dfg.values() {
+            // VADFS returns the immediate target of an alias
+            if let Some(k) = self.func.dfg.value_alias_dest_for_serialization(v) {
+                aliases[k].push(v);
+            }
+        }
+
         for ebb in &self.func.layout {
-            write!(w, "    {} [shape=record, label=\"{{{}", ebb, ebb)?;
+            write!(w, "    {} [shape=record, label=\"{{", ebb)?;
+            crate::write::write_ebb_header(w, self.func, None, ebb, 4)?;
             // Add all outgoing branch instructions to the label.
             for inst in self.func.layout.ebb_insts(ebb) {
-                let idata = &self.func.dfg[inst];
-                match idata.analyze_branch(&self.func.dfg.value_lists) {
-                    BranchInfo::SingleDest(dest, _) => {
-                        write!(w, " | <{}>{} {}", inst, idata.opcode(), dest)?
-                    }
-                    BranchInfo::Table(table, dest) => {
-                        write!(w, " | <{}>{} {}", inst, idata.opcode(), table)?;
-                        if let Some(dest) = dest {
-                            write!(w, " {}", dest)?
-                        }
-                    }
-                    BranchInfo::NotABranch => {}
-                }
+                write!(w, " | <{}>", inst)?;
+                PlainWriter.write_instruction(w, self.func, &aliases, None, inst, 0)?;
             }
             writeln!(w, "}}\"]")?
         }
diff --git a/cranelift-codegen/src/constant_hash.rs b/cranelift-codegen/src/constant_hash.rs
index d6afc5eed..5785a8982 100644
--- a/cranelift-codegen/src/constant_hash.rs
+++ b/cranelift-codegen/src/constant_hash.rs
@@ -1,6 +1,6 @@
 //! Runtime support for precomputed constant hash tables.
 //!
-//! The `cranelift-codegen/meta-python/constant_hash.py` Python module can generate constant hash tables
+//! The `cranelift-codegen/meta/src/constant_hash.rs` Rust crate can generate constant hash tables
 //! using open addressing and quadratic probing. The hash tables are arrays that are guaranteed to:
 //!
 //! - Have a power-of-two size.
@@ -56,7 +56,7 @@ pub fn probe<K: Copy + Eq, T: Table<K> + ?Sized>(
 }
 
 /// A primitive hash function for matching opcodes.
-/// Must match `cranelift-codegen/meta-python/constant_hash.py` and `cranelift-codegen/meta/constant_hash.rs`.
+/// Must match `cranelift-codegen/meta/src/constant_hash.rs`.
 pub fn simple_hash(s: &str) -> usize {
     let mut h: u32 = 5381;
     for c in s.chars() {
@@ -71,7 +71,7 @@ mod tests {
 
     #[test]
     fn basic() {
-        // c.f. `meta-python/constant_hash.py` tests.
+        // c.f. `meta/src/constant_hash.rs` tests.
         assert_eq!(simple_hash("Hello"), 0x2fa70c01);
         assert_eq!(simple_hash("world"), 0x5b0c31d5);
     }
diff --git a/cranelift-codegen/src/context.rs b/cranelift-codegen/src/context.rs
index 64c382cb2..8bbe2893d 100644
--- a/cranelift-codegen/src/context.rs
+++ b/cranelift-codegen/src/context.rs
@@ -10,7 +10,8 @@
 //! single ISA instance.
 
 use crate::binemit::{
-    relax_branches, shrink_instructions, CodeInfo, MemoryCodeSink, RelocSink, TrapSink,
+    relax_branches, shrink_instructions, CodeInfo, MemoryCodeSink, RelocSink, StackmapSink,
+    TrapSink,
 };
 use crate::dce::do_dce;
 use crate::dominator_tree::DominatorTree;
@@ -22,6 +23,7 @@ use crate::licm::do_licm;
 use crate::loop_analysis::LoopAnalysis;
 use crate::nan_canonicalization::do_nan_canonicalization;
 use crate::postopt::do_postopt;
+use crate::redundant_reload_remover::RedundantReloadRemover;
 use crate::regalloc;
 use crate::result::CodegenResult;
 use crate::settings::{FlagsOrIsa, OptLevel};
@@ -31,6 +33,7 @@ use crate::timing;
 use crate::unreachable_code::eliminate_unreachable_code;
 use crate::value_label::{build_value_labels_ranges, ComparableSourceLoc, ValueLabelsRanges};
 use crate::verifier::{verify_context, verify_locations, VerifierErrors, VerifierResult};
+use log::debug;
 use std::vec::Vec;
 
 /// Persistent data structures and compilation pipeline.
@@ -49,6 +52,9 @@ pub struct Context {
 
     /// Loop analysis of `func`.
     pub loop_analysis: LoopAnalysis,
+
+    /// Redundant-reload remover context.
+    pub redundant_reload_remover: RedundantReloadRemover,
 }
 
 impl Context {
@@ -71,6 +77,7 @@ impl Context {
             domtree: DominatorTree::new(),
             regalloc: regalloc::Context::new(),
             loop_analysis: LoopAnalysis::new(),
+            redundant_reload_remover: RedundantReloadRemover::new(),
         }
     }
 
@@ -81,6 +88,7 @@ impl Context {
         self.domtree.clear();
         self.regalloc.clear();
         self.loop_analysis.clear();
+        self.redundant_reload_remover.clear();
     }
 
     /// Compile the function, and emit machine code into a `Vec<u8>`.
@@ -100,12 +108,14 @@ impl Context {
         mem: &mut Vec<u8>,
         relocs: &mut dyn RelocSink,
         traps: &mut dyn TrapSink,
+        stackmaps: &mut dyn StackmapSink,
     ) -> CodegenResult<CodeInfo> {
         let info = self.compile(isa)?;
         let old_len = mem.len();
         mem.resize(old_len + info.total_size as usize, 0);
-        let new_info =
-            unsafe { self.emit_to_memory(isa, mem.as_mut_ptr().add(old_len), relocs, traps) };
+        let new_info = unsafe {
+            self.emit_to_memory(isa, mem.as_mut_ptr().add(old_len), relocs, traps, stackmaps)
+        };
         debug_assert!(new_info == info);
         Ok(info)
     }
@@ -120,19 +130,20 @@ impl Context {
     pub fn compile(&mut self, isa: &dyn TargetIsa) -> CodegenResult<CodeInfo> {
         let _tt = timing::compile();
         self.verify_if(isa)?;
+        debug!("Compiling:\n{}", self.func.display(isa));
+
+        let opt_level = isa.flags().opt_level();
 
         self.compute_cfg();
-        if isa.flags().opt_level() != OptLevel::Fastest {
+        if opt_level != OptLevel::None {
             self.preopt(isa)?;
         }
         if isa.flags().enable_nan_canonicalization() {
             self.canonicalize_nans(isa)?;
         }
         self.legalize(isa)?;
-        if isa.flags().opt_level() != OptLevel::Fastest {
+        if opt_level != OptLevel::None {
             self.postopt(isa)?;
-        }
-        if isa.flags().opt_level() == OptLevel::Best {
             self.compute_domtree();
             self.compute_loop_analysis();
             self.licm(isa)?;
@@ -140,15 +151,21 @@ impl Context {
         }
         self.compute_domtree();
         self.eliminate_unreachable_code(isa)?;
-        if isa.flags().opt_level() != OptLevel::Fastest {
+        if opt_level != OptLevel::None {
             self.dce(isa)?;
         }
         self.regalloc(isa)?;
         self.prologue_epilogue(isa)?;
-        if isa.flags().opt_level() == OptLevel::Best {
+        if opt_level == OptLevel::Speed || opt_level == OptLevel::SpeedAndSize {
+            self.redundant_reload_remover(isa)?;
+        }
+        if opt_level == OptLevel::SpeedAndSize {
             self.shrink_instructions(isa)?;
         }
-        self.relax_branches(isa)
+        let result = self.relax_branches(isa);
+
+        debug!("Compiled:\n{}", self.func.display(isa));
+        result
     }
 
     /// Emit machine code directly into raw memory.
@@ -168,9 +185,10 @@ impl Context {
         mem: *mut u8,
         relocs: &mut dyn RelocSink,
         traps: &mut dyn TrapSink,
+        stackmaps: &mut dyn StackmapSink,
     ) -> CodeInfo {
         let _tt = timing::binemit();
-        let mut sink = MemoryCodeSink::new(mem, relocs, traps);
+        let mut sink = MemoryCodeSink::new(mem, relocs, traps, stackmaps);
         isa.emit_function_to_memory(&self.func, &mut sink);
         sink.info
     }
@@ -201,7 +219,7 @@ impl Context {
     /// Run the locations verifier on the function.
     pub fn verify_locations(&self, isa: &dyn TargetIsa) -> VerifierResult<()> {
         let mut errors = VerifierErrors::default();
-        let _ = verify_locations(isa, &self.func, None, &mut errors);
+        let _ = verify_locations(isa, &self.func, &self.cfg, None, &mut errors);
 
         if errors.is_empty() {
             Ok(())
@@ -227,7 +245,7 @@ impl Context {
 
     /// Perform pre-legalization rewrites on the function.
     pub fn preopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
-        do_preopt(&mut self.func, &mut self.cfg);
+        do_preopt(&mut self.func, &mut self.cfg, isa);
         self.verify_if(isa)?;
         Ok(())
     }
@@ -245,6 +263,7 @@ impl Context {
         self.domtree.clear();
         self.loop_analysis.clear();
         legalize_function(&mut self.func, &mut self.cfg, isa);
+        debug!("Legalized:\n{}", self.func.display(isa));
         self.verify_if(isa)
     }
 
@@ -307,7 +326,7 @@ impl Context {
     /// Run the register allocator.
     pub fn regalloc(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
         self.regalloc
-            .run(isa, &mut self.func, &self.cfg, &mut self.domtree)
+            .run(isa, &mut self.func, &mut self.cfg, &mut self.domtree)
     }
 
     /// Insert prologue and epilogues after computing the stack frame layout.
@@ -318,6 +337,14 @@ impl Context {
         Ok(())
     }
 
+    /// Do redundant-reload removal after allocation of both registers and stack slots.
+    pub fn redundant_reload_remover(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
+        self.redundant_reload_remover
+            .run(isa, &mut self.func, &self.cfg);
+        self.verify_if(isa)?;
+        Ok(())
+    }
+
     /// Run the instruction shrinking pass.
     pub fn shrink_instructions(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
         shrink_instructions(&mut self.func, isa);
@@ -329,7 +356,7 @@ impl Context {
     /// Run the branch relaxation pass and return information about the function's code and
     /// read-only data.
     pub fn relax_branches(&mut self, isa: &dyn TargetIsa) -> CodegenResult<CodeInfo> {
-        let info = relax_branches(&mut self.func, isa)?;
+        let info = relax_branches(&mut self.func, &mut self.cfg, &mut self.domtree, isa)?;
         self.verify_if(isa)?;
         self.verify_locations_if(isa)?;
         Ok(info)
diff --git a/cranelift-codegen/src/cursor.rs b/cranelift-codegen/src/cursor.rs
index dbb459ed7..e688a90b3 100644
--- a/cranelift-codegen/src/cursor.rs
+++ b/cranelift-codegen/src/cursor.rs
@@ -635,6 +635,28 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> {
     }
 
     fn insert_built_inst(self, inst: ir::Inst, _: ir::Type) -> &'c mut ir::DataFlowGraph {
+        // TODO: Remove this assertion once #796 is fixed.
+        #[cfg(feature = "basic-blocks")]
+        #[cfg(debug_assertions)]
+        {
+            if let CursorPosition::At(_) = self.position() {
+                if let Some(curr) = self.current_inst() {
+                    if let Some(prev) = self.layout().prev_inst(curr) {
+                        let prev_op = self.data_flow_graph()[prev].opcode();
+                        let inst_op = self.data_flow_graph()[inst].opcode();
+                        let curr_op = self.data_flow_graph()[curr].opcode();
+                        if prev_op.is_branch() && !prev_op.is_terminator() {
+                            if !inst_op.is_terminator() {
+                                panic!(
+                                    "Inserting instruction {} after {}, and before {}",
+                                    inst_op, prev_op, curr_op
+                                )
+                            };
+                        }
+                    };
+                };
+            };
+        }
         self.insert_inst(inst);
         if !self.srcloc.is_default() {
             self.func.srclocs[inst] = self.srcloc;
@@ -742,6 +764,29 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
         inst: ir::Inst,
         ctrl_typevar: ir::Type,
     ) -> &'c mut ir::DataFlowGraph {
+        // TODO: Remove this assertion once #796 is fixed.
+        #[cfg(feature = "basic-blocks")]
+        #[cfg(debug_assertions)]
+        {
+            if let CursorPosition::At(_) = self.position() {
+                if let Some(curr) = self.current_inst() {
+                    if let Some(prev) = self.layout().prev_inst(curr) {
+                        let prev_op = self.data_flow_graph()[prev].opcode();
+                        let inst_op = self.data_flow_graph()[inst].opcode();
+                        if prev_op.is_branch() && !prev_op.is_terminator() {
+                            if !inst_op.is_terminator() {
+                                panic!(
+                                    "Inserting instruction {} after {} and before {}",
+                                    self.display_inst(inst),
+                                    self.display_inst(prev),
+                                    self.display_inst(curr)
+                                )
+                            };
+                        }
+                    };
+                };
+            };
+        }
         // Insert the instruction and remember the reference.
         self.insert_inst(inst);
         self.built_inst = Some(inst);
diff --git a/cranelift-codegen/src/dominator_tree.rs b/cranelift-codegen/src/dominator_tree.rs
index feea33a0e..8191c310e 100644
--- a/cranelift-codegen/src/dominator_tree.rs
+++ b/cranelift-codegen/src/dominator_tree.rs
@@ -226,7 +226,13 @@ impl DominatorTree {
 
     /// Allocate and compute a dominator tree.
     pub fn with_function(func: &Function, cfg: &ControlFlowGraph) -> Self {
-        let mut domtree = Self::new();
+        let ebb_capacity = func.layout.ebb_capacity();
+        let mut domtree = Self {
+            nodes: SecondaryMap::with_capacity(ebb_capacity),
+            postorder: Vec::with_capacity(ebb_capacity),
+            stack: Vec::new(),
+            valid: false,
+        };
         domtree.compute(func, cfg);
         domtree
     }
diff --git a/cranelift-codegen/src/flowgraph.rs b/cranelift-codegen/src/flowgraph.rs
index 335ca15ce..645b7d4ff 100644
--- a/cranelift-codegen/src/flowgraph.rs
+++ b/cranelift-codegen/src/flowgraph.rs
@@ -31,7 +31,7 @@ use crate::timing;
 use core::mem;
 
 /// A basic block denoted by its enclosing Ebb and last instruction.
-#[derive(PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq)]
 pub struct BasicBlock {
     /// Enclosing Ebb key.
     pub ebb: Ebb,
diff --git a/cranelift-codegen/src/ir/builder.rs b/cranelift-codegen/src/ir/builder.rs
index 43578d537..13202870f 100644
--- a/cranelift-codegen/src/ir/builder.rs
+++ b/cranelift-codegen/src/ir/builder.rs
@@ -32,7 +32,7 @@ pub trait InstBuilderBase<'f>: Sized {
     fn build(self, data: InstructionData, ctrl_typevar: Type) -> (Inst, &'f mut DataFlowGraph);
 }
 
-// Include trait code generated by `cranelift-codegen/meta-python/gen_instr.py`.
+// Include trait code generated by `cranelift-codegen/meta/src/gen_inst.rs`.
 //
 // This file defines the `InstBuilder` trait as an extension of `InstBuilderBase` with methods per
 // instruction format and per opcode.
diff --git a/cranelift-codegen/src/ir/constant.rs b/cranelift-codegen/src/ir/constant.rs
new file mode 100644
index 000000000..c22380db5
--- /dev/null
+++ b/cranelift-codegen/src/ir/constant.rs
@@ -0,0 +1,230 @@
+//! Constants
+//!
+//! The constant pool defined here allows Cranelift to avoid emitting the same constant multiple
+//! times. As constants are inserted in the pool, a handle is returned; the handle is a Cranelift
+//! Entity. Inserting the same data multiple times will always return the same handle.
+//!
+//! Future work could include:
+//! - ensuring alignment of constants within the pool,
+//! - bucketing constants by size.
+
+use crate::ir::Constant;
+use cranelift_entity::EntityRef;
+use std::collections::{BTreeMap, HashMap};
+use std::vec::Vec;
+
+/// This type describes the actual constant data.
+pub type ConstantData = Vec<u8>;
+
+/// This type describes an offset in bytes within a constant pool.
+pub type ConstantOffset = u32;
+
+/// Inner type for storing data and offset together in the constant pool. The offset is optional
+/// because it must be set relative to the function code size (i.e. constants are emitted after the
+/// function body); because the function is not yet compiled when constants are inserted,
+/// [`set_offset`](crate::ir::ConstantPool::set_offset) must be called once a constant's offset
+/// from the beginning of the function is known (see
+/// [`relaxation.rs`](crate::binemit::relaxation)).
+#[derive(Clone)]
+pub struct ConstantPoolEntry {
+    data: ConstantData,
+    offset: Option<ConstantOffset>,
+}
+
+impl ConstantPoolEntry {
+    fn new(data: ConstantData) -> Self {
+        Self { data, offset: None }
+    }
+
+    /// Return the size of the constant at this entry.
+    pub fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    /// Assign a new offset to the constant at this entry.
+    pub fn set_offset(&mut self, offset: ConstantOffset) {
+        self.offset = Some(offset)
+    }
+}
+
+/// Maintains the mapping between a constant handle (i.e.  [`Constant`](crate::ir::Constant)) and
+/// its constant data (i.e.  [`ConstantData`](crate::ir::ConstantData)).
+#[derive(Clone)]
+pub struct ConstantPool {
+    /// This mapping maintains the insertion order as long as Constants are created with
+    /// sequentially increasing integers.
+    handles_to_values: BTreeMap<Constant, ConstantPoolEntry>,
+
+    /// This mapping is unordered (no need for lexicographic ordering) but allows us to map
+    /// constant data back to handles.
+    values_to_handles: HashMap<ConstantData, Constant>,
+}
+
+impl ConstantPool {
+    /// Create a new constant pool instance.
+    pub fn new() -> Self {
+        Self {
+            handles_to_values: BTreeMap::new(),
+            values_to_handles: HashMap::new(),
+        }
+    }
+
+    /// Empty the constant pool of all data.
+    pub fn clear(&mut self) {
+        self.handles_to_values.clear();
+        self.values_to_handles.clear();
+    }
+
+    /// Insert constant data into the pool, returning a handle for later referencing; when constant
+    /// data is inserted that is a duplicate of previous constant data, the existing handle will be
+    /// returned.
+    pub fn insert(&mut self, constant_value: ConstantData) -> Constant {
+        if self.values_to_handles.contains_key(&constant_value) {
+            self.values_to_handles.get(&constant_value).unwrap().clone()
+        } else {
+            let constant_handle = Constant::new(self.len());
+            self.values_to_handles
+                .insert(constant_value.clone(), constant_handle.clone());
+            self.handles_to_values.insert(
+                constant_handle.clone(),
+                ConstantPoolEntry::new(constant_value),
+            );
+            constant_handle
+        }
+    }
+
+    /// Retrieve the constant data given a handle.
+    pub fn get(&self, constant_handle: Constant) -> &ConstantData {
+        assert!(self.handles_to_values.contains_key(&constant_handle));
+        &self.handles_to_values.get(&constant_handle).unwrap().data
+    }
+
+    /// Assign an offset to a given constant, where the offset is the number of bytes from the
+    /// beginning of the function to the beginning of the constant data inside the pool.
+    pub fn set_offset(&mut self, constant_handle: Constant, constant_offset: ConstantOffset) {
+        assert!(
+            self.handles_to_values.contains_key(&constant_handle),
+            "A constant handle must have already been inserted into the pool; perhaps a \
+             constant pool was created outside of the pool?"
+        );
+        self.handles_to_values
+            .entry(constant_handle)
+            .and_modify(|e| e.offset = Some(constant_offset));
+    }
+
+    /// Retrieve the offset of a given constant, where the offset is the number of bytes from the
+    /// beginning of the function to the beginning of the constant data inside the pool.
+    pub fn get_offset(&self, constant_handle: Constant) -> ConstantOffset {
+        self.handles_to_values
+            .get(&constant_handle)
+            .expect(
+                "A constant handle must have a corresponding constant value; was a constant \
+                 handle created outside of the pool?",
+            )
+            .offset
+            .expect(
+                "A constant offset has not yet been set; verify that `set_offset` has been \
+                 called before this point",
+            )
+    }
+
+    /// Iterate over the constants in insertion order.
+    pub fn iter(&self) -> impl Iterator<Item = (&Constant, &ConstantData)> {
+        self.handles_to_values.iter().map(|(h, e)| (h, &e.data))
+    }
+
+    /// Iterate over mutable entries in the constant pool in insertion order.
+    pub fn entries_mut(&mut self) -> impl Iterator<Item = &mut ConstantPoolEntry> {
+        self.handles_to_values.values_mut()
+    }
+
+    /// Return the number of constants in the pool.
+    pub fn len(&self) -> usize {
+        self.handles_to_values.len()
+    }
+
+    /// Return the combined size of all of the constant values in the pool.
+    pub fn byte_size(&self) -> usize {
+        self.values_to_handles.keys().map(|c| c.len()).sum()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn empty() {
+        let sut = ConstantPool::new();
+        assert_eq!(sut.len(), 0);
+    }
+
+    #[test]
+    fn insert() {
+        let mut sut = ConstantPool::new();
+        sut.insert(vec![1, 2, 3]);
+        sut.insert(vec![4, 5, 6]);
+        assert_eq!(sut.len(), 2);
+    }
+
+    #[test]
+    fn insert_duplicate() {
+        let mut sut = ConstantPool::new();
+        let a = sut.insert(vec![1, 2, 3]);
+        sut.insert(vec![4, 5, 6]);
+        let b = sut.insert(vec![1, 2, 3]);
+        assert_eq!(a, b);
+    }
+
+    #[test]
+    fn clear() {
+        let mut sut = ConstantPool::new();
+        sut.insert(vec![1, 2, 3]);
+        assert_eq!(sut.len(), 1);
+
+        sut.clear();
+        assert_eq!(sut.len(), 0);
+    }
+
+    #[test]
+    fn iteration_order() {
+        let mut sut = ConstantPool::new();
+        sut.insert(vec![1, 2, 3]);
+        sut.insert(vec![4, 5, 6]);
+        sut.insert(vec![1, 2, 3]);
+        let data = sut.iter().map(|(_, v)| v).collect::<Vec<&ConstantData>>();
+        assert_eq!(data, vec![&vec![1, 2, 3], &vec![4, 5, 6]]);
+    }
+
+    #[test]
+    fn get() {
+        let mut sut = ConstantPool::new();
+        let data = vec![1, 2, 3];
+        let handle = sut.insert(data.clone());
+        assert_eq!(sut.get(handle), &data);
+    }
+
+    #[test]
+    #[should_panic]
+    fn get_nonexistent_constant() {
+        let sut = ConstantPool::new();
+        let a = Constant::with_number(42).unwrap();
+        sut.get(a); // panics, only use constants returned by ConstantPool
+    }
+
+    #[test]
+    fn get_offset() {
+        let mut sut = ConstantPool::new();
+        let a = sut.insert(vec![1]);
+        sut.set_offset(a, 42);
+        assert_eq!(sut.get_offset(a), 42)
+    }
+
+    #[test]
+    #[should_panic]
+    fn get_nonexistent_offset() {
+        let mut sut = ConstantPool::new();
+        let a = sut.insert(vec![1]);
+        sut.get_offset(a); // panics, set_offset should have been called
+    }
+}
diff --git a/cranelift-codegen/src/ir/dfg.rs b/cranelift-codegen/src/ir/dfg.rs
index d26abaa3e..5b3054b59 100644
--- a/cranelift-codegen/src/ir/dfg.rs
+++ b/cranelift-codegen/src/ir/dfg.rs
@@ -5,7 +5,7 @@ use crate::ir;
 use crate::ir::builder::ReplaceBuilder;
 use crate::ir::extfunc::ExtFuncData;
 use crate::ir::instructions::{BranchInfo, CallInfo, InstructionData};
-use crate::ir::types;
+use crate::ir::{types, ConstantPool, Immediate};
 use crate::ir::{
     Ebb, FuncRef, Inst, SigRef, Signature, Type, Value, ValueLabelAssignments, ValueList,
     ValueListPool,
@@ -19,13 +19,14 @@ use core::mem;
 use core::ops::{Index, IndexMut};
 use core::u16;
 use std::collections::HashMap;
+use std::vec::Vec;
 
 /// A data flow graph defines all instructions and extended basic blocks in a function as well as
 /// the data flow dependencies between them. The DFG also tracks values which can be either
 /// instruction results or EBB parameters.
 ///
 /// The layout of EBBs in the function and of instructions in each EBB is recorded by the
-/// `FunctionLayout` data structure which form the other half of the function representation.
+/// `Layout` data structure which forms the other half of the function representation.
 ///
 #[derive(Clone)]
 pub struct DataFlowGraph {
@@ -67,6 +68,12 @@ pub struct DataFlowGraph {
 
     /// Saves Value labels.
     pub values_labels: Option<HashMap<Value, ValueLabelAssignments>>,
+
+    /// Constants used within the function
+    pub constants: ConstantPool,
+
+    /// Stores large immediates that otherwise will not fit on InstructionData
+    pub immediates: PrimaryMap<Immediate, Vec<u8>>,
 }
 
 impl DataFlowGraph {
@@ -81,6 +88,8 @@ impl DataFlowGraph {
             signatures: PrimaryMap::new(),
             ext_funcs: PrimaryMap::new(),
             values_labels: None,
+            constants: ConstantPool::new(),
+            immediates: PrimaryMap::new(),
         }
     }
 
@@ -94,6 +103,8 @@ impl DataFlowGraph {
         self.signatures.clear();
         self.ext_funcs.clear();
         self.values_labels = None;
+        self.constants.clear();
+        self.immediates.clear();
     }
 
     /// Get the total number of instructions created in this function, whether they are currently
@@ -1234,7 +1245,6 @@ mod tests {
 
     #[test]
     fn aliases() {
-        use crate::ir::condcodes::IntCC;
         use crate::ir::InstBuilder;
 
         let mut func = Function::new();
@@ -1249,7 +1259,7 @@ mod tests {
         assert_eq!(pos.func.dfg.resolve_aliases(v1), v1);
 
         let arg0 = pos.func.dfg.append_ebb_param(ebb0, types::I32);
-        let (s, c) = pos.ins().iadd_cout(v1, arg0);
+        let (s, c) = pos.ins().iadd_ifcout(v1, arg0);
         let iadd = match pos.func.dfg.value_def(s) {
             ValueDef::Result(i, 0) => i,
             _ => panic!(),
@@ -1259,9 +1269,9 @@ mod tests {
         pos.func.dfg.clear_results(iadd);
         pos.func.dfg.attach_result(iadd, s);
 
-        // Replace `iadd_cout` with a normal `iadd` and an `icmp`.
+        // Replace `iadd_ifcout` with a normal `iadd` and an `ifcmp`.
         pos.func.dfg.replace(iadd).iadd(v1, arg0);
-        let c2 = pos.ins().icmp(IntCC::UnsignedLessThan, s, v1);
+        let c2 = pos.ins().ifcmp(s, v1);
         pos.func.dfg.change_to_alias(c, c2);
 
         assert_eq!(pos.func.dfg.resolve_aliases(c2), c2);
diff --git a/cranelift-codegen/src/ir/entities.rs b/cranelift-codegen/src/ir/entities.rs
index 2c05fbc84..1f8e1fc6a 100644
--- a/cranelift-codegen/src/ir/entities.rs
+++ b/cranelift-codegen/src/ir/entities.rs
@@ -22,8 +22,15 @@
 use crate::entity::entity_impl;
 use core::fmt;
 use core::u32;
-
-/// An opaque reference to an extended basic block in a function.
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
+/// An opaque reference to an [extended basic
+/// block](https://en.wikipedia.org/wiki/Extended_basic_block) in a
+/// [`Function`](super::function::Function).
+///
+/// You can get an `Ebb` using
+/// [`FunctionBuilder::create_ebb`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_ebb)
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct Ebb(u32);
 entity_impl!(Ebb, "ebb");
@@ -42,6 +49,18 @@ impl Ebb {
 }
 
 /// An opaque reference to an SSA value.
+///
+/// You can get a constant `Value` from the following
+/// [`InstBuilder`](super::InstBuilder) instructions:
+///
+/// - [`iconst`](super::InstBuilder::iconst) for integer constants
+/// - [`f32const`](super::InstBuilder::f32const) for 32-bit float constants
+/// - [`f64const`](super::InstBuilder::f64const) for 64-bit float constants
+/// - [`bconst`](super::InstBuilder::bconst) for boolean constants
+/// - [`vconst`](super::InstBuilder::vconst) for vector constants
+/// - [`null`](super::InstBuilder::null) for null reference constants
+///
+/// Any `InstBuilder` instruction that has an output will also return a `Value`.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct Value(u32);
 entity_impl!(Value, "v");
@@ -60,13 +79,36 @@ impl Value {
     }
 }
 
-/// An opaque reference to an instruction in a function.
+/// An opaque reference to an instruction in a [`Function`](super::Function).
+///
+/// Most usage of `Inst` is internal. `Inst`ructions are returned by
+/// [`InstBuilder`](super::InstBuilder) instructions that do not return a
+/// [`Value`], such as control flow and trap instructions.
+///
+/// If you look around the API, you can find many inventive uses for `Inst`,
+/// such as [annotating specific instructions with a comment][inst_comment]
+/// or [performing reflection at compile time](super::DataFlowGraph::analyze_branch)
+/// on the type of instruction.
+///
+/// [inst_comment]: https://github.com/bjorn3/rustc_codegen_cranelift/blob/0f8814fd6da3d436a90549d4bb19b94034f2b19c/src/pretty_clif.rs
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct Inst(u32);
 entity_impl!(Inst, "inst");
 
 /// An opaque reference to a stack slot.
+///
+/// Stack slots represent an address on the
+/// [call stack](https://en.wikipedia.org/wiki/Call_stack).
+///
+/// `StackSlot`s can be created with
+/// [`FunctionBuilder::create_stackslot`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_stack_slot).
+///
+/// `StackSlot`s are most often used with
+/// [`stack_addr`](super::InstBuilder::stack_addr),
+/// [`stack_load`](super::InstBuilder::stack_load), and
+/// [`stack_store`](super::InstBuilder::stack_store).
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct StackSlot(u32);
 entity_impl!(StackSlot, "ss");
 
@@ -84,6 +126,22 @@ impl StackSlot {
 }
 
 /// An opaque reference to a global value.
+///
+/// A `GlobalValue` is a [`Value`](Value) that will be live across the entire
+/// function lifetime. It can be preloaded from other global values.
+///
+/// You can create a `GlobalValue` in the following ways:
+///
+/// - When compiling to WASM, you can use it to load values from a
+/// [`VmContext`](super::GlobalValueData::VMContext) using
+/// [`FuncEnvironment::make_global`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_global).
+/// - When compiling to native code, you can use it for objects in static memory with
+/// [`Module::declare_data_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html#method.declare_data_in_func).
+/// - For any compilation target, it can be registered with
+/// [`FunctionBuilder::create_global_value`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_global_value).
+///
+/// `GlobalValue`s can be retrieved with
+/// [`InstBuilder:global_value`](super::InstBuilder::global_value).
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
 pub struct GlobalValue(u32);
 entity_impl!(GlobalValue, "gv");
@@ -101,8 +159,63 @@ impl GlobalValue {
     }
 }
 
-/// An opaque reference to a jump table.
+/// An opaque reference to a constant.
+///
+/// You can store [`ConstantData`](super::ConstantData) in a
+/// [`ConstantPool`](super::ConstantPool) for efficient storage and retrieval.
+/// See [`ConstantPool::insert`](super::ConstantPool::insert).
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)]
+pub struct Constant(u32);
+entity_impl!(Constant, "const");
+
+impl Constant {
+    /// Create a const reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Self> {
+        if n < u32::MAX {
+            Some(Constant(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// An opaque reference to an immediate.
+///
+/// Some immediates (e.g. SIMD shuffle masks) are too large to store in the
+/// [`InstructionData`](super::instructions::InstructionData) struct and therefore must be
+/// tracked separately in [`DataFlowGraph::immediates`](super::dfg::DataFlowGraph). `Immediate`
+/// provides a way to reference values stored there.
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Immediate(u32);
+entity_impl!(Immediate, "imm");
+
+impl Immediate {
+    /// Create an immediate reference from its number.
+    ///
+    /// This method is for use by the parser.
+    pub fn with_number(n: u32) -> Option<Self> {
+        if n < u32::MAX {
+            Some(Immediate(n))
+        } else {
+            None
+        }
+    }
+}
+
+/// An opaque reference to a [jump table](https://en.wikipedia.org/wiki/Branch_table).
+///
+/// `JumpTable`s are used for indirect branching and are specialized for dense,
+/// 0-based jump offsets. If you want a jump table which doesn't start at 0,
+/// or is not contiguous, consider using a [`Switch`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.Switch.html) instead.
+///
+/// `JumpTable` are used with [`br_table`](super::InstBuilder::br_table).
+///
+/// `JumpTable`s can be created with
+/// [`create_jump_table`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_jump_table).
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct JumpTable(u32);
 entity_impl!(JumpTable, "jt");
 
@@ -119,7 +232,22 @@ impl JumpTable {
     }
 }
 
-/// A reference to an external function.
+/// An opaque reference to another [`Function`](super::Function).
+///
+/// `FuncRef`s are used for [direct](super::InstBuilder::call) function calls
+/// and by [`func_addr`](super::InstBuilder::func_addr) for use in
+/// [indirect](super::InstBuilder::call_indirect) function calls.
+///
+/// `FuncRef`s can be created with
+///
+/// - [`FunctionBuilder::import_function`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.import_function)
+/// for external functions
+/// - [`Module::declare_func_in_func`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html#method.declare_func_in_func)
+/// for functions declared elsewhere in the same native
+/// [`Module`](https://docs.rs/cranelift-module/*/cranelift_module/struct.Module.html)
+/// - [`FuncEnvironment::make_direct_func`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_direct_func)
+/// for functions declared in the same WebAssembly
+/// [`FuncEnvironment`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_direct_func)
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
 pub struct FuncRef(u32);
 entity_impl!(FuncRef, "fn");
@@ -137,7 +265,18 @@ impl FuncRef {
     }
 }
 
-/// A reference to a function signature.
+/// An opaque reference to a function [`Signature`](super::Signature).
+///
+/// `SigRef`s are used to declare a function with
+/// [`FunctionBuiler::import_function`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.import_function)
+/// as well as to make an [indirect function call](super::InstBuilder::call_indirect).
+///
+/// `SigRef`s can be created with
+/// [`FunctionBuilder::import_signature`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.import_signature).
+///
+/// You can retrieve the [`Signature`](super::Signature) that was used to create a `SigRef` with
+/// [`FunctionBuilder::signature`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.signature) or
+/// [`func.dfg.signatures`](super::dfg::DataFlowGraph::signatures).
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
 pub struct SigRef(u32);
 entity_impl!(SigRef, "sig");
@@ -155,7 +294,12 @@ impl SigRef {
     }
 }
 
-/// A reference to a heap.
+/// An opaque reference to a [heap](https://en.wikipedia.org/wiki/Memory_management#DYNAMIC).
+///
+/// Heaps are used to access dynamically allocated memory through
+/// [`heap_addr`](super::InstBuilder::heap_addr).
+///
+/// To create a heap, use [`FunctionBuilder::create_heap`](https://docs.rs/cranelift-frontend/*/cranelift_frontend/struct.FunctionBuilder.html#method.create_heap).
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
 pub struct Heap(u32);
 entity_impl!(Heap, "heap");
@@ -173,7 +317,13 @@ impl Heap {
     }
 }
 
-/// A reference to a table.
+/// An opaque reference to a [WebAssembly
+/// table](https://developer.mozilla.org/en-US/docs/WebAssembly/Understanding_the_text_format#WebAssembly_tables).
+///
+/// `Table`s are used to store a list of function references.
+/// They can be created with [`FuncEnvironment::make_table`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.make_table).
+/// They can be used with
+/// [`FuncEnvironment::translate_call_indirect`](https://docs.rs/cranelift-wasm/*/cranelift_wasm/trait.FuncEnvironment.html#tymethod.translate_call_indirect).
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
 pub struct Table(u32);
 entity_impl!(Table, "table");
@@ -191,7 +341,7 @@ impl Table {
     }
 }
 
-/// A reference to any of the entities defined in this module.
+/// An opaque reference to any of the entities defined in this module that can appear in CLIF IR.
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
 pub enum AnyEntity {
     /// The whole function.
@@ -327,4 +477,10 @@ mod tests {
             mem::size_of::<PackedOption<Value>>()
         );
     }
+
+    #[test]
+    fn constant_with_number() {
+        assert_eq!(Constant::with_number(0).unwrap().to_string(), "const0");
+        assert_eq!(Constant::with_number(1).unwrap().to_string(), "const1");
+    }
 }
diff --git a/cranelift-codegen/src/ir/extfunc.rs b/cranelift-codegen/src/ir/extfunc.rs
index ada7bab8c..0b74fd3d2 100644
--- a/cranelift-codegen/src/ir/extfunc.rs
+++ b/cranelift-codegen/src/ir/extfunc.rs
@@ -373,7 +373,8 @@ mod tests {
             CallConv::Cold,
             CallConv::SystemV,
             CallConv::WindowsFastcall,
-            CallConv::Baldrdash,
+            CallConv::BaldrdashSystemV,
+            CallConv::BaldrdashWindows,
         ] {
             assert_eq!(Ok(cc), cc.to_string().parse())
         }
@@ -381,16 +382,19 @@ mod tests {
 
     #[test]
     fn signatures() {
-        let mut sig = Signature::new(CallConv::Baldrdash);
-        assert_eq!(sig.to_string(), "() baldrdash");
+        let mut sig = Signature::new(CallConv::BaldrdashSystemV);
+        assert_eq!(sig.to_string(), "() baldrdash_system_v");
         sig.params.push(AbiParam::new(I32));
-        assert_eq!(sig.to_string(), "(i32) baldrdash");
+        assert_eq!(sig.to_string(), "(i32) baldrdash_system_v");
         sig.returns.push(AbiParam::new(F32));
-        assert_eq!(sig.to_string(), "(i32) -> f32 baldrdash");
+        assert_eq!(sig.to_string(), "(i32) -> f32 baldrdash_system_v");
         sig.params.push(AbiParam::new(I32.by(4).unwrap()));
-        assert_eq!(sig.to_string(), "(i32, i32x4) -> f32 baldrdash");
+        assert_eq!(sig.to_string(), "(i32, i32x4) -> f32 baldrdash_system_v");
         sig.returns.push(AbiParam::new(B8));
-        assert_eq!(sig.to_string(), "(i32, i32x4) -> f32, b8 baldrdash");
+        assert_eq!(
+            sig.to_string(),
+            "(i32, i32x4) -> f32, b8 baldrdash_system_v"
+        );
 
         // Order does not matter.
         sig.params[0].location = ArgumentLoc::Stack(24);
@@ -399,7 +403,7 @@ mod tests {
         // Writing ABI-annotated signatures.
         assert_eq!(
             sig.to_string(),
-            "(i32 [24], i32x4 [8]) -> f32, b8 baldrdash"
+            "(i32 [24], i32x4 [8]) -> f32, b8 baldrdash_system_v"
         );
     }
 }
diff --git a/cranelift-codegen/src/ir/function.rs b/cranelift-codegen/src/ir/function.rs
index f4b62dc59..00827240d 100644
--- a/cranelift-codegen/src/ir/function.rs
+++ b/cranelift-codegen/src/ir/function.rs
@@ -8,17 +8,20 @@ use crate::entity::{PrimaryMap, SecondaryMap};
 use crate::ir;
 use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature};
 use crate::ir::{
-    Ebb, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, JumpTable,
+    Ebb, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, Inst, JumpTable,
     JumpTableData, SigRef, StackSlot, StackSlotData, Table, TableData,
 };
 use crate::ir::{EbbOffsets, InstEncodings, SourceLocs, StackSlots, ValueLocations};
 use crate::ir::{JumpTableOffsets, JumpTables};
 use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa};
-use crate::regalloc::RegDiversions;
+use crate::regalloc::{EntryRegDiversions, RegDiversions};
 use crate::value_label::ValueLabelsRanges;
 use crate::write::write_function;
 use core::fmt;
 
+#[cfg(feature = "basic-blocks")]
+use crate::ir::Opcode;
+
 /// A function.
 ///
 /// Functions can be cloned, but it is not a very fast operation.
@@ -59,6 +62,12 @@ pub struct Function {
     /// Location assigned to every value.
     pub locations: ValueLocations,
 
+    /// Non-default locations assigned to value at the entry of basic blocks.
+    ///
+    /// At the entry of each basic block, we might have values which are not in their default
+    /// ValueLocation. This field records these register-to-register moves as Diversions.
+    pub entry_diversions: EntryRegDiversions,
+
     /// Code offsets of the EBB headers.
     ///
     /// This information is only transiently available after the `binemit::relax_branches` function
@@ -91,6 +100,7 @@ impl Function {
             layout: Layout::new(),
             encodings: SecondaryMap::new(),
             locations: SecondaryMap::new(),
+            entry_diversions: EntryRegDiversions::new(),
             offsets: SecondaryMap::new(),
             jt_offsets: SecondaryMap::new(),
             srclocs: SecondaryMap::new(),
@@ -109,7 +119,9 @@ impl Function {
         self.layout.clear();
         self.encodings.clear();
         self.locations.clear();
+        self.entry_diversions.clear();
         self.offsets.clear();
+        self.jt_offsets.clear();
         self.srclocs.clear();
     }
 
@@ -194,10 +206,12 @@ impl Function {
             !self.offsets.is_empty(),
             "Code layout must be computed first"
         );
+        let mut divert = RegDiversions::new();
+        divert.at_ebb(&self.entry_diversions, ebb);
         InstOffsetIter {
             encinfo: encinfo.clone(),
             func: self,
-            divert: RegDiversions::new(),
+            divert,
             encodings: &self.encodings,
             offset: self.offsets[ebb],
             iter: self.layout.ebb_insts(ebb),
@@ -219,9 +233,44 @@ impl Function {
     pub fn collect_debug_info(&mut self) {
         self.dfg.collect_debug_info();
     }
+
+    /// Changes the destination of a jump or branch instruction.
+    /// Does nothing if called with a non-jump or non-branch instruction.
+    pub fn change_branch_destination(&mut self, inst: Inst, new_dest: Ebb) {
+        match self.dfg[inst].branch_destination_mut() {
+            None => (),
+            Some(inst_dest) => *inst_dest = new_dest,
+        }
+    }
+
+    /// Checks that the specified EBB can be encoded as a basic block.
+    ///
+    /// On error, returns the first invalid instruction and an error message.
+    #[cfg(feature = "basic-blocks")]
+    pub fn is_ebb_basic(&self, ebb: Ebb) -> Result<(), (Inst, &'static str)> {
+        let dfg = &self.dfg;
+        let inst_iter = self.layout.ebb_insts(ebb);
+
+        // Ignore all instructions prior to the first branch.
+        let mut inst_iter = inst_iter.skip_while(|&inst| !dfg[inst].opcode().is_branch());
+
+        // A conditional branch is permitted in a basic block only when followed
+        // by a terminal jump or fallthrough instruction.
+        if let Some(_branch) = inst_iter.next() {
+            if let Some(next) = inst_iter.next() {
+                match dfg[next].opcode() {
+                    Opcode::Fallthrough | Opcode::Jump => (),
+                    _ => return Err((next, "post-branch instruction not fallthrough or jump")),
+                }
+            }
+        }
+
+        Ok(())
+    }
 }
 
 /// Additional annotations for function display.
+#[derive(Default)]
 pub struct DisplayFunctionAnnotations<'a> {
     /// Enable ISA annotations.
     pub isa: Option<&'a dyn TargetIsa>,
@@ -230,16 +279,6 @@ pub struct DisplayFunctionAnnotations<'a> {
     pub value_ranges: Option<&'a ValueLabelsRanges>,
 }
 
-impl<'a> DisplayFunctionAnnotations<'a> {
-    /// Create a DisplayFunctionAnnotations with all fields set to None.
-    pub fn default() -> Self {
-        DisplayFunctionAnnotations {
-            isa: None,
-            value_ranges: None,
-        }
-    }
-}
-
 impl<'a> From<Option<&'a dyn TargetIsa>> for DisplayFunctionAnnotations<'a> {
     fn from(isa: Option<&'a dyn TargetIsa>) -> DisplayFunctionAnnotations {
         DisplayFunctionAnnotations {
diff --git a/cranelift-codegen/src/ir/immediates.rs b/cranelift-codegen/src/ir/immediates.rs
index 36cc470ae..371c05bdd 100644
--- a/cranelift-codegen/src/ir/immediates.rs
+++ b/cranelift-codegen/src/ir/immediates.rs
@@ -1,13 +1,33 @@
 //! Immediate operands for Cranelift instructions
 //!
 //! This module defines the types of immediate operands that can appear on Cranelift instructions.
-//! Each type here should have a corresponding definition in the `cranelift.immediates` Python
-//! module in the meta language.
+//! Each type here should have a corresponding definition in the
+//! `cranelift-codegen/meta/src/shared/immediates` crate in the meta language.
 
 use core::fmt::{self, Display, Formatter};
+use core::iter::FromIterator;
 use core::mem;
-use core::str::FromStr;
+use core::str::{from_utf8, FromStr};
 use core::{i32, u32};
+use std::vec::Vec;
+
+/// Convert a type into a vector of bytes; all implementors in this file must use little-endian
+/// orderings of bytes to match WebAssembly's little-endianness.
+trait IntoBytes {
+    fn into_bytes(self) -> Vec<u8>;
+}
+
+impl IntoBytes for u8 {
+    fn into_bytes(self) -> Vec<u8> {
+        vec![self]
+    }
+}
+
+impl IntoBytes for i32 {
+    fn into_bytes(self) -> Vec<u8> {
+        self.to_le_bytes().to_vec()
+    }
+}
 
 /// 64-bit immediate signed integer operand.
 ///
@@ -34,6 +54,12 @@ impl Into<i64> for Imm64 {
     }
 }
 
+impl IntoBytes for Imm64 {
+    fn into_bytes(self) -> Vec<u8> {
+        self.0.to_le_bytes().to_vec()
+    }
+}
+
 impl From<i64> for Imm64 {
     fn from(x: i64) -> Self {
         Imm64(x)
@@ -264,6 +290,151 @@ impl FromStr for Uimm32 {
     }
 }
 
+/// A 128-bit immediate operand.
+///
+/// This is used as an immediate value in SIMD instructions.
+#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]
+pub struct V128Imm(pub [u8; 16]);
+
+impl V128Imm {
+    /// Iterate over the bytes in the constant
+    pub fn bytes(&self) -> impl Iterator<Item = &u8> {
+        self.0.iter()
+    }
+
+    /// Convert the immediate into a vector
+    pub fn to_vec(self) -> Vec<u8> {
+        self.0.to_vec()
+    }
+
+    /// Convert the immediate into a slice
+    pub fn as_slice(&self) -> &[u8] {
+        &self.0[..]
+    }
+}
+
+impl Display for V128Imm {
+    // Print a 128-bit vector in hexadecimal, e.g. 0x000102030405060708090a0b0c0d0e0f.
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "0x")?;
+        let mut anything_written = false;
+        for &b in self.0.iter().rev() {
+            if b == 0 && !anything_written {
+                continue;
+            } else {
+                anything_written = true;
+                write!(f, "{:02x}", b)?;
+            }
+        }
+        if !anything_written {
+            write!(f, "00")?;
+        }
+        Ok(())
+    }
+}
+
+impl From<u64> for V128Imm {
+    fn from(x: u64) -> Self {
+        let mut buffer: [u8; 16] = [0; 16]; // zero-fill
+        (0..8).for_each(|byte| buffer[byte] = (x >> (byte as u64 * 8) & 0xff) as u8); // insert each byte from the u64 into v in little-endian order
+        V128Imm(buffer)
+    }
+}
+
+impl From<&[u8]> for V128Imm {
+    fn from(slice: &[u8]) -> Self {
+        assert_eq!(slice.len(), 16);
+        let mut buffer = [0; 16];
+        buffer.copy_from_slice(slice);
+        V128Imm(buffer)
+    }
+}
+
+impl FromStr for V128Imm {
+    type Err = &'static str;
+
+    // parse a 128-bit vector from a hexadecimal string, formatted as above
+    fn from_str(s: &str) -> Result<Self, &'static str> {
+        if s.len() <= 2 || &s[0..2] != "0x" {
+            Err("Expected a hexadecimal string, e.g. 0x1234")
+        } else {
+            // clean and check the string
+            let cleaned: Vec<u8> = s[2..]
+                .as_bytes()
+                .iter()
+                .filter(|&&b| b as char != '_')
+                .cloned()
+                .collect(); // remove 0x prefix and any intervening _ characters
+
+            if cleaned.len() == 0 {
+                Err("Hexadecimal string must have some digits")
+            } else if cleaned.len() % 2 != 0 {
+                Err("Hexadecimal string must have an even number of digits")
+            } else if cleaned.len() > 32 {
+                Err("Hexadecimal string has too many digits to fit in a 128-bit vector")
+            } else {
+                let mut buffer = [0; 16]; // zero-fill the buffer
+                let mut position = cleaned.len() / 2 - 1; // since Uimm128 is little-endian but the string is not, we write from back to front but must start at the highest position required by the string
+                for i in (0..cleaned.len()).step_by(2) {
+                    let pair = from_utf8(&cleaned[i..i + 2])
+                        .or_else(|_| Err("Unable to parse hexadecimal pair as UTF-8"))?;
+                    let byte = u8::from_str_radix(pair, 16)
+                        .or_else(|_| Err("Unable to parse as hexadecimal"))?;
+                    buffer[position] = byte;
+                    position = position.wrapping_sub(1); // should only wrap on the last iteration
+                }
+
+                Ok(V128Imm(buffer))
+            }
+        }
+    }
+}
+
+/// Implement a way to convert an iterator of immediates to a Uimm128:
+///  - this expects the items in reverse order (e.g. last lane first) which is the natural output of pushing items into a vector
+///  - this may not fully consume the iterator or may fail if it cannot take the expected number of items
+///  - this requires the input type (i.e. $ty) to implement ToBytes
+macro_rules! construct_uimm128_from_iterator_of {
+    ( $ty:ident, $lanes:expr ) => {
+        impl FromIterator<$ty> for V128Imm {
+            fn from_iter<T: IntoIterator<Item = $ty>>(iter: T) -> Self {
+                let mut buffer: [u8; 16] = [0; 16];
+                iter.into_iter()
+                    .take($lanes)
+                    .map(|f| f.into_bytes())
+                    .flat_map(|b| b)
+                    .enumerate()
+                    .for_each(|(i, b)| buffer[i] = b);
+                V128Imm(buffer)
+            }
+        }
+    };
+}
+
+/// Special case for booleans since we have to decide the bit-width based on the number of items
+impl FromIterator<bool> for V128Imm {
+    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
+        let bools = Vec::from_iter(iter);
+        let count = bools.len();
+        assert!(count > 0 && count <= 16); // ensure we don't have too many booleans
+        assert_eq!(count & (count - 1), 0); // ensure count is a power of two, see https://stackoverflow.com/questions/600293
+        let mut buffer: [u8; 16] = [0; 16];
+        let step = 16 / count;
+        bools
+            .iter()
+            .enumerate()
+            .map(|(i, &b)| (i * step, if b { 1 } else { 0 }))
+            .for_each(|(i, b)| buffer[i] = b);
+        V128Imm(buffer)
+    }
+}
+
+construct_uimm128_from_iterator_of!(u8, 16);
+construct_uimm128_from_iterator_of!(i32, 4);
+construct_uimm128_from_iterator_of!(Ieee32, 4);
+construct_uimm128_from_iterator_of!(Imm64, 2);
+construct_uimm128_from_iterator_of!(Ieee64, 2);
+
 /// 32-bit signed immediate offset.
 ///
 /// This is used to encode an immediate offset for load/store instructions. All supported ISAs have
@@ -671,6 +842,12 @@ impl From<f32> for Ieee32 {
     }
 }
 
+impl IntoBytes for Ieee32 {
+    fn into_bytes(self) -> Vec<u8> {
+        self.0.to_le_bytes().to_vec()
+    }
+}
+
 impl Ieee64 {
     /// Create a new `Ieee64` containing the bits of `x`.
     pub fn with_bits(x: u64) -> Self {
@@ -738,6 +915,18 @@ impl From<f64> for Ieee64 {
     }
 }
 
+impl From<u64> for Ieee64 {
+    fn from(x: u64) -> Self {
+        Ieee64::with_float(f64::from_bits(x))
+    }
+}
+
+impl IntoBytes for Ieee64 {
+    fn into_bytes(self) -> Vec<u8> {
+        self.0.to_le_bytes().to_vec()
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -878,6 +1067,117 @@ mod tests {
         parse_err::<Uimm64>("0x0_0000_0000_0000_0000", "Too many hexadecimal digits");
     }
 
+    #[test]
+    fn format_uimm128() {
+        assert_eq!(V128Imm::from(0).to_string(), "0x00");
+        assert_eq!(V128Imm::from(42).to_string(), "0x2a");
+        assert_eq!(V128Imm::from(3735928559).to_string(), "0xdeadbeef");
+        assert_eq!(
+            V128Imm::from(0x0102030405060708).to_string(),
+            "0x0102030405060708"
+        );
+    }
+
+    #[test]
+    fn parse_uimm128() {
+        parse_ok::<V128Imm>("0x00", "0x00");
+        parse_ok::<V128Imm>("0x00000042", "0x42");
+        parse_ok::<V128Imm>(
+            "0x0102030405060708090a0b0c0d0e0f00",
+            "0x0102030405060708090a0b0c0d0e0f00",
+        );
+        parse_ok::<V128Imm>("0x_0000_0043_21", "0x4321");
+
+        parse_err::<V128Imm>("", "Expected a hexadecimal string, e.g. 0x1234");
+        parse_err::<V128Imm>("0x", "Expected a hexadecimal string, e.g. 0x1234");
+        parse_err::<V128Imm>(
+            "0x042",
+            "Hexadecimal string must have an even number of digits",
+        );
+        parse_err::<V128Imm>(
+            "0x00000000000000000000000000000000000000000000000000",
+            "Hexadecimal string has too many digits to fit in a 128-bit vector",
+        );
+        parse_err::<V128Imm>("0xrstu", "Unable to parse as hexadecimal");
+        parse_err::<V128Imm>("0x__", "Hexadecimal string must have some digits");
+    }
+
+    #[test]
+    fn uimm128_equivalence() {
+        assert_eq!(
+            "0x01".parse::<V128Imm>().unwrap().0,
+            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        );
+        assert_eq!(
+            V128Imm::from_iter(vec![1, 0, 0, 0]).0,
+            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        );
+        assert_eq!(
+            V128Imm::from(1).0,
+            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        );
+    }
+
+    #[test]
+    fn uimm128_endianness() {
+        assert_eq!(
+            "0x42".parse::<V128Imm>().unwrap().0,
+            [0x42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        );
+        assert_eq!(
+            "0x00".parse::<V128Imm>().unwrap().0,
+            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        );
+        assert_eq!(
+            "0x12345678".parse::<V128Imm>().unwrap().0,
+            [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        );
+        assert_eq!(
+            "0x1234_5678".parse::<V128Imm>().unwrap().0,
+            [0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        );
+    }
+
+    #[test]
+    fn uimm128_from_iter() {
+        assert_eq!(
+            V128Imm::from_iter(vec![4, 3, 2, 1]).0,
+            [4, 0, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0]
+        );
+
+        assert_eq!(
+            V128Imm::from_iter(vec![false, true]).0,
+            [/* false */ 0, 0, 0, 0, 0, 0, 0, 0, /* true */ 1, 0, 0, 0, 0, 0, 0, 0]
+        );
+
+        assert_eq!(
+            V128Imm::from_iter(vec![false, true, false, true, false, true, false, true]).0,
+            [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0]
+        );
+
+        #[allow(trivial_numeric_casts)]
+        let u8s = vec![
+            1 as u8, 2, 3, 4, 5, 6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0,
+        ];
+        assert_eq!(
+            V128Imm::from_iter(u8s).0,
+            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0]
+        );
+
+        #[allow(trivial_numeric_casts)]
+        let ieee32s: Vec<Ieee32> = vec![32.4 as f32, 0.0, 1.0, 6.6666]
+            .iter()
+            .map(|&f| Ieee32::from(f))
+            .collect();
+        assert_eq!(
+            V128Imm::from_iter(ieee32s).0,
+            [
+                /* 32.4 == */ 0x9a, 0x99, 0x01, 0x42, /* 0 == */ 0, 0, 0, 0,
+                /* 1 == */ 0, 0, 0x80, 0x3f, /* 6.6666 == */ 0xca, 0x54, 0xd5, 0x40,
+            ]
+        )
+    }
+
     #[test]
     fn format_offset32() {
         assert_eq!(Offset32(0).to_string(), "");
diff --git a/cranelift-codegen/src/ir/instructions.rs b/cranelift-codegen/src/ir/instructions.rs
index ae227dc5f..30e8d1468 100644
--- a/cranelift-codegen/src/ir/instructions.rs
+++ b/cranelift-codegen/src/ir/instructions.rs
@@ -28,7 +28,7 @@ pub type ValueList = entity::EntityList<Value>;
 /// Memory pool for holding value lists. See `ValueList`.
 pub type ValueListPool = entity::ListPool<Value>;
 
-// Include code generated by `cranelift-codegen/meta-python/gen_instr.py`. This file contains:
+// Include code generated by `cranelift-codegen/meta/src/gen_inst.rs`. This file contains:
 //
 // - The `pub enum InstructionFormat` enum with all the instruction formats.
 // - The `pub enum InstructionData` enum with all the instruction data fields.
@@ -444,6 +444,8 @@ pub struct ValueTypeSet {
     pub floats: BitSet8,
     /// Allowed bool widths
     pub bools: BitSet8,
+    /// Allowed ref widths
+    pub refs: BitSet8,
 }
 
 impl ValueTypeSet {
@@ -458,6 +460,8 @@ impl ValueTypeSet {
             self.floats.contains(l2b)
         } else if scalar.is_bool() {
             self.bools.contains(l2b)
+        } else if scalar.is_ref() {
+            self.refs.contains(l2b)
         } else {
             false
         }
@@ -498,7 +502,7 @@ enum OperandConstraint {
     /// This operand is the same type as the controlling type variable.
     Same,
 
-    /// This operand is `ctrlType.lane_type()`.
+    /// This operand is `ctrlType.lane_of()`.
     LaneOf,
 
     /// This operand is `ctrlType.as_bool()`.
@@ -527,7 +531,7 @@ impl OperandConstraint {
             Concrete(t) => Bound(t),
             Free(vts) => ResolvedConstraint::Free(TYPE_SETS[vts as usize]),
             Same => Bound(ctrl_type),
-            LaneOf => Bound(ctrl_type.lane_type()),
+            LaneOf => Bound(ctrl_type.lane_of()),
             AsBool => Bound(ctrl_type.as_bool()),
             HalfWidth => Bound(ctrl_type.half_width().expect("invalid type for half_width")),
             DoubleWidth => Bound(
@@ -652,6 +656,7 @@ mod tests {
             ints: BitSet8::from_range(4, 7),
             floats: BitSet8::from_range(0, 0),
             bools: BitSet8::from_range(3, 7),
+            refs: BitSet8::from_range(5, 7),
         };
         assert!(!vts.contains(I8));
         assert!(vts.contains(I32));
@@ -661,6 +666,8 @@ mod tests {
         assert!(!vts.contains(B1));
         assert!(vts.contains(B8));
         assert!(vts.contains(B64));
+        assert!(vts.contains(R32));
+        assert!(vts.contains(R64));
         assert_eq!(vts.example().to_string(), "i32");
 
         let vts = ValueTypeSet {
@@ -668,6 +675,7 @@ mod tests {
             ints: BitSet8::from_range(0, 0),
             floats: BitSet8::from_range(5, 7),
             bools: BitSet8::from_range(3, 7),
+            refs: BitSet8::from_range(0, 0),
         };
         assert_eq!(vts.example().to_string(), "f32");
 
@@ -676,6 +684,7 @@ mod tests {
             ints: BitSet8::from_range(0, 0),
             floats: BitSet8::from_range(5, 7),
             bools: BitSet8::from_range(3, 7),
+            refs: BitSet8::from_range(0, 0),
         };
         assert_eq!(vts.example().to_string(), "f32x2");
 
@@ -684,6 +693,7 @@ mod tests {
             ints: BitSet8::from_range(0, 0),
             floats: BitSet8::from_range(0, 0),
             bools: BitSet8::from_range(3, 7),
+            refs: BitSet8::from_range(0, 0),
         };
         assert!(!vts.contains(B32X2));
         assert!(vts.contains(B32X4));
@@ -695,8 +705,11 @@ mod tests {
             ints: BitSet8::from_range(3, 7),
             floats: BitSet8::from_range(0, 0),
             bools: BitSet8::from_range(0, 0),
+            refs: BitSet8::from_range(0, 0),
         };
         assert!(vts.contains(I32));
         assert!(vts.contains(I32X4));
+        assert!(!vts.contains(R32));
+        assert!(!vts.contains(R64));
     }
 }
diff --git a/cranelift-codegen/src/ir/layout.rs b/cranelift-codegen/src/ir/layout.rs
index e1015f7f3..78a462857 100644
--- a/cranelift-codegen/src/ir/layout.rs
+++ b/cranelift-codegen/src/ir/layout.rs
@@ -60,6 +60,11 @@ impl Layout {
         self.first_ebb = None;
         self.last_ebb = None;
     }
+
+    /// Returns the capacity of the `EbbData` map.
+    pub fn ebb_capacity(&self) -> usize {
+        self.ebbs.capacity()
+    }
 }
 
 /// Sequence numbers.
diff --git a/cranelift-codegen/src/ir/libcall.rs b/cranelift-codegen/src/ir/libcall.rs
index 275e22a91..1e1989122 100644
--- a/cranelift-codegen/src/ir/libcall.rs
+++ b/cranelift-codegen/src/ir/libcall.rs
@@ -7,6 +7,8 @@ use crate::ir::{
 use crate::isa::{CallConv, RegUnit, TargetIsa};
 use core::fmt;
 use core::str::FromStr;
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
 
 /// The name of a runtime library routine.
 ///
@@ -16,7 +18,8 @@ use core::str::FromStr;
 /// convention in the embedding VM's runtime library.
 ///
 /// This list is likely to grow over time.
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum LibCall {
     /// probe for stack overflow. These are emitted for functions which need
     /// when the `probestack_enabled` setting is true.
@@ -105,11 +108,13 @@ impl LibCall {
 /// If there is an existing reference, use it, otherwise make a new one.
 pub fn get_libcall_funcref(
     libcall: LibCall,
+    call_conv: CallConv,
     func: &mut Function,
     inst: Inst,
     isa: &dyn TargetIsa,
 ) -> FuncRef {
-    find_funcref(libcall, func).unwrap_or_else(|| make_funcref_for_inst(libcall, func, inst, isa))
+    find_funcref(libcall, func)
+        .unwrap_or_else(|| make_funcref_for_inst(libcall, call_conv, func, inst, isa))
 }
 
 /// Get a function reference for the probestack function in `func`.
@@ -161,11 +166,12 @@ fn make_funcref_for_probestack(
 /// Create a funcref for `libcall` with a signature matching `inst`.
 fn make_funcref_for_inst(
     libcall: LibCall,
+    call_conv: CallConv,
     func: &mut Function,
     inst: Inst,
     isa: &dyn TargetIsa,
 ) -> FuncRef {
-    let mut sig = Signature::new(isa.default_call_conv());
+    let mut sig = Signature::new(call_conv);
     for &v in func.dfg.inst_args(inst) {
         sig.params.push(AbiParam::new(func.dfg.value_type(v)));
     }
@@ -173,6 +179,14 @@ fn make_funcref_for_inst(
         sig.returns.push(AbiParam::new(func.dfg.value_type(v)));
     }
 
+    if call_conv.extends_baldrdash() {
+        // Adds the special VMContext parameter to the signature.
+        sig.params.push(AbiParam::special(
+            isa.pointer_type(),
+            ArgumentPurpose::VMContext,
+        ));
+    }
+
     make_funcref(libcall, func, sig, isa)
 }
 
diff --git a/cranelift-codegen/src/ir/mod.rs b/cranelift-codegen/src/ir/mod.rs
index 0873561cf..2a0293b31 100644
--- a/cranelift-codegen/src/ir/mod.rs
+++ b/cranelift-codegen/src/ir/mod.rs
@@ -2,6 +2,7 @@
 
 mod builder;
 pub mod condcodes;
+pub mod constant;
 pub mod dfg;
 pub mod entities;
 mod extfunc;
@@ -23,10 +24,15 @@ mod trapcode;
 pub mod types;
 mod valueloc;
 
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 pub use crate::ir::builder::{InsertBuilder, InstBuilder, InstBuilderBase, InstInserterBase};
+pub use crate::ir::constant::{ConstantData, ConstantOffset, ConstantPool};
 pub use crate::ir::dfg::{DataFlowGraph, ValueDef};
 pub use crate::ir::entities::{
-    Ebb, FuncRef, GlobalValue, Heap, Inst, JumpTable, SigRef, StackSlot, Table, Value,
+    Constant, Ebb, FuncRef, GlobalValue, Heap, Immediate, Inst, JumpTable, SigRef, StackSlot,
+    Table, Value,
 };
 pub use crate::ir::extfunc::{
     AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature,
@@ -74,6 +80,7 @@ pub type SourceLocs = SecondaryMap<Inst, SourceLoc>;
 
 /// Marked with a label value.
 #[derive(Copy, Clone, PartialEq, Eq, Hash)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct ValueLabel(u32);
 entity_impl!(ValueLabel, "val");
 
diff --git a/cranelift-codegen/src/ir/sourceloc.rs b/cranelift-codegen/src/ir/sourceloc.rs
index ab5722a57..a0d051d3a 100644
--- a/cranelift-codegen/src/ir/sourceloc.rs
+++ b/cranelift-codegen/src/ir/sourceloc.rs
@@ -4,6 +4,8 @@
 //! location when instructions are transformed.
 
 use core::fmt;
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
 
 /// A source location.
 ///
@@ -13,6 +15,7 @@ use core::fmt;
 /// The default source location uses the all-ones bit pattern `!0`. It is used for instructions
 /// that can't be given a real source location.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct SourceLoc(u32);
 
 impl SourceLoc {
diff --git a/cranelift-codegen/src/ir/stackslot.rs b/cranelift-codegen/src/ir/stackslot.rs
index 4c94a6b01..0b488582d 100644
--- a/cranelift-codegen/src/ir/stackslot.rs
+++ b/cranelift-codegen/src/ir/stackslot.rs
@@ -13,6 +13,9 @@ use core::slice;
 use core::str::FromStr;
 use std::vec::Vec;
 
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 /// The size of an object on the stack, or the size of a stack frame.
 ///
 /// We don't use `usize` to represent object sizes on the target platform because Cranelift supports
@@ -38,6 +41,7 @@ fn spill_size(ty: Type) -> StackSize {
 
 /// The kind of a stack slot.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum StackSlotKind {
     /// A spill slot. This is a stack slot created by the register allocator.
     SpillSlot,
@@ -97,7 +101,8 @@ impl fmt::Display for StackSlotKind {
 }
 
 /// Contents of a stack slot.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct StackSlotData {
     /// The kind of stack slot.
     pub kind: StackSlotKind,
@@ -149,7 +154,8 @@ impl fmt::Display for StackSlotData {
 /// Stack frame manager.
 ///
 /// Keep track of all the stack slots used by a function.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct StackSlots {
     /// All allocated stack slots.
     slots: PrimaryMap<StackSlot, StackSlotData>,
@@ -203,11 +209,6 @@ impl StackSlots {
         self.slots.is_valid(ss)
     }
 
-    /// Set the offset of a stack slot.
-    pub fn set_offset(&mut self, ss: StackSlot, offset: StackOffset) {
-        self.slots[ss].offset = Some(offset);
-    }
-
     /// Get an iterator over all the stack slot keys.
     pub fn iter(&self) -> Iter<StackSlot, StackSlotData> {
         self.slots.iter()
diff --git a/cranelift-codegen/src/ir/types.rs b/cranelift-codegen/src/ir/types.rs
index eee39c83e..10fca8aaa 100644
--- a/cranelift-codegen/src/ir/types.rs
+++ b/cranelift-codegen/src/ir/types.rs
@@ -10,11 +10,11 @@ use target_lexicon::{PointerWidth, Triple};
 /// field is present put no type is needed, such as the controlling type variable for a
 /// non-polymorphic instruction.
 ///
-/// Basic integer types: `I8`, `I16`, `I32`, and `I64`. These types are sign-agnostic.
+/// Basic integer types: `I8`, `I16`, `I32`, `I64`, and `I128`. These types are sign-agnostic.
 ///
 /// Basic floating point types: `F32` and `F64`. IEEE single and double precision.
 ///
-/// Boolean types: `B1`, `B8`, `B16`, `B32`, and `B64`. These all encode 'true' or 'false'. The
+/// Boolean types: `B1`, `B8`, `B16`, `B32`, `B64`, and `B128`. These all encode 'true' or 'false'. The
 /// larger types use redundant bits.
 ///
 /// SIMD vector types have power-of-two lanes, up to 256. Lanes can be any int/float/bool type.
@@ -25,7 +25,7 @@ pub struct Type(u8);
 /// Not a valid type. Can't be loaded or stored. Can't be part of a SIMD vector.
 pub const INVALID: Type = Type(0);
 
-/// Start of the lane types. See also `meta-python/cdsl/types.py`.
+/// Start of the lane types. See also `meta/src/cdsl/types.rs`.
 const LANE_BASE: u8 = 0x70;
 
 /// Start of the 2-lane vector types.
@@ -44,18 +44,26 @@ impl Type {
         if self.0 < VECTOR_BASE {
             self
         } else {
-            Type(LANE_BASE | (self.0 & 0x0f))
+            Self(LANE_BASE | (self.0 & 0x0f))
         }
     }
 
+    /// The type transformation that returns the lane type of a type variable; it is just a
+    /// renaming of lane_type() to be used in context where we think in terms of type variable
+    /// transformations.
+    pub fn lane_of(self) -> Self {
+        self.lane_type()
+    }
+
     /// Get log_2 of the number of bits in a lane.
     pub fn log2_lane_bits(self) -> u8 {
         match self.lane_type() {
             B1 => 0,
             B8 | I8 => 3,
             B16 | I16 => 4,
-            B32 | I32 | F32 => 5,
-            B64 | I64 | F64 => 6,
+            B32 | I32 | F32 | R32 => 5,
+            B64 | I64 | F64 | R64 => 6,
+            B128 | I128 => 7,
             _ => 0,
         }
     }
@@ -66,8 +74,9 @@ impl Type {
             B1 => 1,
             B8 | I8 => 8,
             B16 | I16 => 16,
-            B32 | I32 | F32 => 32,
-            B64 | I64 | F64 => 64,
+            B32 | I32 | F32 | R32 => 32,
+            B64 | I64 | F64 | R64 => 64,
+            B128 | I128 => 128,
             _ => 0,
         }
     }
@@ -79,6 +88,7 @@ impl Type {
             16 => Some(I16),
             32 => Some(I32),
             64 => Some(I64),
+            128 => Some(I128),
             _ => None,
         }
     }
@@ -92,7 +102,7 @@ impl Type {
     /// Get a type with the same number of lanes as this type, but with the lanes replaced by
     /// booleans of the same size.
     ///
-    /// Scalar types are treated as vectors with one lane, so they are converted to the multi-bit
+    /// Lane types are treated as vectors with one lane, so they are converted to the multi-bit
     /// boolean types.
     pub fn as_bool_pedantic(self) -> Self {
         // Replace the low 4 bits with the boolean version, preserve the high 4 bits.
@@ -101,6 +111,8 @@ impl Type {
             B16 | I16 => B16,
             B32 | I32 | F32 => B32,
             B64 | I64 | F64 => B64,
+            R32 | R64 => panic!("Reference types should not convert to bool"),
+            B128 | I128 => B128,
             _ => B1,
         })
     }
@@ -124,10 +136,12 @@ impl Type {
             I16 => I8,
             I32 => I16,
             I64 => I32,
+            I128 => I64,
             F64 => F32,
             B16 => B8,
             B32 => B16,
             B64 => B32,
+            B128 => B64,
             _ => return None,
         }))
     }
@@ -139,10 +153,12 @@ impl Type {
             I8 => I16,
             I16 => I32,
             I32 => I64,
+            I64 => I128,
             F32 => F64,
             B8 => B16,
             B16 => B32,
             B32 => B64,
+            B64 => B128,
             _ => return None,
         }))
     }
@@ -174,7 +190,7 @@ impl Type {
     /// Is this a scalar boolean type?
     pub fn is_bool(self) -> bool {
         match self {
-            B1 | B8 | B16 | B32 | B64 => true,
+            B1 | B8 | B16 | B32 | B64 | B128 => true,
             _ => false,
         }
     }
@@ -182,7 +198,7 @@ impl Type {
     /// Is this a scalar integer type?
     pub fn is_int(self) -> bool {
         match self {
-            I8 | I16 | I32 | I64 => true,
+            I8 | I16 | I32 | I64 | I128 => true,
             _ => false,
         }
     }
@@ -203,6 +219,14 @@ impl Type {
         }
     }
 
+    /// Is this a ref type?
+    pub fn is_ref(self) -> bool {
+        match self {
+            R32 | R64 => true,
+            _ => false,
+        }
+    }
+
     /// Get log_2 of the number of lanes in this SIMD vector type.
     ///
     /// All SIMD types have a lane count that is a power of two and no larger than 256, so this
@@ -294,6 +318,8 @@ impl Display for Type {
             write!(f, "f{}", self.lane_bits())
         } else if self.is_vector() {
             write!(f, "{}x{}", self.lane_type(), self.lane_count())
+        } else if self.is_ref() {
+            write!(f, "r{}", self.lane_bits())
         } else {
             f.write_str(match *self {
                 IFLAGS => "iflags",
@@ -315,6 +341,8 @@ impl Debug for Type {
             write!(f, "types::F{}", self.lane_bits())
         } else if self.is_vector() {
             write!(f, "{:?}X{}", self.lane_type(), self.lane_count())
+        } else if self.is_ref() {
+            write!(f, "types::R{}", self.lane_bits())
         } else {
             match *self {
                 INVALID => write!(f, "types::INVALID"),
@@ -350,12 +378,19 @@ mod tests {
         assert_eq!(B16, B16.lane_type());
         assert_eq!(B32, B32.lane_type());
         assert_eq!(B64, B64.lane_type());
+        assert_eq!(B128, B128.lane_type());
         assert_eq!(I8, I8.lane_type());
         assert_eq!(I16, I16.lane_type());
         assert_eq!(I32, I32.lane_type());
         assert_eq!(I64, I64.lane_type());
+        assert_eq!(I128, I128.lane_type());
         assert_eq!(F32, F32.lane_type());
         assert_eq!(F64, F64.lane_type());
+        assert_eq!(B1, B1.by(8).unwrap().lane_type());
+        assert_eq!(I32, I32X4.lane_type());
+        assert_eq!(F64, F64X2.lane_type());
+        assert_eq!(R32, R32.lane_type());
+        assert_eq!(R64, R64.lane_type());
 
         assert_eq!(INVALID.lane_bits(), 0);
         assert_eq!(IFLAGS.lane_bits(), 0);
@@ -365,12 +400,16 @@ mod tests {
         assert_eq!(B16.lane_bits(), 16);
         assert_eq!(B32.lane_bits(), 32);
         assert_eq!(B64.lane_bits(), 64);
+        assert_eq!(B128.lane_bits(), 128);
         assert_eq!(I8.lane_bits(), 8);
         assert_eq!(I16.lane_bits(), 16);
         assert_eq!(I32.lane_bits(), 32);
         assert_eq!(I64.lane_bits(), 64);
+        assert_eq!(I128.lane_bits(), 128);
         assert_eq!(F32.lane_bits(), 32);
         assert_eq!(F64.lane_bits(), 64);
+        assert_eq!(R32.lane_bits(), 32);
+        assert_eq!(R64.lane_bits(), 64);
     }
 
     #[test]
@@ -383,11 +422,13 @@ mod tests {
         assert_eq!(B16.half_width(), Some(B8));
         assert_eq!(B32.half_width(), Some(B16));
         assert_eq!(B64.half_width(), Some(B32));
+        assert_eq!(B128.half_width(), Some(B64));
         assert_eq!(I8.half_width(), None);
         assert_eq!(I16.half_width(), Some(I8));
         assert_eq!(I32.half_width(), Some(I16));
         assert_eq!(I32X4.half_width(), Some(I16X4));
         assert_eq!(I64.half_width(), Some(I32));
+        assert_eq!(I128.half_width(), Some(I64));
         assert_eq!(F32.half_width(), None);
         assert_eq!(F64.half_width(), Some(F32));
 
@@ -398,12 +439,14 @@ mod tests {
         assert_eq!(B8.double_width(), Some(B16));
         assert_eq!(B16.double_width(), Some(B32));
         assert_eq!(B32.double_width(), Some(B64));
-        assert_eq!(B64.double_width(), None);
+        assert_eq!(B64.double_width(), Some(B128));
+        assert_eq!(B128.double_width(), None);
         assert_eq!(I8.double_width(), Some(I16));
         assert_eq!(I16.double_width(), Some(I32));
         assert_eq!(I32.double_width(), Some(I64));
         assert_eq!(I32X4.double_width(), Some(I64X4));
-        assert_eq!(I64.double_width(), None);
+        assert_eq!(I64.double_width(), Some(I128));
+        assert_eq!(I128.double_width(), None);
         assert_eq!(F32.double_width(), Some(F64));
         assert_eq!(F64.double_width(), None);
     }
@@ -434,12 +477,16 @@ mod tests {
         assert_eq!(B16.to_string(), "b16");
         assert_eq!(B32.to_string(), "b32");
         assert_eq!(B64.to_string(), "b64");
+        assert_eq!(B128.to_string(), "b128");
         assert_eq!(I8.to_string(), "i8");
         assert_eq!(I16.to_string(), "i16");
         assert_eq!(I32.to_string(), "i32");
         assert_eq!(I64.to_string(), "i64");
+        assert_eq!(I128.to_string(), "i128");
         assert_eq!(F32.to_string(), "f32");
         assert_eq!(F64.to_string(), "f64");
+        assert_eq!(R32.to_string(), "r32");
+        assert_eq!(R64.to_string(), "r64");
     }
 
     #[test]
diff --git a/cranelift-codegen/src/ir/valueloc.rs b/cranelift-codegen/src/ir/valueloc.rs
index 0cbcd5d2c..f3ba44896 100644
--- a/cranelift-codegen/src/ir/valueloc.rs
+++ b/cranelift-codegen/src/ir/valueloc.rs
@@ -7,8 +7,12 @@ use crate::ir::StackSlot;
 use crate::isa::{RegInfo, RegUnit};
 use core::fmt;
 
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 /// Value location.
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub enum ValueLoc {
     /// This value has not been assigned to a location yet.
     Unassigned,
diff --git a/cranelift-codegen/src/isa/arm32/binemit.rs b/cranelift-codegen/src/isa/arm32/binemit.rs
index 2ca78b4d6..d74ee0911 100644
--- a/cranelift-codegen/src/isa/arm32/binemit.rs
+++ b/cranelift-codegen/src/isa/arm32/binemit.rs
@@ -2,6 +2,7 @@
 
 use crate::binemit::{bad_encoding, CodeSink};
 use crate::ir::{Function, Inst};
+use crate::isa::TargetIsa;
 use crate::regalloc::RegDiversions;
 
 include!(concat!(env!("OUT_DIR"), "/binemit-arm32.rs"));
diff --git a/cranelift-codegen/src/isa/arm32/enc_tables.rs b/cranelift-codegen/src/isa/arm32/enc_tables.rs
index e3833eecf..5cb4e5c4c 100644
--- a/cranelift-codegen/src/isa/arm32/enc_tables.rs
+++ b/cranelift-codegen/src/isa/arm32/enc_tables.rs
@@ -1,5 +1,6 @@
 //! Encoding tables for ARM32 ISA.
 
+use crate::ir;
 use crate::isa;
 use crate::isa::constraints::*;
 use crate::isa::enc_tables::*;
diff --git a/cranelift-codegen/src/isa/arm32/mod.rs b/cranelift-codegen/src/isa/arm32/mod.rs
index d4e2f3225..ced1b8876 100644
--- a/cranelift-codegen/src/isa/arm32/mod.rs
+++ b/cranelift-codegen/src/isa/arm32/mod.rs
@@ -42,14 +42,13 @@ fn isa_constructor(
     builder: shared_settings::Builder,
 ) -> Box<dyn TargetIsa> {
     let level1 = match triple.architecture {
-        Architecture::Thumbv6m | Architecture::Thumbv7em | Architecture::Thumbv7m => {
-            &enc_tables::LEVEL1_T32[..]
+        Architecture::Arm(arm) => {
+            if arm.is_thumb() {
+                &enc_tables::LEVEL1_T32[..]
+            } else {
+                &enc_tables::LEVEL1_A32[..]
+            }
         }
-        Architecture::Arm
-        | Architecture::Armv4t
-        | Architecture::Armv5te
-        | Architecture::Armv7
-        | Architecture::Armv7s => &enc_tables::LEVEL1_A32[..],
         _ => panic!(),
     };
     Box::new(Isa {
@@ -121,11 +120,11 @@ impl TargetIsa for Isa {
         divert: &mut regalloc::RegDiversions,
         sink: &mut dyn CodeSink,
     ) {
-        binemit::emit_inst(func, inst, divert, sink)
+        binemit::emit_inst(func, inst, divert, sink, self)
     }
 
     fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
-        emit_function(func, binemit::emit_inst, sink)
+        emit_function(func, binemit::emit_inst, sink, self)
     }
 }
 
diff --git a/cranelift-codegen/src/isa/arm32/settings.rs b/cranelift-codegen/src/isa/arm32/settings.rs
index 084c14239..bef631b2b 100644
--- a/cranelift-codegen/src/isa/arm32/settings.rs
+++ b/cranelift-codegen/src/isa/arm32/settings.rs
@@ -3,7 +3,7 @@
 use crate::settings::{self, detail, Builder};
 use core::fmt;
 
-// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
-// `Flags` struct with an impl for all of the settings defined in
-// `cranelift-codegen/meta-python/isa/arm32/settings.py`.
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta/src/isa/arm32/mod.rs`.
 include!(concat!(env!("OUT_DIR"), "/settings-arm32.rs"));
diff --git a/cranelift-codegen/src/isa/arm64/binemit.rs b/cranelift-codegen/src/isa/arm64/binemit.rs
index 05df67e5b..4401b6d6f 100644
--- a/cranelift-codegen/src/isa/arm64/binemit.rs
+++ b/cranelift-codegen/src/isa/arm64/binemit.rs
@@ -2,6 +2,7 @@
 
 use crate::binemit::{bad_encoding, CodeSink};
 use crate::ir::{Function, Inst};
+use crate::isa::TargetIsa;
 use crate::regalloc::RegDiversions;
 
 include!(concat!(env!("OUT_DIR"), "/binemit-arm64.rs"));
diff --git a/cranelift-codegen/src/isa/arm64/enc_tables.rs b/cranelift-codegen/src/isa/arm64/enc_tables.rs
index 6e1b73e0e..6040a9b86 100644
--- a/cranelift-codegen/src/isa/arm64/enc_tables.rs
+++ b/cranelift-codegen/src/isa/arm64/enc_tables.rs
@@ -1,5 +1,6 @@
 //! Encoding tables for ARM64 ISA.
 
+use crate::ir;
 use crate::isa;
 use crate::isa::constraints::*;
 use crate::isa::enc_tables::*;
diff --git a/cranelift-codegen/src/isa/arm64/mod.rs b/cranelift-codegen/src/isa/arm64/mod.rs
index c24be19f6..d787524a6 100644
--- a/cranelift-codegen/src/isa/arm64/mod.rs
+++ b/cranelift-codegen/src/isa/arm64/mod.rs
@@ -108,11 +108,11 @@ impl TargetIsa for Isa {
         divert: &mut regalloc::RegDiversions,
         sink: &mut dyn CodeSink,
     ) {
-        binemit::emit_inst(func, inst, divert, sink)
+        binemit::emit_inst(func, inst, divert, sink, self)
     }
 
     fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
-        emit_function(func, binemit::emit_inst, sink)
+        emit_function(func, binemit::emit_inst, sink, self)
     }
 }
 
diff --git a/cranelift-codegen/src/isa/arm64/settings.rs b/cranelift-codegen/src/isa/arm64/settings.rs
index b7be97e36..56d0f4ee0 100644
--- a/cranelift-codegen/src/isa/arm64/settings.rs
+++ b/cranelift-codegen/src/isa/arm64/settings.rs
@@ -3,7 +3,7 @@
 use crate::settings::{self, detail, Builder};
 use core::fmt;
 
-// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
-// `Flags` struct with an impl for all of the settings defined in
-// `cranelift-codegen/meta-python/isa/arm64/settings.py`.
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta/src/isa/arm64/mod.rs`.
 include!(concat!(env!("OUT_DIR"), "/settings-arm64.rs"));
diff --git a/cranelift-codegen/src/isa/call_conv.rs b/cranelift-codegen/src/isa/call_conv.rs
index d4b4d3999..e834ad251 100644
--- a/cranelift-codegen/src/isa/call_conv.rs
+++ b/cranelift-codegen/src/isa/call_conv.rs
@@ -1,3 +1,5 @@
+use crate::isa::TargetIsa;
+use crate::settings::LibcallCallConv;
 use core::fmt;
 use core::str;
 use target_lexicon::{CallingConvention, Triple};
@@ -13,8 +15,10 @@ pub enum CallConv {
     SystemV,
     /// Windows "fastcall" convention, also used for x64 and ARM
     WindowsFastcall,
-    /// SpiderMonkey WebAssembly convention
-    Baldrdash,
+    /// SpiderMonkey WebAssembly convention on systems using natively SystemV
+    BaldrdashSystemV,
+    /// SpiderMonkey WebAssembly convention on Windows
+    BaldrdashWindows,
     /// Specialized convention for the probestack function
     Probestack,
 }
@@ -27,6 +31,37 @@ impl CallConv {
             // uses System V.
             Ok(CallingConvention::SystemV) | Err(()) => CallConv::SystemV,
             Ok(CallingConvention::WindowsFastcall) => CallConv::WindowsFastcall,
+            Ok(unimp) => unimplemented!("calling convention: {:?}", unimp),
+        }
+    }
+
+    /// Returns the calling convention used for libcalls for the given ISA.
+    pub fn for_libcall(isa: &dyn TargetIsa) -> Self {
+        match isa.flags().libcall_call_conv() {
+            LibcallCallConv::IsaDefault => isa.default_call_conv(),
+            LibcallCallConv::Fast => CallConv::Fast,
+            LibcallCallConv::Cold => CallConv::Cold,
+            LibcallCallConv::SystemV => CallConv::SystemV,
+            LibcallCallConv::WindowsFastcall => CallConv::WindowsFastcall,
+            LibcallCallConv::BaldrdashSystemV => CallConv::BaldrdashSystemV,
+            LibcallCallConv::BaldrdashWindows => CallConv::BaldrdashWindows,
+            LibcallCallConv::Probestack => CallConv::Probestack,
+        }
+    }
+
+    /// Is the calling convention extending the Windows Fastcall ABI?
+    pub fn extends_windows_fastcall(&self) -> bool {
+        match self {
+            CallConv::WindowsFastcall | CallConv::BaldrdashWindows => true,
+            _ => false,
+        }
+    }
+
+    /// Is the calling convention extending the Baldrdash ABI?
+    pub fn extends_baldrdash(&self) -> bool {
+        match self {
+            CallConv::BaldrdashSystemV | CallConv::BaldrdashWindows => true,
+            _ => false,
         }
     }
 }
@@ -38,7 +73,8 @@ impl fmt::Display for CallConv {
             CallConv::Cold => "cold",
             CallConv::SystemV => "system_v",
             CallConv::WindowsFastcall => "windows_fastcall",
-            CallConv::Baldrdash => "baldrdash",
+            CallConv::BaldrdashSystemV => "baldrdash_system_v",
+            CallConv::BaldrdashWindows => "baldrdash_windows",
             CallConv::Probestack => "probestack",
         })
     }
@@ -52,7 +88,8 @@ impl str::FromStr for CallConv {
             "cold" => Ok(CallConv::Cold),
             "system_v" => Ok(CallConv::SystemV),
             "windows_fastcall" => Ok(CallConv::WindowsFastcall),
-            "baldrdash" => Ok(CallConv::Baldrdash),
+            "baldrdash_system_v" => Ok(CallConv::BaldrdashSystemV),
+            "baldrdash_windows" => Ok(CallConv::BaldrdashWindows),
             "probestack" => Ok(CallConv::Probestack),
             _ => Err(()),
         }
diff --git a/cranelift-codegen/src/isa/enc_tables.rs b/cranelift-codegen/src/isa/enc_tables.rs
index 45ad86441..e21557497 100644
--- a/cranelift-codegen/src/isa/enc_tables.rs
+++ b/cranelift-codegen/src/isa/enc_tables.rs
@@ -1,7 +1,7 @@
 //! Support types for generated encoding tables.
 //!
 //! This module contains types and functions for working with the encoding tables generated by
-//! `cranelift-codegen/meta-python/gen_encoding.py`.
+//! `cranelift-codegen/meta/src/gen_encodings.rs`.
 
 use crate::constant_hash::{probe, Table};
 use crate::ir::{Function, InstructionData, Opcode, Type};
@@ -164,10 +164,10 @@ where
 /// Encoding lists are represented as sequences of u16 words.
 pub type EncListEntry = u16;
 
-/// Number of bits used to represent a predicate. c.f. `meta-python/gen_encoding.py`.
+/// Number of bits used to represent a predicate. c.f. `meta/src/gen_encodings.rs`.
 const PRED_BITS: u8 = 12;
 const PRED_MASK: usize = (1 << PRED_BITS) - 1;
-/// First code word representing a predicate check. c.f. `meta-python/gen_encoding.py`.
+/// First code word representing a predicate check. c.f. `meta/src/gen_encodings.rs`.
 const PRED_START: usize = 0x1000;
 
 /// An iterator over legal encodings for the instruction.
diff --git a/cranelift-codegen/src/isa/mod.rs b/cranelift-codegen/src/isa/mod.rs
index eb6b4d265..c36d29ce2 100644
--- a/cranelift-codegen/src/isa/mod.rs
+++ b/cranelift-codegen/src/isa/mod.rs
@@ -66,7 +66,7 @@ use crate::timing;
 use core::fmt;
 use failure_derive::Fail;
 use std::boxed::Box;
-use target_lexicon::{Architecture, PointerWidth, Triple};
+use target_lexicon::{triple, Architecture, PointerWidth, Triple};
 
 #[cfg(feature = "riscv")]
 mod riscv;
@@ -88,42 +88,41 @@ pub mod registers;
 mod stack;
 
 /// Returns a builder that can create a corresponding `TargetIsa`
-/// or `Err(LookupError::Unsupported)` if not enabled.
+/// or `Err(LookupError::SupportDisabled)` if not enabled.
 macro_rules! isa_builder {
-    ($name:ident, $feature:tt) => {{
+    ($name: ident, $feature: tt, $triple: ident) => {{
         #[cfg(feature = $feature)]
-        fn $name(triple: Triple) -> Result<Builder, LookupError> {
-            Ok($name::isa_builder(triple))
-        };
+        {
+            Ok($name::isa_builder($triple))
+        }
         #[cfg(not(feature = $feature))]
-        fn $name(_triple: Triple) -> Result<Builder, LookupError> {
-            Err(LookupError::Unsupported)
+        {
+            Err(LookupError::SupportDisabled)
         }
-        $name
     }};
 }
 
-/// Look for a supported ISA with the given `name`.
+/// Look for an ISA for the given `triple`.
 /// Return a builder that can create a corresponding `TargetIsa`.
 pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
     match triple.architecture {
-        Architecture::Riscv32 | Architecture::Riscv64 => isa_builder!(riscv, "riscv")(triple),
+        Architecture::Riscv32 | Architecture::Riscv64 => isa_builder!(riscv, "riscv", triple),
         Architecture::I386 | Architecture::I586 | Architecture::I686 | Architecture::X86_64 => {
-            isa_builder!(x86, "x86")(triple)
+            isa_builder!(x86, "x86", triple)
         }
-        Architecture::Thumbv6m
-        | Architecture::Thumbv7em
-        | Architecture::Thumbv7m
-        | Architecture::Arm
-        | Architecture::Armv4t
-        | Architecture::Armv5te
-        | Architecture::Armv7
-        | Architecture::Armv7s => isa_builder!(arm32, "arm32")(triple),
-        Architecture::Aarch64 => isa_builder!(arm64, "arm64")(triple),
+        Architecture::Arm { .. } => isa_builder!(arm32, "arm32", triple),
+        Architecture::Aarch64 { .. } => isa_builder!(arm64, "arm64", triple),
         _ => Err(LookupError::Unsupported),
     }
 }
 
+/// Look for a supported ISA with the given `name`.
+/// Return a builder that can create a corresponding `TargetIsa`.
+pub fn lookup_by_name(name: &str) -> Result<Builder, LookupError> {
+    use std::str::FromStr;
+    lookup(triple!(name))
+}
+
 /// Describes reason for target lookup failure
 #[derive(Fail, PartialEq, Eq, Copy, Clone, Debug)]
 pub enum LookupError {
@@ -343,7 +342,7 @@ pub trait TargetIsa: fmt::Display + Sync {
         let word_size = StackSize::from(self.pointer_bytes());
 
         // Account for the SpiderMonkey standard prologue pushes.
-        if func.signature.call_conv == CallConv::Baldrdash {
+        if func.signature.call_conv.extends_baldrdash() {
             let bytes = StackSize::from(self.flags().baldrdash_prologue_words()) * word_size;
             let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
             ss.offset = Some(-(bytes as StackOffset));
diff --git a/cranelift-codegen/src/isa/registers.rs b/cranelift-codegen/src/isa/registers.rs
index 6baebd723..f7fcdcac2 100644
--- a/cranelift-codegen/src/isa/registers.rs
+++ b/cranelift-codegen/src/isa/registers.rs
@@ -17,19 +17,19 @@ pub type RegUnit = u16;
 /// The size of this type is determined by the target ISA that has the most register units defined.
 /// Currently that is arm32 which has 64+16 units.
 ///
-/// This type should be coordinated with meta-python/cdsl/registers.py.
+/// This type should be coordinated with meta/src/cdsl/regs.rs.
 pub type RegUnitMask = [u32; 3];
 
 /// A bit mask indexed by register classes.
 ///
 /// The size of this type is determined by the ISA with the most register classes.
 ///
-/// This type should be coordinated with meta-python/cdsl/isa.py.
+/// This type should be coordinated with meta/src/cdsl/regs.rs.
 pub type RegClassMask = u32;
 
 /// Guaranteed maximum number of top-level register classes with pressure tracking in any ISA.
 ///
-/// This can be increased, but should be coordinated with meta-python/cdsl/isa.py.
+/// This can be increased, but should be coordinated with meta/src/cdsl/regs.rs.
 pub const MAX_TRACKED_TOPRCS: usize = 4;
 
 /// The register units in a target ISA are divided into disjoint register banks. Each bank covers a
@@ -37,7 +37,7 @@ pub const MAX_TRACKED_TOPRCS: usize = 4;
 ///
 /// The `RegBank` struct provides a static description of a register bank.
 pub struct RegBank {
-    /// The name of this register bank as defined in the ISA's `registers.py` file.
+    /// The name of this register bank as defined in the ISA's DSL definition.
     pub name: &'static str,
 
     /// The first register unit in this bank.
@@ -154,6 +154,12 @@ pub struct RegClassData {
 
     /// The global `RegInfo` instance containing this register class.
     pub info: &'static RegInfo,
+
+    /// The "pinned" register of the associated register bank.
+    ///
+    /// This register must be non-volatile (callee-preserved) and must not be the fixed
+    /// output register of any instruction.
+    pub pinned_reg: Option<RegUnit>,
 }
 
 impl RegClassData {
@@ -201,6 +207,15 @@ impl RegClassData {
     pub fn contains(&self, regunit: RegUnit) -> bool {
         self.mask[(regunit / 32) as usize] & (1u32 << (regunit % 32)) != 0
     }
+
+    /// If the pinned register is used, is the given regunit the pinned register of this class?
+    #[inline]
+    pub fn is_pinned_reg(&self, enabled: bool, regunit: RegUnit) -> bool {
+        enabled
+            && self
+                .pinned_reg
+                .map_or(false, |pinned_reg| pinned_reg == regunit)
+    }
 }
 
 impl fmt::Display for RegClassData {
diff --git a/cranelift-codegen/src/isa/riscv/binemit.rs b/cranelift-codegen/src/isa/riscv/binemit.rs
index 64f3c0029..a1d2b82e1 100644
--- a/cranelift-codegen/src/isa/riscv/binemit.rs
+++ b/cranelift-codegen/src/isa/riscv/binemit.rs
@@ -2,7 +2,7 @@
 
 use crate::binemit::{bad_encoding, CodeSink, Reloc};
 use crate::ir::{Function, Inst, InstructionData};
-use crate::isa::{RegUnit, StackBaseMask, StackRef};
+use crate::isa::{RegUnit, StackBaseMask, StackRef, TargetIsa};
 use crate::predicates::is_signed_int;
 use crate::regalloc::RegDiversions;
 use core::u32;
diff --git a/cranelift-codegen/src/isa/riscv/enc_tables.rs b/cranelift-codegen/src/isa/riscv/enc_tables.rs
index 19488003a..76184ad72 100644
--- a/cranelift-codegen/src/isa/riscv/enc_tables.rs
+++ b/cranelift-codegen/src/isa/riscv/enc_tables.rs
@@ -6,6 +6,7 @@ use crate::isa;
 use crate::isa::constraints::*;
 use crate::isa::enc_tables::*;
 use crate::isa::encoding::{base_size, RecipeSizing};
+use crate::predicates;
 
 // Include the generated encoding tables:
 // - `LEVEL1_RV32`
diff --git a/cranelift-codegen/src/isa/riscv/mod.rs b/cranelift-codegen/src/isa/riscv/mod.rs
index 3096ff69a..233e92a3b 100644
--- a/cranelift-codegen/src/isa/riscv/mod.rs
+++ b/cranelift-codegen/src/isa/riscv/mod.rs
@@ -115,11 +115,11 @@ impl TargetIsa for Isa {
         divert: &mut regalloc::RegDiversions,
         sink: &mut dyn CodeSink,
     ) {
-        binemit::emit_inst(func, inst, divert, sink)
+        binemit::emit_inst(func, inst, divert, sink, self)
     }
 
     fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
-        emit_function(func, binemit::emit_inst, sink)
+        emit_function(func, binemit::emit_inst, sink, self)
     }
 }
 
diff --git a/cranelift-codegen/src/isa/riscv/settings.rs b/cranelift-codegen/src/isa/riscv/settings.rs
index 26999b87e..24c0e2af1 100644
--- a/cranelift-codegen/src/isa/riscv/settings.rs
+++ b/cranelift-codegen/src/isa/riscv/settings.rs
@@ -3,9 +3,9 @@
 use crate::settings::{self, detail, Builder};
 use core::fmt;
 
-// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
-// `Flags` struct with an impl for all of the settings defined in
-// `cranelift-codegen/meta-python/isa/riscv/settings.py`.
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta/src/isa/riscv/mod.rs`.
 include!(concat!(env!("OUT_DIR"), "/settings-riscv.rs"));
 
 #[cfg(test)]
@@ -35,7 +35,9 @@ mod tests {
 
     #[test]
     fn predicates() {
-        let shared = settings::Flags::new(settings::builder());
+        let mut sb = settings::builder();
+        sb.set("enable_simd", "true").unwrap();
+        let shared = settings::Flags::new(sb);
         let mut b = builder();
         b.enable("supports_f").unwrap();
         b.enable("supports_d").unwrap();
diff --git a/cranelift-codegen/src/isa/stack.rs b/cranelift-codegen/src/isa/stack.rs
index cc7b57803..852aedddd 100644
--- a/cranelift-codegen/src/isa/stack.rs
+++ b/cranelift-codegen/src/isa/stack.rs
@@ -82,7 +82,7 @@ pub enum StackBase {
 /// This behaves like a set of `StackBase` variants.
 ///
 /// The internal representation as a `u8` is public because stack base masks are used in constant
-/// tables generated from the Python encoding definitions.
+/// tables generated from the meta-language encoding definitions.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub struct StackBaseMask(pub u8);
 
diff --git a/cranelift-codegen/src/isa/x86/abi.rs b/cranelift-codegen/src/isa/x86/abi.rs
index 549ec644c..6252a3fb7 100644
--- a/cranelift-codegen/src/isa/x86/abi.rs
+++ b/cranelift-codegen/src/isa/x86/abi.rs
@@ -1,6 +1,8 @@
 //! x86 ABI implementation.
 
+use super::super::settings as shared_settings;
 use super::registers::{FPR, GPR, RU};
+use super::settings as isa_settings;
 use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
 use crate::cursor::{Cursor, CursorPosition, EncCursor};
 use crate::ir;
@@ -39,11 +41,21 @@ struct Args {
     fpr_used: usize,
     offset: u32,
     call_conv: CallConv,
+    shared_flags: shared_settings::Flags,
+    #[allow(dead_code)]
+    isa_flags: isa_settings::Flags,
 }
 
 impl Args {
-    fn new(bits: u8, gpr: &'static [RU], fpr_limit: usize, call_conv: CallConv) -> Self {
-        let offset = if let CallConv::WindowsFastcall = call_conv {
+    fn new(
+        bits: u8,
+        gpr: &'static [RU],
+        fpr_limit: usize,
+        call_conv: CallConv,
+        shared_flags: &shared_settings::Flags,
+        isa_flags: &isa_settings::Flags,
+    ) -> Self {
+        let offset = if call_conv.extends_windows_fastcall() {
             // [1] "The caller is responsible for allocating space for parameters to the callee,
             // and must always allocate sufficient space to store four register parameters"
             32
@@ -61,6 +73,8 @@ impl Args {
             fpr_used: 0,
             offset,
             call_conv,
+            shared_flags: shared_flags.clone(),
+            isa_flags: isa_flags.clone(),
         }
     }
 }
@@ -69,9 +83,13 @@ impl ArgAssigner for Args {
     fn assign(&mut self, arg: &AbiParam) -> ArgAction {
         let ty = arg.value_type;
 
-        // Check for a legal type.
-        // We don't support SIMD yet, so break all vectors down.
+        // Vectors should stay in vector registers unless SIMD is not enabled--then they are split
         if ty.is_vector() {
+            if self.shared_flags.enable_simd() {
+                let reg = FPR.unit(self.fpr_used);
+                self.fpr_used += 1;
+                return ArgumentLoc::Reg(reg).into();
+            }
             return ValueConversion::VectorSplit.into();
         }
 
@@ -90,7 +108,7 @@ impl ArgAssigner for Args {
         }
 
         // Handle special-purpose arguments.
-        if ty.is_int() && self.call_conv == CallConv::Baldrdash {
+        if ty.is_int() && self.call_conv.extends_baldrdash() {
             match arg.purpose {
                 // This is SpiderMonkey's `WasmTlsReg`.
                 ArgumentPurpose::VMContext => {
@@ -115,7 +133,7 @@ impl ArgAssigner for Args {
         }
 
         // Try to use an FPR.
-        let fpr_offset = if self.call_conv == CallConv::WindowsFastcall {
+        let fpr_offset = if self.call_conv.extends_windows_fastcall() {
             // Float and general registers on windows share the same parameter index.
             // The used register depends entirely on the parameter index: Even if XMM0
             // is not used for the first parameter, it cannot be used for the second parameter.
@@ -124,6 +142,7 @@ impl ArgAssigner for Args {
         } else {
             &mut self.fpr_used
         };
+
         if ty.is_float() && *fpr_offset < self.fpr_limit {
             let reg = FPR.unit(*fpr_offset);
             *fpr_offset += 1;
@@ -139,7 +158,13 @@ impl ArgAssigner for Args {
 }
 
 /// Legalize `sig`.
-pub fn legalize_signature(sig: &mut ir::Signature, triple: &Triple, _current: bool) {
+pub fn legalize_signature(
+    sig: &mut ir::Signature,
+    triple: &Triple,
+    _current: bool,
+    shared_flags: &shared_settings::Flags,
+    isa_flags: &isa_settings::Flags,
+) {
     let bits;
     let mut args;
 
@@ -147,28 +172,49 @@ pub fn legalize_signature(sig: &mut ir::Signature, triple: &Triple, _current: bo
         PointerWidth::U16 => panic!(),
         PointerWidth::U32 => {
             bits = 32;
-            args = Args::new(bits, &[], 0, sig.call_conv);
+            args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags);
         }
         PointerWidth::U64 => {
             bits = 64;
-            args = if sig.call_conv == CallConv::WindowsFastcall {
-                Args::new(bits, &ARG_GPRS_WIN_FASTCALL_X64[..], 4, sig.call_conv)
+            args = if sig.call_conv.extends_windows_fastcall() {
+                Args::new(
+                    bits,
+                    &ARG_GPRS_WIN_FASTCALL_X64[..],
+                    4,
+                    sig.call_conv,
+                    shared_flags,
+                    isa_flags,
+                )
             } else {
-                Args::new(bits, &ARG_GPRS[..], 8, sig.call_conv)
+                Args::new(
+                    bits,
+                    &ARG_GPRS[..],
+                    8,
+                    sig.call_conv,
+                    shared_flags,
+                    isa_flags,
+                )
             };
         }
     }
 
     legalize_args(&mut sig.params, &mut args);
 
-    let (regs, fpr_limit) = if sig.call_conv == CallConv::WindowsFastcall {
+    let (regs, fpr_limit) = if sig.call_conv.extends_windows_fastcall() {
         // windows-x64 calling convention only uses XMM0 or RAX for return values
         (&RET_GPRS_WIN_FASTCALL_X64[..], 1)
     } else {
         (&RET_GPRS[..], 2)
     };
 
-    let mut rets = Args::new(bits, regs, fpr_limit, sig.call_conv);
+    let mut rets = Args::new(
+        bits,
+        regs,
+        fpr_limit,
+        sig.call_conv,
+        shared_flags,
+        isa_flags,
+    );
     legalize_args(&mut sig.returns, &mut rets);
 }
 
@@ -182,7 +228,7 @@ pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
 }
 
 /// Get the set of allocatable registers for `func`.
-pub fn allocatable_registers(_func: &ir::Function, triple: &Triple) -> RegisterSet {
+pub fn allocatable_registers(triple: &Triple, flags: &shared_settings::Flags) -> RegisterSet {
     let mut regs = RegisterSet::new();
     regs.take(GPR, RU::rsp as RegUnit);
     regs.take(GPR, RU::rbp as RegUnit);
@@ -193,6 +239,15 @@ pub fn allocatable_registers(_func: &ir::Function, triple: &Triple) -> RegisterS
             regs.take(GPR, GPR.unit(i));
             regs.take(FPR, FPR.unit(i));
         }
+        if flags.enable_pinned_reg() {
+            unimplemented!("Pinned register not implemented on x86-32.");
+        }
+    } else {
+        // Choose r15 as the pinned register on 64-bits: it is non-volatile on native ABIs and
+        // isn't the fixed output register of any instruction.
+        if flags.enable_pinned_reg() {
+            regs.take(GPR, RU::r15 as RegUnit);
+        }
     }
 
     regs
@@ -204,7 +259,7 @@ fn callee_saved_gprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU]
         PointerWidth::U16 => panic!(),
         PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi],
         PointerWidth::U64 => {
-            if call_conv == CallConv::WindowsFastcall {
+            if call_conv.extends_windows_fastcall() {
                 // "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15 are considered nonvolatile
                 //  and must be saved and restored by a function that uses them."
                 // as per https://msdn.microsoft.com/en-us/library/6t169e9c.aspx
@@ -276,7 +331,9 @@ pub fn prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> Codege
             system_v_prologue_epilogue(func, isa)
         }
         CallConv::WindowsFastcall => fastcall_prologue_epilogue(func, isa),
-        CallConv::Baldrdash => baldrdash_prologue_epilogue(func, isa),
+        CallConv::BaldrdashSystemV | CallConv::BaldrdashWindows => {
+            baldrdash_prologue_epilogue(func, isa)
+        }
         CallConv::Probestack => unimplemented!("probestack calling convention"),
     }
 }
@@ -290,7 +347,14 @@ fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) ->
     // Baldrdash on 32-bit x86 always aligns its stack pointer to 16 bytes.
     let stack_align = 16;
     let word_size = StackSize::from(isa.pointer_bytes());
-    let bytes = StackSize::from(isa.flags().baldrdash_prologue_words()) * word_size;
+    let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() {
+        32
+    } else {
+        0
+    };
+
+    let bytes =
+        StackSize::from(isa.flags().baldrdash_prologue_words()) * word_size + shadow_store_size;
 
     let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
     ss.offset = Some(-(bytes as StackOffset));
diff --git a/cranelift-codegen/src/isa/x86/binemit.rs b/cranelift-codegen/src/isa/x86/binemit.rs
index ef44766d0..a5924ed03 100644
--- a/cranelift-codegen/src/isa/x86/binemit.rs
+++ b/cranelift-codegen/src/isa/x86/binemit.rs
@@ -4,8 +4,8 @@ use super::enc_tables::{needs_offset, needs_sib_byte};
 use super::registers::RU;
 use crate::binemit::{bad_encoding, CodeSink, Reloc};
 use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
-use crate::ir::{Ebb, Function, Inst, InstructionData, JumpTable, Opcode, TrapCode};
-use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef};
+use crate::ir::{Constant, Ebb, Function, Inst, InstructionData, JumpTable, Opcode, TrapCode};
+use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef, TargetIsa};
 use crate::regalloc::RegDiversions;
 
 include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs"));
@@ -341,3 +341,11 @@ fn jt_disp4<CS: CodeSink + ?Sized>(jt: JumpTable, func: &Function, sink: &mut CS
     sink.put4(delta);
     sink.reloc_jt(Reloc::X86PCRelRodata4, jt);
 }
+
+/// Emit a four-byte displacement to `constant`.
+fn const_disp4<CS: CodeSink + ?Sized>(constant: Constant, func: &Function, sink: &mut CS) {
+    let offset = func.dfg.constants.get_offset(constant);
+    let delta = offset.wrapping_sub(sink.offset() + 4);
+    sink.put4(delta);
+    sink.reloc_constant(Reloc::X86PCRelRodata4, offset);
+}
diff --git a/cranelift-codegen/src/isa/x86/enc_tables.rs b/cranelift-codegen/src/isa/x86/enc_tables.rs
index 474ef3c54..94333116e 100644
--- a/cranelift-codegen/src/isa/x86/enc_tables.rs
+++ b/cranelift-codegen/src/isa/x86/enc_tables.rs
@@ -5,13 +5,15 @@ use crate::bitset::BitSet;
 use crate::cursor::{Cursor, FuncCursor};
 use crate::flowgraph::ControlFlowGraph;
 use crate::ir::condcodes::{FloatCC, IntCC};
+use crate::ir::types::*;
 use crate::ir::{self, Function, Inst, InstBuilder};
-use crate::isa;
 use crate::isa::constraints::*;
 use crate::isa::enc_tables::*;
 use crate::isa::encoding::base_size;
 use crate::isa::encoding::RecipeSizing;
 use crate::isa::RegUnit;
+use crate::isa::{self, TargetIsa};
+use crate::predicates;
 use crate::regalloc::RegDiversions;
 
 include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs"));
@@ -115,7 +117,7 @@ fn expand_sdivrem(
     inst: ir::Inst,
     func: &mut ir::Function,
     cfg: &mut ControlFlowGraph,
-    isa: &dyn isa::TargetIsa,
+    isa: &dyn TargetIsa,
 ) {
     let (x, y, is_srem) = match func.dfg[inst] {
         ir::InstructionData::Binary {
@@ -173,6 +175,9 @@ fn expand_sdivrem(
         return;
     }
 
+    // EBB handling the nominal case.
+    let nominal = pos.func.dfg.make_ebb();
+
     // EBB handling the -1 divisor case.
     let minus_one = pos.func.dfg.make_ebb();
 
@@ -185,9 +190,11 @@ fn expand_sdivrem(
     // Start by checking for a -1 divisor which needs to be handled specially.
     let is_m1 = pos.ins().ifcmp_imm(y, -1);
     pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
+    pos.ins().jump(nominal, &[]);
 
     // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
     // by zero.
+    pos.insert_ebb(nominal);
     let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
     let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
     let divres = if is_srem { rem } else { quot };
@@ -216,6 +223,7 @@ fn expand_sdivrem(
     pos.insert_ebb(done);
 
     cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, nominal);
     cfg.recompute_ebb(pos.func, minus_one);
     cfg.recompute_ebb(pos.func, done);
 }
@@ -225,7 +233,7 @@ fn expand_udivrem(
     inst: ir::Inst,
     func: &mut ir::Function,
     _cfg: &mut ControlFlowGraph,
-    isa: &dyn isa::TargetIsa,
+    isa: &dyn TargetIsa,
 ) {
     let (x, y, is_urem) = match func.dfg[inst] {
         ir::InstructionData::Binary {
@@ -278,7 +286,7 @@ fn expand_minmax(
     inst: ir::Inst,
     func: &mut ir::Function,
     cfg: &mut ControlFlowGraph,
-    _isa: &dyn isa::TargetIsa,
+    _isa: &dyn TargetIsa,
 ) {
     let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] {
         ir::InstructionData::Binary {
@@ -300,12 +308,18 @@ fn expand_minmax(
     //    fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0.
     // 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical.
 
+    // EBB handling case 1) where operands are ordered but not equal.
+    let one_ebb = func.dfg.make_ebb();
+
     // EBB handling case 3) where one operand is NaN.
     let uno_ebb = func.dfg.make_ebb();
 
     // EBB that handles the unordered or equal cases 2) and 3).
     let ueq_ebb = func.dfg.make_ebb();
 
+    // EBB handling case 2) where operands are ordered and equal.
+    let eq_ebb = func.dfg.make_ebb();
+
     // Final EBB with one argument representing the final result value.
     let done = func.dfg.make_ebb();
 
@@ -326,8 +340,10 @@ fn expand_minmax(
     pos.use_srcloc(inst);
     let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y);
     pos.ins().brnz(cmp_ueq, ueq_ebb, &[]);
+    pos.ins().jump(one_ebb, &[]);
 
     // Handle the common ordered, not equal (LT|GT) case.
+    pos.insert_ebb(one_ebb);
     let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0;
     let one_result = pos.func.dfg.first_result(one_inst);
     pos.ins().jump(done, &[one_result]);
@@ -345,9 +361,11 @@ fn expand_minmax(
     // TODO: When we get support for flag values, we can reuse the above comparison.
     let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y);
     pos.ins().brnz(cmp_uno, uno_ebb, &[]);
+    pos.ins().jump(eq_ebb, &[]);
 
     // We are now in case 2) where x and y compare EQ.
     // We need a bitwise operation to get the sign right.
+    pos.insert_ebb(eq_ebb);
     let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0;
     let bw_result = pos.func.dfg.first_result(bw_inst);
     // This should become a fall-through for this second most common case.
@@ -359,8 +377,10 @@ fn expand_minmax(
     pos.insert_ebb(done);
 
     cfg.recompute_ebb(pos.func, old_ebb);
-    cfg.recompute_ebb(pos.func, ueq_ebb);
+    cfg.recompute_ebb(pos.func, one_ebb);
     cfg.recompute_ebb(pos.func, uno_ebb);
+    cfg.recompute_ebb(pos.func, ueq_ebb);
+    cfg.recompute_ebb(pos.func, eq_ebb);
     cfg.recompute_ebb(pos.func, done);
 }
 
@@ -370,7 +390,7 @@ fn expand_fcvt_from_uint(
     inst: ir::Inst,
     func: &mut ir::Function,
     cfg: &mut ControlFlowGraph,
-    _isa: &dyn isa::TargetIsa,
+    _isa: &dyn TargetIsa,
 ) {
     let x;
     match func.dfg[inst] {
@@ -386,16 +406,23 @@ fn expand_fcvt_from_uint(
     let mut pos = FuncCursor::new(func).at_inst(inst);
     pos.use_srcloc(inst);
 
-    // Conversion from unsigned 32-bit is easy on x86-64.
-    // TODO: This should be guarded by an ISA check.
-    if xty == ir::types::I32 {
-        let wide = pos.ins().uextend(ir::types::I64, x);
-        pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide);
-        return;
+    // Conversion from an unsigned int smaller than 64bit is easy on x86-64.
+    match xty {
+        ir::types::I8 | ir::types::I16 | ir::types::I32 => {
+            // TODO: This should be guarded by an ISA check.
+            let wide = pos.ins().uextend(ir::types::I64, x);
+            pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide);
+            return;
+        }
+        ir::types::I64 => {}
+        _ => unimplemented!(),
     }
 
     let old_ebb = pos.func.layout.pp_ebb(inst);
 
+    // EBB handling the case where x >= 0.
+    let poszero_ebb = pos.func.dfg.make_ebb();
+
     // EBB handling the case where x < 0.
     let neg_ebb = pos.func.dfg.make_ebb();
 
@@ -409,8 +436,10 @@ fn expand_fcvt_from_uint(
     // If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction.
     let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0);
     pos.ins().brnz(is_neg, neg_ebb, &[]);
+    pos.ins().jump(poszero_ebb, &[]);
 
     // Easy case: just use a signed conversion.
+    pos.insert_ebb(poszero_ebb);
     let posres = pos.ins().fcvt_from_sint(ty, x);
     pos.ins().jump(done, &[posres]);
 
@@ -433,6 +462,7 @@ fn expand_fcvt_from_uint(
     pos.insert_ebb(done);
 
     cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, poszero_ebb);
     cfg.recompute_ebb(pos.func, neg_ebb);
     cfg.recompute_ebb(pos.func, done);
 }
@@ -441,7 +471,7 @@ fn expand_fcvt_to_sint(
     inst: ir::Inst,
     func: &mut ir::Function,
     cfg: &mut ControlFlowGraph,
-    _isa: &dyn isa::TargetIsa,
+    _isa: &dyn TargetIsa,
 ) {
     use crate::ir::immediates::{Ieee32, Ieee64};
 
@@ -460,6 +490,9 @@ fn expand_fcvt_to_sint(
     // Final EBB after the bad value checks.
     let done = func.dfg.make_ebb();
 
+    // EBB for checking failure cases.
+    let maybe_trap_ebb = func.dfg.make_ebb();
+
     // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow.
     // It produces an INT_MIN result instead.
     func.dfg.replace(inst).x86_cvtt2si(ty, x);
@@ -471,6 +504,7 @@ fn expand_fcvt_to_sint(
         .ins()
         .icmp_imm(IntCC::NotEqual, result, 1 << (ty.lane_bits() - 1));
     pos.ins().brnz(is_done, done, &[]);
+    pos.ins().jump(maybe_trap_ebb, &[]);
 
     // We now have the following possibilities:
     //
@@ -478,6 +512,7 @@ fn expand_fcvt_to_sint(
     // 2. The input was NaN -> trap bad_toint
     // 3. The input was out of range -> trap int_ovf
     //
+    pos.insert_ebb(maybe_trap_ebb);
 
     // Check for NaN.
     let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
@@ -529,6 +564,7 @@ fn expand_fcvt_to_sint(
     pos.insert_ebb(done);
 
     cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, maybe_trap_ebb);
     cfg.recompute_ebb(pos.func, done);
 }
 
@@ -536,7 +572,7 @@ fn expand_fcvt_to_sint_sat(
     inst: ir::Inst,
     func: &mut ir::Function,
     cfg: &mut ControlFlowGraph,
-    _isa: &dyn isa::TargetIsa,
+    _isa: &dyn TargetIsa,
 ) {
     use crate::ir::immediates::{Ieee32, Ieee64};
 
@@ -558,6 +594,9 @@ fn expand_fcvt_to_sint_sat(
 
     // Final EBB after the bad value checks.
     let done_ebb = func.dfg.make_ebb();
+    let intmin_ebb = func.dfg.make_ebb();
+    let minsat_ebb = func.dfg.make_ebb();
+    let maxsat_ebb = func.dfg.make_ebb();
     func.dfg.clear_results(inst);
     func.dfg.attach_ebb_param(done_ebb, result);
 
@@ -572,20 +611,24 @@ fn expand_fcvt_to_sint_sat(
         .ins()
         .icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1));
     pos.ins().brnz(is_done, done_ebb, &[cvtt2si]);
+    pos.ins().jump(intmin_ebb, &[]);
 
     // We now have the following possibilities:
     //
     // 1. INT_MIN was actually the correct conversion result.
     // 2. The input was NaN -> replace the result value with 0.
     // 3. The input was out of range -> saturate the result to the min/max value.
+    pos.insert_ebb(intmin_ebb);
 
     // Check for NaN, which is truncated to 0.
     let zero = pos.ins().iconst(ty, 0);
     let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
     pos.ins().brnz(is_nan, done_ebb, &[zero]);
+    pos.ins().jump(minsat_ebb, &[]);
 
     // Check for case 1: INT_MIN is the correct result.
     // Determine the smallest floating point number that would convert to INT_MIN.
+    pos.insert_ebb(minsat_ebb);
     let mut overflow_cc = FloatCC::LessThan;
     let output_bits = ty.lane_bits();
     let flimit = match xty {
@@ -622,8 +665,10 @@ fn expand_fcvt_to_sint_sat(
     };
     let min_value = pos.ins().iconst(ty, min_imm);
     pos.ins().brnz(overflow, done_ebb, &[min_value]);
+    pos.ins().jump(maxsat_ebb, &[]);
 
     // Finally, we could have a positive value that is too large.
+    pos.insert_ebb(maxsat_ebb);
     let fzero = match xty {
         ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
         ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
@@ -648,6 +693,9 @@ fn expand_fcvt_to_sint_sat(
     pos.insert_ebb(done_ebb);
 
     cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, intmin_ebb);
+    cfg.recompute_ebb(pos.func, minsat_ebb);
+    cfg.recompute_ebb(pos.func, maxsat_ebb);
     cfg.recompute_ebb(pos.func, done_ebb);
 }
 
@@ -655,7 +703,7 @@ fn expand_fcvt_to_uint(
     inst: ir::Inst,
     func: &mut ir::Function,
     cfg: &mut ControlFlowGraph,
-    _isa: &dyn isa::TargetIsa,
+    _isa: &dyn TargetIsa,
 ) {
     use crate::ir::immediates::{Ieee32, Ieee64};
 
@@ -672,6 +720,12 @@ fn expand_fcvt_to_uint(
     let result = func.dfg.first_result(inst);
     let ty = func.dfg.value_type(result);
 
+    // EBB handle numbers < 2^(N-1).
+    let below_uint_max_ebb = func.dfg.make_ebb();
+
+    // EBB handle numbers < 0.
+    let below_zero_ebb = func.dfg.make_ebb();
+
     // EBB handling numbers >= 2^(N-1).
     let large = func.dfg.make_ebb();
 
@@ -695,9 +749,11 @@ fn expand_fcvt_to_uint(
     let is_large = pos.ins().ffcmp(x, pow2nm1);
     pos.ins()
         .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
+    pos.ins().jump(below_uint_max_ebb, &[]);
 
     // We need to generate a specific trap code when `x` is NaN, so reuse the flags from the
     // previous comparison.
+    pos.insert_ebb(below_uint_max_ebb);
     pos.ins().trapff(
         FloatCC::Unordered,
         is_large,
@@ -709,6 +765,9 @@ fn expand_fcvt_to_uint(
     let is_neg = pos.ins().ifcmp_imm(sres, 0);
     pos.ins()
         .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
+    pos.ins().jump(below_zero_ebb, &[]);
+
+    pos.insert_ebb(below_zero_ebb);
     pos.ins().trap(ir::TrapCode::IntegerOverflow);
 
     // Handle the case where x >= 2^(N-1) and not NaN.
@@ -728,6 +787,8 @@ fn expand_fcvt_to_uint(
     pos.insert_ebb(done);
 
     cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, below_uint_max_ebb);
+    cfg.recompute_ebb(pos.func, below_zero_ebb);
     cfg.recompute_ebb(pos.func, large);
     cfg.recompute_ebb(pos.func, done);
 }
@@ -736,7 +797,7 @@ fn expand_fcvt_to_uint_sat(
     inst: ir::Inst,
     func: &mut ir::Function,
     cfg: &mut ControlFlowGraph,
-    _isa: &dyn isa::TargetIsa,
+    _isa: &dyn TargetIsa,
 ) {
     use crate::ir::immediates::{Ieee32, Ieee64};
 
@@ -756,9 +817,16 @@ fn expand_fcvt_to_uint_sat(
     let result = func.dfg.first_result(inst);
     let ty = func.dfg.value_type(result);
 
+    // EBB handle numbers < 2^(N-1).
+    let below_pow2nm1_or_nan_ebb = func.dfg.make_ebb();
+    let below_pow2nm1_ebb = func.dfg.make_ebb();
+
     // EBB handling numbers >= 2^(N-1).
     let large = func.dfg.make_ebb();
 
+    // EBB handling numbers < 2^N.
+    let uint_large_ebb = func.dfg.make_ebb();
+
     // Final EBB after the bad value checks.
     let done = func.dfg.make_ebb();
 
@@ -780,12 +848,16 @@ fn expand_fcvt_to_uint_sat(
     let is_large = pos.ins().ffcmp(x, pow2nm1);
     pos.ins()
         .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
+    pos.ins().jump(below_pow2nm1_or_nan_ebb, &[]);
 
     // We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison.
+    pos.insert_ebb(below_pow2nm1_or_nan_ebb);
     pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]);
+    pos.ins().jump(below_pow2nm1_ebb, &[]);
 
     // Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're
     // done; otherwise saturate to the minimum unsigned value, that is 0.
+    pos.insert_ebb(below_pow2nm1_ebb);
     let sres = pos.ins().x86_cvtt2si(ty, x);
     let is_neg = pos.ins().ifcmp_imm(sres, 0);
     pos.ins()
@@ -807,6 +879,9 @@ fn expand_fcvt_to_uint_sat(
     let is_neg = pos.ins().ifcmp_imm(lres, 0);
     pos.ins()
         .brif(IntCC::SignedLessThan, is_neg, done, &[max_value]);
+    pos.ins().jump(uint_large_ebb, &[]);
+
+    pos.insert_ebb(uint_large_ebb);
     let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
 
     // Recycle the original instruction as a jump.
@@ -817,6 +892,201 @@ fn expand_fcvt_to_uint_sat(
     pos.insert_ebb(done);
 
     cfg.recompute_ebb(pos.func, old_ebb);
+    cfg.recompute_ebb(pos.func, below_pow2nm1_or_nan_ebb);
+    cfg.recompute_ebb(pos.func, below_pow2nm1_ebb);
     cfg.recompute_ebb(pos.func, large);
+    cfg.recompute_ebb(pos.func, uint_large_ebb);
     cfg.recompute_ebb(pos.func, done);
 }
+
+/// Convert shuffle instructions.
+fn convert_shuffle(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    _isa: &dyn TargetIsa,
+) {
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    if let ir::InstructionData::Shuffle { args, mask, .. } = pos.func.dfg[inst] {
+        // A mask-building helper: in 128-bit SIMD, 0-15 indicate which lane to read from and a 1
+        // in the most significant position zeroes the lane.
+        let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b };
+
+        // We only have to worry about aliasing here because copies will be introduced later (in
+        // regalloc).
+        let a = pos.func.dfg.resolve_aliases(args[0]);
+        let b = pos.func.dfg.resolve_aliases(args[1]);
+        let mask = pos
+            .func
+            .dfg
+            .immediates
+            .get(mask)
+            .expect("The shuffle immediate should have been recorded before this point")
+            .clone();
+        if a == b {
+            // PSHUFB the first argument (since it is the same as the second).
+            let constructed_mask = mask
+                .iter()
+                // If the mask is greater than 15 it still may be referring to a lane in b.
+                .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
+                .map(zero_unknown_lane_index)
+                .collect();
+            let handle = pos.func.dfg.constants.insert(constructed_mask);
+            // Move the built mask into another XMM register.
+            let a_type = pos.func.dfg.value_type(a);
+            let mask_value = pos.ins().vconst(a_type, handle);
+            // Shuffle the single incoming argument.
+            pos.func.dfg.replace(inst).x86_pshufb(a, mask_value);
+        } else {
+            // PSHUFB the first argument, placing zeroes for unused lanes.
+            let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
+            let handle = pos.func.dfg.constants.insert(constructed_mask);
+            // Move the built mask into another XMM register.
+            let a_type = pos.func.dfg.value_type(a);
+            let mask_value = pos.ins().vconst(a_type, handle);
+            // Shuffle the first argument.
+            let shuffled_first_arg = pos.ins().x86_pshufb(a, mask_value);
+
+            // PSHUFB the second argument, placing zeroes for unused lanes.
+            let constructed_mask = mask
+                .iter()
+                .map(|b| b.wrapping_sub(16))
+                .map(zero_unknown_lane_index)
+                .collect();
+            let handle = pos.func.dfg.constants.insert(constructed_mask);
+            // Move the built mask into another XMM register.
+            let b_type = pos.func.dfg.value_type(b);
+            let mask_value = pos.ins().vconst(b_type, handle);
+            // Shuffle the second argument.
+            let shuffled_second_arg = pos.ins().x86_pshufb(b, mask_value);
+
+            // OR the vectors together to form the final shuffled value.
+            pos.func
+                .dfg
+                .replace(inst)
+                .bor(shuffled_first_arg, shuffled_second_arg);
+
+            // TODO when AVX512 is enabled we should replace this sequence with a single VPERMB
+        };
+    }
+}
+
+/// Because floats already exist in XMM registers, we can keep them there when executing a CLIF
+/// extractlane instruction
+fn convert_extractlane(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    _isa: &dyn TargetIsa,
+) {
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    if let ir::InstructionData::ExtractLane {
+        opcode: ir::Opcode::Extractlane,
+        arg,
+        lane,
+    } = pos.func.dfg[inst]
+    {
+        // NOTE: the following legalization assumes that the upper bits of the XMM register do
+        // not need to be zeroed during extractlane.
+        let value_type = pos.func.dfg.value_type(arg);
+        if value_type.lane_type().is_float() {
+            // Floats are already in XMM registers and can stay there.
+            let shuffled = if lane != 0 {
+                // Replace the extractlane with a PSHUFD to get the float in the right place.
+                match value_type {
+                    F32X4 => {
+                        // Move the selected lane to the 0 lane.
+                        let shuffle_mask: u8 = 0b00_00_00_00 | lane;
+                        pos.ins().x86_pshufd(arg, shuffle_mask)
+                    }
+                    F64X2 => {
+                        assert_eq!(lane, 1);
+                        // Because we know the lane == 1, we move the upper 64 bits to the lower
+                        // 64 bits, leaving the top 64 bits as-is.
+                        let shuffle_mask = 0b11_10_11_10;
+                        let bitcast = pos.ins().raw_bitcast(F32X4, arg);
+                        pos.ins().x86_pshufd(bitcast, shuffle_mask)
+                    }
+                    _ => unreachable!(),
+                }
+            } else {
+                // Remove the extractlane instruction, leaving the float where it is.
+                arg
+            };
+            // Then we must bitcast to the right type.
+            pos.func
+                .dfg
+                .replace(inst)
+                .raw_bitcast(value_type.lane_type(), shuffled);
+        } else {
+            // For non-floats, lower with the usual PEXTR* instruction.
+            pos.func.dfg.replace(inst).x86_pextr(arg, lane);
+        }
+    }
+}
+
+/// Because floats exist in XMM registers, we can keep them there when executing a CLIF
+/// insertlane instruction
+fn convert_insertlane(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    _isa: &dyn TargetIsa,
+) {
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    if let ir::InstructionData::InsertLane {
+        opcode: ir::Opcode::Insertlane,
+        args: [vector, replacement],
+        lane,
+    } = pos.func.dfg[inst]
+    {
+        let value_type = pos.func.dfg.value_type(vector);
+        if value_type.lane_type().is_float() {
+            // Floats are already in XMM registers and can stay there.
+            match value_type {
+                F32X4 => {
+                    assert!(lane > 0 && lane <= 3);
+                    let immediate = 0b00_00_00_00 | lane << 4;
+                    // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
+                    // shifted into bits 5:6).
+                    pos.func
+                        .dfg
+                        .replace(inst)
+                        .x86_insertps(vector, immediate, replacement)
+                }
+                F64X2 => {
+                    let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types
+                    if lane == 0 {
+                        // Move the lowest quadword in replacement to vector without changing
+                        // the upper bits.
+                        pos.func
+                            .dfg
+                            .replace(inst)
+                            .x86_movsd(vector, replacement_as_vector)
+                    } else {
+                        assert_eq!(lane, 1);
+                        // Move the low 64 bits of replacement vector to the high 64 bits of the
+                        // vector.
+                        pos.func
+                            .dfg
+                            .replace(inst)
+                            .x86_movlhps(vector, replacement_as_vector)
+                    }
+                }
+                _ => unreachable!(),
+            };
+        } else {
+            // For non-floats, lower with the usual PINSR* instruction.
+            pos.func
+                .dfg
+                .replace(inst)
+                .x86_pinsr(vector, lane, replacement);
+        }
+    }
+}
diff --git a/cranelift-codegen/src/isa/x86/mod.rs b/cranelift-codegen/src/isa/x86/mod.rs
index c71e97410..52ab05544 100644
--- a/cranelift-codegen/src/isa/x86/mod.rs
+++ b/cranelift-codegen/src/isa/x86/mod.rs
@@ -106,15 +106,21 @@ impl TargetIsa for Isa {
     }
 
     fn legalize_signature(&self, sig: &mut ir::Signature, current: bool) {
-        abi::legalize_signature(sig, &self.triple, current)
+        abi::legalize_signature(
+            sig,
+            &self.triple,
+            current,
+            &self.shared_flags,
+            &self.isa_flags,
+        )
     }
 
     fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
         abi::regclass_for_abi_type(ty)
     }
 
-    fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
-        abi::allocatable_registers(func, &self.triple)
+    fn allocatable_registers(&self, _func: &ir::Function) -> regalloc::RegisterSet {
+        abi::allocatable_registers(&self.triple, &self.shared_flags)
     }
 
     #[cfg(feature = "testing_hooks")]
@@ -125,11 +131,11 @@ impl TargetIsa for Isa {
         divert: &mut regalloc::RegDiversions,
         sink: &mut dyn CodeSink,
     ) {
-        binemit::emit_inst(func, inst, divert, sink)
+        binemit::emit_inst(func, inst, divert, sink, self)
     }
 
     fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
-        emit_function(func, binemit::emit_inst, sink)
+        emit_function(func, binemit::emit_inst, sink, self)
     }
 
     fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> {
diff --git a/cranelift-codegen/src/isa/x86/settings.rs b/cranelift-codegen/src/isa/x86/settings.rs
index ab5e08066..2d3a3f669 100644
--- a/cranelift-codegen/src/isa/x86/settings.rs
+++ b/cranelift-codegen/src/isa/x86/settings.rs
@@ -3,9 +3,9 @@
 use crate::settings::{self, detail, Builder};
 use core::fmt;
 
-// Include code generated by `cranelift-codegen/meta-python/gen_settings.py`. This file contains a public
-// `Flags` struct with an impl for all of the settings defined in
-// `cranelift-codegen/meta-python/isa/x86/settings.py`.
+// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
+// public `Flags` struct with an impl for all of the settings defined in
+// `cranelift-codegen/meta/src/isa/x86/settings.rs`.
 include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
 
 #[cfg(test)]
diff --git a/cranelift-codegen/src/legalizer/heap.rs b/cranelift-codegen/src/legalizer/heap.rs
index 33f37155e..332553e80 100644
--- a/cranelift-codegen/src/legalizer/heap.rs
+++ b/cranelift-codegen/src/legalizer/heap.rs
@@ -14,7 +14,7 @@ pub fn expand_heap_addr(
     inst: ir::Inst,
     func: &mut ir::Function,
     cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
+    isa: &dyn TargetIsa,
 ) {
     // Unpack the instruction.
     let (heap, offset, access_size) = match func.dfg[inst] {
@@ -32,16 +32,24 @@ pub fn expand_heap_addr(
 
     match func.heaps[heap].style {
         ir::HeapStyle::Dynamic { bound_gv } => {
-            dynamic_addr(inst, heap, offset, access_size, bound_gv, func)
-        }
-        ir::HeapStyle::Static { bound } => {
-            static_addr(inst, heap, offset, access_size, bound.into(), func, cfg)
+            dynamic_addr(isa, inst, heap, offset, access_size, bound_gv, func)
         }
+        ir::HeapStyle::Static { bound } => static_addr(
+            isa,
+            inst,
+            heap,
+            offset,
+            access_size,
+            bound.into(),
+            func,
+            cfg,
+        ),
     }
 }
 
 /// Expand a `heap_addr` for a dynamic heap.
 fn dynamic_addr(
+    isa: &dyn TargetIsa,
     inst: ir::Inst,
     heap: ir::Heap,
     offset: ir::Value,
@@ -74,7 +82,7 @@ fn dynamic_addr(
     } else {
         // We need an overflow check for the adjusted offset.
         let access_size_val = pos.ins().iconst(offset_ty, access_size as i64);
-        let (adj_offset, overflow) = pos.ins().iadd_cout(offset, access_size_val);
+        let (adj_offset, overflow) = pos.ins().iadd_ifcout(offset, access_size_val);
         pos.ins().trapnz(overflow, ir::TrapCode::HeapOutOfBounds);
         oob = pos
             .ins()
@@ -82,11 +90,12 @@ fn dynamic_addr(
     }
     pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
 
-    compute_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
+    compute_addr(isa, inst, heap, addr_ty, offset, offset_ty, pos.func);
 }
 
 /// Expand a `heap_addr` for a static heap.
 fn static_addr(
+    isa: &dyn TargetIsa,
     inst: ir::Inst,
     heap: ir::Heap,
     offset: ir::Value,
@@ -134,11 +143,12 @@ fn static_addr(
         pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
     }
 
-    compute_addr(inst, heap, addr_ty, offset, offset_ty, pos.func);
+    compute_addr(isa, inst, heap, addr_ty, offset, offset_ty, pos.func);
 }
 
 /// Emit code for the base address computation of a `heap_addr` instruction.
 fn compute_addr(
+    isa: &dyn TargetIsa,
     inst: ir::Inst,
     heap: ir::Heap,
     addr_ty: ir::Type,
@@ -165,7 +175,12 @@ fn compute_addr(
     }
 
     // Add the heap base address base
-    let base_gv = pos.func.heaps[heap].base;
-    let base = pos.ins().global_value(addr_ty, base_gv);
+    let base = if isa.flags().enable_pinned_reg() && isa.flags().use_pinned_reg_as_heap_base() {
+        pos.ins().get_pinned_reg(isa.pointer_type())
+    } else {
+        let base_gv = pos.func.heaps[heap].base;
+        pos.ins().global_value(addr_ty, base_gv)
+    };
+
     pos.func.dfg.replace(inst).iadd(base, offset);
 }
diff --git a/cranelift-codegen/src/legalizer/libcall.rs b/cranelift-codegen/src/legalizer/libcall.rs
index 01ff630f0..1b11ae759 100644
--- a/cranelift-codegen/src/legalizer/libcall.rs
+++ b/cranelift-codegen/src/legalizer/libcall.rs
@@ -2,7 +2,7 @@
 
 use crate::ir;
 use crate::ir::{get_libcall_funcref, InstBuilder};
-use crate::isa::TargetIsa;
+use crate::isa::{CallConv, TargetIsa};
 use crate::legalizer::boundary::legalize_libcall_signature;
 use std::vec::Vec;
 
@@ -18,8 +18,17 @@ pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &dyn Targ
     // Now we convert `inst` to a call. First save the arguments.
     let mut args = Vec::new();
     args.extend_from_slice(func.dfg.inst_args(inst));
+
+    let call_conv = CallConv::for_libcall(isa);
+    if call_conv.extends_baldrdash() {
+        let vmctx = func
+            .special_param(ir::ArgumentPurpose::VMContext)
+            .expect("Missing vmctx parameter for baldrdash libcall");
+        args.push(vmctx);
+    }
+
     // The replace builder will preserve the instruction result values.
-    let funcref = get_libcall_funcref(libcall, func, inst, isa);
+    let funcref = get_libcall_funcref(libcall, call_conv, func, inst, isa);
     func.dfg.replace(inst).call(funcref, &args);
 
     // Ask the ISA to legalize the signature.
diff --git a/cranelift-codegen/src/legalizer/mod.rs b/cranelift-codegen/src/legalizer/mod.rs
index 0a59f0d1b..2fd353c7b 100644
--- a/cranelift-codegen/src/legalizer/mod.rs
+++ b/cranelift-codegen/src/legalizer/mod.rs
@@ -16,10 +16,13 @@
 use crate::bitset::BitSet;
 use crate::cursor::{Cursor, FuncCursor};
 use crate::flowgraph::ControlFlowGraph;
-use crate::ir::types::I32;
+use crate::ir::types::{I32, I64};
 use crate::ir::{self, InstBuilder, MemFlags};
 use crate::isa::TargetIsa;
+use crate::predicates;
 use crate::timing;
+use std::collections::BTreeSet;
+use std::vec::Vec;
 
 mod boundary;
 mod call;
@@ -35,31 +38,78 @@ use self::heap::expand_heap_addr;
 use self::libcall::expand_as_libcall;
 use self::table::expand_table_addr;
 
-/// Legalize `inst` for `isa`. Return true if any changes to the code were
-/// made; return false if the instruction was successfully encoded as is.
+enum LegalizeInstResult {
+    Done,
+    Legalized,
+    SplitLegalizePending,
+}
+
+/// Legalize `inst` for `isa`.
 fn legalize_inst(
     inst: ir::Inst,
     pos: &mut FuncCursor,
     cfg: &mut ControlFlowGraph,
     isa: &dyn TargetIsa,
-) -> bool {
+) -> LegalizeInstResult {
     let opcode = pos.func.dfg[inst].opcode();
 
     // Check for ABI boundaries that need to be converted to the legalized signature.
     if opcode.is_call() {
         if boundary::handle_call_abi(inst, pos.func, cfg) {
-            return true;
+            return LegalizeInstResult::Legalized;
         }
     } else if opcode.is_return() {
         if boundary::handle_return_abi(inst, pos.func, cfg) {
-            return true;
+            return LegalizeInstResult::Legalized;
         }
     } else if opcode.is_branch() {
         split::simplify_branch_arguments(&mut pos.func.dfg, inst);
+    } else if opcode == ir::Opcode::Isplit {
+        pos.use_srcloc(inst);
+
+        let arg = match pos.func.dfg[inst] {
+            ir::InstructionData::Unary { arg, .. } => pos.func.dfg.resolve_aliases(arg),
+            _ => panic!("Expected isplit: {}", pos.func.dfg.display_inst(inst, None)),
+        };
+
+        match pos.func.dfg.value_def(arg) {
+            ir::ValueDef::Result(inst, _num) => {
+                if let ir::InstructionData::Binary {
+                    opcode: ir::Opcode::Iconcat,
+                    ..
+                } = pos.func.dfg[inst]
+                {
+                    // `arg` was created by an `iconcat` instruction.
+                } else {
+                    // `arg` was not created by an `iconcat` instruction. Don't try to resolve it,
+                    // as otherwise `split::isplit` will re-insert the original `isplit`, causing
+                    // an endless loop.
+                    return LegalizeInstResult::SplitLegalizePending;
+                }
+            }
+            ir::ValueDef::Param(_ebb, _num) => {}
+        }
+
+        let res = pos.func.dfg.inst_results(inst).to_vec();
+        assert_eq!(res.len(), 2);
+        let (resl, resh) = (res[0], res[1]); // Prevent borrowck error
+
+        // Remove old isplit
+        pos.func.dfg.clear_results(inst);
+        pos.remove_inst();
+
+        let curpos = pos.position();
+        let srcloc = pos.srcloc();
+        let (xl, xh) = split::isplit(pos.func, cfg, curpos, srcloc, arg);
+
+        pos.func.dfg.change_to_alias(resl, xl);
+        pos.func.dfg.change_to_alias(resh, xh);
+
+        return LegalizeInstResult::Legalized;
     }
 
     match pos.func.update_encoding(inst, isa) {
-        Ok(()) => false,
+        Ok(()) => LegalizeInstResult::Done,
         Err(action) => {
             // We should transform the instruction into legal equivalents.
             // If the current instruction was replaced, we need to double back and revisit
@@ -68,12 +118,16 @@ fn legalize_inst(
             // There's a risk of infinite looping here if the legalization patterns are
             // unsound. Should we attempt to detect that?
             if action(inst, pos.func, cfg, isa) {
-                return true;
+                return LegalizeInstResult::Legalized;
             }
 
             // We don't have any pattern expansion for this instruction either.
             // Try converting it to a library call as a last resort.
-            expand_as_libcall(inst, pos.func, isa)
+            if expand_as_libcall(inst, pos.func, isa) {
+                LegalizeInstResult::Legalized
+            } else {
+                LegalizeInstResult::Done
+            }
         }
     }
 }
@@ -93,38 +147,56 @@ pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, is
 
     let mut pos = FuncCursor::new(func);
 
+    // This must be a set to prevent trying to legalize `isplit` and `vsplit` twice in certain cases.
+    let mut pending_splits = BTreeSet::new();
+
     // Process EBBs in layout order. Some legalization actions may split the current EBB or append
     // new ones to the end. We need to make sure we visit those new EBBs too.
-    while let Some(_ebb) = pos.next_ebb() {
+    while let Some(ebb) = pos.next_ebb() {
+        split::split_ebb_params(pos.func, cfg, ebb);
+
         // Keep track of the cursor position before the instruction being processed, so we can
         // double back when replacing instructions.
         let mut prev_pos = pos.position();
 
         while let Some(inst) = pos.next_inst() {
-            if legalize_inst(inst, &mut pos, cfg, isa) {
-                // Go back and legalize the inserted return value conversion instructions.
-                pos.set_position(prev_pos);
-            } else {
+            match legalize_inst(inst, &mut pos, cfg, isa) {
                 // Remember this position in case we need to double back.
-                prev_pos = pos.position();
+                LegalizeInstResult::Done => prev_pos = pos.position(),
+
+                // Go back and legalize the inserted return value conversion instructions.
+                LegalizeInstResult::Legalized => pos.set_position(prev_pos),
+
+                // The argument of a `isplit` or `vsplit` instruction didn't resolve to a
+                // `iconcat` or `vconcat` instruction. Try again after legalizing the rest of
+                // the instructions.
+                LegalizeInstResult::SplitLegalizePending => {
+                    pending_splits.insert(inst);
+                }
             }
         }
     }
 
+    // Try legalizing `isplit` and `vsplit` instructions, which could not previously be legalized.
+    for inst in pending_splits {
+        pos.goto_inst(inst);
+        legalize_inst(inst, &mut pos, cfg, isa);
+    }
+
     // Now that we've lowered all br_tables, we don't need the jump tables anymore.
     if !isa.flags().jump_tables_enabled() {
         pos.func.jump_tables.clear();
     }
 }
 
-// Include legalization patterns that were generated by `gen_legalizer.py` from the `XForms` in
-// `cranelift-codegen/meta-python/base/legalize.py`.
+// Include legalization patterns that were generated by `gen_legalizer.rs` from the
+// `TransformGroup` in `cranelift-codegen/meta/shared/legalize.rs`.
 //
 // Concretely, this defines private functions `narrow()`, and `expand()`.
 include!(concat!(env!("OUT_DIR"), "/legalizer.rs"));
 
 /// Custom expansion for conditional trap instructions.
-/// TODO: Add CFG support to the Python patterns so we won't have to do this.
+/// TODO: Add CFG support to the Rust DSL patterns so we won't have to do this.
 fn expand_cond_trap(
     inst: ir::Inst,
     func: &mut ir::Function,
@@ -149,29 +221,45 @@ fn expand_cond_trap(
     // Split the EBB after `inst`:
     //
     //     trapnz arg
+    //     ..
     //
     // Becomes:
     //
-    //     brz arg, new_ebb
+    //     brz arg, new_ebb_resume
+    //     jump new_ebb_trap
+    //
+    //   new_ebb_trap:
     //     trap
-    //   new_ebb:
     //
+    //   new_ebb_resume:
+    //     ..
     let old_ebb = func.layout.pp_ebb(inst);
-    let new_ebb = func.dfg.make_ebb();
+    let new_ebb_trap = func.dfg.make_ebb();
+    let new_ebb_resume = func.dfg.make_ebb();
+
+    // Replace trap instruction by the inverted condition.
     if trapz {
-        func.dfg.replace(inst).brnz(arg, new_ebb, &[]);
+        func.dfg.replace(inst).brnz(arg, new_ebb_resume, &[]);
     } else {
-        func.dfg.replace(inst).brz(arg, new_ebb, &[]);
+        func.dfg.replace(inst).brz(arg, new_ebb_resume, &[]);
     }
 
+    // Add jump instruction after the inverted branch.
     let mut pos = FuncCursor::new(func).after_inst(inst);
     pos.use_srcloc(inst);
+    pos.ins().jump(new_ebb_trap, &[]);
+
+    // Insert the new label and the unconditional trap terminator.
+    pos.insert_ebb(new_ebb_trap);
     pos.ins().trap(code);
-    pos.insert_ebb(new_ebb);
+
+    // Insert the new label and resume the execution when the trap fails.
+    pos.insert_ebb(new_ebb_resume);
 
     // Finally update the CFG.
     cfg.recompute_ebb(pos.func, old_ebb);
-    cfg.recompute_ebb(pos.func, new_ebb);
+    cfg.recompute_ebb(pos.func, new_ebb_resume);
+    cfg.recompute_ebb(pos.func, new_ebb_trap);
 }
 
 /// Jump tables.
@@ -207,31 +295,57 @@ fn expand_br_table_jt(
         _ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)),
     };
 
-    let table_size = func.jump_tables[table].len();
-    let addr_ty = isa.pointer_type();
-    let entry_ty = I32;
+    // Rewrite:
+    //
+    //     br_table $idx, default_ebb, $jt
+    //
+    // To:
+    //
+    //     $oob = ifcmp_imm $idx, len($jt)
+    //     brif uge $oob, default_ebb
+    //     jump fallthrough_ebb
+    //
+    //   fallthrough_ebb:
+    //     $base = jump_table_base.i64 $jt
+    //     $rel_addr = jump_table_entry.i64 $idx, $base, 4, $jt
+    //     $addr = iadd $base, $rel_addr
+    //     indirect_jump_table_br $addr, $jt
+
+    let ebb = func.layout.pp_ebb(inst);
+    let jump_table_ebb = func.dfg.make_ebb();
 
     let mut pos = FuncCursor::new(func).at_inst(inst);
     pos.use_srcloc(inst);
 
-    // Bounds check
+    // Bounds check.
+    let table_size = pos.func.jump_tables[table].len() as i64;
     let oob = pos
         .ins()
-        .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, arg, table_size as i64);
+        .icmp_imm(IntCC::UnsignedGreaterThanOrEqual, arg, table_size);
 
     pos.ins().brnz(oob, default_ebb, &[]);
+    pos.ins().jump(jump_table_ebb, &[]);
+    pos.insert_ebb(jump_table_ebb);
+
+    let addr_ty = isa.pointer_type();
+
+    let arg = if pos.func.dfg.value_type(arg) == addr_ty {
+        arg
+    } else {
+        pos.ins().uextend(addr_ty, arg)
+    };
 
     let base_addr = pos.ins().jump_table_base(addr_ty, table);
     let entry = pos
         .ins()
-        .jump_table_entry(addr_ty, arg, base_addr, entry_ty.bytes() as u8, table);
+        .jump_table_entry(arg, base_addr, I32.bytes() as u8, table);
 
     let addr = pos.ins().iadd(base_addr, entry);
     pos.ins().indirect_jump_table_br(addr, table);
 
-    let ebb = pos.current_ebb().unwrap();
     pos.remove_inst();
     cfg.recompute_ebb(pos.func, ebb);
+    cfg.recompute_ebb(pos.func, jump_table_ebb);
 }
 
 /// Expand br_table to series of conditionals.
@@ -253,8 +367,18 @@ fn expand_br_table_conds(
         _ => panic!("Expected br_table: {}", func.dfg.display_inst(inst, None)),
     };
 
+    let ebb = func.layout.pp_ebb(inst);
+
     // This is a poor man's jump table using just a sequence of conditional branches.
     let table_size = func.jump_tables[table].len();
+    let mut cond_failed_ebb = vec![];
+    if table_size >= 1 {
+        cond_failed_ebb = std::vec::Vec::with_capacity(table_size - 1);
+        for _ in 0..table_size - 1 {
+            cond_failed_ebb.push(func.dfg.make_ebb());
+        }
+    }
+
     let mut pos = FuncCursor::new(func).at_inst(inst);
     pos.use_srcloc(inst);
 
@@ -262,14 +386,21 @@ fn expand_br_table_conds(
         let dest = pos.func.jump_tables[table].as_slice()[i];
         let t = pos.ins().icmp_imm(IntCC::Equal, arg, i as i64);
         pos.ins().brnz(t, dest, &[]);
+        // Jump to the next case.
+        if i < table_size - 1 {
+            pos.ins().jump(cond_failed_ebb[i], &[]);
+            pos.insert_ebb(cond_failed_ebb[i]);
+        }
     }
 
     // `br_table` jumps to the default destination if nothing matches
     pos.ins().jump(default_ebb, &[]);
 
-    let ebb = pos.current_ebb().unwrap();
     pos.remove_inst();
     cfg.recompute_ebb(pos.func, ebb);
+    for failed_ebb in cond_failed_ebb.into_iter() {
+        cfg.recompute_ebb(pos.func, failed_ebb);
+    }
 }
 
 /// Expand the select instruction.
@@ -438,3 +569,99 @@ fn expand_stack_store(
     mflags.set_aligned();
     pos.func.dfg.replace(inst).store(mflags, val, addr, 0);
 }
+
+/// Split a load into two parts before `iconcat`ing the result together.
+fn narrow_load(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    _isa: &dyn TargetIsa,
+) {
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    let (ptr, offset, flags) = match pos.func.dfg[inst] {
+        ir::InstructionData::Load {
+            opcode: ir::Opcode::Load,
+            arg,
+            offset,
+            flags,
+        } => (arg, offset, flags),
+        _ => panic!("Expected load: {}", pos.func.dfg.display_inst(inst, None)),
+    };
+
+    let res_ty = pos.func.dfg.ctrl_typevar(inst);
+    let small_ty = res_ty.half_width().expect("Can't narrow load");
+
+    let al = pos.ins().load(small_ty, flags, ptr, offset);
+    let ah = pos.ins().load(
+        small_ty,
+        flags,
+        ptr,
+        offset.try_add_i64(8).expect("load offset overflow"),
+    );
+    pos.func.dfg.replace(inst).iconcat(al, ah);
+}
+
+/// Split a store into two parts after `isplit`ing the value.
+fn narrow_store(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    _isa: &dyn TargetIsa,
+) {
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    let (val, ptr, offset, flags) = match pos.func.dfg[inst] {
+        ir::InstructionData::Store {
+            opcode: ir::Opcode::Store,
+            args,
+            offset,
+            flags,
+        } => (args[0], args[1], offset, flags),
+        _ => panic!("Expected store: {}", pos.func.dfg.display_inst(inst, None)),
+    };
+
+    let (al, ah) = pos.ins().isplit(val);
+    pos.ins().store(flags, al, ptr, offset);
+    pos.ins().store(
+        flags,
+        ah,
+        ptr,
+        offset.try_add_i64(8).expect("store offset overflow"),
+    );
+    pos.remove_inst();
+}
+
+/// Expands an illegal iconst value by splitting it into two.
+fn narrow_iconst(
+    inst: ir::Inst,
+    func: &mut ir::Function,
+    _cfg: &mut ControlFlowGraph,
+    isa: &dyn TargetIsa,
+) {
+    let imm: i64 = if let ir::InstructionData::UnaryImm {
+        opcode: ir::Opcode::Iconst,
+        imm,
+    } = &func.dfg[inst]
+    {
+        (*imm).into()
+    } else {
+        panic!("unexpected instruction in narrow_iconst");
+    };
+
+    let mut pos = FuncCursor::new(func).at_inst(inst);
+    pos.use_srcloc(inst);
+
+    let ty = pos.func.dfg.ctrl_typevar(inst);
+    if isa.pointer_bits() == 32 && ty == I64 {
+        let low = pos.ins().iconst(I32, imm & 0xffffffff);
+        let high = pos.ins().iconst(I32, imm >> 32);
+        // The instruction has as many results as iconcat, so no need to replace them.
+        pos.func.dfg.replace(inst).iconcat(low, high);
+        return;
+    }
+
+    unimplemented!("missing encoding or legalization for iconst.{:?}", ty);
+}
diff --git a/cranelift-codegen/src/legalizer/split.rs b/cranelift-codegen/src/legalizer/split.rs
index 773df1321..5e55419fa 100644
--- a/cranelift-codegen/src/legalizer/split.rs
+++ b/cranelift-codegen/src/legalizer/split.rs
@@ -68,6 +68,7 @@ use crate::cursor::{Cursor, CursorPosition, FuncCursor};
 use crate::flowgraph::{BasicBlock, ControlFlowGraph};
 use crate::ir::{self, Ebb, Inst, InstBuilder, InstructionData, Opcode, Type, Value, ValueDef};
 use core::iter;
+use smallvec::SmallVec;
 use std::vec::Vec;
 
 /// Split `value` into two values using the `isplit` semantics. Do this by reusing existing values
@@ -124,6 +125,43 @@ fn split_any(
     let pos = &mut FuncCursor::new(func).at_position(pos).with_srcloc(srcloc);
     let result = split_value(pos, value, concat, &mut repairs);
 
+    perform_repairs(pos, cfg, repairs);
+
+    result
+}
+
+pub fn split_ebb_params(func: &mut ir::Function, cfg: &ControlFlowGraph, ebb: Ebb) {
+    let pos = &mut FuncCursor::new(func).at_top(ebb);
+    let ebb_params = pos.func.dfg.ebb_params(ebb);
+
+    // Add further splittable types here.
+    fn type_requires_splitting(ty: Type) -> bool {
+        ty == ir::types::I128
+    }
+
+    // A shortcut.  If none of the param types require splitting, exit now.  This helps because
+    // the loop below necessarily has to copy the ebb params into a new vector, so it's better to
+    // avoid doing so when possible.
+    if !ebb_params
+        .iter()
+        .any(|ebb_param| type_requires_splitting(pos.func.dfg.value_type(*ebb_param)))
+    {
+        return;
+    }
+
+    let mut repairs = Vec::new();
+    for (num, ebb_param) in ebb_params.to_vec().into_iter().enumerate() {
+        if !type_requires_splitting(pos.func.dfg.value_type(ebb_param)) {
+            continue;
+        }
+
+        split_ebb_param(pos, ebb, num, ebb_param, Opcode::Iconcat, &mut repairs);
+    }
+
+    perform_repairs(pos, cfg, repairs);
+}
+
+fn perform_repairs(pos: &mut FuncCursor, cfg: &ControlFlowGraph, mut repairs: Vec<Repair>) {
     // We have split the value requested, and now we may need to fix some EBB predecessors.
     while let Some(repair) = repairs.pop() {
         for BasicBlock { inst, .. } in cfg.pred_iter(repair.ebb) {
@@ -181,8 +219,6 @@ fn split_any(
             pos.func.dfg[inst].put_value_list(args);
         }
     }
-
-    result
 }
 
 /// Split a single value using the integer or vector semantics given by the `concat` opcode.
@@ -215,40 +251,7 @@ fn split_value(
             // This is an EBB parameter. We can split the parameter value unless this is the entry
             // block.
             if pos.func.layout.entry_block() != Some(ebb) {
-                // We are going to replace the parameter at `num` with two new arguments.
-                // Determine the new value types.
-                let ty = pos.func.dfg.value_type(value);
-                let split_type = match concat {
-                    Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"),
-                    Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"),
-                    _ => panic!("Unhandled concat opcode: {}", concat),
-                };
-
-                // Since the `repairs` stack potentially contains other parameter numbers for
-                // `ebb`, avoid shifting and renumbering EBB parameters. It could invalidate other
-                // `repairs` entries.
-                //
-                // Replace the original `value` with the low part, and append the high part at the
-                // end of the argument list.
-                let lo = pos.func.dfg.replace_ebb_param(value, split_type);
-                let hi_num = pos.func.dfg.num_ebb_params(ebb);
-                let hi = pos.func.dfg.append_ebb_param(ebb, split_type);
-                reuse = Some((lo, hi));
-
-                // Now the original value is dangling. Insert a concatenation instruction that can
-                // compute it from the two new parameters. This also serves as a record of what we
-                // did so a future call to this function doesn't have to redo the work.
-                //
-                // Note that it is safe to move `pos` here since `reuse` was set above, so we don't
-                // need to insert a split instruction before returning.
-                pos.goto_first_inst(ebb);
-                pos.ins()
-                    .with_result(value)
-                    .Binary(concat, split_type, lo, hi);
-
-                // Finally, splitting the EBB parameter is not enough. We also have to repair all
-                // of the predecessor instructions that branch here.
-                add_repair(concat, split_type, ebb, num, hi_num, repairs);
+                reuse = Some(split_ebb_param(pos, ebb, num, value, concat, repairs));
             }
         }
     }
@@ -267,6 +270,51 @@ fn split_value(
     }
 }
 
+fn split_ebb_param(
+    pos: &mut FuncCursor,
+    ebb: Ebb,
+    param_num: usize,
+    value: Value,
+    concat: Opcode,
+    repairs: &mut Vec<Repair>,
+) -> (Value, Value) {
+    // We are going to replace the parameter at `num` with two new arguments.
+    // Determine the new value types.
+    let ty = pos.func.dfg.value_type(value);
+    let split_type = match concat {
+        Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"),
+        Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"),
+        _ => panic!("Unhandled concat opcode: {}", concat),
+    };
+
+    // Since the `repairs` stack potentially contains other parameter numbers for
+    // `ebb`, avoid shifting and renumbering EBB parameters. It could invalidate other
+    // `repairs` entries.
+    //
+    // Replace the original `value` with the low part, and append the high part at the
+    // end of the argument list.
+    let lo = pos.func.dfg.replace_ebb_param(value, split_type);
+    let hi_num = pos.func.dfg.num_ebb_params(ebb);
+    let hi = pos.func.dfg.append_ebb_param(ebb, split_type);
+
+    // Now the original value is dangling. Insert a concatenation instruction that can
+    // compute it from the two new parameters. This also serves as a record of what we
+    // did so a future call to this function doesn't have to redo the work.
+    //
+    // Note that it is safe to move `pos` here since `reuse` was set above, so we don't
+    // need to insert a split instruction before returning.
+    pos.goto_first_inst(ebb);
+    pos.ins()
+        .with_result(value)
+        .Binary(concat, split_type, lo, hi);
+
+    // Finally, splitting the EBB parameter is not enough. We also have to repair all
+    // of the predecessor instructions that branch here.
+    add_repair(concat, split_type, ebb, param_num, hi_num, repairs);
+
+    (lo, hi)
+}
+
 // Add a repair entry to the work list.
 fn add_repair(
     concat: Opcode,
@@ -334,7 +382,7 @@ fn resolve_splits(dfg: &ir::DataFlowGraph, value: Value) -> Value {
 /// After legalizing the instructions computing the value that was split, it is likely that we can
 /// avoid depending on the split instruction. Its input probably comes from a concatenation.
 pub fn simplify_branch_arguments(dfg: &mut ir::DataFlowGraph, branch: Inst) {
-    let mut new_args = Vec::new();
+    let mut new_args = SmallVec::<[Value; 32]>::new();
 
     for &arg in dfg.inst_args(branch) {
         let new_arg = resolve_splits(dfg, arg);
diff --git a/cranelift-codegen/src/lib.rs b/cranelift-codegen/src/lib.rs
index 5b809460c..1e97dfd6e 100644
--- a/cranelift-codegen/src/lib.rs
+++ b/cranelift-codegen/src/lib.rs
@@ -41,7 +41,6 @@
     )
 )]
 #![no_std]
-#![cfg_attr(not(feature = "std"), feature(alloc))]
 
 #[cfg(not(feature = "std"))]
 #[macro_use]
@@ -95,6 +94,7 @@ mod nan_canonicalization;
 mod partition_slice;
 mod postopt;
 mod predicates;
+mod redundant_reload_remover;
 mod ref_slice;
 mod regalloc;
 mod result;
diff --git a/cranelift-codegen/src/licm.rs b/cranelift-codegen/src/licm.rs
index 2af4ef324..2866da87d 100644
--- a/cranelift-codegen/src/licm.rs
+++ b/cranelift-codegen/src/licm.rs
@@ -88,7 +88,7 @@ fn create_pre_header(
     {
         // We only follow normal edges (not the back edges)
         if !domtree.dominates(header, last_inst, &func.layout) {
-            change_branch_jump_destination(last_inst, pre_header, func);
+            func.change_branch_destination(last_inst, pre_header);
         }
     }
     {
@@ -136,15 +136,6 @@ fn has_pre_header(
     result
 }
 
-// Change the destination of a jump or branch instruction. Does nothing if called with a non-jump
-// or non-branch instruction.
-fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) {
-    match func.dfg[inst].branch_destination_mut() {
-        None => (),
-        Some(instruction_dest) => *instruction_dest = new_ebb,
-    }
-}
-
 /// Test whether the given opcode is unsafe to even consider for LICM.
 fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
     opcode.can_store()
diff --git a/cranelift-codegen/src/predicates.rs b/cranelift-codegen/src/predicates.rs
index da01ad6b2..f90054611 100644
--- a/cranelift-codegen/src/predicates.rs
+++ b/cranelift-codegen/src/predicates.rs
@@ -1,7 +1,7 @@
 //! Predicate functions for testing instruction fields.
 //!
 //! This module defines functions that are used by the instruction predicates defined by
-//! `cranelift-codegen/meta-python/cdsl/predicates.py` classes.
+//! `cranelift-codegen/meta/src/cdsl/instructions.rs` classes.
 //!
 //! The predicates the operate on integer fields use `Into<i64>` as a shared trait bound. This
 //! bound is implemented by all the native integer types as well as `Imm64`.
@@ -11,6 +11,12 @@
 
 use crate::ir;
 
+/// Check that an integer value is zero.
+#[allow(dead_code)]
+pub fn is_zero_int<T: Into<i64>>(x: T) -> bool {
+    x.into() == 0
+}
+
 /// Check that a 64-bit floating point value is zero.
 #[allow(dead_code)]
 pub fn is_zero_64_bit_float<T: Into<ir::immediates::Ieee64>>(x: T) -> bool {
diff --git a/cranelift-codegen/src/redundant_reload_remover.rs b/cranelift-codegen/src/redundant_reload_remover.rs
new file mode 100644
index 000000000..9bcc3fbc9
--- /dev/null
+++ b/cranelift-codegen/src/redundant_reload_remover.rs
@@ -0,0 +1,904 @@
+//! This module implements a late-stage redundant-reload remover, which runs after registers have
+//! been allocated and stack slots have been given specific offsets.
+
+use crate::cursor::{Cursor, CursorPosition, EncCursor, FuncCursor};
+use crate::entity::EntitySet;
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::dfg::DataFlowGraph;
+use crate::ir::instructions::BranchInfo;
+use crate::ir::stackslot::{StackSlotKind, StackSlots};
+use crate::ir::{
+    Ebb, Function, Inst, InstBuilder, InstructionData, Opcode, StackSlotData, Type, Value, ValueLoc,
+};
+use crate::isa::{RegInfo, RegUnit, TargetIsa};
+use crate::regalloc::RegDiversions;
+use core::convert::TryInto;
+use cranelift_entity::{PrimaryMap, SecondaryMap};
+use std::vec::Vec;
+
+// =============================================================================================
+// A description of the redundant-fill-removal algorithm
+//
+//
+// The algorithm works forwards through each Ebb.  It carries along and updates a table,
+// AvailEnv, with which it tracks registers that are known to have the same value as some stack
+// slot.  The actions on encountering an instruction depend on the instruction, as follows:
+//
+// ss1 = spill r0: update the AvailEnv so as to note that slot `ss1` and register `r0`
+//                 have the same value.
+//
+// r1 = fill ss0: look in the AvailEnv.  If it tells us that register `r1` and slot `ss0`
+//                have the same value, then delete the instruction by converting it to a
+//                `fill_nop`.
+//
+//                If it tells us that some other register `r2` has the same value as
+//                slot `ss0`, convert the instruction into a copy from `r2` to `r1`.
+//
+// any other insn: remove from the AvailEnv, any bindings associated with registers
+//                 written by this instruction, since they will be invalidated by it.
+//
+// Tracking the effects of `copy` instructions in AvailEnv for the case when both source and
+// destination are registers does not cause any more fills to be removed or converted to copies.
+// It's not clear why.
+//
+// There are various other instruction-handling cases in `visit_inst`, which are documented
+// in-line, and do not change the core algorithm, so are not described here.
+//
+// The registers tracked by AvailEnv are the post-diversion registers that are really used by the
+// code; they are not the pre-diversion names associated with each SSA `Value`.  The second
+// `fill` case above opportunistically copies values from registers that may have been diversion
+// targets in some predecessor block, and so are no longer associated with any specific SSA-level
+// name at the point the copy is made.  Hence those copies (from `r2` to `r1`) cannot be done
+// with an ordinary `copy` instruction.  Instead they have to be done using a new `copy_to_ssa`
+// instruction, which copies from an arbitrary register to a register-resident `Value` (that is,
+// "back to" SSA-world).
+//
+// That completes the description of the core algorithm.
+//
+// In the case where a block `A` jumps to `B` and `A` is the only predecessor of `B`, the
+// AvailEnv at the end of `A` will still be valid at the entry to `B`.  In such a case, we can
+// profitably transform `B` using the AvailEnv "inherited" from `A`.  In order to take full
+// advantage of this, this module partitions the function's CFG into tree-shaped groups of
+// blocks, and processes each tree as described above.  So the AvailEnv is only initialised to
+// empty at the start of blocks that form the root of each tree; that is, for blocks which have
+// two or more predecessors.
+
+// =============================================================================================
+// Top level algorithm structure
+//
+// The overall algorithm, for a function, starts like this:
+//
+// * (once per function): finds Ebbs that have two or more predecessors, since they will be the
+//   roots of Ebb trees.  Also, the entry node for the function is considered to be a root.
+//
+// It then continues with a loop that first finds a tree of Ebbs ("discovery") and then removes
+// redundant fills as described above ("processing"):
+//
+// * (discovery; once per tree): for each root, performs a depth first search to find all the Ebbs
+//   in the tree, guided by RedundantReloadRemover::discovery_stack.
+//
+// * (processing; once per tree): the just-discovered tree is then processed as described above,
+//   guided by RedundantReloadRemover::processing_stack.
+//
+// In this way, all Ebbs reachable from the function's entry point are eventually processed.  Note
+// that each tree is processed as soon as it has been discovered, so the algorithm never creates a
+// list of trees for the function.
+//
+// The running state is stored in `RedundantReloadRemover`.  This is allocated once and can be
+// reused for multiple functions so as to minimise heap turnover.  The fields are, roughly:
+//
+//   num_regunits -- constant for the whole function; used by the tree processing phase
+//   num_preds_per_ebb -- constant for the whole function; used by the tree discovery process
+//
+//   discovery_stack -- used to guide the tree discovery process
+//   nodes_in_tree -- the discovered nodes are recorded here
+//
+//   processing_stack -- used to guide the tree processing process
+//   nodes_already_visited -- used to ensure the tree processing logic terminates in the case
+//                            where a tree has a branch back to its root node.
+//
+// There is further documentation in line below, as appropriate.
+
+// =============================================================================================
+// A side note on register choice heuristics
+
+// The core algorithm opportunistically replaces fill instructions when it knows of a register
+// that already holds the required value.  How effective this is largely depends on how long
+// reloaded values happen to stay alive before the relevant register is overwritten.  And that
+// depends on the register allocator's register choice heuristics.  The worst case is, when the
+// register allocator reuses registers as soon as possible after they become free.  Unfortunately
+// that was indeed the selection scheme, prior to development of this pass.
+//
+// As part of this work, the register selection scheme has been changed as follows: for registers
+// written by any instruction other than a fill, use the lowest numbered available register.  But
+// for registers written by a fill instruction, use the highest numbered available register.  The
+// aim is to try and keep reload- and non-reload registers disjoint to the extent possible.
+// Several other schemes were tried, but this one is simple and can be worth an extra 2% of
+// performance in some cases.
+//
+// The relevant change is more or less a one-line change in the solver.
+
+// =============================================================================================
+// Data structures used for discovery of trees
+
+// `ZeroOneOrMany` is used to record the number of predecessors an Ebb block has.  The `Zero` case
+// is included so as to cleanly handle the case where the incoming graph has unreachable Ebbs.
+
+#[derive(Clone, PartialEq)]
+enum ZeroOneOrMany {
+    Zero,
+    One,
+    Many,
+}
+
+// =============================================================================================
+// Data structures used for processing of trees
+
+// `SlotInfo` describes a spill slot in the obvious way.  Note that it doesn't indicate which
+// register(s) are currently associated with the slot.  That job is done by `AvailEnv` instead.
+//
+// In the CL framework, stack slots are partitioned into disjoint sets, one for each
+// `StackSlotKind`.  The offset and size only give a unique identity within any particular
+// `StackSlotKind`.  So, to uniquely identify a stack slot, all three fields are necessary.
+
+#[derive(Clone, Copy)]
+struct SlotInfo {
+    kind: StackSlotKind,
+    offset: i32,
+    size: u32,
+}
+
+// `AvailEnv` maps each possible register to a stack slot that holds the same value.  The index
+// space of `AvailEnv::map` is exactly the set of registers available on the current target.  If
+// (as is mostly the case) a register is not known to have the same value as a stack slot, then
+// its entry is `None` rather than `Some(..)`.
+//
+// Invariants for AvailEnv:
+//
+// AvailEnv may have multiple different registers bound to the same stack slot -- that is, `(kind,
+// offset, size)` triple.  That's OK, and reflects the reality that those two registers contain
+// the same value.  This could happen, for example, in the case
+//
+//   ss1 = spill r0
+//   ..
+//   r2 = fill ss1
+//
+// Then both `r0` and `r2` will have the same value as `ss1`, provided that ".." doesn't write to
+// `r1`.
+//
+// To say that two different registers may be bound to the same stack slot is the same as saying
+// that it is allowed to have two different entries in AvailEnv with the same `(kind, offset,
+// size)` triple.  What is *not* allowed is to have partial overlaps.  That is, if two SlotInfos
+// have the same `kind` field and have `offset` and `size` fields that overlap, then their
+// `offset` and `size` fields must be identical.  This is so as to make the algorithm safe against
+// situations where, for example, a 64 bit register is spilled, but then only the bottom 32 bits
+// are reloaded from the slot.
+//
+// Although in such a case it seems likely that the Cranelift IR would be ill-typed, and so this
+// case could probably not occur in practice.
+
+#[derive(Clone)]
+struct AvailEnv {
+    map: Vec<Option<SlotInfo>>,
+}
+
+// `ProcessingStackElem` combines AvailEnv with contextual information needed to "navigate" within
+// an Ebb.
+//
+// A ProcessingStackElem conceptually has the lifetime of exactly one Ebb: once the current Ebb is
+// completed, the ProcessingStackElem will be abandoned.  In practice the top level state,
+// RedundantReloadRemover, caches them, so as to avoid heap turnover.
+//
+// Note that ProcessingStackElem must contain a CursorPosition.  The CursorPosition, which
+// indicates where we are in the current Ebb, cannot be implicitly maintained by looping over all
+// the instructions in an Ebb in turn, because we may choose to suspend processing the current Ebb
+// at a side exit, continue by processing the subtree reached via the side exit, and only later
+// resume the current Ebb.
+
+struct ProcessingStackElem {
+    /// Indicates the AvailEnv at the current point in the Ebb.
+    avail_env: AvailEnv,
+
+    /// Shows where we currently are inside the Ebb.
+    cursor: CursorPosition,
+
+    /// Indicates the currently active register diversions at the current point.
+    diversions: RegDiversions,
+}
+
+// =============================================================================================
+// The top level data structure
+
+// `RedundantReloadRemover` contains data structures for the two passes: discovery of tree shaped
+// regions, and processing of them.  These are allocated once and stay alive for the entire
+// function, even though they are cleared out for each new tree shaped region.  It also caches
+// `num_regunits` and `num_preds_per_ebb`, which are computed at the start of each function and
+// then remain constant.
+
+/// The redundant reload remover's state.
+pub struct RedundantReloadRemover {
+    /// The total number of RegUnits available on this architecture.  This is unknown when the
+    /// RedundantReloadRemover is created.  It becomes known at the beginning of processing of a
+    /// function.
+    num_regunits: Option<u16>,
+
+    /// This stores, for each Ebb, a characterisation of the number of predecessors it has.
+    num_preds_per_ebb: PrimaryMap<Ebb, ZeroOneOrMany>,
+
+    /// The stack used for the first phase (discovery).  There is one element on the discovery
+    /// stack for each currently unexplored Ebb in the tree being searched.
+    discovery_stack: Vec<Ebb>,
+
+    /// The nodes in the discovered tree are inserted here.
+    nodes_in_tree: EntitySet<Ebb>,
+
+    /// The stack used during the second phase (transformation).  There is one element on the
+    /// processing stack for each currently-open node in the tree being transformed.
+    processing_stack: Vec<ProcessingStackElem>,
+
+    /// Used in the second phase to avoid visiting nodes more than once.
+    nodes_already_visited: EntitySet<Ebb>,
+}
+
+// =============================================================================================
+// Miscellaneous small helper functions
+
+// Is this a kind of stack slot that is safe to track in AvailEnv?  This is probably overly
+// conservative, but tracking only the SpillSlot and IncomingArgument kinds catches almost all
+// available redundancy in practice.
+fn is_slot_kind_tracked(kind: StackSlotKind) -> bool {
+    match kind {
+        StackSlotKind::SpillSlot | StackSlotKind::IncomingArg => true,
+        _ => false,
+    }
+}
+
+// Find out if the range `[offset, +size)` overlaps with the range in `si`.
+fn overlaps(si: &SlotInfo, offset: i32, size: u32) -> bool {
+    let a_offset = si.offset as i64;
+    let a_size = si.size as i64;
+    let b_offset = offset as i64;
+    let b_size = size as i64;
+    let no_overlap = a_offset + a_size <= b_offset || b_offset + b_size <= a_offset;
+    !no_overlap
+}
+
+// Find, in `reginfo`, the register bank that `reg` lives in, and return the lower limit and size
+// of the bank.  This is so the caller can conveniently iterate over all RegUnits in the bank that
+// `reg` lives in.
+fn find_bank_limits(reginfo: &RegInfo, reg: RegUnit) -> (RegUnit, u16) {
+    if let Some(bank) = reginfo.bank_containing_regunit(reg) {
+        return (bank.first_unit, bank.units);
+    }
+    // We should never get here, since `reg` must come from *some* RegBank.
+    panic!("find_regclass_limits: reg not found");
+}
+
+// Returns the register that `v` is allocated to.  Assumes that `v` actually resides in a
+// register.
+fn reg_of_value(locations: &SecondaryMap<Value, ValueLoc>, v: Value) -> RegUnit {
+    match locations[v] {
+        ValueLoc::Reg(ru) => ru,
+        _ => panic!("reg_of_value: value isn't in a reg"),
+    }
+}
+
+// Returns the stack slot that `v` is allocated to.  Assumes that `v` actually resides in a stack
+// slot.
+fn slot_of_value<'s>(
+    locations: &SecondaryMap<Value, ValueLoc>,
+    stack_slots: &'s StackSlots,
+    v: Value,
+) -> &'s StackSlotData {
+    match locations[v] {
+        ValueLoc::Stack(slot) => &stack_slots[slot],
+        _ => panic!("slot_of_value: value isn't in a stack slot"),
+    }
+}
+
+// =============================================================================================
+// Top level: discovery of tree shaped regions
+
+impl RedundantReloadRemover {
+    // A helper for `add_nodes_to_tree` below.
+    fn discovery_stack_push_successors_of(&mut self, cfg: &ControlFlowGraph, node: Ebb) {
+        for successor in cfg.succ_iter(node) {
+            self.discovery_stack.push(successor);
+        }
+    }
+
+    // Visit the tree of Ebbs rooted at `starting_point` and add them to `self.nodes_in_tree`.
+    // `self.num_preds_per_ebb` guides the process, ensuring we don't leave the tree-ish region
+    // and indirectly ensuring that the process will terminate in the presence of cycles in the
+    // graph.  `self.discovery_stack` holds the search state in this function.
+    fn add_nodes_to_tree(&mut self, cfg: &ControlFlowGraph, starting_point: Ebb) {
+        // One might well ask why this doesn't loop forever when it encounters cycles in the
+        // control flow graph.  The reason is that any cycle in the graph that is reachable from
+        // anywhere outside the cycle -- in particular, that is reachable from the function's
+        // entry node -- must have at least one node that has two or more predecessors.  So the
+        // logic below won't follow into it, because it regards any such node as the root of some
+        // other tree.
+        debug_assert!(self.discovery_stack.is_empty());
+        debug_assert!(self.nodes_in_tree.is_empty());
+
+        self.nodes_in_tree.insert(starting_point);
+        self.discovery_stack_push_successors_of(cfg, starting_point);
+
+        while let Some(node) = self.discovery_stack.pop() {
+            match self.num_preds_per_ebb[node] {
+                // We arrived at a node with multiple predecessors, so it's a new root.  Ignore it.
+                ZeroOneOrMany::Many => {}
+                // This node has just one predecessor, so we should incorporate it in the tree and
+                // immediately transition into searching from it instead.
+                ZeroOneOrMany::One => {
+                    self.nodes_in_tree.insert(node);
+                    self.discovery_stack_push_successors_of(cfg, node);
+                }
+                // This is meaningless.  We arrived at a node that doesn't point back at where we
+                // came from.
+                ZeroOneOrMany::Zero => panic!("add_nodes_to_tree: inconsistent graph"),
+            }
+        }
+    }
+}
+
+// =============================================================================================
+// Operations relating to `AvailEnv`
+
+impl AvailEnv {
+    // Create a new one.
+    fn new(size: usize) -> Self {
+        let mut env = AvailEnv {
+            map: Vec::<Option<SlotInfo>>::new(),
+        };
+        env.map.resize(size, None);
+        env
+    }
+
+    // Debug only: checks (some of) the required AvailEnv invariants.
+    #[cfg(debug_assertions)]
+    fn check_invariants(&self) -> bool {
+        // Check that any overlapping entries overlap exactly.  This is super lame (quadratic),
+        // but it's only used in debug builds.
+        for i in 0..self.map.len() {
+            if let Some(si) = self.map[i] {
+                for j in i + 1..self.map.len() {
+                    if let Some(sj) = self.map[j] {
+                        // "si and sj overlap, but not exactly"
+                        if si.kind == sj.kind
+                            && overlaps(&si, sj.offset, sj.size)
+                            && !(si.offset == sj.offset && si.size == sj.size)
+                        {
+                            return false;
+                        }
+                    }
+                }
+            }
+        }
+        true
+    }
+
+    // Invalidates the binding associated with `reg`.  Note that by construction of AvailEnv,
+    // `reg` can only be associated with one binding at once.
+    fn invalidate_by_reg(&mut self, reg: RegUnit) {
+        self.map[reg as usize] = None;
+    }
+
+    // Invalidates any binding that has any overlap with `(kind, offset, size)`.
+    fn invalidate_by_offset(&mut self, kind: StackSlotKind, offset: i32, size: u32) {
+        debug_assert!(is_slot_kind_tracked(kind));
+        for i in 0..self.map.len() {
+            if let Some(si) = &self.map[i] {
+                if si.kind == kind && overlaps(&si, offset, size) {
+                    self.map[i] = None;
+                }
+            }
+        }
+    }
+
+    // Invalidates all bindings.
+    fn invalidate_all(&mut self) {
+        for i in 0..self.map.len() {
+            self.map[i] = None;
+        }
+    }
+
+    // Updates AvailEnv to track the effect of a `regmove` instruction.
+    fn copy_reg(&mut self, src: RegUnit, dst: RegUnit) {
+        self.map[dst as usize] = self.map[src as usize];
+    }
+
+    // Does `env` have the exact binding characterised by `(reg, kind, offset, size)` ?
+    fn has_exact_binding(&self, reg: RegUnit, kind: StackSlotKind, offset: i32, size: u32) -> bool {
+        debug_assert!(is_slot_kind_tracked(kind));
+        if let Some(si) = &self.map[reg as usize] {
+            return si.kind == kind && si.offset == offset && si.size == size;
+        }
+        // No such binding.
+        false
+    }
+
+    // Does `env` have a binding characterised by `(kind, offset, size)` but to a register, let's
+    // call it `other_reg`, that isn't `reg`?  If so, return `other_reg`.  Note that `other_reg`
+    // will have the same bank as `reg`.  It is a checked error to call this function with a
+    // binding matching all four of `(reg, kind, offset, size)`.
+    fn has_inexact_binding(
+        &self,
+        reginfo: &RegInfo,
+        reg: RegUnit,
+        kind: StackSlotKind,
+        offset: i32,
+        size: u32,
+    ) -> Option<RegUnit> {
+        debug_assert!(is_slot_kind_tracked(kind));
+        // Find the range of RegUnit numbers for the bank that contains `reg`, and use that as our
+        // search space.  This is so as to guarantee that any match is restricted to the same bank
+        // as `reg`.
+        let (first_unit, num_units) = find_bank_limits(reginfo, reg);
+        for other_reg in first_unit..first_unit + num_units {
+            if let Some(si) = &self.map[other_reg as usize] {
+                if si.kind == kind && si.offset == offset && si.size == size {
+                    if other_reg == reg {
+                        panic!("has_inexact_binding: binding *is* exact!");
+                    }
+                    return Some(other_reg);
+                }
+            }
+        }
+        // No such binding.
+        None
+    }
+
+    // Create the binding `(reg, kind, offset, size)` in `env`, and throw away any previous
+    // binding associated with either `reg` or the `(kind, offset, size)` triple.
+    fn bind(&mut self, reg: RegUnit, kind: StackSlotKind, offset: i32, size: u32) {
+        debug_assert!(is_slot_kind_tracked(kind));
+        self.invalidate_by_offset(kind, offset, size);
+        self.map[reg as usize] = Some(SlotInfo { kind, offset, size });
+    }
+}
+
+// Invalidates in `avail_env`, any binding associated with a regunit that is written by `inst`.
+fn invalidate_regs_written_by_inst(
+    locations: &SecondaryMap<Value, ValueLoc>,
+    diversions: &RegDiversions,
+    dfg: &DataFlowGraph,
+    avail_env: &mut AvailEnv,
+    inst: Inst,
+) {
+    for v in dfg.inst_results(inst).iter() {
+        if let ValueLoc::Reg(ru) = locations[*v] {
+            // This must be true.  It would be meaningless for an SSA value to be diverted before
+            // the point where it is defined.
+            debug_assert!(diversions.reg(*v, locations) == ru);
+            avail_env.invalidate_by_reg(ru);
+        }
+    }
+}
+
+// =============================================================================================
+// Processing of individual instructions
+
+impl RedundantReloadRemover {
+    // Process `inst`, possibly changing it into a different instruction, and possibly changing
+    // `self.avail_env` and `func.dfg`.
+    fn visit_inst(
+        &mut self,
+        func: &mut Function,
+        reginfo: &RegInfo,
+        isa: &dyn TargetIsa,
+        inst: Inst,
+    ) {
+        // Get hold of the top-of-stack work item.  This is the state that we will mutate during
+        // processing of this instruction.
+        debug_assert!(!self.processing_stack.is_empty());
+        let ProcessingStackElem {
+            avail_env,
+            cursor: _,
+            diversions,
+        } = &mut self.processing_stack.last_mut().unwrap();
+
+        #[cfg(debug_assertions)]
+        debug_assert!(
+            avail_env.check_invariants(),
+            "visit_inst: env invariants not ok"
+        );
+
+        let dfg = &mut func.dfg;
+        let locations = &func.locations;
+        let stack_slots = &func.stack_slots;
+
+        // To avoid difficulties with the borrow checker, do this in two stages.  First, examine
+        // the instruction to see if it can be deleted or modified, and park the relevant
+        // information in `transform`.  Update `self.avail_env` too.  Later, use `transform` to
+        // actually do the transformation if necessary.
+        enum Transform {
+            NoChange,
+            ChangeToNopFill(Value),           // delete this insn entirely
+            ChangeToCopyToSSA(Type, RegUnit), // change it into a copy from the specified reg
+        }
+        let mut transform = Transform::NoChange;
+
+        // In this match { .. } statement, either we must treat the instruction specially, or we
+        // must call `invalidate_regs_written_by_inst` on it.
+        match &dfg[inst] {
+            InstructionData::Unary {
+                opcode: Opcode::Spill,
+                arg: src_value,
+            } => {
+                // Extract: (src_reg, kind, offset, size)
+                // Invalidate: (kind, offset, size)
+                // Add new binding: {src_reg -> (kind, offset, size)}
+                // Don't forget that src_value might be diverted, so we have to deref it.
+                let slot = slot_of_value(locations, stack_slots, dfg.inst_results(inst)[0]);
+                let src_reg = diversions.reg(*src_value, locations);
+                let kind = slot.kind;
+                if is_slot_kind_tracked(kind) {
+                    let offset = slot.offset.expect("visit_inst: spill with no offset");
+                    let size = slot.size;
+                    avail_env.bind(src_reg, kind, offset, size);
+                } else {
+                    // We don't expect this insn to write any regs.  But to be consistent with the
+                    // rule above, do this anyway.
+                    invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst);
+                }
+            }
+            InstructionData::Unary {
+                opcode: Opcode::Fill,
+                arg: src_value,
+            } => {
+                // Extract: (dst_reg, kind, offset, size)
+                // Invalidate: (kind, offset, size)
+                // Add new: {dst_reg -> (dst_value, kind, offset, size)}
+                let slot = slot_of_value(locations, stack_slots, *src_value);
+                let dst_value = dfg.inst_results(inst)[0];
+                let dst_reg = reg_of_value(locations, dst_value);
+                // This must be true.  It would be meaningless for an SSA value to be diverted
+                // before it was defined.
+                debug_assert!(dst_reg == diversions.reg(dst_value, locations));
+                let kind = slot.kind;
+                if is_slot_kind_tracked(kind) {
+                    let offset = slot.offset.expect("visit_inst: fill with no offset");
+                    let size = slot.size;
+                    if avail_env.has_exact_binding(dst_reg, kind, offset, size) {
+                        // This instruction is an exact copy of a fill we saw earlier, and the
+                        // loaded value is still valid.  So we'll schedule this instruction for
+                        // deletion (below).  No need to make any changes to `avail_env`.
+                        transform = Transform::ChangeToNopFill(*src_value);
+                    } else if let Some(other_reg) =
+                        avail_env.has_inexact_binding(reginfo, dst_reg, kind, offset, size)
+                    {
+                        // This fill is from the required slot, but into a different register
+                        // `other_reg`.  So replace it with a copy from `other_reg` to `dst_reg`
+                        // and update `dst_reg`s binding to make it the same as `other_reg`'s, so
+                        // as to maximise the chances of future matches after this instruction.
+                        debug_assert!(other_reg != dst_reg);
+                        transform =
+                            Transform::ChangeToCopyToSSA(dfg.value_type(dst_value), other_reg);
+                        avail_env.copy_reg(other_reg, dst_reg);
+                    } else {
+                        // This fill creates some new binding we don't know about.  Update
+                        // `avail_env` to track it.
+                        avail_env.bind(dst_reg, kind, offset, size);
+                    }
+                } else {
+                    // Else it's "just another instruction that writes a reg", so we'd better
+                    // treat it as such, just as we do below for instructions that we don't handle
+                    // specially.
+                    invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst);
+                }
+            }
+            InstructionData::RegMove {
+                opcode: _,
+                arg: _,
+                src,
+                dst,
+            } => {
+                // These happen relatively rarely, but just frequently enough that it's worth
+                // tracking the copy (at the machine level, it's really a copy) in `avail_env`.
+                avail_env.copy_reg(*src, *dst);
+            }
+            InstructionData::RegSpill { .. }
+            | InstructionData::RegFill { .. }
+            | InstructionData::Call { .. }
+            | InstructionData::CallIndirect { .. }
+            | InstructionData::StackLoad { .. }
+            | InstructionData::StackStore { .. }
+            | InstructionData::Unary {
+                opcode: Opcode::AdjustSpDown,
+                ..
+            }
+            | InstructionData::UnaryImm {
+                opcode: Opcode::AdjustSpUpImm,
+                ..
+            }
+            | InstructionData::UnaryImm {
+                opcode: Opcode::AdjustSpDownImm,
+                ..
+            } => {
+                // All of these change, or might change, the memory-register bindings tracked in
+                // `avail_env` in some way we don't know about, or at least, we might be able to
+                // track, but for which the effort-to-benefit ratio seems too low to bother.  So
+                // play safe: forget everything we know.
+                //
+                // For Call/CallIndirect, we could do better when compiling for calling
+                // conventions that have callee-saved registers, since bindings for them would
+                // remain valid across the call.
+                avail_env.invalidate_all();
+            }
+            _ => {
+                // Invalidate: any `avail_env` entry associated with a reg written by `inst`.
+                invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst);
+            }
+        }
+
+        // Actually do the transformation.
+        match transform {
+            Transform::NoChange => {}
+            Transform::ChangeToNopFill(arg) => {
+                // Load is completely redundant.  Convert it to a no-op.
+                dfg.replace(inst).fill_nop(arg);
+                let ok = func.update_encoding(inst, isa).is_ok();
+                debug_assert!(ok, "fill_nop encoding missing for this type");
+            }
+            Transform::ChangeToCopyToSSA(ty, reg) => {
+                // We already have the relevant value in some other register.  Convert the
+                // load into a reg-reg copy.
+                dfg.replace(inst).copy_to_ssa(ty, reg);
+                let ok = func.update_encoding(inst, isa).is_ok();
+                debug_assert!(ok, "copy_to_ssa encoding missing for type {}", ty);
+            }
+        }
+    }
+}
+
+// =============================================================================================
+// Top level: processing of tree shaped regions
+
+impl RedundantReloadRemover {
+    // Push a clone of the top-of-stack ProcessingStackElem.  This will be used to process exactly
+    // one Ebb.  The diversions are created new, rather than cloned, to reflect the fact
+    // that diversions are local to each Ebb.
+    fn processing_stack_push(&mut self, cursor: CursorPosition) {
+        let avail_env = if let Some(stack_top) = self.processing_stack.last() {
+            stack_top.avail_env.clone()
+        } else {
+            AvailEnv::new(
+                self.num_regunits
+                    .expect("processing_stack_push: num_regunits unknown!")
+                    as usize,
+            )
+        };
+        self.processing_stack.push(ProcessingStackElem {
+            avail_env,
+            cursor,
+            diversions: RegDiversions::new(),
+        });
+    }
+
+    // This pushes the node `dst` onto the processing stack, and sets up the new
+    // ProcessingStackElem accordingly.  But it does all that only if `dst` is part of the current
+    // tree *and* we haven't yet visited it.
+    fn processing_stack_maybe_push(&mut self, dst: Ebb) {
+        if self.nodes_in_tree.contains(dst) && !self.nodes_already_visited.contains(dst) {
+            if !self.processing_stack.is_empty() {
+                // If this isn't the outermost node in the tree (that is, the root), then it must
+                // have exactly one predecessor.  Nodes with no predecessors are dead and not
+                // incorporated in any tree.  Nodes with two or more predecessors are the root of
+                // some other tree, and visiting them as if they were part of the current tree
+                // would be a serious error.
+                debug_assert!(self.num_preds_per_ebb[dst] == ZeroOneOrMany::One);
+            }
+            self.processing_stack_push(CursorPosition::Before(dst));
+            self.nodes_already_visited.insert(dst);
+        }
+    }
+
+    // Perform redundant-reload removal on the tree shaped region of graph defined by `root` and
+    // `self.nodes_in_tree`.  The following state is modified: `self.processing_stack`,
+    // `self.nodes_already_visited`, and `func.dfg`.
+    fn process_tree(
+        &mut self,
+        func: &mut Function,
+        reginfo: &RegInfo,
+        isa: &dyn TargetIsa,
+        root: Ebb,
+    ) {
+        debug_assert!(self.nodes_in_tree.contains(root));
+        debug_assert!(self.processing_stack.is_empty());
+        debug_assert!(self.nodes_already_visited.is_empty());
+
+        // Create the initial work item
+        self.processing_stack_maybe_push(root);
+
+        while !self.processing_stack.is_empty() {
+            // It seems somewhat ridiculous to construct a whole new FuncCursor just so we can do
+            // next_inst() on it once, and then copy the resulting position back out.  But use of
+            // a function-global FuncCursor, or of the EncCursor in struct Context, leads to
+            // borrow checker problems, as does including FuncCursor directly in
+            // ProcessingStackElem.  In any case this is not as bad as it looks, since profiling
+            // shows that the build-insert-step-extract work is reduced to just 8 machine
+            // instructions in an optimised x86_64 build, presumably because rustc can inline and
+            // then optimise out almost all the work.
+            let tos = self.processing_stack.len() - 1;
+            let mut pos = FuncCursor::new(func).at_position(self.processing_stack[tos].cursor);
+            let maybe_inst = pos.next_inst();
+            self.processing_stack[tos].cursor = pos.position();
+
+            if let Some(inst) = maybe_inst {
+                // Deal with this insn, possibly changing it, possibly updating the top item of
+                // `self.processing_stack`.
+                self.visit_inst(func, reginfo, isa, inst);
+
+                // Update diversions after the insn.
+                self.processing_stack[tos].diversions.apply(&func.dfg[inst]);
+
+                // If the insn can branch outside this Ebb, push work items on the stack for all
+                // target Ebbs that are part of the same tree and that we haven't yet visited.
+                // The next iteration of this instruction-processing loop will immediately start
+                // work on the most recently pushed Ebb, and will eventually continue in this Ebb
+                // when those new items have been removed from the stack.
+                match func.dfg.analyze_branch(inst) {
+                    BranchInfo::NotABranch => (),
+                    BranchInfo::SingleDest(dst, _) => {
+                        self.processing_stack_maybe_push(dst);
+                    }
+                    BranchInfo::Table(jt, default) => {
+                        func.jump_tables[jt]
+                            .iter()
+                            .for_each(|dst| self.processing_stack_maybe_push(*dst));
+                        if let Some(dst) = default {
+                            self.processing_stack_maybe_push(dst);
+                        }
+                    }
+                }
+            } else {
+                // We've come to the end of the current work-item (Ebb).  We'll already have
+                // processed the fallthrough/continuation/whatever for it using the logic above.
+                // Pop it off the stack and resume work on its parent.
+                self.processing_stack.pop();
+            }
+        }
+    }
+}
+
+// =============================================================================================
+// Top level: perform redundant fill removal for a complete function
+
+impl RedundantReloadRemover {
+    /// Create a new remover state.
+    pub fn new() -> Self {
+        Self {
+            num_regunits: None,
+            num_preds_per_ebb: PrimaryMap::<Ebb, ZeroOneOrMany>::with_capacity(8),
+            discovery_stack: Vec::<Ebb>::with_capacity(16),
+            nodes_in_tree: EntitySet::<Ebb>::new(),
+            processing_stack: Vec::<ProcessingStackElem>::with_capacity(8),
+            nodes_already_visited: EntitySet::<Ebb>::new(),
+        }
+    }
+
+    /// Clear the state of the remover.
+    pub fn clear(&mut self) {
+        self.clear_for_new_function();
+    }
+
+    fn clear_for_new_function(&mut self) {
+        self.num_preds_per_ebb.clear();
+        self.clear_for_new_tree();
+    }
+
+    fn clear_for_new_tree(&mut self) {
+        self.discovery_stack.clear();
+        self.nodes_in_tree.clear();
+        self.processing_stack.clear();
+        self.nodes_already_visited.clear();
+    }
+
+    #[inline(never)]
+    fn do_redundant_fill_removal_on_function(
+        &mut self,
+        func: &mut Function,
+        reginfo: &RegInfo,
+        isa: &dyn TargetIsa,
+        cfg: &ControlFlowGraph,
+    ) {
+        // Fail in an obvious way if there are more than (2^32)-1 Ebbs in this function.
+        let num_ebbs: u32 = func.dfg.num_ebbs().try_into().unwrap();
+
+        // Clear out per-tree state.
+        self.clear_for_new_function();
+
+        // Create a PrimaryMap that summarises the number of predecessors for each block, as 0, 1
+        // or "many", and that also claims the entry block as having "many" predecessors.
+        self.num_preds_per_ebb.clear();
+        self.num_preds_per_ebb.reserve(num_ebbs as usize);
+
+        for i in 0..num_ebbs {
+            let mut pi = cfg.pred_iter(Ebb::from_u32(i));
+            let mut n_pi = ZeroOneOrMany::Zero;
+            if let Some(_) = pi.next() {
+                n_pi = ZeroOneOrMany::One;
+                if let Some(_) = pi.next() {
+                    n_pi = ZeroOneOrMany::Many;
+                    // We don't care if there are more than two preds, so stop counting now.
+                }
+            }
+            self.num_preds_per_ebb.push(n_pi);
+        }
+        debug_assert!(self.num_preds_per_ebb.len() == num_ebbs as usize);
+
+        // The entry block must be the root of some tree, so set up the state to reflect that.
+        let entry_ebb = func
+            .layout
+            .entry_block()
+            .expect("do_redundant_fill_removal_on_function: entry ebb unknown");
+        debug_assert!(self.num_preds_per_ebb[entry_ebb] == ZeroOneOrMany::Zero);
+        self.num_preds_per_ebb[entry_ebb] = ZeroOneOrMany::Many;
+
+        // Now build and process trees.
+        for root_ix in 0..self.num_preds_per_ebb.len() {
+            let root = Ebb::from_u32(root_ix as u32);
+
+            // Build a tree for each node that has two or more preds, and ignore all other nodes.
+            if self.num_preds_per_ebb[root] != ZeroOneOrMany::Many {
+                continue;
+            }
+
+            // Clear out per-tree state.
+            self.clear_for_new_tree();
+
+            // Discovery phase: build the tree, as `root` and `self.nodes_in_tree`.
+            self.add_nodes_to_tree(cfg, root);
+            debug_assert!(self.nodes_in_tree.cardinality() > 0);
+            debug_assert!(self.num_preds_per_ebb[root] == ZeroOneOrMany::Many);
+
+            // Processing phase: do redundant-reload-removal.
+            self.process_tree(func, reginfo, isa, root);
+            debug_assert!(
+                self.nodes_in_tree.cardinality() == self.nodes_already_visited.cardinality()
+            );
+        }
+    }
+}
+
+// =============================================================================================
+// Top level: the external interface
+
+struct Context<'a> {
+    // Current instruction as well as reference to function and ISA.
+    cur: EncCursor<'a>,
+
+    // Cached ISA information.  We save it here to avoid frequent virtual function calls on the
+    // `TargetIsa` trait object.
+    reginfo: RegInfo,
+
+    // References to contextual data structures we need.
+    cfg: &'a ControlFlowGraph,
+
+    // The running state.
+    state: &'a mut RedundantReloadRemover,
+}
+
+impl RedundantReloadRemover {
+    /// Run the remover.
+    pub fn run(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
+        let ctx = Context {
+            cur: EncCursor::new(func, isa),
+            reginfo: isa.register_info(),
+            cfg: cfg,
+            state: &mut RedundantReloadRemover::new(),
+        };
+        let mut total_regunits = 0;
+        for rb in isa.register_info().banks {
+            total_regunits += rb.units;
+        }
+        ctx.state.num_regunits = Some(total_regunits);
+        ctx.state.do_redundant_fill_removal_on_function(
+            ctx.cur.func,
+            &ctx.reginfo,
+            ctx.cur.isa,
+            &ctx.cfg,
+        );
+    }
+}
diff --git a/cranelift-codegen/src/regalloc/branch_splitting.rs b/cranelift-codegen/src/regalloc/branch_splitting.rs
new file mode 100644
index 000000000..e44f45229
--- /dev/null
+++ b/cranelift-codegen/src/regalloc/branch_splitting.rs
@@ -0,0 +1,197 @@
+//! Split the outgoing edges of conditional branches that pass parameters.
+//!
+//! One of the reason for splitting edges is to be able to insert `copy` and `regmove` instructions
+//! between a conditional branch and the following terminator.
+#![cfg(feature = "basic-blocks")]
+
+use std::vec::Vec;
+
+use crate::cursor::{Cursor, EncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Opcode, ValueList};
+use crate::isa::TargetIsa;
+use crate::topo_order::TopoOrder;
+
+pub fn run(
+    isa: &dyn TargetIsa,
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    domtree: &mut DominatorTree,
+    topo: &mut TopoOrder,
+) {
+    let mut ctx = Context {
+        has_new_blocks: false,
+        has_fallthrough_return: None,
+        cur: EncCursor::new(func, isa),
+        domtree,
+        topo,
+        cfg,
+    };
+    ctx.run()
+}
+
+struct Context<'a> {
+    /// True if new blocks were inserted.
+    has_new_blocks: bool,
+
+    /// Record whether newly inserted empty blocks should be inserted last, or before the last, to
+    /// avoid disturbing the expected control flow of `fallthroug_return` statements.
+    ///
+    /// This value is computed when needed. The Option wraps the computed value if any.
+    has_fallthrough_return: Option<bool>,
+
+    /// Current instruction as well as reference to function and ISA.
+    cur: EncCursor<'a>,
+
+    /// References to contextual data structures we need.
+    domtree: &'a mut DominatorTree,
+    topo: &'a mut TopoOrder,
+    cfg: &'a mut ControlFlowGraph,
+}
+
+impl<'a> Context<'a> {
+    fn run(&mut self) {
+        // Any ebb order will do.
+        self.topo.reset(self.cur.func.layout.ebbs());
+        while let Some(ebb) = self.topo.next(&self.cur.func.layout, self.domtree) {
+            // Branches can only be at the last or second to last position in an extended basic
+            // block.
+            self.cur.goto_last_inst(ebb);
+            let terminator_inst = self.cur.current_inst().expect("terminator");
+            if let Some(inst) = self.cur.prev_inst() {
+                let opcode = self.cur.func.dfg[inst].opcode();
+                if opcode.is_branch() {
+                    self.visit_conditional_branch(inst, opcode);
+                    self.cur.goto_inst(terminator_inst);
+                    self.visit_terminator_branch(terminator_inst);
+                }
+            }
+        }
+
+        // If blocks were added the cfg and domtree are inconsistent and must be recomputed.
+        if self.has_new_blocks {
+            self.cfg.compute(&self.cur.func);
+            self.domtree.compute(&self.cur.func, self.cfg);
+        }
+    }
+
+    fn visit_conditional_branch(&mut self, branch: Inst, opcode: Opcode) {
+        // TODO: target = dfg[branch].branch_destination().expect("conditional branch");
+        let target = match self.cur.func.dfg[branch] {
+            InstructionData::Branch { destination, .. }
+            | InstructionData::BranchIcmp { destination, .. }
+            | InstructionData::BranchInt { destination, .. }
+            | InstructionData::BranchFloat { destination, .. } => destination,
+            _ => panic!("Unexpected instruction in visit_conditional_branch"),
+        };
+
+        // If there are any parameters, split the edge.
+        if self.should_split_edge(target) {
+            // Create the block the branch will jump to.
+            let new_ebb = self.make_empty_ebb();
+
+            // Extract the arguments of the branch instruction, split the Ebb parameters and the
+            // branch arguments
+            let num_fixed = opcode.constraints().num_fixed_value_arguments();
+            let dfg = &mut self.cur.func.dfg;
+            let old_args: Vec<_> = {
+                let args = dfg[branch].take_value_list().expect("ebb parameters");
+                args.as_slice(&dfg.value_lists).iter().map(|x| *x).collect()
+            };
+            let (branch_args, ebb_params) = old_args.split_at(num_fixed);
+
+            // Replace the branch destination by the new Ebb created with no parameters, and restore
+            // the branch arguments, without the original Ebb parameters.
+            {
+                let branch_args = ValueList::from_slice(branch_args, &mut dfg.value_lists);
+                let data = &mut dfg[branch];
+                *data.branch_destination_mut().expect("branch") = new_ebb;
+                data.put_value_list(branch_args);
+            }
+            let ok = self.cur.func.update_encoding(branch, self.cur.isa).is_ok();
+            debug_assert!(ok);
+
+            // Insert a jump to the original target with its arguments into the new block.
+            self.cur.goto_first_insertion_point(new_ebb);
+            self.cur.ins().jump(target, ebb_params);
+
+            // Reset the cursor to point to the branch.
+            self.cur.goto_inst(branch);
+        }
+    }
+
+    fn visit_terminator_branch(&mut self, inst: Inst) {
+        let inst_data = &self.cur.func.dfg[inst];
+        let opcode = inst_data.opcode();
+        if opcode != Opcode::Jump && opcode != Opcode::Fallthrough {
+            // This opcode is ignored as it does not have any EBB parameters.
+            if opcode != Opcode::IndirectJumpTableBr {
+                debug_assert!(!opcode.is_branch())
+            }
+            return;
+        }
+
+        let target = match inst_data {
+            InstructionData::Jump { destination, .. } => destination,
+            _ => panic!(
+                "Unexpected instruction {} in visit_terminator_branch",
+                self.cur.display_inst(inst)
+            ),
+        };
+        debug_assert!(self.cur.func.dfg[inst].opcode().is_terminator());
+
+        // If there are any parameters, split the edge.
+        if self.should_split_edge(*target) {
+            // Create the block the branch will jump to.
+            let new_ebb = self.cur.func.dfg.make_ebb();
+            self.has_new_blocks = true;
+
+            // Split the current block before its terminator, and insert a new jump instruction to
+            // jump to it.
+            let jump = self.cur.ins().jump(new_ebb, &[]);
+            self.cur.insert_ebb(new_ebb);
+
+            // Reset the cursor to point to new terminator of the old ebb.
+            self.cur.goto_inst(jump);
+        }
+    }
+
+    // A new ebb must be inserted before the last ebb because the last ebb may have a
+    // fallthrough_return and can't have anything after it.
+    fn make_empty_ebb(&mut self) -> Ebb {
+        let last_ebb = self.cur.layout().last_ebb().unwrap();
+        if self.has_fallthrough_return == None {
+            let last_inst = self.cur.layout().last_inst(last_ebb).unwrap();
+            self.has_fallthrough_return =
+                Some(self.cur.func.dfg[last_inst].opcode() == Opcode::FallthroughReturn);
+        }
+        let new_ebb = self.cur.func.dfg.make_ebb();
+        if self.has_fallthrough_return == Some(true) {
+            // Insert before the last block which has a fallthrough_return
+            // instruction.
+            self.cur.layout_mut().insert_ebb(new_ebb, last_ebb);
+        } else {
+            // Insert after the last block.
+            self.cur.layout_mut().insert_ebb_after(new_ebb, last_ebb);
+        }
+        self.has_new_blocks = true;
+        new_ebb
+    }
+
+    /// Returns whether we should introduce a new branch.
+    fn should_split_edge(&self, target: Ebb) -> bool {
+        // We should split the edge if the target has any parameters.
+        if self.cur.func.dfg.ebb_params(target).len() > 0 {
+            return true;
+        };
+
+        // Or, if the target has more than one block reaching it.
+        debug_assert!(self.cfg.pred_iter(target).next() != None);
+        if let Some(_) = self.cfg.pred_iter(target).skip(1).next() {
+            return true;
+        };
+
+        false
+    }
+}
diff --git a/cranelift-codegen/src/regalloc/coloring.rs b/cranelift-codegen/src/regalloc/coloring.rs
index 4e584b5a6..1e69c342a 100644
--- a/cranelift-codegen/src/regalloc/coloring.rs
+++ b/cranelift-codegen/src/regalloc/coloring.rs
@@ -44,18 +44,19 @@
 
 use crate::cursor::{Cursor, EncCursor};
 use crate::dominator_tree::DominatorTree;
-use crate::ir::{AbiParam, ArgumentLoc, InstBuilder, ValueDef};
-use crate::ir::{Ebb, Function, Inst, Layout, SigRef, Value, ValueLoc};
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::{ArgumentLoc, InstBuilder, ValueDef};
+use crate::ir::{Ebb, Function, Inst, InstructionData, Layout, Opcode, SigRef, Value, ValueLoc};
 use crate::isa::{regs_overlap, RegClass, RegInfo, RegUnit};
 use crate::isa::{ConstraintKind, EncInfo, OperandConstraint, RecipeConstraints, TargetIsa};
 use crate::packed_option::PackedOption;
 use crate::regalloc::affinity::Affinity;
+use crate::regalloc::diversion::RegDiversions;
 use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
 use crate::regalloc::liveness::Liveness;
 use crate::regalloc::liverange::{LiveRange, LiveRangeContext};
 use crate::regalloc::register_set::RegisterSet;
 use crate::regalloc::solver::{Solver, SolverError};
-use crate::regalloc::RegDiversions;
 use crate::timing;
 use core::mem;
 use log::debug;
@@ -68,6 +69,12 @@ pub struct Coloring {
     solver: Solver,
 }
 
+/// Kinds of ABI parameters.
+enum AbiParams {
+    Parameters(SigRef),
+    Returns,
+}
+
 /// Bundle of references that the coloring algorithm needs.
 ///
 /// Some of the needed mutable references are passed around as explicit function arguments so we
@@ -86,6 +93,7 @@ struct Context<'a> {
     encinfo: EncInfo,
 
     // References to contextual data structures we need.
+    cfg: &'a ControlFlowGraph,
     domtree: &'a DominatorTree,
     liveness: &'a mut Liveness,
 
@@ -98,6 +106,8 @@ struct Context<'a> {
     // Pristine set of registers that the allocator can use.
     // This set remains immutable, we make clones.
     usable_regs: RegisterSet,
+
+    uses_pinned_reg: bool,
 }
 
 impl Coloring {
@@ -120,6 +130,7 @@ impl Coloring {
         &mut self,
         isa: &dyn TargetIsa,
         func: &mut Function,
+        cfg: &ControlFlowGraph,
         domtree: &DominatorTree,
         liveness: &mut Liveness,
         tracker: &mut LiveValueTracker,
@@ -128,9 +139,11 @@ impl Coloring {
         debug!("Coloring for:\n{}", func.display(isa));
         let mut ctx = Context {
             usable_regs: isa.allocatable_registers(func),
+            uses_pinned_reg: isa.flags().enable_pinned_reg(),
             cur: EncCursor::new(func, isa),
             reginfo: isa.register_info(),
             encinfo: isa.encoding_info(),
+            cfg,
             domtree,
             liveness,
             divert: &mut self.divert,
@@ -141,6 +154,12 @@ impl Coloring {
 }
 
 impl<'a> Context<'a> {
+    /// Is the pinned register usage enabled, and is this register the pinned register?
+    #[inline]
+    fn is_pinned_reg(&self, rc: RegClass, reg: RegUnit) -> bool {
+        rc.is_pinned_reg(self.uses_pinned_reg, reg)
+    }
+
     /// Run the coloring algorithm.
     fn run(&mut self, tracker: &mut LiveValueTracker) {
         self.cur
@@ -160,13 +179,13 @@ impl<'a> Context<'a> {
         debug!("Coloring {}:", ebb);
         let mut regs = self.visit_ebb_header(ebb, tracker);
         tracker.drop_dead_params();
-        self.divert.clear();
 
         // Now go through the instructions in `ebb` and color the values they define.
         self.cur.goto_top(ebb);
         while let Some(inst) = self.cur.next_inst() {
             self.cur.use_srcloc(inst);
-            if !self.cur.func.dfg[inst].opcode().is_ghost() {
+            let opcode = self.cur.func.dfg[inst].opcode();
+            if !opcode.is_ghost() {
                 // This is an instruction which either has an encoding or carries ABI-related
                 // register allocation constraints.
                 let enc = self.cur.func.encodings[inst];
@@ -183,6 +202,54 @@ impl<'a> Context<'a> {
                 self.process_ghost_kills(kills, &mut regs);
             }
             tracker.drop_dead(inst);
+
+            // We are not able to insert any regmove for diversion or un-diversion after the first
+            // branch. Instead, we record the diversion to be restored at the entry of the next EBB,
+            // which should have a single predecessor.
+            if opcode.is_branch() && cfg!(feature = "basic-blocks") {
+                // The next instruction is necessarily an unconditional branch.
+                if let Some(branch) = self.cur.next_inst() {
+                    debug!(
+                        "Skip coloring {}\n    from {}\n    with diversions {}",
+                        self.cur.display_inst(branch),
+                        regs.input.display(&self.reginfo),
+                        self.divert.display(&self.reginfo)
+                    );
+                    use crate::ir::instructions::BranchInfo::*;
+                    let target = match self.cur.func.dfg.analyze_branch(branch) {
+                        NotABranch | Table(_, _) => panic!(
+                            "unexpected instruction {} after a conditional branch",
+                            self.cur.display_inst(branch)
+                        ),
+                        SingleDest(ebb, _) => ebb,
+                    };
+
+                    // We have a single branch with a single target, and an EBB with a single
+                    // predecessor. Thus we can forward the diversion set to the next EBB.
+                    if self.cfg.pred_iter(target).count() == 1 {
+                        // Transfer the diversion to the next EBB.
+                        self.divert
+                            .save_for_ebb(&mut self.cur.func.entry_diversions, target);
+                        debug!(
+                            "Set entry-diversion for {} to\n      {}",
+                            target,
+                            self.divert.display(&self.reginfo)
+                        );
+                    } else {
+                        debug_assert!(
+                            self.divert.is_empty(),
+                            "Divert set is non-empty after the terminator."
+                        );
+                    }
+                    assert_eq!(
+                        self.cur.next_inst(),
+                        None,
+                        "Unexpected instruction after a branch group."
+                    );
+                } else {
+                    assert!(opcode.is_terminator());
+                }
+            }
         }
     }
 
@@ -199,6 +266,15 @@ impl<'a> Context<'a> {
             self.domtree,
         );
 
+        // Copy the content of the registered diversions to be reused at the
+        // entry of this basic block.
+        self.divert.at_ebb(&self.cur.func.entry_diversions, ebb);
+        debug!(
+            "Start {} with entry-diversion set to\n      {}",
+            ebb,
+            self.divert.display(&self.reginfo)
+        );
+
         if self.cur.func.layout.entry_block() == Some(ebb) {
             // Parameters on the entry block have ABI constraints.
             self.color_entry_params(tracker.live())
@@ -224,17 +300,35 @@ impl<'a> Context<'a> {
                 "Live-in: {}:{} in {}",
                 lv.value,
                 lv.affinity.display(&self.reginfo),
-                self.cur.func.locations[lv.value].display(&self.reginfo)
+                self.divert
+                    .get(lv.value, &self.cur.func.locations)
+                    .display(&self.reginfo)
             );
             if let Affinity::Reg(rci) = lv.affinity {
                 let rc = self.reginfo.rc(rci);
                 let loc = self.cur.func.locations[lv.value];
-                match loc {
-                    ValueLoc::Reg(reg) => regs.take(rc, reg, lv.is_local),
+                let reg = match loc {
+                    ValueLoc::Reg(reg) => reg,
                     ValueLoc::Unassigned => panic!("Live-in {} wasn't assigned", lv.value),
                     ValueLoc::Stack(ss) => {
                         panic!("Live-in {} is in {}, should be register", lv.value, ss)
                     }
+                };
+                if lv.is_local {
+                    regs.take(rc, reg, lv.is_local);
+                } else {
+                    let loc = self.divert.get(lv.value, &self.cur.func.locations);
+                    let reg_divert = match loc {
+                        ValueLoc::Reg(reg) => reg,
+                        ValueLoc::Unassigned => {
+                            panic!("Diversion: Live-in {} wasn't assigned", lv.value)
+                        }
+                        ValueLoc::Stack(ss) => panic!(
+                            "Diversion: Live-in {} is in {}, should be register",
+                            lv.value, ss
+                        ),
+                    };
+                    regs.take_divert(rc, reg, reg_divert);
                 }
             }
         }
@@ -284,6 +378,36 @@ impl<'a> Context<'a> {
         regs
     }
 
+    /// Program the input-side ABI constraints for `inst` into the constraint solver.
+    ///
+    /// ABI constraints are the fixed register assignments useds for calls and returns.
+    fn program_input_abi(&mut self, inst: Inst, abi_params: AbiParams) {
+        let abi_types = match abi_params {
+            AbiParams::Parameters(sig) => &self.cur.func.dfg.signatures[sig].params,
+            AbiParams::Returns => &self.cur.func.signature.returns,
+        };
+
+        for (abi, &value) in abi_types
+            .iter()
+            .zip(self.cur.func.dfg.inst_variable_args(inst))
+        {
+            if let ArgumentLoc::Reg(reg) = abi.location {
+                if let Affinity::Reg(rci) = self
+                    .liveness
+                    .get(value)
+                    .expect("ABI register must have live range")
+                    .affinity
+                {
+                    let rc = self.reginfo.rc(rci);
+                    let cur_reg = self.divert.reg(value, &self.cur.func.locations);
+                    self.solver.reassign_in(value, rc, cur_reg, reg);
+                } else {
+                    panic!("ABI argument {} should be in a register", value);
+                }
+            }
+        }
+    }
+
     /// Color the values defined by `inst` and insert any necessary shuffle code to satisfy
     /// instruction constraints.
     ///
@@ -310,30 +434,16 @@ impl<'a> Context<'a> {
 
         // Program the solver with register constraints for the input side.
         self.solver.reset(&regs.input);
+
         if let Some(constraints) = constraints {
             self.program_input_constraints(inst, constraints.ins);
         }
+
         let call_sig = self.cur.func.dfg.call_signature(inst);
         if let Some(sig) = call_sig {
-            program_input_abi(
-                &mut self.solver,
-                inst,
-                &self.cur.func.dfg.signatures[sig].params,
-                &self.cur.func,
-                &self.liveness,
-                &self.reginfo,
-                &self.divert,
-            );
+            self.program_input_abi(inst, AbiParams::Parameters(sig));
         } else if self.cur.func.dfg[inst].opcode().is_return() {
-            program_input_abi(
-                &mut self.solver,
-                inst,
-                &self.cur.func.signature.returns,
-                &self.cur.func,
-                &self.liveness,
-                &self.reginfo,
-                &self.divert,
-            );
+            self.program_input_abi(inst, AbiParams::Returns);
         } else if self.cur.func.dfg[inst].opcode().is_branch() {
             // This is a branch, so we need to make sure that globally live values are in their
             // global registers. For EBBs that take arguments, we also need to place the argument
@@ -358,6 +468,7 @@ impl<'a> Context<'a> {
         if self.solver.has_fixed_input_conflicts() {
             self.divert_fixed_input_conflicts(tracker.live());
         }
+
         self.solver.inputs_done();
 
         // Update the live value tracker with this instruction.
@@ -368,6 +479,13 @@ impl<'a> Context<'a> {
             if let Affinity::Reg(rci) = lv.affinity {
                 let rc = self.reginfo.rc(rci);
                 let reg = self.divert.reg(lv.value, &self.cur.func.locations);
+
+                if self.is_pinned_reg(rc, reg) {
+                    // Don't kill the pinned reg, either in the local or global register sets.
+                    debug_assert!(lv.is_local, "pinned register SSA value can't be global");
+                    continue;
+                }
+
                 debug!(
                     "    kill {} in {} ({} {})",
                     lv.value,
@@ -407,6 +525,7 @@ impl<'a> Context<'a> {
                 );
             }
         }
+
         if let Some(sig) = call_sig {
             self.program_output_abi(
                 sig,
@@ -416,6 +535,7 @@ impl<'a> Context<'a> {
                 &regs.global,
             );
         }
+
         if let Some(constraints) = constraints {
             self.program_output_constraints(
                 inst,
@@ -428,10 +548,26 @@ impl<'a> Context<'a> {
 
         // Finally, we've fully programmed the constraint solver.
         // We expect a quick solution in most cases.
-        let output_regs = self.solver.quick_solve(&regs.global).unwrap_or_else(|_| {
-            debug!("quick_solve failed for {}", self.solver);
-            self.iterate_solution(throughs, &regs.global, &mut replace_global_defines)
-        });
+        let is_reload = match &self.cur.func.dfg[inst] {
+            InstructionData::Unary {
+                opcode: Opcode::Fill,
+                arg: _,
+            } => true,
+            _ => false,
+        };
+
+        let output_regs = self
+            .solver
+            .quick_solve(&regs.global, is_reload)
+            .unwrap_or_else(|_| {
+                debug!("quick_solve failed for {}", self.solver);
+                self.iterate_solution(
+                    throughs,
+                    &regs.global,
+                    &mut replace_global_defines,
+                    is_reload,
+                )
+            });
 
         // The solution and/or fixed input constraints may require us to shuffle the set of live
         // registers around.
@@ -481,16 +617,28 @@ impl<'a> Context<'a> {
 
             if let Affinity::Reg(rci) = lv.affinity {
                 let rc = self.reginfo.rc(rci);
+                let reg = loc.unwrap_reg();
+
+                debug_assert!(
+                    !self.is_pinned_reg(rc, reg)
+                        || self.cur.func.dfg[inst].opcode() == Opcode::GetPinnedReg,
+                    "pinned register may not be part of outputs for '{}'.",
+                    self.cur.func.dfg[inst].opcode()
+                );
+
+                if self.is_pinned_reg(rc, reg) {
+                    continue;
+                }
 
                 // Remove the dead defs.
                 if lv.endpoint == inst {
-                    regs.input.free(rc, loc.unwrap_reg());
+                    regs.input.free(rc, reg);
                     debug_assert!(lv.is_local);
                 }
 
                 // Track globals in their undiverted locations.
                 if !lv.is_local && !replace_global_defines {
-                    regs.global.take(rc, loc.unwrap_reg());
+                    regs.global.take(rc, reg);
                 }
             }
         }
@@ -511,14 +659,35 @@ impl<'a> Context<'a> {
             // already in a register.
             let cur_reg = self.divert.reg(value, &self.cur.func.locations);
             match op.kind {
-                ConstraintKind::FixedReg(regunit) | ConstraintKind::FixedTied(regunit) => {
+                ConstraintKind::FixedReg(regunit) => {
                     // Add the fixed constraint even if `cur_reg == regunit`.
                     // It is possible that we will want to convert the value to a variable later,
                     // and this identity assignment prevents that from happening.
                     self.solver
                         .reassign_in(value, op.regclass, cur_reg, regunit);
                 }
-                ConstraintKind::Reg | ConstraintKind::Tied(_) => {
+                ConstraintKind::FixedTied(regunit) => {
+                    // The pinned register may not be part of a fixed tied requirement. If this
+                    // becomes the case, then it must be changed to a different register.
+                    debug_assert!(
+                        !self.is_pinned_reg(op.regclass, regunit),
+                        "see comment above"
+                    );
+                    // See comment right above.
+                    self.solver
+                        .reassign_in(value, op.regclass, cur_reg, regunit);
+                }
+                ConstraintKind::Tied(_) => {
+                    if self.is_pinned_reg(op.regclass, cur_reg) {
+                        // Divert the pinned register; it shouldn't be reused for a tied input.
+                        if self.solver.can_add_var(op.regclass, cur_reg) {
+                            self.solver.add_var(value, op.regclass, cur_reg);
+                        }
+                    } else if !op.regclass.contains(cur_reg) {
+                        self.solver.add_var(value, op.regclass, cur_reg);
+                    }
+                }
+                ConstraintKind::Reg => {
                     if !op.regclass.contains(cur_reg) {
                         self.solver.add_var(value, op.regclass, cur_reg);
                     }
@@ -549,10 +718,13 @@ impl<'a> Context<'a> {
             match op.kind {
                 ConstraintKind::Reg | ConstraintKind::Tied(_) => {
                     let cur_reg = self.divert.reg(value, &self.cur.func.locations);
-                    // This is the opposite condition of `program_input_constraints()`.
-                    if op.regclass.contains(cur_reg) {
+
+                    // This is the opposite condition of `program_input_constraints()`. The pinned
+                    // register mustn't be added back as a variable.
+                    if op.regclass.contains(cur_reg) && !self.is_pinned_reg(op.regclass, cur_reg) {
                         // This code runs after calling `solver.inputs_done()` so we must identify
-                        // the new variable as killed or live-through.
+                        // the new variable as killed or live-through. Always special-case the
+                        // pinned register as a through variable.
                         let ctx = self.liveness.context(&self.cur.func.layout);
                         if self.liveness[value].killed_at(inst, ctx.order.pp_ebb(inst), ctx) {
                             self.solver.add_killed_var(value, op.regclass, cur_reg);
@@ -663,7 +835,7 @@ impl<'a> Context<'a> {
             if pred(lr, self.liveness.context(&self.cur.func.layout)) {
                 if let Affinity::Reg(rci) = lr.affinity {
                     let rc = self.reginfo.rc(rci);
-                    // Stack diversions should not be possible here. The only live transiently
+                    // Stack diversions should not be possible here. They only live transiently
                     // during `shuffle_inputs()`.
                     self.solver.reassign_in(
                         value,
@@ -682,8 +854,8 @@ impl<'a> Context<'a> {
         }
     }
 
-    // Find existing live values that conflict with the fixed input register constraints programmed
-    // into the constraint solver. Convert them to solver variables so they can be diverted.
+    /// Find existing live values that conflict with the fixed input register constraints programmed
+    /// into the constraint solver. Convert them to solver variables so they can be diverted.
     fn divert_fixed_input_conflicts(&mut self, live: &[LiveValue]) {
         for lv in live {
             if let Affinity::Reg(rci) = lv.affinity {
@@ -771,7 +943,9 @@ impl<'a> Context<'a> {
         reg: RegUnit,
         throughs: &[LiveValue],
     ) {
-        if !self.solver.add_fixed_output(rc, reg) {
+        // Pinned register is already unavailable in the solver, since it is copied in the
+        // available registers on entry.
+        if !self.is_pinned_reg(rc, reg) && !self.solver.add_fixed_output(rc, reg) {
             // The fixed output conflicts with some of the live-through registers.
             for lv in throughs {
                 if let Affinity::Reg(rci) = lv.affinity {
@@ -814,12 +988,12 @@ impl<'a> Context<'a> {
                     // Find the input operand we're tied to.
                     // The solver doesn't care about the output value.
                     let arg = self.cur.func.dfg.inst_args(inst)[num as usize];
-                    if let Some(reg) = self.solver.add_tied_input(
-                        arg,
-                        op.regclass,
-                        self.divert.reg(arg, &self.cur.func.locations),
-                        !lv.is_local,
-                    ) {
+                    let reg = self.divert.reg(arg, &self.cur.func.locations);
+
+                    if let Some(reg) =
+                        self.solver
+                            .add_tied_input(arg, op.regclass, reg, !lv.is_local)
+                    {
                         // The value we're tied to has been assigned to a fixed register.
                         // We need to make sure that fixed output register is compatible with the
                         // global register set.
@@ -847,12 +1021,13 @@ impl<'a> Context<'a> {
         throughs: &[LiveValue],
         global_regs: &RegisterSet,
         replace_global_defines: &mut bool,
+        is_reload: bool,
     ) -> RegisterSet {
         // Make sure `try_add_var()` below doesn't create a variable with too loose constraints.
         self.program_complete_input_constraints();
 
         loop {
-            match self.solver.real_solve(global_regs) {
+            match self.solver.real_solve(global_regs, is_reload) {
                 Ok(regs) => return regs,
                 Err(SolverError::Divert(rc)) => {
                     // Do we have any live-through `rc` registers that are not already variables?
@@ -884,7 +1059,7 @@ impl<'a> Context<'a> {
                 let toprc2 = self.reginfo.toprc(rci);
                 let reg2 = self.divert.reg(lv.value, &self.cur.func.locations);
                 if rc.contains(reg2)
-                    && self.solver.can_add_var(lv.value, toprc2, reg2)
+                    && self.solver.can_add_var(toprc2, reg2)
                     && !self.is_live_on_outgoing_edge(lv.value)
                 {
                     self.solver.add_through_var(lv.value, toprc2, reg2);
@@ -945,8 +1120,15 @@ impl<'a> Context<'a> {
         for m in self.solver.moves() {
             match *m {
                 Reg {
-                    value, from, to, ..
+                    value,
+                    from,
+                    to,
+                    rc,
                 } => {
+                    debug_assert!(
+                        !self.is_pinned_reg(rc, to),
+                        "pinned register used in a regmove"
+                    );
                     self.divert.regmove(value, from, to);
                     self.cur.ins().regmove(value, from, to);
                 }
@@ -970,8 +1152,12 @@ impl<'a> Context<'a> {
                     value,
                     from_slot,
                     to,
-                    ..
+                    rc,
                 } => {
+                    debug_assert!(
+                        !self.is_pinned_reg(rc, to),
+                        "pinned register used in a regfill"
+                    );
                     // These slots are single use, so mark `ss` as available again.
                     let ss = slot[from_slot].take().expect("Using unallocated slot");
                     self.divert.regfill(value, ss, to);
@@ -1086,35 +1272,6 @@ impl<'a> Context<'a> {
     }
 }
 
-/// Program the input-side ABI constraints for `inst` into the constraint solver.
-///
-/// ABI constraints are the fixed register assignments used for calls and returns.
-fn program_input_abi(
-    solver: &mut Solver,
-    inst: Inst,
-    abi_types: &[AbiParam],
-    func: &Function,
-    liveness: &Liveness,
-    reginfo: &RegInfo,
-    divert: &RegDiversions,
-) {
-    for (abi, &value) in abi_types.iter().zip(func.dfg.inst_variable_args(inst)) {
-        if let ArgumentLoc::Reg(reg) = abi.location {
-            if let Affinity::Reg(rci) = liveness
-                .get(value)
-                .expect("ABI register must have live range")
-                .affinity
-            {
-                let rc = reginfo.rc(rci);
-                let cur_reg = divert.reg(value, &func.locations);
-                solver.reassign_in(value, rc, cur_reg, reg);
-            } else {
-                panic!("ABI argument {} should be in a register", value);
-            }
-        }
-    }
-}
-
 /// Keep track of the set of available registers in two interference domains: all registers
 /// considering diversions and global registers not considering diversions.
 struct AvailableRegs {
@@ -1146,4 +1303,10 @@ impl AvailableRegs {
             self.global.take(rc, reg);
         }
     }
+
+    /// Take a diverted register from both sets for a non-local allocation.
+    pub fn take_divert(&mut self, rc: RegClass, reg: RegUnit, reg_divert: RegUnit) {
+        self.input.take(rc, reg_divert);
+        self.global.take(rc, reg);
+    }
 }
diff --git a/cranelift-codegen/src/regalloc/context.rs b/cranelift-codegen/src/regalloc/context.rs
index cb4b4245f..c5f5da4ec 100644
--- a/cranelift-codegen/src/regalloc/context.rs
+++ b/cranelift-codegen/src/regalloc/context.rs
@@ -8,11 +8,14 @@ use crate::dominator_tree::DominatorTree;
 use crate::flowgraph::ControlFlowGraph;
 use crate::ir::Function;
 use crate::isa::TargetIsa;
+#[cfg(feature = "basic-blocks")]
+use crate::regalloc::branch_splitting;
 use crate::regalloc::coalescing::Coalescing;
 use crate::regalloc::coloring::Coloring;
 use crate::regalloc::live_value_tracker::LiveValueTracker;
 use crate::regalloc::liveness::Liveness;
 use crate::regalloc::reload::Reload;
+use crate::regalloc::safepoint::emit_stackmaps;
 use crate::regalloc::spilling::Spilling;
 use crate::regalloc::virtregs::VirtRegs;
 use crate::result::CodegenResult;
@@ -77,7 +80,7 @@ impl Context {
         &mut self,
         isa: &dyn TargetIsa,
         func: &mut Function,
-        cfg: &ControlFlowGraph,
+        cfg: &mut ControlFlowGraph,
         domtree: &mut DominatorTree,
     ) -> CodegenResult<()> {
         let _tt = timing::regalloc();
@@ -92,6 +95,12 @@ impl Context {
         // phases.
         self.tracker.clear();
 
+        // Pass: Split branches, add space where to add copy & regmove instructions.
+        #[cfg(feature = "basic-blocks")]
+        {
+            branch_splitting::run(isa, func, cfg, domtree, &mut self.topo);
+        }
+
         // Pass: Liveness analysis.
         self.liveness.compute(isa, func, cfg);
 
@@ -189,13 +198,33 @@ impl Context {
         }
 
         // Pass: Coloring.
-        self.coloring
-            .run(isa, func, domtree, &mut self.liveness, &mut self.tracker);
+        self.coloring.run(
+            isa,
+            func,
+            cfg,
+            domtree,
+            &mut self.liveness,
+            &mut self.tracker,
+        );
+
+        // This function runs after register allocation has taken
+        // place, meaning values have locations assigned already.
+        if isa.flags().enable_safepoints() {
+            emit_stackmaps(func, domtree, &self.liveness, &mut self.tracker, isa);
+        } else {
+            // Make sure no references are used.
+            for val in func.dfg.values() {
+                let ty = func.dfg.value_type(val);
+                if ty.lane_type().is_ref() {
+                    panic!("reference types were found but safepoints were not enabled.");
+                }
+            }
+        }
 
         if isa.flags().enable_verifier() {
             let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
                 && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
-                && verify_locations(isa, func, Some(&self.liveness), &mut errors).is_ok()
+                && verify_locations(isa, func, cfg, Some(&self.liveness), &mut errors).is_ok()
                 && verify_cssa(
                     func,
                     cfg,
diff --git a/cranelift-codegen/src/regalloc/diversion.rs b/cranelift-codegen/src/regalloc/diversion.rs
index 6e9b1f23c..12461c5ce 100644
--- a/cranelift-codegen/src/regalloc/diversion.rs
+++ b/cranelift-codegen/src/regalloc/diversion.rs
@@ -9,10 +9,11 @@
 
 use crate::fx::FxHashMap;
 use crate::hash_map::{Entry, Iter};
+use crate::ir::{Ebb, StackSlot, Value, ValueLoc, ValueLocations};
 use crate::ir::{InstructionData, Opcode};
-use crate::ir::{StackSlot, Value, ValueLoc, ValueLocations};
 use crate::isa::{RegInfo, RegUnit};
 use core::fmt;
+use cranelift_entity::{SparseMap, SparseMapValue};
 
 /// A diversion of a value from its original location to a new register or stack location.
 ///
@@ -38,10 +39,23 @@ impl Diversion {
 }
 
 /// Keep track of diversions in an EBB.
+#[derive(Clone)]
 pub struct RegDiversions {
     current: FxHashMap<Value, Diversion>,
 }
 
+/// Keep track of diversions at the entry of EBB.
+#[derive(Clone)]
+struct EntryRegDiversionsValue {
+    key: Ebb,
+    divert: RegDiversions,
+}
+
+/// Map EBB to their matching RegDiversions at basic blocks entry.
+pub struct EntryRegDiversions {
+    map: SparseMap<Ebb, EntryRegDiversionsValue>,
+}
+
 impl RegDiversions {
     /// Create a new empty diversion tracker.
     pub fn new() -> Self {
@@ -50,7 +64,7 @@ impl RegDiversions {
         }
     }
 
-    /// Clear the tracker, preparing for a new EBB.
+    /// Clear the content of the diversions, to reset the state of the compiler.
     pub fn clear(&mut self) {
         self.current.clear()
     }
@@ -92,7 +106,7 @@ impl RegDiversions {
     /// Record any kind of move.
     ///
     /// The `from` location must match an existing `to` location, if any.
-    pub fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) {
+    fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) {
         debug_assert!(from.is_assigned() && to.is_assigned());
         match self.current.entry(value) {
             Entry::Occupied(mut e) => {
@@ -163,9 +177,92 @@ impl RegDiversions {
         self.current.remove(&value).map(|d| d.to)
     }
 
+    /// Resets the state of the current diversions to the recorded diversions at the entry of the
+    /// given `ebb`. The recoded diversions is available after coloring on `func.entry_diversions`
+    /// field.
+    pub fn at_ebb(&mut self, entry_diversions: &EntryRegDiversions, ebb: Ebb) {
+        self.clear();
+        if let Some(entry_divert) = entry_diversions.map.get(ebb) {
+            let iter = entry_divert.divert.current.iter();
+            self.current.extend(iter);
+        }
+    }
+
+    /// Copy the current state of the diversions, and save it for the entry of the `ebb` given as
+    /// argument.
+    ///
+    /// Note: This function can only be called once on an `ebb` with a given `entry_diversions`
+    /// argument, otherwise it would panic.
+    pub fn save_for_ebb(&mut self, entry_diversions: &mut EntryRegDiversions, target: Ebb) {
+        // No need to save anything if there is no diversions to be recorded.
+        if self.is_empty() {
+            return;
+        }
+        debug_assert!(!entry_diversions.map.contains_key(target));
+        let iter = self.current.iter();
+        let mut entry_divert = RegDiversions::new();
+        entry_divert.current.extend(iter);
+        entry_diversions.map.insert(EntryRegDiversionsValue {
+            key: target,
+            divert: entry_divert,
+        });
+    }
+
+    /// Check that the recorded entry for a given `ebb` matches what is recorded in the
+    /// `entry_diversions`.
+    pub fn check_ebb_entry(&self, entry_diversions: &EntryRegDiversions, target: Ebb) -> bool {
+        let entry_divert = match entry_diversions.map.get(target) {
+            Some(entry_divert) => entry_divert,
+            None => return self.is_empty(),
+        };
+
+        if entry_divert.divert.current.len() != self.current.len() {
+            return false;
+        }
+
+        for (val, _) in entry_divert.divert.current.iter() {
+            if !self.current.contains_key(val) {
+                return false;
+            }
+        }
+        return true;
+    }
+
     /// Return an object that can display the diversions.
     pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayDiversions<'a> {
-        DisplayDiversions(self, regs.into())
+        DisplayDiversions(&self, regs.into())
+    }
+}
+
+impl EntryRegDiversions {
+    /// Create a new empty entry diversion, to associate diversions to each EBB entry.
+    pub fn new() -> Self {
+        EntryRegDiversions {
+            map: SparseMap::new(),
+        }
+    }
+
+    pub fn clear(&mut self) {
+        self.map.clear();
+    }
+}
+
+impl Clone for EntryRegDiversions {
+    /// The Clone trait is required by `ir::Function`.
+    fn clone(&self) -> Self {
+        let mut tmp = Self::new();
+        for v in self.map.values() {
+            tmp.map.insert(v.clone());
+        }
+        tmp
+    }
+}
+
+/// Implement `SparseMapValue`, as required to make use of a `SparseMap` for mapping the entry
+/// diversions for each EBB.
+impl SparseMapValue<Ebb> for EntryRegDiversionsValue {
+    fn key(&self) -> Ebb {
+        self.key
     }
 }
 
@@ -175,7 +272,7 @@ pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>);
 impl<'a> fmt::Display for DisplayDiversions<'a> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "{{")?;
-        for (value, div) in self.0.iter() {
+        for (value, div) in self.0.current.iter() {
             write!(
                 f,
                 " {}: {} -> {}",
diff --git a/cranelift-codegen/src/regalloc/mod.rs b/cranelift-codegen/src/regalloc/mod.rs
index dbec69160..37fcccb3b 100644
--- a/cranelift-codegen/src/regalloc/mod.rs
+++ b/cranelift-codegen/src/regalloc/mod.rs
@@ -10,14 +10,17 @@ pub mod register_set;
 pub mod virtregs;
 
 mod affinity;
+mod branch_splitting;
 mod coalescing;
 mod context;
 mod diversion;
 mod pressure;
 mod reload;
+mod safepoint;
 mod solver;
 mod spilling;
 
 pub use self::context::Context;
-pub use self::diversion::RegDiversions;
+pub use self::diversion::{EntryRegDiversions, RegDiversions};
 pub use self::register_set::RegisterSet;
+pub use self::safepoint::emit_stackmaps;
diff --git a/cranelift-codegen/src/regalloc/register_set.rs b/cranelift-codegen/src/regalloc/register_set.rs
index 29b6df762..e5edaa96d 100644
--- a/cranelift-codegen/src/regalloc/register_set.rs
+++ b/cranelift-codegen/src/regalloc/register_set.rs
@@ -89,9 +89,9 @@ impl RegisterSet {
 
         // Mask out the unavailable units.
         for idx in 0..self.avail.len() {
-            // If a single unit in a register is unavailable, the whole register can't be used.
-            // If a register straddles a word boundary, it will be marked as unavailable.
-            // There's an assertion in `cdsl/registers.py` to check for that.
+            // If a single unit in a register is unavailable, the whole register can't be used.  If
+            // a register straddles a word boundary, it will be marked as unavailable.  There's an
+            // assertion in `cranelift-codegen/meta/src/cdsl/regs.rs` to check for that.
             for i in 0..rc.width {
                 rsi.regs[idx] &= self.avail[idx] >> i;
             }
@@ -126,6 +126,7 @@ impl RegisterSet {
 }
 
 /// Iterator over available registers in a register class.
+#[derive(Clone)]
 pub struct RegSetIter {
     regs: RegUnitMask,
 }
@@ -161,6 +162,31 @@ impl Iterator for RegSetIter {
     }
 }
 
+impl RegSetIter {
+    pub fn rnext(&mut self) -> Option<RegUnit> {
+        let num_words = self.regs.len();
+        let bits_per_word = 8 * size_of_val(&self.regs[0]);
+
+        // Find the last set bit in `self.regs`.
+        for i in 0..num_words {
+            let word_ix = num_words - 1 - i;
+
+            let word = &mut self.regs[word_ix];
+            if *word != 0 {
+                let lzeroes = word.leading_zeros() as usize;
+
+                // Clear that highest bit so we won't find it again.
+                *word &= !(1 << (bits_per_word - 1 - lzeroes));
+
+                return Some((word_ix * bits_per_word + bits_per_word - 1 - lzeroes) as RegUnit);
+            }
+        }
+
+        // All of `self.regs` is 0.
+        None
+    }
+}
+
 impl ExactSizeIterator for RegSetIter {}
 
 /// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
@@ -242,6 +268,7 @@ mod tests {
         subclasses: 0,
         mask: [0xf0000000, 0x0000000f, 0],
         info: &INFO,
+        pinned_reg: None,
     };
 
     const DPR: RegClass = &RegClassData {
@@ -254,6 +281,7 @@ mod tests {
         subclasses: 0,
         mask: [0x50000000, 0x0000000a, 0],
         info: &INFO,
+        pinned_reg: None,
     };
 
     const INFO: RegInfo = RegInfo {
@@ -261,6 +289,45 @@ mod tests {
         classes: &[],
     };
 
+    const RSI_1: RegSetIter = RegSetIter {
+        regs: [0x31415927, 0x27182818, 0x14141356],
+    };
+
+    const RSI_2: RegSetIter = RegSetIter {
+        regs: [0x00000000, 0x00000000, 0x00000000],
+    };
+
+    const RSI_3: RegSetIter = RegSetIter {
+        regs: [0xffffffff, 0xffffffff, 0xffffffff],
+    };
+
+    fn reverse_regset_iteration_work(rsi: &RegSetIter) {
+        // Check the reverse iterator by comparing its output with the forward iterator.
+        let rsi_f = (*rsi).clone();
+        let results_f = rsi_f.collect::<Vec<_>>();
+
+        let mut rsi_r = (*rsi).clone();
+        let mut results_r = Vec::<RegUnit>::new();
+        while let Some(r) = rsi_r.rnext() {
+            results_r.push(r);
+        }
+
+        let len_f = results_f.len();
+        let len_r = results_r.len();
+        assert_eq!(len_f, len_r);
+
+        for i in 0..len_f {
+            assert_eq!(results_f[i], results_r[len_f - 1 - i]);
+        }
+    }
+
+    #[test]
+    fn reverse_regset_iteration() {
+        reverse_regset_iteration_work(&RSI_1);
+        reverse_regset_iteration_work(&RSI_2);
+        reverse_regset_iteration_work(&RSI_3);
+    }
+
     #[test]
     fn put_and_take() {
         let mut regs = RegisterSet::new();
diff --git a/cranelift-codegen/src/regalloc/reload.rs b/cranelift-codegen/src/regalloc/reload.rs
index fb6b61ec6..bbc198c45 100644
--- a/cranelift-codegen/src/regalloc/reload.rs
+++ b/cranelift-codegen/src/regalloc/reload.rs
@@ -233,7 +233,7 @@ impl<'a> Context<'a> {
                             let dst_ty = self.cur.func.dfg.value_type(dst_val);
                             debug_assert!(src_ty == dst_ty);
                             // This limits the transformation to copies of the
-                            // types: I64 I32 I16 I8 F64 and F32, since that's
+                            // types: I128 I64 I32 I16 I8 F64 and F32, since that's
                             // the set of `copy_nop` encodings available.
                             src_ty.is_int() || src_ty.is_float()
                         }
diff --git a/cranelift-codegen/src/regalloc/safepoint.rs b/cranelift-codegen/src/regalloc/safepoint.rs
new file mode 100644
index 000000000..9b27b6227
--- /dev/null
+++ b/cranelift-codegen/src/regalloc/safepoint.rs
@@ -0,0 +1,72 @@
+use crate::cursor::{Cursor, FuncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::ir::{Function, InstBuilder, InstructionData, Opcode, TrapCode};
+use crate::isa::TargetIsa;
+use crate::regalloc::live_value_tracker::LiveValueTracker;
+use crate::regalloc::liveness::Liveness;
+use std::vec::Vec;
+
+fn insert_and_encode_safepoint<'f>(
+    pos: &mut FuncCursor<'f>,
+    tracker: &LiveValueTracker,
+    isa: &dyn TargetIsa,
+) {
+    // Iterate through all live values, collect only the references.
+    let live_ref_values = tracker
+        .live()
+        .iter()
+        .filter(|live_value| pos.func.dfg.value_type(live_value.value).is_ref())
+        .map(|live_val| live_val.value)
+        .collect::<Vec<_>>();
+
+    if !live_ref_values.is_empty() {
+        pos.ins().safepoint(&live_ref_values);
+        // Move cursor to the new safepoint instruction to encode it.
+        if let Some(inst) = pos.prev_inst() {
+            let ok = pos.func.update_encoding(inst, isa).is_ok();
+            debug_assert!(ok);
+        }
+        // Restore cursor position.
+        pos.next_inst();
+    }
+}
+
+// The emit_stackmaps() function analyzes each instruction to retrieve the liveness of
+// the defs and operands by traversing a function's ebbs in layout order.
+pub fn emit_stackmaps(
+    func: &mut Function,
+    domtree: &DominatorTree,
+    liveness: &Liveness,
+    tracker: &mut LiveValueTracker,
+    isa: &dyn TargetIsa,
+) {
+    let mut curr = func.layout.entry_block();
+
+    while let Some(ebb) = curr {
+        tracker.ebb_top(ebb, &func.dfg, liveness, &func.layout, domtree);
+        tracker.drop_dead_params();
+        let mut pos = FuncCursor::new(func);
+
+        // From the top of the ebb, step through the instructions.
+        pos.goto_top(ebb);
+
+        while let Some(inst) = pos.next_inst() {
+            if let InstructionData::Trap {
+                code: TrapCode::Interrupt,
+                ..
+            } = &pos.func.dfg[inst]
+            {
+                insert_and_encode_safepoint(&mut pos, tracker, isa);
+            } else if pos.func.dfg[inst].opcode().is_call() {
+                insert_and_encode_safepoint(&mut pos, tracker, isa);
+            } else if pos.func.dfg[inst].opcode() == Opcode::Safepoint {
+                panic!("safepoint instruction can only be used by the compiler!");
+            }
+
+            // Process the instruction and get rid of dead values.
+            tracker.process_inst(inst, &pos.func.dfg, liveness);
+            tracker.drop_dead(inst);
+        }
+        curr = func.layout.next_ebb(ebb);
+    }
+}
diff --git a/cranelift-codegen/src/regalloc/solver.rs b/cranelift-codegen/src/regalloc/solver.rs
index 0d6a816dc..35e17b050 100644
--- a/cranelift-codegen/src/regalloc/solver.rs
+++ b/cranelift-codegen/src/regalloc/solver.rs
@@ -852,8 +852,12 @@ impl Solver {
     /// always trivial.
     ///
     /// Returns `Ok(regs)` if a solution was found.
-    pub fn quick_solve(&mut self, global_regs: &RegisterSet) -> Result<RegisterSet, SolverError> {
-        self.find_solution(global_regs)
+    pub fn quick_solve(
+        &mut self,
+        global_regs: &RegisterSet,
+        is_reload: bool,
+    ) -> Result<RegisterSet, SolverError> {
+        self.find_solution(global_regs, is_reload)
     }
 
     /// Try harder to find a solution.
@@ -863,12 +867,17 @@ impl Solver {
     /// This may return an error with a register class that has run out of registers. If registers
     /// can be freed up in the starving class, this method can be called again after adding
     /// variables for the freed registers.
-    pub fn real_solve(&mut self, global_regs: &RegisterSet) -> Result<RegisterSet, SolverError> {
+    pub fn real_solve(
+        &mut self,
+        global_regs: &RegisterSet,
+        is_reload: bool,
+    ) -> Result<RegisterSet, SolverError> {
         // Compute domain sizes for all the variables given the current register sets.
         for v in &mut self.vars {
             let d = v.iter(&self.regs_in, &self.regs_out, global_regs).len();
             v.domain = cmp::min(d, u16::MAX as usize) as u16;
         }
+
         // Solve for vars with small domains first to increase the chance of finding a solution.
         //
         // Also consider this case:
@@ -901,7 +910,7 @@ impl Solver {
         });
 
         debug!("real_solve for {}", self);
-        self.find_solution(global_regs)
+        self.find_solution(global_regs, is_reload)
     }
 
     /// Search for a solution with the current list of variables.
@@ -909,7 +918,11 @@ impl Solver {
     /// If a solution was found, returns `Ok(regs)` with the set of available registers on the
     /// output side after the solution. If no solution could be found, returns `Err(rc)` with the
     /// constraint register class that needs more available registers.
-    fn find_solution(&mut self, global_regs: &RegisterSet) -> Result<RegisterSet, SolverError> {
+    fn find_solution(
+        &mut self,
+        global_regs: &RegisterSet,
+        is_reload: bool,
+    ) -> Result<RegisterSet, SolverError> {
         // Available registers on the input and output sides respectively.
         let mut iregs = self.regs_in.clone();
         let mut oregs = self.regs_out.clone();
@@ -917,16 +930,28 @@ impl Solver {
 
         for v in &mut self.vars {
             let rc = v.constraint;
-            let reg = match v.iter(&iregs, &oregs, &gregs).next() {
+
+            // Decide which register to assign.  In order to try and keep registers holding
+            // reloaded values separate from all other registers to the extent possible, we choose
+            // the first available register in the normal case, but the last available one in the
+            // case of a reload.  See "A side note on register choice heuristics" in
+            // src/redundant_reload_remover.rs for further details.
+            let mut reg_set_iter = v.iter(&iregs, &oregs, &gregs);
+            let maybe_reg = if is_reload {
+                reg_set_iter.rnext()
+            } else {
+                reg_set_iter.next()
+            };
+
+            let reg = match maybe_reg {
                 Some(reg) => reg,
                 None => {
                     // If `v` must avoid global interference, there is not point in requesting
                     // live registers be diverted. We need to make it a non-global value.
                     if v.is_global && gregs.iter(rc).next().is_none() {
                         return Err(SolverError::Global(v.value));
-                    } else {
-                        return Err(SolverError::Divert(rc));
                     }
+                    return Err(SolverError::Divert(rc));
                 }
             };
 
@@ -951,7 +976,7 @@ impl Solver {
     }
 
     /// Check if `value` can be added as a variable to help find a solution.
-    pub fn can_add_var(&mut self, _value: Value, constraint: RegClass, from: RegUnit) -> bool {
+    pub fn can_add_var(&mut self, constraint: RegClass, from: RegUnit) -> bool {
         !self.regs_in.is_avail(constraint, from)
     }
 }
@@ -1005,7 +1030,7 @@ impl Solver {
         let mut avail = regs.clone();
         let mut i = 0;
         while i < self.moves.len() + self.fills.len() {
-            // Don't even look at the fills until we've spent all the moves. Deferring these let's
+            // Don't even look at the fills until we've spent all the moves. Deferring these lets
             // us potentially reuse the claimed registers to resolve multiple cycles.
             if i >= self.moves.len() {
                 self.moves.append(&mut self.fills);
@@ -1207,7 +1232,7 @@ mod tests {
         solver.reset(&regs);
         solver.reassign_in(v10, gpr, r1, r0);
         solver.inputs_done();
-        assert!(solver.quick_solve(&gregs).is_ok());
+        assert!(solver.quick_solve(&gregs, false).is_ok());
         assert_eq!(solver.schedule_moves(&regs), 0);
         assert_eq!(solver.moves(), &[mov(v10, gpr, r1, r0)]);
 
@@ -1217,7 +1242,7 @@ mod tests {
         solver.reassign_in(v10, gpr, r0, r1);
         solver.reassign_in(v11, gpr, r1, r2);
         solver.inputs_done();
-        assert!(solver.quick_solve(&gregs).is_ok());
+        assert!(solver.quick_solve(&gregs, false).is_ok());
         assert_eq!(solver.schedule_moves(&regs), 0);
         assert_eq!(
             solver.moves(),
@@ -1229,7 +1254,7 @@ mod tests {
         solver.reassign_in(v10, gpr, r0, r1);
         solver.reassign_in(v11, gpr, r1, r0);
         solver.inputs_done();
-        assert!(solver.quick_solve(&gregs).is_ok());
+        assert!(solver.quick_solve(&gregs, false).is_ok());
         assert_eq!(solver.schedule_moves(&regs), 0);
         assert_eq!(
             solver.moves(),
@@ -1269,7 +1294,7 @@ mod tests {
         solver.reassign_in(v11, s, s2, s0);
         solver.reassign_in(v12, s, s3, s1);
         solver.inputs_done();
-        assert!(solver.quick_solve(&gregs).is_ok());
+        assert!(solver.quick_solve(&gregs, false).is_ok());
         assert_eq!(solver.schedule_moves(&regs), 0);
         assert_eq!(
             solver.moves(),
@@ -1290,7 +1315,7 @@ mod tests {
         solver.reassign_in(v12, s, s1, s3);
         solver.reassign_in(v10, d, d1, d0);
         solver.inputs_done();
-        assert!(solver.quick_solve(&gregs).is_ok());
+        assert!(solver.quick_solve(&gregs, false).is_ok());
         assert_eq!(solver.schedule_moves(&regs), 0);
         assert_eq!(
             solver.moves(),
@@ -1335,7 +1360,7 @@ mod tests {
         solver.reassign_in(v11, gpr, r1, r2);
         solver.reassign_in(v12, gpr, r2, r0);
         solver.inputs_done();
-        assert!(solver.quick_solve(&gregs).is_ok());
+        assert!(solver.quick_solve(&gregs, false).is_ok());
         assert_eq!(solver.schedule_moves(&regs), 1);
         assert_eq!(
             solver.moves(),
@@ -1359,7 +1384,7 @@ mod tests {
         solver.reassign_in(v15, gpr, r5, r3);
 
         solver.inputs_done();
-        assert!(solver.quick_solve(&gregs).is_ok());
+        assert!(solver.quick_solve(&gregs, false).is_ok());
         // We resolve two cycles with one spill.
         assert_eq!(solver.schedule_moves(&regs), 1);
         assert_eq!(
diff --git a/cranelift-codegen/src/regalloc/virtregs.rs b/cranelift-codegen/src/regalloc/virtregs.rs
index 584ad9c53..fc267c3f6 100644
--- a/cranelift-codegen/src/regalloc/virtregs.rs
+++ b/cranelift-codegen/src/regalloc/virtregs.rs
@@ -21,6 +21,7 @@ use crate::packed_option::PackedOption;
 use crate::ref_slice::ref_slice;
 use core::cmp::Ordering;
 use core::fmt;
+use smallvec::SmallVec;
 use std::vec::Vec;
 
 /// A virtual register reference.
@@ -292,7 +293,7 @@ impl VirtRegs {
     /// Find the leader value and rank of the set containing `v`.
     /// Compress the path if needed.
     fn find(&mut self, mut val: Value) -> (Value, u32) {
-        let mut val_stack = vec![];
+        let mut val_stack = SmallVec::<[Value; 8]>::new();
         let found = loop {
             match UFEntry::decode(self.union_find[val]) {
                 UFEntry::Rank(rank) => break (val, rank),
diff --git a/cranelift-codegen/src/settings.rs b/cranelift-codegen/src/settings.rs
index 748484b10..c31097217 100644
--- a/cranelift-codegen/src/settings.rs
+++ b/cranelift-codegen/src/settings.rs
@@ -14,10 +14,10 @@
 //! use cranelift_codegen::settings::{self, Configurable};
 //!
 //! let mut b = settings::builder();
-//! b.set("opt_level", "fastest");
+//! b.set("opt_level", "speed_and_size");
 //!
 //! let f = settings::Flags::new(b);
-//! assert_eq!(f.opt_level(), settings::OptLevel::Fastest);
+//! assert_eq!(f.opt_level(), settings::OptLevel::SpeedAndSize);
 //! ```
 
 use crate::constant_hash::{probe, simple_hash};
@@ -331,9 +331,9 @@ pub mod detail {
     }
 }
 
-// Include code generated by `meta-python/gen_settings.py`. This file contains a public `Flags`
-// struct with an impl for all of the settings defined in
-// `cranelift-codegen/meta-python/base/settings.py`.
+// Include code generated by `meta/gen_settings.rs`. This file contains a public `Flags` struct
+// with an implementation for all of the settings defined in
+// `cranelift-codegen/meta/src/shared/settings.rs`.
 include!(concat!(env!("OUT_DIR"), "/settings.rs"));
 
 /// Wrapper containing flags and optionally a `TargetIsa` trait object.
@@ -378,7 +378,8 @@ mod tests {
         assert_eq!(
             f.to_string(),
             "[shared]\n\
-             opt_level = \"default\"\n\
+             opt_level = \"none\"\n\
+             libcall_call_conv = \"isa_default\"\n\
              baldrdash_prologue_words = 0\n\
              probestack_size_log2 = 12\n\
              enable_verifier = true\n\
@@ -387,15 +388,18 @@ mod tests {
              avoid_div_traps = false\n\
              enable_float = true\n\
              enable_nan_canonicalization = false\n\
-             enable_simd = true\n\
+             enable_pinned_reg = false\n\
+             use_pinned_reg_as_heap_base = false\n\
+             enable_simd = false\n\
              enable_atomics = true\n\
+             enable_safepoints = false\n\
              allones_funcaddrs = false\n\
              probestack_enabled = true\n\
              probestack_func_adjusts_sp = false\n\
              jump_tables_enabled = true\n"
         );
-        assert_eq!(f.opt_level(), super::OptLevel::Default);
-        assert_eq!(f.enable_simd(), true);
+        assert_eq!(f.opt_level(), super::OptLevel::None);
+        assert_eq!(f.enable_simd(), false);
         assert_eq!(f.baldrdash_prologue_words(), 0);
     }
 
@@ -424,13 +428,15 @@ mod tests {
         );
         assert_eq!(
             b.set("opt_level", "true"),
-            Err(BadValue("any among default, best, fastest".to_string()))
+            Err(BadValue(
+                "any among none, speed, speed_and_size".to_string()
+            ))
         );
-        assert_eq!(b.set("opt_level", "best"), Ok(()));
+        assert_eq!(b.set("opt_level", "speed"), Ok(()));
         assert_eq!(b.set("enable_simd", "0"), Ok(()));
 
         let f = Flags::new(b);
         assert_eq!(f.enable_simd(), false);
-        assert_eq!(f.opt_level(), super::OptLevel::Best);
+        assert_eq!(f.opt_level(), super::OptLevel::Speed);
     }
 }
diff --git a/cranelift-codegen/src/simple_preopt.rs b/cranelift-codegen/src/simple_preopt.rs
index fabad6d3b..221e8a57b 100644
--- a/cranelift-codegen/src/simple_preopt.rs
+++ b/cranelift-codegen/src/simple_preopt.rs
@@ -8,14 +8,32 @@ use crate::cursor::{Cursor, FuncCursor};
 use crate::divconst_magic_numbers::{magic_s32, magic_s64, magic_u32, magic_u64};
 use crate::divconst_magic_numbers::{MS32, MS64, MU32, MU64};
 use crate::flowgraph::ControlFlowGraph;
-use crate::ir::condcodes::{CondCode, IntCC};
-use crate::ir::dfg::ValueDef;
-use crate::ir::instructions::{Opcode, ValueList};
-use crate::ir::types::{I32, I64};
-use crate::ir::Inst;
-use crate::ir::{DataFlowGraph, Ebb, Function, InstBuilder, InstructionData, Type, Value};
+use crate::ir::{
+    condcodes::{CondCode, IntCC},
+    dfg::ValueDef,
+    immediates,
+    instructions::{Opcode, ValueList},
+    types::{I16, I32, I64, I8},
+    DataFlowGraph, Ebb, Function, Inst, InstBuilder, InstructionData, Type, Value,
+};
+use crate::isa::TargetIsa;
 use crate::timing;
 
+#[inline]
+/// Replaces the unique result of the instruction inst to an alias of the given value, and
+/// replaces the instruction with a nop. Can be used only on instructions producing one unique
+/// result, otherwise will assert.
+fn replace_single_result_with_alias(dfg: &mut DataFlowGraph, inst: Inst, value: Value) {
+    // Replace the result value by an alias.
+    let results = dfg.detach_results(inst);
+    debug_assert!(results.len(&dfg.value_lists) == 1);
+    let result = results.get(0, &dfg.value_lists).unwrap();
+    dfg.change_to_alias(result, value);
+
+    // Replace instruction by a nop.
+    dfg.replace(inst).nop();
+}
+
 //----------------------------------------------------------------------
 //
 // Pattern-match helpers and transformation for div and rem by constants.
@@ -169,7 +187,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
             if is_rem {
                 pos.func.dfg.replace(inst).iconst(I32, 0);
             } else {
-                pos.func.dfg.replace(inst).copy(n1);
+                replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
             }
         }
 
@@ -224,7 +242,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
                 let tt = pos.ins().imul_imm(qf, d as i64);
                 pos.func.dfg.replace(inst).isub(n1, tt);
             } else {
-                pos.func.dfg.replace(inst).copy(qf);
+                replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
             }
         }
 
@@ -239,7 +257,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
             if is_rem {
                 pos.func.dfg.replace(inst).iconst(I64, 0);
             } else {
-                pos.func.dfg.replace(inst).copy(n1);
+                replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
             }
         }
 
@@ -294,7 +312,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
                 let tt = pos.ins().imul_imm(qf, d as i64);
                 pos.func.dfg.replace(inst).isub(n1, tt);
             } else {
-                pos.func.dfg.replace(inst).copy(qf);
+                replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
             }
         }
 
@@ -312,7 +330,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
             if is_rem {
                 pos.func.dfg.replace(inst).iconst(I32, 0);
             } else {
-                pos.func.dfg.replace(inst).copy(n1);
+                replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
             }
         }
 
@@ -338,7 +356,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
                     if is_negative {
                         pos.func.dfg.replace(inst).irsub_imm(t4, 0);
                     } else {
-                        pos.func.dfg.replace(inst).copy(t4);
+                        replace_single_result_with_alias(&mut pos.func.dfg, inst, t4);
                     }
                 }
             } else {
@@ -368,7 +386,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
                     let tt = pos.ins().imul_imm(qf, d as i64);
                     pos.func.dfg.replace(inst).isub(n1, tt);
                 } else {
-                    pos.func.dfg.replace(inst).copy(qf);
+                    replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
                 }
             }
         }
@@ -387,7 +405,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
             if is_rem {
                 pos.func.dfg.replace(inst).iconst(I64, 0);
             } else {
-                pos.func.dfg.replace(inst).copy(n1);
+                replace_single_result_with_alias(&mut pos.func.dfg, inst, n1);
             }
         }
 
@@ -413,7 +431,7 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
                     if is_negative {
                         pos.func.dfg.replace(inst).irsub_imm(t4, 0);
                     } else {
-                        pos.func.dfg.replace(inst).copy(t4);
+                        replace_single_result_with_alias(&mut pos.func.dfg, inst, t4);
                     }
                 }
             } else {
@@ -443,64 +461,134 @@ fn do_divrem_transformation(divrem_info: &DivRemByConstInfo, pos: &mut FuncCurso
                     let tt = pos.ins().imul_imm(qf, d);
                     pos.func.dfg.replace(inst).isub(n1, tt);
                 } else {
-                    pos.func.dfg.replace(inst).copy(qf);
+                    replace_single_result_with_alias(&mut pos.func.dfg, inst, qf);
                 }
             }
         }
     }
 }
 
+#[inline]
+fn resolve_imm64_value(dfg: &DataFlowGraph, value: Value) -> Option<immediates::Imm64> {
+    if let ValueDef::Result(candidate_inst, _) = dfg.value_def(value) {
+        if let InstructionData::UnaryImm {
+            opcode: Opcode::Iconst,
+            imm,
+        } = dfg[candidate_inst]
+        {
+            return Some(imm);
+        }
+    }
+    None
+}
+
+/// Try to transform [(x << N) >> N] into a (un)signed-extending move.
+/// Returns true if the final instruction has been converted to such a move.
+fn try_fold_extended_move(
+    pos: &mut FuncCursor,
+    inst: Inst,
+    opcode: Opcode,
+    arg: Value,
+    imm: immediates::Imm64,
+) -> bool {
+    if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
+        if let InstructionData::BinaryImm {
+            opcode: Opcode::IshlImm,
+            arg: prev_arg,
+            imm: prev_imm,
+        } = &pos.func.dfg[arg_inst]
+        {
+            if imm != *prev_imm {
+                return false;
+            }
+
+            let dest_ty = pos.func.dfg.ctrl_typevar(inst);
+            if dest_ty != pos.func.dfg.ctrl_typevar(arg_inst) || !dest_ty.is_int() {
+                return false;
+            }
+
+            let imm_bits: i64 = imm.into();
+            let ireduce_ty = match dest_ty.lane_bits() as i64 - imm_bits {
+                8 => I8,
+                16 => I16,
+                32 => I32,
+                _ => return false,
+            };
+            let ireduce_ty = ireduce_ty.by(dest_ty.lane_count()).unwrap();
+
+            // This becomes a no-op, since ireduce_ty has a smaller lane width than
+            // the argument type (also the destination type).
+            let arg = *prev_arg;
+            let narrower_arg = pos.ins().ireduce(ireduce_ty, arg);
+
+            if opcode == Opcode::UshrImm {
+                pos.func.dfg.replace(inst).uextend(dest_ty, narrower_arg);
+            } else {
+                pos.func.dfg.replace(inst).sextend(dest_ty, narrower_arg);
+            }
+            return true;
+        }
+    }
+    false
+}
+
 /// Apply basic simplifications.
 ///
-/// This folds constants with arithmetic to form `_imm` instructions, and other
-/// minor simplifications.
-fn simplify(pos: &mut FuncCursor, inst: Inst) {
+/// This folds constants with arithmetic to form `_imm` instructions, and other minor
+/// simplifications.
+///
+/// Doesn't apply some simplifications if the native word width (in bytes) is smaller than the
+/// controlling type's width of the instruction. This would result in an illegal instruction that
+/// would likely be expanded back into an instruction on smaller types with the same initial
+/// opcode, creating unnecessary churn.
+fn simplify(pos: &mut FuncCursor, inst: Inst, native_word_width: u32) {
     match pos.func.dfg[inst] {
         InstructionData::Binary { opcode, args } => {
-            if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
-                if let InstructionData::UnaryImm {
-                    opcode: Opcode::Iconst,
-                    mut imm,
-                } = pos.func.dfg[iconst_inst]
-                {
-                    let new_opcode = match opcode {
-                        Opcode::Iadd => Opcode::IaddImm,
-                        Opcode::Imul => Opcode::ImulImm,
-                        Opcode::Sdiv => Opcode::SdivImm,
-                        Opcode::Udiv => Opcode::UdivImm,
-                        Opcode::Srem => Opcode::SremImm,
-                        Opcode::Urem => Opcode::UremImm,
-                        Opcode::Band => Opcode::BandImm,
-                        Opcode::Bor => Opcode::BorImm,
-                        Opcode::Bxor => Opcode::BxorImm,
-                        Opcode::Rotl => Opcode::RotlImm,
-                        Opcode::Rotr => Opcode::RotrImm,
-                        Opcode::Ishl => Opcode::IshlImm,
-                        Opcode::Ushr => Opcode::UshrImm,
-                        Opcode::Sshr => Opcode::SshrImm,
-                        Opcode::Isub => {
-                            imm = imm.wrapping_neg();
-                            Opcode::IaddImm
-                        }
-                        _ => return,
-                    };
-                    let ty = pos.func.dfg.ctrl_typevar(inst);
+            if let Some(mut imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
+                let new_opcode = match opcode {
+                    Opcode::Iadd => Opcode::IaddImm,
+                    Opcode::Imul => Opcode::ImulImm,
+                    Opcode::Sdiv => Opcode::SdivImm,
+                    Opcode::Udiv => Opcode::UdivImm,
+                    Opcode::Srem => Opcode::SremImm,
+                    Opcode::Urem => Opcode::UremImm,
+                    Opcode::Band => Opcode::BandImm,
+                    Opcode::Bor => Opcode::BorImm,
+                    Opcode::Bxor => Opcode::BxorImm,
+                    Opcode::Rotl => Opcode::RotlImm,
+                    Opcode::Rotr => Opcode::RotrImm,
+                    Opcode::Ishl => Opcode::IshlImm,
+                    Opcode::Ushr => Opcode::UshrImm,
+                    Opcode::Sshr => Opcode::SshrImm,
+                    Opcode::Isub => {
+                        imm = imm.wrapping_neg();
+                        Opcode::IaddImm
+                    }
+                    Opcode::Ifcmp => Opcode::IfcmpImm,
+                    _ => return,
+                };
+                let ty = pos.func.dfg.ctrl_typevar(inst);
+                if ty.bytes() <= native_word_width {
                     pos.func
                         .dfg
                         .replace(inst)
                         .BinaryImm(new_opcode, ty, imm, args[0]);
+
+                    // Repeat for BinaryImm simplification.
+                    simplify(pos, inst, native_word_width);
                 }
-            } else if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[0]) {
-                if let InstructionData::UnaryImm {
-                    opcode: Opcode::Iconst,
-                    imm,
-                } = pos.func.dfg[iconst_inst]
-                {
-                    let new_opcode = match opcode {
-                        Opcode::Isub => Opcode::IrsubImm,
-                        _ => return,
-                    };
-                    let ty = pos.func.dfg.ctrl_typevar(inst);
+            } else if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[0]) {
+                let new_opcode = match opcode {
+                    Opcode::Iadd => Opcode::IaddImm,
+                    Opcode::Imul => Opcode::ImulImm,
+                    Opcode::Band => Opcode::BandImm,
+                    Opcode::Bor => Opcode::BorImm,
+                    Opcode::Bxor => Opcode::BxorImm,
+                    Opcode::Isub => Opcode::IrsubImm,
+                    _ => return,
+                };
+                let ty = pos.func.dfg.ctrl_typevar(inst);
+                if ty.bytes() <= native_word_width {
                     pos.func
                         .dfg
                         .replace(inst)
@@ -508,18 +596,114 @@ fn simplify(pos: &mut FuncCursor, inst: Inst) {
                 }
             }
         }
+
+        InstructionData::Unary { opcode, arg } => match opcode {
+            Opcode::AdjustSpDown => {
+                if let Some(imm) = resolve_imm64_value(&pos.func.dfg, arg) {
+                    // Note this works for both positive and negative immediate values.
+                    pos.func.dfg.replace(inst).adjust_sp_down_imm(imm);
+                }
+            }
+            _ => {}
+        },
+
+        InstructionData::BinaryImm { opcode, arg, imm } => {
+            let ty = pos.func.dfg.ctrl_typevar(inst);
+
+            let mut arg = arg;
+            let mut imm = imm;
+            match opcode {
+                Opcode::IaddImm
+                | Opcode::ImulImm
+                | Opcode::BorImm
+                | Opcode::BandImm
+                | Opcode::BxorImm => {
+                    // Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
+                    if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
+                        if let InstructionData::BinaryImm {
+                            opcode: prev_opcode,
+                            arg: prev_arg,
+                            imm: prev_imm,
+                        } = &pos.func.dfg[arg_inst]
+                        {
+                            if opcode == *prev_opcode {
+                                if ty == pos.func.dfg.ctrl_typevar(arg_inst) {
+                                    let lhs: i64 = imm.into();
+                                    let rhs: i64 = (*prev_imm).into();
+                                    let new_imm = match opcode {
+                                        Opcode::BorImm => lhs | rhs,
+                                        Opcode::BandImm => lhs & rhs,
+                                        Opcode::BxorImm => lhs ^ rhs,
+                                        Opcode::IaddImm => lhs.wrapping_add(rhs),
+                                        Opcode::ImulImm => lhs.wrapping_mul(rhs),
+                                        _ => panic!("can't happen"),
+                                    };
+                                    let new_imm = immediates::Imm64::from(new_imm);
+                                    let new_arg = *prev_arg;
+                                    pos.func
+                                        .dfg
+                                        .replace(inst)
+                                        .BinaryImm(opcode, ty, new_imm, new_arg);
+                                    imm = new_imm;
+                                    arg = new_arg;
+                                }
+                            }
+                        }
+                    }
+                }
+
+                Opcode::UshrImm | Opcode::SshrImm => {
+                    if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width
+                        && try_fold_extended_move(pos, inst, opcode, arg, imm)
+                    {
+                        return;
+                    }
+                }
+
+                _ => {}
+            };
+
+            // Replace operations that are no-ops.
+            match (opcode, imm.into()) {
+                (Opcode::IaddImm, 0)
+                | (Opcode::ImulImm, 1)
+                | (Opcode::SdivImm, 1)
+                | (Opcode::UdivImm, 1)
+                | (Opcode::BorImm, 0)
+                | (Opcode::BandImm, -1)
+                | (Opcode::BxorImm, 0)
+                | (Opcode::RotlImm, 0)
+                | (Opcode::RotrImm, 0)
+                | (Opcode::IshlImm, 0)
+                | (Opcode::UshrImm, 0)
+                | (Opcode::SshrImm, 0) => {
+                    // Alias the result value with the original argument.
+                    replace_single_result_with_alias(&mut pos.func.dfg, inst, arg);
+                    return;
+                }
+                (Opcode::ImulImm, 0) | (Opcode::BandImm, 0) => {
+                    // Replace by zero.
+                    pos.func.dfg.replace(inst).iconst(ty, 0);
+                    return;
+                }
+                (Opcode::BorImm, -1) => {
+                    // Replace by minus one.
+                    pos.func.dfg.replace(inst).iconst(ty, -1);
+                    return;
+                }
+                _ => {}
+            }
+        }
+
         InstructionData::IntCompare { opcode, cond, args } => {
             debug_assert_eq!(opcode, Opcode::Icmp);
-            if let ValueDef::Result(iconst_inst, _) = pos.func.dfg.value_def(args[1]) {
-                if let InstructionData::UnaryImm {
-                    opcode: Opcode::Iconst,
-                    imm,
-                } = pos.func.dfg[iconst_inst]
-                {
+            if let Some(imm) = resolve_imm64_value(&pos.func.dfg, args[1]) {
+                if pos.func.dfg.ctrl_typevar(inst).bytes() <= native_word_width {
                     pos.func.dfg.replace(inst).icmp_imm(cond, args[0], imm);
                 }
             }
         }
+
         InstructionData::CondTrap { .. }
         | InstructionData::Branch { .. }
         | InstructionData::Ternary {
@@ -542,6 +726,7 @@ fn simplify(pos: &mut FuncCursor, inst: Inst) {
                 }
             }
         }
+
         _ => {}
     }
 }
@@ -766,13 +951,14 @@ fn branch_order(pos: &mut FuncCursor, cfg: &mut ControlFlowGraph, ebb: Ebb, inst
 }
 
 /// The main pre-opt pass.
-pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph) {
+pub fn do_preopt(func: &mut Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {
     let _tt = timing::preopt();
     let mut pos = FuncCursor::new(func);
+    let native_word_width = isa.pointer_bytes();
     while let Some(ebb) = pos.next_ebb() {
         while let Some(inst) = pos.next_inst() {
             // Apply basic simplifications.
-            simplify(&mut pos, inst);
+            simplify(&mut pos, inst, native_word_width as u32);
 
             // Try to transform divide-by-constant into simpler operations.
             if let Some(divrem_info) = get_div_info(inst, &pos.func.dfg) {
diff --git a/cranelift-codegen/src/value_label.rs b/cranelift-codegen/src/value_label.rs
index 8dab92b89..2bd3bdc13 100644
--- a/cranelift-codegen/src/value_label.rs
+++ b/cranelift-codegen/src/value_label.rs
@@ -8,8 +8,12 @@ use std::ops::Bound::*;
 use std::ops::Deref;
 use std::vec::Vec;
 
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
 /// Value location range.
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct ValueLocRange {
     /// The ValueLoc containing a ValueLabel during this range.
     pub loc: ValueLoc,
@@ -114,7 +118,7 @@ where
     let mut tracked_values: Vec<(Value, ValueLabel, u32, ValueLoc)> = Vec::new();
     let mut divert = RegDiversions::new();
     for ebb in ebbs {
-        divert.clear();
+        divert.at_ebb(&func.entry_diversions, ebb);
         let mut last_srcloc: Option<T> = None;
         for (offset, inst, size) in func.inst_offsets(ebb, &encinfo) {
             divert.apply(&func.dfg[inst]);
diff --git a/cranelift-codegen/src/verifier/locations.rs b/cranelift-codegen/src/verifier/locations.rs
index bcf006e7f..cf17ae13d 100644
--- a/cranelift-codegen/src/verifier/locations.rs
+++ b/cranelift-codegen/src/verifier/locations.rs
@@ -1,5 +1,6 @@
 //! Verify value locations.
 
+use crate::flowgraph::ControlFlowGraph;
 use crate::ir;
 use crate::isa;
 use crate::regalloc::liveness::Liveness;
@@ -21,6 +22,7 @@ use crate::verifier::{VerifierErrors, VerifierStepResult};
 pub fn verify_locations(
     isa: &dyn isa::TargetIsa,
     func: &ir::Function,
+    cfg: &ControlFlowGraph,
     liveness: Option<&Liveness>,
     errors: &mut VerifierErrors,
 ) -> VerifierStepResult<()> {
@@ -30,6 +32,7 @@ pub fn verify_locations(
         func,
         reginfo: isa.register_info(),
         encinfo: isa.encoding_info(),
+        cfg,
         liveness,
     };
     verifier.check_constraints(errors)?;
@@ -41,6 +44,7 @@ struct LocationVerifier<'a> {
     func: &'a ir::Function,
     reginfo: isa::RegInfo,
     encinfo: isa::EncInfo,
+    cfg: &'a ControlFlowGraph,
     liveness: Option<&'a Liveness>,
 }
 
@@ -51,9 +55,9 @@ impl<'a> LocationVerifier<'a> {
         let mut divert = RegDiversions::new();
 
         for ebb in self.func.layout.ebbs() {
-            // Diversions are reset at the top of each EBB. No diversions can exist across control
-            // flow edges.
-            divert.clear();
+            divert.at_ebb(&self.func.entry_diversions, ebb);
+
+            let mut is_after_branch = false;
             for inst in self.func.layout.ebb_insts(ebb) {
                 let enc = self.func.encodings[inst];
 
@@ -71,10 +75,11 @@ impl<'a> LocationVerifier<'a> {
                 if opcode.is_return() {
                     self.check_return_abi(inst, &divert, errors)?;
                 } else if opcode.is_branch() && !divert.is_empty() {
-                    self.check_cfg_edges(inst, &divert, errors)?;
+                    self.check_cfg_edges(inst, &mut divert, is_after_branch, errors)?;
                 }
 
                 self.update_diversions(inst, &mut divert, errors)?;
+                is_after_branch = opcode.is_branch();
             }
         }
 
@@ -102,8 +107,10 @@ impl<'a> LocationVerifier<'a> {
         fatal!(
             errors,
             inst,
-            "{} constraints not satisfied",
-            self.encinfo.display(enc)
+            "{} constraints not satisfied in: {}\n{}",
+            self.encinfo.display(enc),
+            self.func.dfg.display_inst(inst, self.isa),
+            self.func.display(self.isa)
         )
     }
 
@@ -285,8 +292,9 @@ impl<'a> LocationVerifier<'a> {
             return fatal!(
                 errors,
                 inst,
-                "inconsistent with global location {}",
-                self.func.locations[arg].display(&self.reginfo)
+                "inconsistent with global location {} ({})",
+                self.func.locations[arg].display(&self.reginfo),
+                self.func.dfg.display_inst(inst, None)
             );
         }
 
@@ -300,37 +308,52 @@ impl<'a> LocationVerifier<'a> {
     fn check_cfg_edges(
         &self,
         inst: ir::Inst,
-        divert: &RegDiversions,
+        divert: &mut RegDiversions,
+        is_after_branch: bool,
         errors: &mut VerifierErrors,
     ) -> VerifierStepResult<()> {
         use crate::ir::instructions::BranchInfo::*;
+        let dfg = &self.func.dfg;
+        let branch_kind = dfg.analyze_branch(inst);
 
         // We can only check CFG edges if we have a liveness analysis.
         let liveness = match self.liveness {
             Some(l) => l,
             None => return Ok(()),
         };
-        let dfg = &self.func.dfg;
 
-        match dfg.analyze_branch(inst) {
+        match branch_kind {
             NotABranch => panic!(
                 "No branch information for {}",
                 dfg.display_inst(inst, self.isa)
             ),
             SingleDest(ebb, _) => {
+                let unique_predecessor = self.cfg.pred_iter(ebb).count() == 1;
+                let mut val_to_remove = vec![];
                 for (&value, d) in divert.iter() {
                     let lr = &liveness[value];
-                    if lr.is_livein(ebb, liveness.context(&self.func.layout)) {
+                    if is_after_branch && unique_predecessor {
+                        // Forward diversions based on the targeted branch.
+                        if !lr.is_livein(ebb, liveness.context(&self.func.layout)) {
+                            val_to_remove.push(value)
+                        }
+                    } else if lr.is_livein(ebb, liveness.context(&self.func.layout)) {
                         return fatal!(
                             errors,
                             inst,
-                            "{} is diverted to {} and live in to {}",
+                            "SingleDest: {} is diverted to {} and live in to {}",
                             value,
                             d.to.display(&self.reginfo),
                             ebb
                         );
                     }
                 }
+                if is_after_branch && unique_predecessor {
+                    for val in val_to_remove.into_iter() {
+                        divert.remove(val);
+                    }
+                    debug_assert!(divert.check_ebb_entry(&self.func.entry_diversions, ebb));
+                }
             }
             Table(jt, ebb) => {
                 for (&value, d) in divert.iter() {
@@ -340,7 +363,7 @@ impl<'a> LocationVerifier<'a> {
                             return fatal!(
                                 errors,
                                 inst,
-                                "{} is diverted to {} and live in to {}",
+                                "Table.default: {} is diverted to {} and live in to {}",
                                 value,
                                 d.to.display(&self.reginfo),
                                 ebb
@@ -352,7 +375,7 @@ impl<'a> LocationVerifier<'a> {
                             return fatal!(
                                 errors,
                                 inst,
-                                "{} is diverted to {} and live in to {}",
+                                "Table.case: {} is diverted to {} and live in to {}",
                                 value,
                                 d.to.display(&self.reginfo),
                                 ebb
diff --git a/cranelift-codegen/src/verifier/mod.rs b/cranelift-codegen/src/verifier/mod.rs
index 7e8d7909c..02cc3f0c5 100644
--- a/cranelift-codegen/src/verifier/mod.rs
+++ b/cranelift-codegen/src/verifier/mod.rs
@@ -468,6 +468,16 @@ impl<'a> Verifier<'a> {
         Ok(())
     }
 
+    /// Check that the given EBB can be encoded as a BB, by checking that only
+    /// branching instructions are ending the EBB.
+    #[cfg(feature = "basic-blocks")]
+    fn encodable_as_bb(&self, ebb: Ebb, errors: &mut VerifierErrors) -> VerifierStepResult<()> {
+        match self.func.is_ebb_basic(ebb) {
+            Ok(()) => Ok(()),
+            Err((inst, message)) => fatal!(errors, inst, message),
+        }
+    }
+
     fn ebb_integrity(
         &self,
         ebb: Ebb,
@@ -666,6 +676,33 @@ impl<'a> Verifier<'a> {
                 self.verify_value_list(inst, args, errors)?;
             }
 
+            NullAry {
+                opcode: Opcode::GetPinnedReg,
+            }
+            | Unary {
+                opcode: Opcode::SetPinnedReg,
+                ..
+            } => {
+                if let Some(isa) = &self.isa {
+                    if !isa.flags().enable_pinned_reg() {
+                        return fatal!(
+                            errors,
+                            inst,
+                            "GetPinnedReg/SetPinnedReg cannot be used without enable_pinned_reg"
+                        );
+                    }
+                } else {
+                    return fatal!(errors, inst, "GetPinnedReg/SetPinnedReg need an ISA!");
+                }
+            }
+
+            Unary {
+                opcode: Opcode::Bitcast,
+                arg,
+            } => {
+                self.verify_bitcast(inst, arg, errors)?;
+            }
+
             // Exhaustive list so we can't forget to add new formats
             Unary { .. }
             | UnaryImm { .. }
@@ -677,6 +714,8 @@ impl<'a> Verifier<'a> {
             | Ternary { .. }
             | InsertLane { .. }
             | ExtractLane { .. }
+            | UnaryConst { .. }
+            | Shuffle { .. }
             | IntCompare { .. }
             | IntCompareImm { .. }
             | IntCond { .. }
@@ -687,6 +726,7 @@ impl<'a> Verifier<'a> {
             | Store { .. }
             | RegMove { .. }
             | CopySpecial { .. }
+            | CopyToSsa { .. }
             | Trap { .. }
             | CondTrap { .. }
             | IntCondTrap { .. }
@@ -949,6 +989,28 @@ impl<'a> Verifier<'a> {
         }
     }
 
+    fn verify_bitcast(
+        &self,
+        inst: Inst,
+        arg: Value,
+        errors: &mut VerifierErrors,
+    ) -> VerifierStepResult<()> {
+        let typ = self.func.dfg.ctrl_typevar(inst);
+        let value_type = self.func.dfg.value_type(arg);
+
+        if typ.lane_bits() < value_type.lane_bits() {
+            fatal!(
+                errors,
+                inst,
+                "The bitcast argument {} doesn't fit in a type of {} bits",
+                arg,
+                typ.lane_bits()
+            )
+        } else {
+            Ok(())
+        }
+    }
+
     fn domtree_integrity(
         &self,
         domtree: &DominatorTree,
@@ -1662,9 +1724,12 @@ impl<'a> Verifier<'a> {
         // Instructions with side effects are not allowed to be ghost instructions.
         let opcode = self.func.dfg[inst].opcode();
 
-        // The `fallthrough` and `fallthrough_return` instructions are marked as terminators and
-        // branches, but they are not required to have an encoding.
-        if opcode == Opcode::Fallthrough || opcode == Opcode::FallthroughReturn {
+        // The `fallthrough`, `fallthrough_return`, and `safepoint` instructions are not required
+        // to have an encoding.
+        if opcode == Opcode::Fallthrough
+            || opcode == Opcode::FallthroughReturn
+            || opcode == Opcode::Safepoint
+        {
             return Ok(());
         }
 
@@ -1711,19 +1776,101 @@ impl<'a> Verifier<'a> {
     ) -> VerifierStepResult<()> {
         let inst_data = &self.func.dfg[inst];
 
-        // If this is some sort of a store instruction, get the memflags, else, just return.
-        let memflags = match *inst_data {
+        match *inst_data {
             ir::InstructionData::Store { flags, .. }
-            | ir::InstructionData::StoreComplex { flags, .. } => flags,
-            _ => return Ok(()),
-        };
+            | ir::InstructionData::StoreComplex { flags, .. } => {
+                if flags.readonly() {
+                    fatal!(
+                        errors,
+                        inst,
+                        "A store instruction cannot have the `readonly` MemFlag"
+                    )
+                } else {
+                    Ok(())
+                }
+            }
+            ir::InstructionData::ExtractLane {
+                opcode: ir::instructions::Opcode::Extractlane,
+                lane,
+                arg,
+                ..
+            }
+            | ir::InstructionData::InsertLane {
+                opcode: ir::instructions::Opcode::Insertlane,
+                lane,
+                args: [arg, _],
+                ..
+            } => {
+                // We must be specific about the opcodes above because other instructions are using
+                // the ExtractLane/InsertLane formats.
+                let ty = self.func.dfg.value_type(arg);
+                if u16::from(lane) >= ty.lane_count() {
+                    fatal!(
+                        errors,
+                        inst,
+                        "The lane {} does not index into the type {}",
+                        lane,
+                        ty
+                    )
+                } else {
+                    Ok(())
+                }
+            }
+            _ => Ok(()),
+        }
+    }
 
-        if memflags.readonly() {
-            fatal!(
-                errors,
-                inst,
-                "A store instruction cannot have the `readonly` MemFlag"
-            )
+    fn verify_safepoint_unused(
+        &self,
+        inst: Inst,
+        errors: &mut VerifierErrors,
+    ) -> VerifierStepResult<()> {
+        if let Some(isa) = self.isa {
+            if !isa.flags().enable_safepoints() && self.func.dfg[inst].opcode() == Opcode::Safepoint
+            {
+                return fatal!(
+                    errors,
+                    inst,
+                    "safepoint instruction cannot be used when it is not enabled."
+                );
+            }
+        }
+        Ok(())
+    }
+
+    fn typecheck_function_signature(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> {
+        self.func
+            .signature
+            .params
+            .iter()
+            .enumerate()
+            .filter(|(_, &param)| param.value_type == types::INVALID)
+            .for_each(|(i, _)| {
+                report!(
+                    errors,
+                    AnyEntity::Function,
+                    "Parameter at position {} has an invalid type",
+                    i
+                );
+            });
+
+        self.func
+            .signature
+            .returns
+            .iter()
+            .enumerate()
+            .filter(|(_, &ret)| ret.value_type == types::INVALID)
+            .for_each(|(i, _)| {
+                report!(
+                    errors,
+                    AnyEntity::Function,
+                    "Return value at position {} has an invalid type",
+                    i
+                )
+            });
+
+        if errors.has_error() {
+            Err(())
         } else {
             Ok(())
         }
@@ -1735,15 +1882,20 @@ impl<'a> Verifier<'a> {
         self.verify_tables(errors)?;
         self.verify_jump_tables(errors)?;
         self.typecheck_entry_block_params(errors)?;
+        self.typecheck_function_signature(errors)?;
 
         for ebb in self.func.layout.ebbs() {
             for inst in self.func.layout.ebb_insts(ebb) {
                 self.ebb_integrity(ebb, inst, errors)?;
                 self.instruction_integrity(inst, errors)?;
+                self.verify_safepoint_unused(inst, errors)?;
                 self.typecheck(inst, errors)?;
                 self.verify_encoding(inst, errors)?;
                 self.immediate_constraints(inst, errors)?;
             }
+
+            #[cfg(feature = "basic-blocks")]
+            self.encodable_as_bb(ebb, errors)?;
         }
 
         verify_flags(self.func, &self.expected_cfg, self.isa, errors)?;
@@ -1757,7 +1909,7 @@ mod tests {
     use super::{Verifier, VerifierError, VerifierErrors};
     use crate::entity::EntityList;
     use crate::ir::instructions::{InstructionData, Opcode};
-    use crate::ir::Function;
+    use crate::ir::{types, AbiParam, Function};
     use crate::settings;
 
     macro_rules! assert_err_with_msg {
@@ -1816,4 +1968,30 @@ mod tests {
 
         assert_err_with_msg!(errors, "instruction format");
     }
+
+    #[test]
+    fn test_function_invalid_param() {
+        let mut func = Function::new();
+        func.signature.params.push(AbiParam::new(types::INVALID));
+
+        let mut errors = VerifierErrors::default();
+        let flags = &settings::Flags::new(settings::builder());
+        let verifier = Verifier::new(&func, flags.into());
+
+        let _ = verifier.typecheck_function_signature(&mut errors);
+        assert_err_with_msg!(errors, "Parameter at position 0 has an invalid type");
+    }
+
+    #[test]
+    fn test_function_invalid_return_value() {
+        let mut func = Function::new();
+        func.signature.returns.push(AbiParam::new(types::INVALID));
+
+        let mut errors = VerifierErrors::default();
+        let flags = &settings::Flags::new(settings::builder());
+        let verifier = Verifier::new(&func, flags.into());
+
+        let _ = verifier.typecheck_function_signature(&mut errors);
+        assert_err_with_msg!(errors, "Return value at position 0 has an invalid type");
+    }
 }
diff --git a/cranelift-codegen/src/write.rs b/cranelift-codegen/src/write.rs
index cf44fa8ac..8d974c1ec 100644
--- a/cranelift-codegen/src/write.rs
+++ b/cranelift-codegen/src/write.rs
@@ -5,6 +5,7 @@
 
 use crate::entity::SecondaryMap;
 use crate::ir::entities::AnyEntity;
+use crate::ir::immediates::V128Imm;
 use crate::ir::{
     DataFlowGraph, DisplayFunctionAnnotations, Ebb, Function, Inst, SigRef, Type, Value, ValueDef,
     ValueLoc,
@@ -504,6 +505,20 @@ pub fn write_operands(
         NullAry { .. } => write!(w, " "),
         InsertLane { lane, args, .. } => write!(w, " {}, {}, {}", args[0], lane, args[1]),
         ExtractLane { lane, arg, .. } => write!(w, " {}, {}", arg, lane),
+        UnaryConst {
+            constant_handle, ..
+        } => {
+            let data = dfg.constants.get(constant_handle);
+            let v128 = V128Imm::from(&data[..]);
+            write!(w, " {}", v128)
+        }
+        Shuffle { mask, args, .. } => {
+            let data = dfg.immediates.get(mask).expect(
+                "Expected the shuffle mask to already be inserted into the immediates table",
+            );
+            let v128 = V128Imm::from(&data[..]);
+            write!(w, " {}, {}, {}", args[0], args[1], v128)
+        }
         IntCompare { cond, args, .. } => write!(w, " {} {}, {}", cond, args[0], args[1]),
         IntCompareImm { cond, arg, imm, .. } => write!(w, " {} {}, {}", cond, arg, imm),
         IntCond { cond, arg, .. } => write!(w, " {} {}", cond, arg),
@@ -664,6 +679,14 @@ pub fn write_operands(
                 write!(w, " %{} -> %{}", src, dst)
             }
         }
+        CopyToSsa { src, .. } => {
+            if let Some(isa) = isa {
+                let regs = isa.register_info();
+                write!(w, " {}", regs.display_regunit(src))
+            } else {
+                write!(w, " %{}", src)
+            }
+        }
         RegSpill { arg, src, dst, .. } => {
             if let Some(isa) = isa {
                 let regs = isa.register_info();
diff --git a/cranelift-entity/Cargo.toml b/cranelift-entity/Cargo.toml
index ccc8ca9bd..d71e63ec5 100644
--- a/cranelift-entity/Cargo.toml
+++ b/cranelift-entity/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-entity"
-version = "0.31.0"
+version = "0.43.1"
 description = "Data structures using entity references as mapping keys"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://cranelift.readthedocs.io/"
@@ -11,10 +11,14 @@ readme = "README.md"
 keywords = ["entity", "set", "map"]
 edition = "2018"
 
+[dependencies]
+serde = { version = "1.0.94", features = ["derive"], optional = true }
+
 [features]
 default = ["std"]
 std = []
 core = []
+enable-serde = ["serde"]
 
 [badges]
 maintenance = { status = "experimental" }
diff --git a/cranelift-entity/src/lib.rs b/cranelift-entity/src/lib.rs
index 57287d6c7..aa10264ab 100644
--- a/cranelift-entity/src/lib.rs
+++ b/cranelift-entity/src/lib.rs
@@ -51,7 +51,6 @@
     )
 )]
 #![no_std]
-#![cfg_attr(not(feature = "std"), feature(alloc))]
 
 #[cfg(not(feature = "std"))]
 #[macro_use]
diff --git a/cranelift-entity/src/map.rs b/cranelift-entity/src/map.rs
index 9410cc2f9..77fe38ce9 100644
--- a/cranelift-entity/src/map.rs
+++ b/cranelift-entity/src/map.rs
@@ -6,6 +6,13 @@ use crate::EntityRef;
 use core::marker::PhantomData;
 use core::ops::{Index, IndexMut};
 use core::slice;
+#[cfg(feature = "enable-serde")]
+use serde::{
+    de::{Deserializer, SeqAccess, Visitor},
+    ser::{SerializeSeq, Serializer},
+    Deserialize, Serialize,
+};
+use std::cmp::min;
 use std::vec::Vec;
 
 /// A mapping `K -> V` for densely indexed entity references.
@@ -45,6 +52,20 @@ where
         }
     }
 
+    /// Create a new, empty map with the specified capacity.
+    ///
+    /// The map will be able to hold exactly `capacity` elements without reallocating.
+    pub fn with_capacity(capacity: usize) -> Self
+    where
+        V: Default,
+    {
+        Self {
+            elems: Vec::with_capacity(capacity),
+            default: Default::default(),
+            unused: PhantomData,
+        }
+    }
+
     /// Create a new empty map with a specified default value.
     ///
     /// This constructor does not require V to implement Default.
@@ -56,17 +77,25 @@ where
         }
     }
 
+    /// Returns the number of elements the map can hold without reallocating.
+    pub fn capacity(&self) -> usize {
+        self.elems.capacity()
+    }
+
     /// Get the element at `k` if it exists.
+    #[inline(always)]
     pub fn get(&self, k: K) -> Option<&V> {
         self.elems.get(k.index())
     }
 
     /// Is this map completely empty?
+    #[inline(always)]
     pub fn is_empty(&self) -> bool {
         self.elems.is_empty()
     }
 
     /// Remove all entries from this map.
+    #[inline(always)]
     pub fn clear(&mut self) {
         self.elems.clear()
     }
@@ -97,7 +126,6 @@ where
     }
 
     /// Resize the map to have `n` entries by adding default entries as needed.
-    #[inline]
     pub fn resize(&mut self, n: usize) {
         self.elems.resize(n, self.default.clone());
     }
@@ -113,8 +141,9 @@ where
 {
     type Output = V;
 
+    #[inline(always)]
     fn index(&self, k: K) -> &V {
-        self.get(k).unwrap_or(&self.default)
+        self.elems.get(k.index()).unwrap_or(&self.default)
     }
 }
 
@@ -126,16 +155,116 @@ where
     K: EntityRef,
     V: Clone,
 {
-    #[inline]
+    #[inline(always)]
     fn index_mut(&mut self, k: K) -> &mut V {
         let i = k.index();
         if i >= self.elems.len() {
-            self.resize(i + 1);
+            self.elems.resize(i + 1, self.default.clone());
         }
         &mut self.elems[i]
     }
 }
 
+impl<K, V> PartialEq for SecondaryMap<K, V>
+where
+    K: EntityRef,
+    V: Clone + PartialEq,
+{
+    fn eq(&self, other: &Self) -> bool {
+        let min_size = min(self.elems.len(), other.elems.len());
+        self.default == other.default
+            && self.elems[..min_size] == other.elems[..min_size]
+            && self.elems[min_size..].iter().all(|e| *e == self.default)
+            && other.elems[min_size..].iter().all(|e| *e == other.default)
+    }
+}
+
+impl<K, V> Eq for SecondaryMap<K, V>
+where
+    K: EntityRef,
+    V: Clone + PartialEq + Eq,
+{
+}
+
+#[cfg(feature = "enable-serde")]
+impl<K, V> Serialize for SecondaryMap<K, V>
+where
+    K: EntityRef,
+    V: Clone + PartialEq + Serialize,
+{
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        // TODO: bincode encodes option as "byte for Some/None" and then optionally the content
+        // TODO: we can actually optimize it by encoding manually bitmask, then elements
+        let mut elems_cnt = self.elems.len();
+        while elems_cnt > 0 && self.elems[elems_cnt - 1] == self.default {
+            elems_cnt -= 1;
+        }
+        let mut seq = serializer.serialize_seq(Some(1 + elems_cnt))?;
+        seq.serialize_element(&Some(self.default.clone()))?;
+        for e in self.elems.iter().take(elems_cnt) {
+            let some_e = Some(e);
+            seq.serialize_element(if *e == self.default { &None } else { &some_e })?;
+        }
+        seq.end()
+    }
+}
+
+#[cfg(feature = "enable-serde")]
+impl<'de, K, V> Deserialize<'de> for SecondaryMap<K, V>
+where
+    K: EntityRef,
+    V: Clone + Deserialize<'de>,
+{
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        use std::fmt;
+        struct SecondaryMapVisitor<K, V> {
+            unused: PhantomData<fn(K) -> V>,
+        }
+
+        impl<'de, K, V> Visitor<'de> for SecondaryMapVisitor<K, V>
+        where
+            K: EntityRef,
+            V: Clone + Deserialize<'de>,
+        {
+            type Value = SecondaryMap<K, V>;
+
+            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+                formatter.write_str("struct SecondaryMap")
+            }
+
+            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
+            where
+                A: SeqAccess<'de>,
+            {
+                match seq.next_element()? {
+                    Some(Some(default_val)) => {
+                        let default_val: V = default_val; // compiler can't infer the type
+                        let mut m = SecondaryMap::with_default(default_val.clone());
+                        let mut idx = 0;
+                        while let Some(val) = seq.next_element()? {
+                            let val: Option<_> = val; // compiler can't infer the type
+                            m[K::new(idx)] = val.unwrap_or_else(|| default_val.clone());
+                            idx += 1;
+                        }
+                        Ok(m)
+                    }
+                    _ => Err(serde::de::Error::custom("Default value required")),
+                }
+            }
+        }
+
+        deserializer.deserialize_seq(SecondaryMapVisitor {
+            unused: PhantomData {},
+        })
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/cranelift-entity/src/primary.rs b/cranelift-entity/src/primary.rs
index d3fc7470c..9cde5e779 100644
--- a/cranelift-entity/src/primary.rs
+++ b/cranelift-entity/src/primary.rs
@@ -7,6 +7,8 @@ use core::iter::FromIterator;
 use core::marker::PhantomData;
 use core::ops::{Index, IndexMut};
 use core::slice;
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
 use std::boxed::Box;
 use std::vec::Vec;
 
@@ -25,7 +27,8 @@ use std::vec::Vec;
 /// that it only allows indexing with the distinct `EntityRef` key type, so converting to a
 /// plain slice would make it easier to use incorrectly. To make a slice of a `PrimaryMap`, use
 /// `into_boxed_slice`.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Hash, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct PrimaryMap<K, V>
 where
     K: EntityRef,
diff --git a/cranelift-entity/src/set.rs b/cranelift-entity/src/set.rs
index f89e96538..a4759a171 100644
--- a/cranelift-entity/src/set.rs
+++ b/cranelift-entity/src/set.rs
@@ -45,9 +45,27 @@ where
 
     /// Is this set completely empty?
     pub fn is_empty(&self) -> bool {
+        // Note that this implementation will become incorrect should it ever become possible
+        // to remove elements from an `EntitySet`.
         self.len == 0
     }
 
+    /// Returns the cardinality of the set.  More precisely, it returns the number of calls to
+    /// `insert` with different key values, that have happened since the the set was most recently
+    /// `clear`ed or created with `new`.
+    pub fn cardinality(&self) -> usize {
+        let mut n: usize = 0;
+        for byte_ix in 0..self.len / 8 {
+            n += self.elems[byte_ix].count_ones() as usize;
+        }
+        for bit_ix in (self.len / 8) * 8..self.len {
+            if (self.elems[bit_ix / 8] & (1 << (bit_ix % 8))) != 0 {
+                n += 1;
+            }
+        }
+        n
+    }
+
     /// Remove all entries from this set.
     pub fn clear(&mut self) {
         self.len = 0;
diff --git a/cranelift-faerie/Cargo.toml b/cranelift-faerie/Cargo.toml
index 41774e185..98dccdcf4 100644
--- a/cranelift-faerie/Cargo.toml
+++ b/cranelift-faerie/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cranelift-faerie"
-version = "0.31.0"
+version = "0.43.1"
 authors = ["The Cranelift Project Developers"]
 description = "Emit Cranelift output to native object files with Faerie"
 repository = "https://github.com/CraneStation/cranelift"
@@ -10,12 +10,12 @@ readme = "README.md"
 edition = "2018"
 
 [dependencies]
-cranelift-codegen = { path = "../cranelift-codegen", version = "0.31.0" }
-cranelift-module = { path = "../cranelift-module", version = "0.31.0" }
-faerie = "0.10.0"
-goblin = "0.0.22"
+cranelift-codegen = { path = "../cranelift-codegen", version = "0.43.1" }
+cranelift-module = { path = "../cranelift-module", version = "0.43.1" }
+faerie = "0.11.0"
+goblin = "0.0.24"
 failure = "0.1.2"
-target-lexicon = "0.4.0"
+target-lexicon = "0.8.1"
 
 [badges]
 maintenance = { status = "experimental" }
diff --git a/cranelift-faerie/src/backend.rs b/cranelift-faerie/src/backend.rs
index 57dc21376..585866ca9 100644
--- a/cranelift-faerie/src/backend.rs
+++ b/cranelift-faerie/src/backend.rs
@@ -2,12 +2,14 @@
 
 use crate::container;
 use crate::traps::{FaerieTrapManifest, FaerieTrapSink};
-use cranelift_codegen::binemit::{Addend, CodeOffset, NullTrapSink, Reloc, RelocSink};
+use cranelift_codegen::binemit::{
+    Addend, CodeOffset, NullStackmapSink, NullTrapSink, Reloc, RelocSink, Stackmap, StackmapSink,
+};
 use cranelift_codegen::isa::TargetIsa;
 use cranelift_codegen::{self, binemit, ir};
 use cranelift_module::{
-    Backend, DataContext, DataDescription, Init, Linkage, ModuleError, ModuleNamespace,
-    ModuleResult,
+    Backend, DataContext, DataDescription, DataId, FuncId, Init, Linkage, ModuleError,
+    ModuleNamespace, ModuleResult,
 };
 use faerie;
 use failure::Error;
@@ -36,7 +38,7 @@ pub struct FaerieBuilder {
 impl FaerieBuilder {
     /// Create a new `FaerieBuilder` using the given Cranelift target, that
     /// can be passed to
-    /// [`Module::new`](cranelift_module/struct.Module.html#method.new].
+    /// [`Module::new`](cranelift_module::Module::new)
     ///
     /// Faerie output requires that TargetIsa have PIC (Position Independent Code) enabled.
     ///
@@ -121,13 +123,20 @@ impl Backend for FaerieBackend {
         &*self.isa
     }
 
-    fn declare_function(&mut self, name: &str, linkage: Linkage) {
+    fn declare_function(&mut self, _id: FuncId, name: &str, linkage: Linkage) {
         self.artifact
             .declare(name, translate_function_linkage(linkage))
             .expect("inconsistent declarations");
     }
 
-    fn declare_data(&mut self, name: &str, linkage: Linkage, writable: bool, align: Option<u8>) {
+    fn declare_data(
+        &mut self,
+        _id: DataId,
+        name: &str,
+        linkage: Linkage,
+        writable: bool,
+        align: Option<u8>,
+    ) {
         self.artifact
             .declare(name, translate_data_linkage(linkage, writable, align))
             .expect("inconsistent declarations");
@@ -135,12 +144,15 @@ impl Backend for FaerieBackend {
 
     fn define_function(
         &mut self,
+        _id: FuncId,
         name: &str,
         ctx: &cranelift_codegen::Context,
         namespace: &ModuleNamespace<Self>,
         total_size: u32,
     ) -> ModuleResult<FaerieCompiledFunction> {
         let mut code: Vec<u8> = vec![0; total_size as usize];
+        // TODO: Replace this with FaerieStackmapSink once it is implemented.
+        let mut stackmap_sink = NullStackmapSink {};
 
         // Non-lexical lifetimes would obviate the braces here.
         {
@@ -160,6 +172,7 @@ impl Backend for FaerieBackend {
                         code.as_mut_ptr(),
                         &mut reloc_sink,
                         &mut trap_sink,
+                        &mut stackmap_sink,
                     )
                 };
                 trap_manifest.add_sink(trap_sink);
@@ -171,6 +184,7 @@ impl Backend for FaerieBackend {
                         code.as_mut_ptr(),
                         &mut reloc_sink,
                         &mut trap_sink,
+                        &mut stackmap_sink,
                     )
                 };
             }
@@ -188,6 +202,7 @@ impl Backend for FaerieBackend {
 
     fn define_data(
         &mut self,
+        _id: DataId,
         name: &str,
         _writable: bool,
         _align: Option<u8>,
@@ -268,6 +283,7 @@ impl Backend for FaerieBackend {
 
     fn finalize_function(
         &mut self,
+        _id: FuncId,
         _func: &FaerieCompiledFunction,
         _namespace: &ModuleNamespace<Self>,
     ) {
@@ -278,7 +294,12 @@ impl Backend for FaerieBackend {
         // Nothing to do.
     }
 
-    fn finalize_data(&mut self, _data: &FaerieCompiledData, _namespace: &ModuleNamespace<Self>) {
+    fn finalize_data(
+        &mut self,
+        _id: DataId,
+        _data: &FaerieCompiledData,
+        _namespace: &ModuleNamespace<Self>,
+    ) {
         // Nothing to do.
     }
 
@@ -336,7 +357,7 @@ fn translate_function_linkage(linkage: Linkage) -> faerie::Decl {
 }
 
 fn translate_data_linkage(linkage: Linkage, writable: bool, align: Option<u8>) -> faerie::Decl {
-    let align = align.map(|align| usize::from(align));
+    let align = align.map(|align| u64::from(align));
     match linkage {
         Linkage::Import => faerie::Decl::data_import().into(),
         Linkage::Local => faerie::Decl::data()
@@ -424,4 +445,29 @@ impl<'a> RelocSink for FaerieRelocSink<'a> {
             }
         }
     }
+
+    fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::ConstantOffset) {
+        match reloc {
+            Reloc::X86PCRelRodata4 => {
+                // Not necessary to record this unless we are going to split apart code and its
+                // jumptbl/rodata.
+            }
+            _ => {
+                panic!("Unhandled reloc");
+            }
+        }
+    }
+}
+
+#[allow(dead_code)]
+struct FaerieStackmapSink<'a> {
+    artifact: &'a mut faerie::Artifact,
+    namespace: &'a ModuleNamespace<'a, FaerieBackend>,
+}
+
+/// Faerie is currently not used in SpiderMonkey. Methods are unimplemented.
+impl<'a> StackmapSink for FaerieStackmapSink<'a> {
+    fn add_stackmap(&mut self, _: CodeOffset, _: Stackmap) {
+        unimplemented!("faerie support for stackmaps");
+    }
 }
diff --git a/cranelift-filetests/Cargo.toml b/cranelift-filetests/Cargo.toml
index 2cf4bb305..5a7d2aab1 100644
--- a/cranelift-filetests/Cargo.toml
+++ b/cranelift-filetests/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "cranelift-filetests"
 authors = ["The Cranelift Project Developers"]
-version = "0.31.0"
+version = "0.43.1"
 description = "Test driver and implementations of the filetest commands"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://cranelift.readthedocs.io/en/latest/testing.html#file-tests"
@@ -10,10 +10,16 @@ publish = false
 edition = "2018"
 
 [dependencies]
-cranelift-codegen = { path = "../cranelift-codegen", version = "0.31.0", features = ["testing_hooks"] }
-cranelift-reader = { path = "../cranelift-reader", version = "0.31.0" }
-cranelift-preopt = { path = "../cranelift-preopt", version = "0.31.0" }
+cranelift-codegen = { path = "../cranelift-codegen", version = "0.43.1", features = ["testing_hooks"] }
+cranelift-native = { path = "../cranelift-native", version = "0.43.1" }
+cranelift-reader = { path = "../cranelift-reader", version = "0.43.1" }
+cranelift-preopt = { path = "../cranelift-preopt", version = "0.43.1" }
 file-per-thread-logger = "0.1.2"
 filecheck = "0.4.0"
-num_cpus = "1.8.0"
 log = "0.4.6"
+memmap = "0.7.0"
+num_cpus = "1.8.0"
+region = "2.1.2"
+
+[features]
+basic-blocks = []
diff --git a/cranelift-filetests/src/function_runner.rs b/cranelift-filetests/src/function_runner.rs
new file mode 100644
index 000000000..6f2cd4a83
--- /dev/null
+++ b/cranelift-filetests/src/function_runner.rs
@@ -0,0 +1,116 @@
+use core::mem;
+use cranelift_codegen::binemit::{NullRelocSink, NullStackmapSink, NullTrapSink};
+use cranelift_codegen::ir::Function;
+use cranelift_codegen::isa::TargetIsa;
+use cranelift_codegen::{settings, Context};
+use cranelift_native::builder as host_isa_builder;
+use memmap::MmapMut;
+
+/// Run a function on a host
+pub struct FunctionRunner {
+    function: Function,
+    isa: Box<dyn TargetIsa>,
+}
+
+impl FunctionRunner {
+    /// Build a function runner from a function and the ISA to run on (must be the host machine's ISA)
+    pub fn new(function: Function, isa: Box<dyn TargetIsa>) -> Self {
+        FunctionRunner { function, isa }
+    }
+
+    /// Build a function runner using the host machine's ISA and the passed flags
+    pub fn with_host_isa(function: Function, flags: settings::Flags) -> Self {
+        let builder = host_isa_builder().expect("Unable to build a TargetIsa for the current host");
+        let isa = builder.finish(flags);
+        FunctionRunner::new(function, isa)
+    }
+
+    /// Build a function runner using the host machine's ISA and the default flags for this ISA
+    pub fn with_default_host_isa(function: Function) -> Self {
+        let flags = settings::Flags::new(settings::builder());
+        FunctionRunner::with_host_isa(function, flags)
+    }
+
+    /// Compile and execute a single function, expecting a boolean to be returned; a 'true' value is
+    /// interpreted as a successful test execution and mapped to Ok whereas a 'false' value is
+    /// interpreted as a failed test and mapped to Err.
+    pub fn run(&self) -> Result<(), String> {
+        let func = self.function.clone();
+        if !(func.signature.params.is_empty()
+            && func.signature.returns.len() == 1
+            && func.signature.returns.first().unwrap().value_type.is_bool())
+        {
+            return Err(String::from(
+                "Functions must have a signature like: () -> boolean",
+            ));
+        }
+
+        if func.signature.call_conv != self.isa.default_call_conv() {
+            return Err(String::from(
+                "Functions only run on the host's default calling convention; remove the specified calling convention in the function signature to use the host's default.",
+            ));
+        }
+
+        // set up the context
+        let mut context = Context::new();
+        context.func = func;
+
+        // compile and encode the result to machine code
+        let relocs = &mut NullRelocSink {};
+        let traps = &mut NullTrapSink {};
+        let stackmaps = &mut NullStackmapSink {};
+        let code_info = context
+            .compile(self.isa.as_ref())
+            .map_err(|e| e.to_string())?;
+        let mut code_page =
+            MmapMut::map_anon(code_info.total_size as usize).map_err(|e| e.to_string())?;
+
+        unsafe {
+            context.emit_to_memory(
+                self.isa.as_ref(),
+                code_page.as_mut_ptr(),
+                relocs,
+                traps,
+                stackmaps,
+            );
+        };
+
+        let code_page = code_page.make_exec().map_err(|e| e.to_string())?;
+        let callable_fn: fn() -> bool = unsafe { mem::transmute(code_page.as_ptr()) };
+
+        // execute
+        match callable_fn() {
+            true => Ok(()),
+            false => Err(format!("Failed: {}", context.func.name.to_string())),
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use cranelift_reader::{parse_test, ParseOptions};
+
+    #[test]
+    fn nop() {
+        let code = String::from(
+            "
+            test run
+            function %test() -> b8 {
+            ebb0:
+                nop
+                v1 = bconst.b8 true
+                return v1
+            }",
+        );
+
+        // extract function
+        let test_file = parse_test(code.as_str(), ParseOptions::default()).unwrap();
+        assert_eq!(1, test_file.functions.len());
+        let function = test_file.functions[0].0.clone();
+
+        // execute function
+        let runner = FunctionRunner::with_default_host_isa(function);
+        runner.run().unwrap() // will panic if execution fails
+    }
+}
diff --git a/cranelift-filetests/src/lib.rs b/cranelift-filetests/src/lib.rs
index fbd069bcb..073fc2449 100644
--- a/cranelift-filetests/src/lib.rs
+++ b/cranelift-filetests/src/lib.rs
@@ -23,6 +23,7 @@
     )
 )]
 
+pub use crate::function_runner::FunctionRunner;
 use crate::runner::TestRunner;
 use cranelift_codegen::timing;
 use cranelift_reader::TestCommand;
@@ -30,6 +31,7 @@ use std::path::Path;
 use std::time;
 
 mod concurrent;
+mod function_runner;
 mod match_directive;
 mod runner;
 mod runone;
@@ -46,6 +48,9 @@ mod test_postopt;
 mod test_preopt;
 mod test_print_cfg;
 mod test_regalloc;
+mod test_rodata;
+mod test_run;
+mod test_safepoint;
 mod test_shrink;
 mod test_simple_gvn;
 mod test_simple_preopt;
@@ -115,6 +120,7 @@ fn new_subtest(parsed: &TestCommand) -> subtest::SubtestResult<Box<dyn subtest::
         "binemit" => test_binemit::subtest(parsed),
         "cat" => test_cat::subtest(parsed),
         "compile" => test_compile::subtest(parsed),
+        "rodata" => test_rodata::subtest(parsed),
         "dce" => test_dce::subtest(parsed),
         "domtree" => test_domtree::subtest(parsed),
         "legalizer" => test_legalizer::subtest(parsed),
@@ -123,10 +129,12 @@ fn new_subtest(parsed: &TestCommand) -> subtest::SubtestResult<Box<dyn subtest::
         "simple_preopt" => test_simple_preopt::subtest(parsed),
         "print-cfg" => test_print_cfg::subtest(parsed),
         "regalloc" => test_regalloc::subtest(parsed),
+        "run" => test_run::subtest(parsed),
         "shrink" => test_shrink::subtest(parsed),
         "simple-gvn" => test_simple_gvn::subtest(parsed),
         "verifier" => test_verifier::subtest(parsed),
         "preopt" => test_preopt::subtest(parsed),
+        "safepoint" => test_safepoint::subtest(parsed),
         _ => Err(format!("unknown test command '{}'", parsed.command)),
     }
 }
diff --git a/cranelift-filetests/src/runone.rs b/cranelift-filetests/src/runone.rs
index 3a69c37c4..1bb7d1c7f 100644
--- a/cranelift-filetests/src/runone.rs
+++ b/cranelift-filetests/src/runone.rs
@@ -8,8 +8,7 @@ use cranelift_codegen::print_errors::pretty_verifier_error;
 use cranelift_codegen::settings::Flags;
 use cranelift_codegen::timing;
 use cranelift_codegen::verify_function;
-use cranelift_reader::parse_test;
-use cranelift_reader::IsaSpec;
+use cranelift_reader::{parse_test, Feature, IsaSpec, ParseOptions};
 use log::info;
 use std::borrow::Cow;
 use std::fs;
@@ -33,8 +32,13 @@ pub fn run(path: &Path, passes: Option<&[String]>, target: Option<&str>) -> Test
     info!("---\nFile: {}", path.to_string_lossy());
     let started = time::Instant::now();
     let buffer = read_to_string(path).map_err(|e| e.to_string())?;
+    let options = ParseOptions {
+        target,
+        passes,
+        ..ParseOptions::default()
+    };
 
-    let testfile = match parse_test(&buffer, passes, target) {
+    let testfile = match parse_test(&buffer, options) {
         Ok(testfile) => testfile,
         Err(e) => {
             if e.is_warning {
@@ -48,6 +52,31 @@ pub fn run(path: &Path, passes: Option<&[String]>, target: Option<&str>) -> Test
         }
     };
 
+    for feature in testfile.features.iter() {
+        let (flag, test_expect) = match feature {
+            Feature::With(name) => (name, true),
+            Feature::Without(name) => (name, false),
+        };
+        let cranelift_has = match flag {
+            // Add any cranelift feature flag here, and make sure that it is forwarded to the
+            // cranelift-filetest crate in the top-level Cargo.toml.
+            &"basic-blocks" => cfg!(feature = "basic-blocks"),
+            _ => {
+                return Err(format!(
+                    r#"{:?}: Unknown feature flag named "{}""#,
+                    path, flag
+                ))
+            }
+        };
+        if cranelift_has != test_expect {
+            println!(
+                r#"skipping test {:?}: non-matching feature flag "{}""#,
+                path, flag
+            );
+            return Ok(started.elapsed());
+        }
+    }
+
     if testfile.functions.is_empty() {
         return Err("no functions found".to_string());
     }
diff --git a/cranelift-filetests/src/test_binemit.rs b/cranelift-filetests/src/test_binemit.rs
index 0b1e83879..b291f6144 100644
--- a/cranelift-filetests/src/test_binemit.rs
+++ b/cranelift-filetests/src/test_binemit.rs
@@ -7,8 +7,11 @@ use crate::match_directive::match_directive;
 use crate::subtest::{Context, SubTest, SubtestResult};
 use cranelift_codegen::binemit::{self, CodeInfo, CodeSink, RegDiversions};
 use cranelift_codegen::dbg::DisplayList;
+use cranelift_codegen::dominator_tree::DominatorTree;
+use cranelift_codegen::flowgraph::ControlFlowGraph;
 use cranelift_codegen::ir;
 use cranelift_codegen::ir::entities::AnyEntity;
+use cranelift_codegen::isa;
 use cranelift_codegen::print_errors::pretty_error;
 use cranelift_codegen::settings::OptLevel;
 use cranelift_reader::TestCommand;
@@ -87,6 +90,10 @@ impl binemit::CodeSink for TextSink {
         write!(self.text, ") ").unwrap();
     }
 
+    fn reloc_constant(&mut self, reloc: binemit::Reloc, constant: ir::ConstantOffset) {
+        write!(self.text, "{}({}) ", reloc, constant).unwrap();
+    }
+
     fn reloc_jt(&mut self, reloc: binemit::Reloc, jt: ir::JumpTable) {
         write!(self.text, "{}({}) ", reloc, jt).unwrap();
     }
@@ -98,9 +105,15 @@ impl binemit::CodeSink for TextSink {
     fn begin_jumptables(&mut self) {
         self.code_size = self.offset
     }
-
     fn begin_rodata(&mut self) {}
     fn end_codegen(&mut self) {}
+    fn add_stackmap(
+        &mut self,
+        _: &[ir::entities::Value],
+        _: &ir::Function,
+        _: &dyn isa::TargetIsa,
+    ) {
+    }
 }
 
 impl SubTest for TestBinEmit {
@@ -149,7 +162,7 @@ impl SubTest for TestBinEmit {
                                 recipe_constraints.satisfied(inst, &divert, &func)
                             });
 
-                        if opt_level == OptLevel::Best {
+                        if opt_level == OptLevel::SpeedAndSize {
                             // Get the smallest legal encoding
                             legal_encodings
                                 .min_by_key(|&e| encinfo.byte_size(e, inst, &divert, &func))
@@ -166,8 +179,11 @@ impl SubTest for TestBinEmit {
         }
 
         // Relax branches and compute EBB offsets based on the encodings.
-        let CodeInfo { total_size, .. } = binemit::relax_branches(&mut func, isa)
-            .map_err(|e| pretty_error(&func, context.isa, e))?;
+        let mut cfg = ControlFlowGraph::with_function(&func);
+        let mut domtree = DominatorTree::with_function(&func, &cfg);
+        let CodeInfo { total_size, .. } =
+            binemit::relax_branches(&mut func, &mut cfg, &mut domtree, isa)
+                .map_err(|e| pretty_error(&func, context.isa, e))?;
 
         // Collect all of the 'bin:' directives on instructions.
         let mut bins = HashMap::new();
@@ -301,7 +317,13 @@ impl SubTest for TestBinEmit {
         }
 
         sink.begin_rodata();
-        // TODO: Read-only (constant pool) data.
+
+        // output constants
+        for (_, constant_data) in func.dfg.constants.iter() {
+            for byte in constant_data.iter() {
+                sink.put1(*byte)
+            }
+        }
 
         sink.end_codegen();
 
diff --git a/cranelift-filetests/src/test_compile.rs b/cranelift-filetests/src/test_compile.rs
index e9150a73e..10e07440e 100644
--- a/cranelift-filetests/src/test_compile.rs
+++ b/cranelift-filetests/src/test_compile.rs
@@ -6,6 +6,7 @@ use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult};
 use cranelift_codegen;
 use cranelift_codegen::binemit::{self, CodeInfo};
 use cranelift_codegen::ir;
+use cranelift_codegen::isa;
 use cranelift_codegen::print_errors::pretty_error;
 use cranelift_reader::TestCommand;
 use log::info;
@@ -53,8 +54,9 @@ impl SubTest for TestCompile {
         let mut sink = SizeSink { offset: 0 };
         binemit::emit_function(
             &comp_ctx.func,
-            |func, inst, div, sink| isa.emit_inst(func, inst, div, sink),
+            |func, inst, div, sink, isa| isa.emit_inst(func, inst, div, sink),
             &mut sink,
+            isa,
         );
 
         if sink.offset != total_size {
@@ -104,9 +106,17 @@ impl binemit::CodeSink for SizeSink {
         _addend: binemit::Addend,
     ) {
     }
+    fn reloc_constant(&mut self, _: binemit::Reloc, _: ir::ConstantOffset) {}
     fn reloc_jt(&mut self, _reloc: binemit::Reloc, _jt: ir::JumpTable) {}
     fn trap(&mut self, _code: ir::TrapCode, _srcloc: ir::SourceLoc) {}
     fn begin_jumptables(&mut self) {}
     fn begin_rodata(&mut self) {}
     fn end_codegen(&mut self) {}
+    fn add_stackmap(
+        &mut self,
+        _: &[ir::entities::Value],
+        _: &ir::Function,
+        _: &dyn isa::TargetIsa,
+    ) {
+    }
 }
diff --git a/cranelift-filetests/src/test_rodata.rs b/cranelift-filetests/src/test_rodata.rs
new file mode 100644
index 000000000..412fa55ff
--- /dev/null
+++ b/cranelift-filetests/src/test_rodata.rs
@@ -0,0 +1,123 @@
+//! Test command for verifying the rodata emitted after each function
+//!
+//! The `rodata` test command runs each function through the full code generator pipeline
+
+use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult};
+use cranelift_codegen;
+use cranelift_codegen::binemit::{self, CodeInfo};
+use cranelift_codegen::ir;
+use cranelift_codegen::ir::{Function, Value};
+use cranelift_codegen::isa::TargetIsa;
+use cranelift_codegen::print_errors::pretty_error;
+use cranelift_reader::TestCommand;
+use log::info;
+use std::borrow::Cow;
+
+struct TestRodata;
+
+pub fn subtest(parsed: &TestCommand) -> SubtestResult<Box<dyn SubTest>> {
+    assert_eq!(parsed.command, "rodata");
+    if !parsed.options.is_empty() {
+        Err(format!("No options allowed on {}", parsed))
+    } else {
+        Ok(Box::new(TestRodata))
+    }
+}
+
+impl SubTest for TestRodata {
+    fn name(&self) -> &'static str {
+        "rodata"
+    }
+
+    fn is_mutating(&self) -> bool {
+        true
+    }
+
+    fn needs_isa(&self) -> bool {
+        true
+    }
+
+    fn run(&self, func: Cow<ir::Function>, context: &Context) -> SubtestResult<()> {
+        let isa = context.isa.expect("rodata needs an ISA");
+        let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned());
+
+        let CodeInfo { total_size, .. } = comp_ctx
+            .compile(isa)
+            .map_err(|e| pretty_error(&comp_ctx.func, context.isa, e))?;
+
+        info!(
+            "Generated {} bytes of code:\n{}",
+            total_size,
+            comp_ctx.func.display(isa)
+        );
+
+        // Verify that the returned code size matches the emitted bytes.
+        let mut sink = RodataSink {
+            rodata: Vec::new(),
+            in_rodata: false,
+        };
+        binemit::emit_function(
+            &comp_ctx.func,
+            |func, inst, div, sink, isa| isa.emit_inst(func, inst, div, sink),
+            &mut sink,
+            isa,
+        );
+
+        // Run final code through filecheck.
+        let text = format!("{:X?}", sink.rodata);
+        info!("Found rodata: {}", text);
+        run_filecheck(&text, context)
+    }
+}
+
+/// Code sink that only captures emitted rodata
+struct RodataSink {
+    in_rodata: bool,
+    rodata: Vec<u8>,
+}
+
+impl binemit::CodeSink for RodataSink {
+    fn offset(&self) -> binemit::CodeOffset {
+        0
+    }
+
+    fn put1(&mut self, byte: u8) {
+        if self.in_rodata {
+            self.rodata.push(byte);
+        }
+    }
+
+    fn put2(&mut self, bytes: u16) {
+        if self.in_rodata {
+            self.rodata.extend_from_slice(&bytes.to_be_bytes());
+        }
+    }
+
+    fn put4(&mut self, bytes: u32) {
+        if self.in_rodata {
+            self.rodata.extend_from_slice(&bytes.to_be_bytes());
+        }
+    }
+
+    fn put8(&mut self, bytes: u64) {
+        if self.in_rodata {
+            self.rodata.extend_from_slice(&bytes.to_be_bytes());
+        }
+    }
+
+    fn reloc_ebb(&mut self, _reloc: binemit::Reloc, _ebb_offset: binemit::CodeOffset) {}
+    fn reloc_external(&mut self, _: binemit::Reloc, _: &ir::ExternalName, _: binemit::Addend) {}
+    fn reloc_constant(&mut self, _: binemit::Reloc, _: ir::ConstantOffset) {}
+    fn reloc_jt(&mut self, _reloc: binemit::Reloc, _jt: ir::JumpTable) {}
+    fn trap(&mut self, _code: ir::TrapCode, _srcloc: ir::SourceLoc) {}
+    fn begin_jumptables(&mut self) {
+        assert!(!self.in_rodata);
+    }
+    fn begin_rodata(&mut self) {
+        self.in_rodata = true;
+    }
+    fn end_codegen(&mut self) {
+        assert!(self.in_rodata);
+    }
+    fn add_stackmap(&mut self, _: &[Value], _: &Function, _: &dyn TargetIsa) {}
+}
diff --git a/cranelift-filetests/src/test_run.rs b/cranelift-filetests/src/test_run.rs
new file mode 100644
index 000000000..6e34bfebf
--- /dev/null
+++ b/cranelift-filetests/src/test_run.rs
@@ -0,0 +1,46 @@
+//! Test command for running CLIF files and verifying their results
+//!
+//! The `run` test command compiles each function on the host machine and executes it
+
+use crate::function_runner::FunctionRunner;
+use crate::subtest::{Context, SubTest, SubtestResult};
+use cranelift_codegen;
+use cranelift_codegen::ir;
+use cranelift_reader::TestCommand;
+use std::borrow::Cow;
+
+struct TestRun;
+
+pub fn subtest(parsed: &TestCommand) -> SubtestResult<Box<dyn SubTest>> {
+    assert_eq!(parsed.command, "run");
+    if !parsed.options.is_empty() {
+        Err(format!("No options allowed on {}", parsed))
+    } else {
+        Ok(Box::new(TestRun))
+    }
+}
+
+impl SubTest for TestRun {
+    fn name(&self) -> &'static str {
+        "run"
+    }
+
+    fn is_mutating(&self) -> bool {
+        false
+    }
+
+    fn needs_isa(&self) -> bool {
+        false
+    }
+
+    fn run(&self, func: Cow<ir::Function>, context: &Context) -> SubtestResult<()> {
+        for comment in context.details.comments.iter() {
+            if comment.text.contains("run") {
+                let runner =
+                    FunctionRunner::with_host_isa(func.clone().into_owned(), context.flags.clone());
+                runner.run()?
+            }
+        }
+        Ok(())
+    }
+}
diff --git a/cranelift-filetests/src/test_safepoint.rs b/cranelift-filetests/src/test_safepoint.rs
new file mode 100644
index 000000000..b213fb274
--- /dev/null
+++ b/cranelift-filetests/src/test_safepoint.rs
@@ -0,0 +1,39 @@
+use crate::subtest::{run_filecheck, Context, SubTest, SubtestResult};
+use cranelift_codegen::ir::Function;
+use cranelift_codegen::print_errors::pretty_error;
+use cranelift_reader::TestCommand;
+use std::borrow::Cow;
+
+struct TestSafepoint;
+
+pub fn subtest(parsed: &TestCommand) -> SubtestResult<Box<dyn SubTest>> {
+    assert_eq!(parsed.command, "safepoint");
+    if !parsed.options.is_empty() {
+        Err(format!("No options allowed on {}", parsed))
+    } else {
+        Ok(Box::new(TestSafepoint))
+    }
+}
+
+impl SubTest for TestSafepoint {
+    fn name(&self) -> &'static str {
+        "safepoint"
+    }
+
+    fn run(&self, func: Cow<Function>, context: &Context) -> SubtestResult<()> {
+        let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned());
+
+        let isa = context.isa.expect("register allocator needs an ISA");
+        comp_ctx.compute_cfg();
+        comp_ctx
+            .legalize(isa)
+            .map_err(|e| pretty_error(&comp_ctx.func, context.isa, e))?;
+        comp_ctx.compute_domtree();
+        comp_ctx
+            .regalloc(isa)
+            .map_err(|e| pretty_error(&comp_ctx.func, context.isa, e))?;
+
+        let text = comp_ctx.func.display(context.isa).to_string();
+        run_filecheck(&text, context)
+    }
+}
diff --git a/cranelift-frontend/Cargo.toml b/cranelift-frontend/Cargo.toml
index f1634bdd9..919b9b4bd 100644
--- a/cranelift-frontend/Cargo.toml
+++ b/cranelift-frontend/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-frontend"
-version = "0.31.0"
+version = "0.43.1"
 description = "Cranelift IR builder helper"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://cranelift.readthedocs.io/"
@@ -11,16 +11,20 @@ readme = "README.md"
 edition = "2018"
 
 [dependencies]
-cranelift-codegen = { path = "../cranelift-codegen", version = "0.31.0", default-features = false }
-target-lexicon = { version = "0.4.0", default-features = false }
+cranelift-codegen = { path = "../cranelift-codegen", version = "0.43.1", default-features = false }
+target-lexicon = "0.8.1"
 log = { version = "0.4.6", default-features = false }
 hashmap_core = { version = "0.1.9", optional = true }
+smallvec = { version = "0.6.10" }
 
 [features]
 default = ["std"]
 std = ["cranelift-codegen/std"]
 core = ["hashmap_core", "cranelift-codegen/core"]
 
+# Temporary feature that enforces basic block semantics.
+basic-blocks = ["cranelift-codegen/basic-blocks"]
+
 [badges]
 maintenance = { status = "experimental" }
 travis-ci = { repository = "CraneStation/cranelift" }
diff --git a/cranelift-frontend/src/frontend.rs b/cranelift-frontend/src/frontend.rs
index a55c0d124..b0c3c6b9f 100644
--- a/cranelift-frontend/src/frontend.rs
+++ b/cranelift-frontend/src/frontend.rs
@@ -41,12 +41,22 @@ pub struct FunctionBuilder<'a> {
 
 #[derive(Clone, Default)]
 struct EbbData {
-    filled: bool,
+    /// An Ebb is "pristine" iff no instructions have been added since the last
+    /// call to `switch_to_block()`.
     pristine: bool,
+
+    /// An Ebb is "filled" iff a terminator instruction has been inserted since
+    /// the last call to `switch_to_block()`.
+    ///
+    /// A filled block cannot be pristine.
+    filled: bool,
+
+    /// Count of parameters not supplied implicitly by the SSABuilder.
     user_param_count: usize,
 }
 
 /// Position
+#[derive(Default)]
 pub struct Position {
     ebb: PackedOption<Ebb>,
     basic_block: PackedOption<Block>,
@@ -60,14 +70,6 @@ impl Position {
         }
     }
 
-    /// Default position
-    pub fn default() -> Self {
-        Self {
-            ebb: PackedOption::default(),
-            basic_block: PackedOption::default(),
-        }
-    }
-
     fn is_default(&self) -> bool {
         self.ebb.is_none() && self.basic_block.is_none()
     }
@@ -95,7 +97,7 @@ impl FunctionBuilderContext {
     }
 }
 
-/// Implementation of the [`InstBuilder`](../codegen/ir/builder/trait.InstBuilder.html) that has
+/// Implementation of the [`InstBuilder`](cranelift_codegen::ir::InstBuilder) that has
 /// one convenience method per Cranelift IR instruction.
 pub struct FuncInstBuilder<'short, 'long: 'short> {
     builder: &'short mut FunctionBuilder<'long>,
@@ -206,7 +208,7 @@ impl<'short, 'long> InstBuilderBase<'short> for FuncInstBuilder<'short, 'long> {
 /// modifies with the information stored in the mutable borrowed
 /// [`FunctionBuilderContext`](struct.FunctionBuilderContext.html). The function passed in
 /// argument should be newly created with
-/// [`Function::with_name_signature()`](../function/struct.Function.html), whereas the
+/// [`Function::with_name_signature()`](Function::with_name_signature), whereas the
 /// `FunctionBuilderContext` can be kept as is between two function translations.
 ///
 /// # Errors
@@ -394,7 +396,7 @@ impl<'a> FunctionBuilder<'a> {
         self.func.create_heap(data)
     }
 
-    /// Returns an object with the [`InstBuilder`](../codegen/ir/builder/trait.InstBuilder.html)
+    /// Returns an object with the [`InstBuilder`](cranelift_codegen::ir::InstBuilder)
     /// trait that allows to conveniently append an instruction to the current `Ebb` being built.
     pub fn ins<'short>(&'short mut self) -> FuncInstBuilder<'short, 'a> {
         let ebb = self
@@ -482,6 +484,19 @@ impl<'a> FunctionBuilder<'a> {
             "all blocks should be filled before dropping a FunctionBuilder"
         );
 
+        // In debug mode, check that all blocks are valid basic blocks.
+        #[cfg(feature = "basic-blocks")]
+        #[cfg(debug_assertions)]
+        {
+            // Iterate manually to provide more helpful error messages.
+            for ebb in self.func_ctx.ebbs.keys() {
+                if let Err((inst, _msg)) = self.func.is_ebb_basic(ebb) {
+                    let inst_str = self.func.dfg.display_inst(inst, None);
+                    panic!("{} failed basic block invariants on {}", ebb, inst_str);
+                }
+            }
+        }
+
         // Clear the state (but preserve the allocated buffers) in preparation
         // for translation another function.
         self.func_ctx.clear();
@@ -515,7 +530,10 @@ impl<'a> FunctionBuilder<'a> {
     /// **Note:** this function has to be called at the creation of the `Ebb` before adding
     /// instructions to it, otherwise this could interfere with SSA construction.
     pub fn append_ebb_param(&mut self, ebb: Ebb, ty: Type) -> Value {
-        debug_assert!(self.func_ctx.ebbs[ebb].pristine);
+        debug_assert!(
+            self.func_ctx.ebbs[ebb].pristine,
+            "You can't add EBB parameters after adding any instruction"
+        );
         debug_assert_eq!(
             self.func_ctx.ebbs[ebb].user_param_count,
             self.func.dfg.num_ebb_params(ebb)
@@ -852,6 +870,7 @@ impl<'a> FunctionBuilder<'a> {
         );
     }
 
+    /// An Ebb is 'filled' when a terminator instruction is present.
     fn fill_current_block(&mut self) {
         self.func_ctx.ebbs[self.position.ebb.unwrap()].filled = true;
     }
@@ -901,6 +920,7 @@ mod tests {
             let block0 = builder.create_ebb();
             let block1 = builder.create_ebb();
             let block2 = builder.create_ebb();
+            let block3 = builder.create_ebb();
             let x = Variable::new(0);
             let y = Variable::new(1);
             let z = Variable::new(2);
@@ -938,7 +958,13 @@ mod tests {
             }
             {
                 let arg = builder.use_var(y);
-                builder.ins().brnz(arg, block2, &[]);
+                builder.ins().brnz(arg, block3, &[]);
+            }
+            builder.ins().jump(block2, &[]);
+
+            builder.switch_to_block(block2);
+            if !lazy_seal {
+                builder.seal_block(block2);
             }
             {
                 let arg1 = builder.use_var(z);
@@ -951,9 +977,9 @@ mod tests {
                 builder.ins().return_(&[arg]);
             }
 
-            builder.switch_to_block(block2);
+            builder.switch_to_block(block3);
             if !lazy_seal {
-                builder.seal_block(block2);
+                builder.seal_block(block3);
             }
 
             {
diff --git a/cranelift-frontend/src/lib.rs b/cranelift-frontend/src/lib.rs
index 908458636..a5cc750b5 100644
--- a/cranelift-frontend/src/lib.rs
+++ b/cranelift-frontend/src/lib.rs
@@ -50,10 +50,12 @@
 //!    jump block1
 //! block1:
 //!    z = z + y;
-//!    brnz y, block2;
+//!    brnz y, block3;
+//!    jump block2
+//! block2:
 //!    z = z - x;
 //!    return y
-//! block2:
+//! block3:
 //!    y = y - x
 //!    jump block1
 //! }
@@ -86,6 +88,7 @@
 //!         let block0 = builder.create_ebb();
 //!         let block1 = builder.create_ebb();
 //!         let block2 = builder.create_ebb();
+//!         let block3 = builder.create_ebb();
 //!         let x = Variable::new(0);
 //!         let y = Variable::new(1);
 //!         let z = Variable::new(2);
@@ -121,8 +124,12 @@
 //!         }
 //!         {
 //!             let arg = builder.use_var(y);
-//!             builder.ins().brnz(arg, block2, &[]);
+//!             builder.ins().brnz(arg, block3, &[]);
 //!         }
+//!         builder.ins().jump(block2, &[]);
+//!
+//!         builder.switch_to_block(block2);
+//!         builder.seal_block(block2);
 //!         {
 //!             let arg1 = builder.use_var(z);
 //!             let arg2 = builder.use_var(x);
@@ -134,8 +141,8 @@
 //!             builder.ins().return_(&[arg]);
 //!         }
 //!
-//!         builder.switch_to_block(block2);
-//!         builder.seal_block(block2);
+//!         builder.switch_to_block(block3);
+//!         builder.seal_block(block3);
 //!
 //!         {
 //!             let arg1 = builder.use_var(y);
@@ -176,7 +183,6 @@
     )
 )]
 #![no_std]
-#![cfg_attr(not(feature = "std"), feature(alloc))]
 
 #[cfg(not(feature = "std"))]
 #[macro_use]
diff --git a/cranelift-frontend/src/ssa.rs b/cranelift-frontend/src/ssa.rs
index 0d48c314a..b3b74fac9 100644
--- a/cranelift-frontend/src/ssa.rs
+++ b/cranelift-frontend/src/ssa.rs
@@ -16,6 +16,7 @@ use cranelift_codegen::ir::types::{F32, F64};
 use cranelift_codegen::ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Type, Value};
 use cranelift_codegen::packed_option::PackedOption;
 use cranelift_codegen::packed_option::ReservedValue;
+use smallvec::SmallVec;
 use std::vec::Vec;
 
 /// Structure containing the data relevant the construction of SSA for a given function.
@@ -123,9 +124,11 @@ impl PredBlock {
     }
 }
 
+type PredBlockSmallVec = SmallVec<[PredBlock; 4]>;
+
 struct EbbHeaderBlockData {
     // The predecessors of the Ebb header block, with the block and branch instruction.
-    predecessors: Vec<PredBlock>,
+    predecessors: PredBlockSmallVec,
     // A ebb header block is sealed if all of its predecessors have been declared.
     sealed: bool,
     // The ebb which this block is part of.
@@ -221,6 +224,8 @@ fn emit_zero(ty: Type, mut cur: FuncCursor) -> Value {
         cur.ins().f32const(Ieee32::with_bits(0))
     } else if ty == F64 {
         cur.ins().f64const(Ieee64::with_bits(0))
+    } else if ty.is_ref() {
+        cur.ins().null(ty)
     } else if ty.is_vector() {
         let scalar_ty = ty.lane_type();
         if scalar_ty.is_int() {
@@ -366,7 +371,7 @@ impl SSABuilder {
     /// Predecessors have to be added with `declare_ebb_predecessor`.
     pub fn declare_ebb_header_block(&mut self, ebb: Ebb) -> Block {
         let block = self.blocks.push(BlockData::EbbHeader(EbbHeaderBlockData {
-            predecessors: Vec::new(),
+            predecessors: PredBlockSmallVec::new(),
             sealed: false,
             ebb,
             undef_variables: Vec::new(),
@@ -587,7 +592,8 @@ impl SSABuilder {
                 // There is disagreement in the predecessors on which value to use so we have
                 // to keep the ebb argument. To avoid borrowing `self` for the whole loop,
                 // temporarily detach the predecessors list and replace it with an empty list.
-                let mut preds = mem::replace(self.predecessors_mut(dest_ebb), Vec::new());
+                let mut preds =
+                    mem::replace(self.predecessors_mut(dest_ebb), PredBlockSmallVec::new());
                 for &mut PredBlock {
                     block: ref mut pred_block,
                     branch: ref mut last_inst,
@@ -699,7 +705,7 @@ impl SSABuilder {
     }
 
     /// Same as predecessors, but for &mut.
-    fn predecessors_mut(&mut self, ebb: Ebb) -> &mut Vec<PredBlock> {
+    fn predecessors_mut(&mut self, ebb: Ebb) -> &mut PredBlockSmallVec {
         let block = self.header_block(ebb);
         match self.blocks[block] {
             BlockData::EbbBody { .. } => panic!("should not happen"),
diff --git a/cranelift-frontend/src/switch.rs b/cranelift-frontend/src/switch.rs
index 30e7687c7..0664d0060 100644
--- a/cranelift-frontend/src/switch.rs
+++ b/cranelift-frontend/src/switch.rs
@@ -168,12 +168,22 @@ impl Switch {
         contiguous_case_ranges: Vec<ContiguousCaseRange>,
         cases_and_jt_ebbs: &mut Vec<(EntryIndex, Ebb, Vec<Ebb>)>,
     ) {
+        let mut was_branch = false;
+        let ins_fallthrough_jump = |was_branch: bool, bx: &mut FunctionBuilder| {
+            if was_branch {
+                let ebb = bx.create_ebb();
+                bx.ins().jump(ebb, &[]);
+                bx.switch_to_block(ebb);
+            }
+        };
         for ContiguousCaseRange { first_index, ebbs } in contiguous_case_ranges.into_iter().rev() {
             match (ebbs.len(), first_index) {
                 (1, 0) => {
+                    ins_fallthrough_jump(was_branch, bx);
                     bx.ins().brz(val, ebbs[0], &[]);
                 }
                 (1, _) => {
+                    ins_fallthrough_jump(was_branch, bx);
                     let is_good_val = bx.ins().icmp_imm(IntCC::Equal, val, first_index as i64);
                     bx.ins().brnz(is_good_val, ebbs[0], &[]);
                 }
@@ -188,6 +198,7 @@ impl Switch {
                     return;
                 }
                 (_, _) => {
+                    ins_fallthrough_jump(was_branch, bx);
                     let jt_ebb = bx.create_ebb();
                     let is_good_val = bx.ins().icmp_imm(
                         IntCC::UnsignedGreaterThanOrEqual,
@@ -198,6 +209,7 @@ impl Switch {
                     cases_and_jt_ebbs.push((first_index, jt_ebb, ebbs));
                 }
             }
+            was_branch = true;
         }
 
         bx.ins().jump(otherwise, &[]);
@@ -361,7 +373,10 @@ ebb3:
     v1 = uextend.i32 v0
     v2 = icmp_imm eq v1, 2
     brnz v2, ebb2
-    brz v1, ebb1
+    jump ebb3
+
+ebb3:
+    brz.i32 v1, ebb1
     jump ebb0"
         );
     }
@@ -384,6 +399,9 @@ ebb0:
 ebb9:
     v3 = icmp_imm.i32 uge v1, 10
     brnz v3, ebb10
+    jump ebb11
+
+ebb11:
     v4 = icmp_imm.i32 eq v1, 7
     brnz v4, ebb4
     jump ebb0
@@ -391,9 +409,9 @@ ebb9:
 ebb8:
     v5 = icmp_imm.i32 eq v1, 5
     brnz v5, ebb3
-    jump ebb11
+    jump ebb12
 
-ebb11:
+ebb12:
     br_table.i32 v1, ebb0, jt0
 
 ebb10:
@@ -412,7 +430,10 @@ ebb10:
     v1 = uextend.i32 v0
     v2 = icmp_imm eq v1, 0x8000_0000_0000_0000
     brnz v2, ebb1
-    v3 = icmp_imm eq v1, 1
+    jump ebb3
+
+ebb3:
+    v3 = icmp_imm.i32 eq v1, 1
     brnz v3, ebb2
     jump ebb0"
         );
@@ -428,7 +449,10 @@ ebb10:
     v1 = uextend.i32 v0
     v2 = icmp_imm eq v1, 0x7fff_ffff_ffff_ffff
     brnz v2, ebb1
-    v3 = icmp_imm eq v1, 1
+    jump ebb3
+
+ebb3:
+    v3 = icmp_imm.i32 eq v1, 1
     brnz v3, ebb2
     jump ebb0"
         )
diff --git a/cranelift-module/Cargo.toml b/cranelift-module/Cargo.toml
index 7d67638b2..ac5eff082 100644
--- a/cranelift-module/Cargo.toml
+++ b/cranelift-module/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cranelift-module"
-version = "0.31.0"
+version = "0.43.1"
 authors = ["The Cranelift Project Developers"]
 description = "Support for linking functions and data with Cranelift"
 repository = "https://github.com/CraneStation/cranelift"
@@ -11,8 +11,8 @@ readme = "README.md"
 edition = "2018"
 
 [dependencies]
-cranelift-codegen = { path = "../cranelift-codegen", version = "0.31.0", default-features = false }
-cranelift-entity = { path = "../cranelift-entity", version = "0.31.0", default-features = false }
+cranelift-codegen = { path = "../cranelift-codegen", version = "0.43.1", default-features = false }
+cranelift-entity = { path = "../cranelift-entity", version = "0.43.1", default-features = false }
 hashmap_core = { version = "0.1.9", optional = true }
 failure = { version = "0.1.1", default-features = false }
 log = { version = "0.4.6", default-features = false }
diff --git a/cranelift-module/src/backend.rs b/cranelift-module/src/backend.rs
index 316c0a3e2..89b610bdb 100644
--- a/cranelift-module/src/backend.rs
+++ b/cranelift-module/src/backend.rs
@@ -1,6 +1,8 @@
 //! Defines the `Backend` trait.
 
 use crate::DataContext;
+use crate::DataId;
+use crate::FuncId;
 use crate::Linkage;
 use crate::ModuleNamespace;
 use crate::ModuleResult;
@@ -56,16 +58,24 @@ where
     fn isa(&self) -> &dyn TargetIsa;
 
     /// Declare a function.
-    fn declare_function(&mut self, name: &str, linkage: Linkage);
+    fn declare_function(&mut self, id: FuncId, name: &str, linkage: Linkage);
 
     /// Declare a data object.
-    fn declare_data(&mut self, name: &str, linkage: Linkage, writable: bool, align: Option<u8>);
+    fn declare_data(
+        &mut self,
+        id: DataId,
+        name: &str,
+        linkage: Linkage,
+        writable: bool,
+        align: Option<u8>,
+    );
 
     /// Define a function, producing the function body from the given `Context`.
     ///
     /// Functions must be declared before being defined.
     fn define_function(
         &mut self,
+        id: FuncId,
         name: &str,
         ctx: &Context,
         namespace: &ModuleNamespace<Self>,
@@ -77,6 +87,7 @@ where
     /// Data objects must be declared before being defined.
     fn define_data(
         &mut self,
+        id: DataId,
         name: &str,
         writable: bool,
         align: Option<u8>,
@@ -107,6 +118,7 @@ where
     /// and `Export` entities referenced to be defined.
     fn finalize_function(
         &mut self,
+        id: FuncId,
         func: &Self::CompiledFunction,
         namespace: &ModuleNamespace<Self>,
     ) -> Self::FinalizedFunction;
@@ -118,6 +130,7 @@ where
     /// `Local` and `Export` entities referenced to be defined.
     fn finalize_data(
         &mut self,
+        id: DataId,
         data: &Self::CompiledData,
         namespace: &ModuleNamespace<Self>,
     ) -> Self::FinalizedData;
diff --git a/cranelift-module/src/lib.rs b/cranelift-module/src/lib.rs
index 33d12d96c..7cb9c10f2 100644
--- a/cranelift-module/src/lib.rs
+++ b/cranelift-module/src/lib.rs
@@ -19,7 +19,6 @@
     )
 )]
 #![no_std]
-#![cfg_attr(not(feature = "std"), feature(alloc))]
 
 #[cfg(not(feature = "std"))]
 #[macro_use]
diff --git a/cranelift-module/src/module.rs b/cranelift-module/src/module.rs
index 33a13b7f9..f19dcfab2 100644
--- a/cranelift-module/src/module.rs
+++ b/cranelift-module/src/module.rs
@@ -224,26 +224,32 @@ impl<B> ModuleContents<B>
 where
     B: Backend,
 {
-    fn get_function_info(&self, name: &ir::ExternalName) -> &ModuleFunction<B> {
+    fn get_function_id(&self, name: &ir::ExternalName) -> FuncId {
         if let ir::ExternalName::User { namespace, index } = *name {
             debug_assert_eq!(namespace, 0);
-            let func = FuncId::from_u32(index);
-            &self.functions[func]
+            FuncId::from_u32(index)
         } else {
             panic!("unexpected ExternalName kind {}", name)
         }
     }
 
-    /// Get the `DataDeclaration` for the function named by `name`.
-    fn get_data_info(&self, name: &ir::ExternalName) -> &ModuleData<B> {
+    fn get_data_id(&self, name: &ir::ExternalName) -> DataId {
         if let ir::ExternalName::User { namespace, index } = *name {
             debug_assert_eq!(namespace, 1);
-            let data = DataId::from_u32(index);
-            &self.data_objects[data]
+            DataId::from_u32(index)
         } else {
             panic!("unexpected ExternalName kind {}", name)
         }
     }
+
+    fn get_function_info(&self, name: &ir::ExternalName) -> &ModuleFunction<B> {
+        &self.functions[self.get_function_id(name)]
+    }
+
+    /// Get the `DataDeclaration` for the function named by `name`.
+    fn get_data_info(&self, name: &ir::ExternalName) -> &ModuleData<B> {
+        &self.data_objects[self.get_data_id(name)]
+    }
 }
 
 /// This provides a view to the state of a module which allows `ir::ExternalName`s to be translated
@@ -259,12 +265,22 @@ impl<'a, B> ModuleNamespace<'a, B>
 where
     B: Backend,
 {
+    /// Get the `FuncId` for the function named by `name`.
+    pub fn get_function_id(&self, name: &ir::ExternalName) -> FuncId {
+        self.contents.get_function_id(name)
+    }
+
+    /// Get the `DataId` for the data object named by `name`.
+    pub fn get_data_id(&self, name: &ir::ExternalName) -> DataId {
+        self.contents.get_data_id(name)
+    }
+
     /// Get the `FunctionDeclaration` for the function named by `name`.
     pub fn get_function_decl(&self, name: &ir::ExternalName) -> &FunctionDeclaration {
         &self.contents.get_function_info(name).decl
     }
 
-    /// Get the `DataDeclaration` for the function named by `name`.
+    /// Get the `DataDeclaration` for the data object named by `name`.
     pub fn get_data_decl(&self, name: &ir::ExternalName) -> &DataDeclaration {
         &self.contents.get_data_info(name).decl
     }
@@ -407,7 +423,8 @@ where
                 FuncOrDataId::Func(id) => {
                     let existing = &mut self.contents.functions[id];
                     existing.merge(linkage, signature)?;
-                    self.backend.declare_function(name, existing.decl.linkage);
+                    self.backend
+                        .declare_function(id, name, existing.decl.linkage);
                     Ok(id)
                 }
                 FuncOrDataId::Data(..) => {
@@ -424,7 +441,7 @@ where
                     compiled: None,
                 });
                 entry.insert(FuncOrDataId::Func(id));
-                self.backend.declare_function(name, linkage);
+                self.backend.declare_function(id, name, linkage);
                 Ok(id)
             }
         }
@@ -451,6 +468,7 @@ where
                     let existing = &mut self.contents.data_objects[id];
                     existing.merge(linkage, writable, align);
                     self.backend.declare_data(
+                        id,
                         name,
                         existing.decl.linkage,
                         existing.decl.writable,
@@ -474,7 +492,8 @@ where
                     compiled: None,
                 });
                 entry.insert(FuncOrDataId::Data(id));
-                self.backend.declare_data(name, linkage, writable, align);
+                self.backend
+                    .declare_data(id, name, linkage, writable, align);
                 Ok(id)
             }
         }
@@ -536,7 +555,6 @@ where
             );
             ModuleError::Compilation(e)
         })?;
-
         let info = &self.contents.functions[func];
         if info.compiled.is_some() {
             return Err(ModuleError::DuplicateDefinition(info.decl.name.clone()));
@@ -546,6 +564,7 @@ where
         }
 
         let compiled = Some(self.backend.define_function(
+            func,
             &info.decl.name,
             ctx,
             &ModuleNamespace::<B> {
@@ -559,7 +578,7 @@ where
         Ok(total_size)
     }
 
-    /// Define a function, producing the data contents from the given `DataContext`.
+    /// Define a data object, producing the data contents from the given `DataContext`.
     pub fn define_data(&mut self, data: DataId, data_ctx: &DataContext) -> ModuleResult<()> {
         let compiled = {
             let info = &self.contents.data_objects[data];
@@ -570,6 +589,7 @@ where
                 return Err(ModuleError::InvalidImportDefinition(info.decl.name.clone()));
             }
             Some(self.backend.define_data(
+                data,
                 &info.decl.name,
                 info.decl.writable,
                 info.decl.align,
@@ -638,6 +658,7 @@ where
             let info = &self.contents.functions[func];
             debug_assert!(info.decl.linkage.is_definable());
             self.backend.finalize_function(
+                func,
                 info.compiled
                     .as_ref()
                     .expect("function must be compiled before it can be finalized"),
@@ -650,6 +671,7 @@ where
             let info = &self.contents.data_objects[data];
             debug_assert!(info.decl.linkage.is_definable());
             self.backend.finalize_data(
+                data,
                 info.compiled
                     .as_ref()
                     .expect("data object must be compiled before it can be finalized"),
diff --git a/cranelift-native/Cargo.toml b/cranelift-native/Cargo.toml
index c5adcead1..35576d1d3 100644
--- a/cranelift-native/Cargo.toml
+++ b/cranelift-native/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cranelift-native"
-version = "0.31.0"
+version = "0.43.1"
 authors = ["The Cranelift Project Developers"]
 description = "Support for targeting the host with Cranelift"
 repository = "https://github.com/CraneStation/cranelift"
@@ -10,15 +10,15 @@ readme = "README.md"
 edition = "2018"
 
 [dependencies]
-cranelift-codegen = { path = "../cranelift-codegen", version = "0.31.0", default-features = false }
-target-lexicon = { version = "0.4.0", default-features = false }
+cranelift-codegen = { path = "../cranelift-codegen", version = "0.43.1", default-features = false }
+target-lexicon = "0.8.1"
 
 [target.'cfg(any(target_arch = "x86", target_arch = "x86_64"))'.dependencies]
 raw-cpuid = "6.0.0"
 
 [features]
 default = ["std"]
-std = ["cranelift-codegen/std", "target-lexicon/std"]
+std = ["cranelift-codegen/std"]
 # when compiling with the "core" feature, nightly must be enabled
 # enabling the "nightly" feature for raw-cpuid allows avoiding
 # linking in a c-library.
diff --git a/cranelift-native/src/lib.rs b/cranelift-native/src/lib.rs
index 0687e7017..9ad873e16 100644
--- a/cranelift-native/src/lib.rs
+++ b/cranelift-native/src/lib.rs
@@ -26,7 +26,6 @@
 #![no_std]
 
 use cranelift_codegen::isa;
-use cranelift_codegen::settings::Configurable;
 use target_lexicon::Triple;
 
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
@@ -50,6 +49,7 @@ pub fn builder() -> Result<isa::Builder, &'static str> {
 
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 fn parse_x86_cpuid(isa_builder: &mut isa::Builder) -> Result<(), &'static str> {
+    use cranelift_codegen::settings::Configurable;
     let cpuid = CpuId::new();
 
     if let Some(info) = cpuid.get_feature_info() {
@@ -59,6 +59,9 @@ fn parse_x86_cpuid(isa_builder: &mut isa::Builder) -> Result<(), &'static str> {
         if info.has_sse3() {
             isa_builder.enable("has_sse3").unwrap();
         }
+        if info.has_ssse3() {
+            isa_builder.enable("has_ssse3").unwrap();
+        }
         if info.has_sse41() {
             isa_builder.enable("has_sse41").unwrap();
         }
diff --git a/cranelift-object/Cargo.toml b/cranelift-object/Cargo.toml
new file mode 100644
index 000000000..d75fb054f
--- /dev/null
+++ b/cranelift-object/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "cranelift-object"
+version = "0.43.1"
+authors = ["The Cranelift Project Developers"]
+description = "Emit Cranelift output to native object files with `object`"
+repository = "https://github.com/CraneStation/cranelift"
+documentation = "https://cranelift.readthedocs.io/"
+license = "Apache-2.0 WITH LLVM-exception"
+readme = "README.md"
+edition = "2018"
+
+[dependencies]
+cranelift-codegen = { path = "../cranelift-codegen", version = "0.43.1" }
+cranelift-module = { path = "../cranelift-module", version = "0.43.1" }
+object = { version = "0.14.0", default-features = false, features = ["write"] }
+target-lexicon = "0.8.1"
+
+[badges]
+maintenance = { status = "experimental" }
+travis-ci = { repository = "CraneStation/cranelift" }
diff --git a/cranelift-object/LICENSE b/cranelift-object/LICENSE
new file mode 100644
index 000000000..f9d81955f
--- /dev/null
+++ b/cranelift-object/LICENSE
@@ -0,0 +1,220 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+--- LLVM Exceptions to the Apache 2.0 License ----
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
+
diff --git a/cranelift-object/README.md b/cranelift-object/README.md
new file mode 100644
index 000000000..3a0fc5135
--- /dev/null
+++ b/cranelift-object/README.md
@@ -0,0 +1,4 @@
+This crate contains a library that enables
+[Cranelift](https://crates.io/crates/cranelift)
+to emit native object (".o") files, using the
+[object](https://crates.io/crates/object) library.
diff --git a/cranelift-object/src/backend.rs b/cranelift-object/src/backend.rs
new file mode 100644
index 000000000..09f209bb3
--- /dev/null
+++ b/cranelift-object/src/backend.rs
@@ -0,0 +1,583 @@
+//! Defines `ObjectBackend`.
+
+use crate::traps::{ObjectTrapSink, ObjectTrapSite};
+use cranelift_codegen::binemit::{
+    Addend, CodeOffset, NullStackmapSink, NullTrapSink, Reloc, RelocSink,
+};
+use cranelift_codegen::entity::SecondaryMap;
+use cranelift_codegen::isa::TargetIsa;
+use cranelift_codegen::{self, binemit, ir};
+use cranelift_module::{
+    Backend, DataContext, DataDescription, DataId, FuncId, Init, Linkage, ModuleNamespace,
+    ModuleResult,
+};
+use object::write::{Object, Relocation, SectionId, StandardSection, Symbol, SymbolId};
+use object::{RelocationEncoding, RelocationKind, SymbolKind, SymbolScope};
+use std::collections::HashMap;
+use target_lexicon::PointerWidth;
+
+#[derive(Debug)]
+/// Setting to enable collection of traps. Setting this to `Enabled` in
+/// `ObjectBuilder` means that `ObjectProduct` will contains trap sites.
+pub enum ObjectTrapCollection {
+    /// `ObjectProduct::traps` will be empty
+    Disabled,
+    /// `ObjectProduct::traps` will contain trap sites
+    Enabled,
+}
+
+/// A builder for `ObjectBackend`.
+pub struct ObjectBuilder {
+    isa: Box<dyn TargetIsa>,
+    name: String,
+    collect_traps: ObjectTrapCollection,
+    libcall_names: Box<dyn Fn(ir::LibCall) -> String>,
+    function_alignment: u64,
+}
+
+impl ObjectBuilder {
+    /// Create a new `ObjectBuilder` using the given Cranelift target, that
+    /// can be passed to [`Module::new`](cranelift_module::Module::new).
+    ///
+    /// `collect_traps` setting determines whether trap information is collected in the
+    /// `ObjectProduct`.
+    ///
+    /// The `libcall_names` function provides a way to translate `cranelift_codegen`'s `ir::LibCall`
+    /// enum to symbols. LibCalls are inserted in the IR as part of the legalization for certain
+    /// floating point instructions, and for stack probes. If you don't know what to use for this
+    /// argument, use `cranelift_module::default_libcall_names()`.
+    pub fn new(
+        isa: Box<dyn TargetIsa>,
+        name: String,
+        collect_traps: ObjectTrapCollection,
+        libcall_names: Box<dyn Fn(ir::LibCall) -> String>,
+    ) -> ModuleResult<Self> {
+        Ok(Self {
+            isa,
+            name,
+            collect_traps,
+            libcall_names,
+            function_alignment: 1,
+        })
+    }
+
+    /// Set the alignment used for functions.
+    pub fn function_alignment(&mut self, alignment: u64) -> &mut Self {
+        self.function_alignment = alignment;
+        self
+    }
+}
+
+/// A `ObjectBackend` implements `Backend` and emits ".o" files using the `object` library.
+///
+/// See the `ObjectBuilder` for a convenient way to construct `ObjectBackend` instances.
+pub struct ObjectBackend {
+    isa: Box<dyn TargetIsa>,
+    object: Object,
+    functions: SecondaryMap<FuncId, Option<SymbolId>>,
+    data_objects: SecondaryMap<DataId, Option<SymbolId>>,
+    traps: SecondaryMap<FuncId, Vec<ObjectTrapSite>>,
+    libcalls: HashMap<ir::LibCall, SymbolId>,
+    libcall_names: Box<dyn Fn(ir::LibCall) -> String>,
+    collect_traps: ObjectTrapCollection,
+    function_alignment: u64,
+}
+
+impl Backend for ObjectBackend {
+    type Builder = ObjectBuilder;
+
+    type CompiledFunction = ObjectCompiledFunction;
+    type CompiledData = ObjectCompiledData;
+
+    // There's no need to return individual artifacts; we're writing them into
+    // the output file instead.
+    type FinalizedFunction = ();
+    type FinalizedData = ();
+
+    type Product = ObjectProduct;
+
+    /// Create a new `ObjectBackend` using the given Cranelift target.
+    fn new(builder: ObjectBuilder) -> Self {
+        let triple = builder.isa.triple();
+        let mut object = Object::new(triple.binary_format, triple.architecture);
+        object.add_file_symbol(builder.name.as_bytes().to_vec());
+        Self {
+            isa: builder.isa,
+            object,
+            functions: SecondaryMap::new(),
+            data_objects: SecondaryMap::new(),
+            traps: SecondaryMap::new(),
+            libcalls: HashMap::new(),
+            libcall_names: builder.libcall_names,
+            collect_traps: builder.collect_traps,
+            function_alignment: builder.function_alignment,
+        }
+    }
+
+    fn isa(&self) -> &dyn TargetIsa {
+        &*self.isa
+    }
+
+    fn declare_function(&mut self, id: FuncId, name: &str, linkage: Linkage) {
+        let (scope, weak) = translate_linkage(linkage);
+
+        if let Some(function) = self.functions[id] {
+            let symbol = self.object.symbol_mut(function);
+            symbol.scope = scope;
+            symbol.weak = weak;
+        } else {
+            let symbol_id = self.object.add_symbol(Symbol {
+                name: name.as_bytes().to_vec(),
+                value: 0,
+                size: 0,
+                kind: SymbolKind::Text,
+                scope,
+                weak,
+                section: None,
+            });
+            self.functions[id] = Some(symbol_id);
+        }
+    }
+
+    fn declare_data(
+        &mut self,
+        id: DataId,
+        name: &str,
+        linkage: Linkage,
+        _writable: bool,
+        _align: Option<u8>,
+    ) {
+        let (scope, weak) = translate_linkage(linkage);
+
+        if let Some(data) = self.data_objects[id] {
+            let symbol = self.object.symbol_mut(data);
+            symbol.scope = scope;
+            symbol.weak = weak;
+        } else {
+            let symbol_id = self.object.add_symbol(Symbol {
+                name: name.as_bytes().to_vec(),
+                value: 0,
+                size: 0,
+                kind: SymbolKind::Data,
+                scope,
+                weak,
+                section: None,
+            });
+            self.data_objects[id] = Some(symbol_id);
+        }
+    }
+
+    fn define_function(
+        &mut self,
+        func_id: FuncId,
+        _name: &str,
+        ctx: &cranelift_codegen::Context,
+        _namespace: &ModuleNamespace<Self>,
+        code_size: u32,
+    ) -> ModuleResult<ObjectCompiledFunction> {
+        let mut code: Vec<u8> = vec![0; code_size as usize];
+        let mut reloc_sink = ObjectRelocSink::default();
+        let mut trap_sink = ObjectTrapSink::default();
+        let mut stackmap_sink = NullStackmapSink {};
+
+        if let ObjectTrapCollection::Enabled = self.collect_traps {
+            unsafe {
+                ctx.emit_to_memory(
+                    &*self.isa,
+                    code.as_mut_ptr(),
+                    &mut reloc_sink,
+                    &mut trap_sink,
+                    &mut stackmap_sink,
+                )
+            };
+        } else {
+            let mut trap_sink = NullTrapSink {};
+            unsafe {
+                ctx.emit_to_memory(
+                    &*self.isa,
+                    code.as_mut_ptr(),
+                    &mut reloc_sink,
+                    &mut trap_sink,
+                    &mut stackmap_sink,
+                )
+            };
+        }
+
+        let symbol = self.functions[func_id].unwrap();
+        let section = self.object.section_id(StandardSection::Text);
+        let offset = self
+            .object
+            .add_symbol_data(symbol, section, &code, self.function_alignment);
+        self.traps[func_id] = trap_sink.sites;
+        Ok(ObjectCompiledFunction {
+            offset,
+            size: code_size,
+            section,
+            relocs: reloc_sink.relocs,
+        })
+    }
+
+    fn define_data(
+        &mut self,
+        data_id: DataId,
+        _name: &str,
+        writable: bool,
+        align: Option<u8>,
+        data_ctx: &DataContext,
+        _namespace: &ModuleNamespace<Self>,
+    ) -> ModuleResult<ObjectCompiledData> {
+        let &DataDescription {
+            ref init,
+            ref function_decls,
+            ref data_decls,
+            ref function_relocs,
+            ref data_relocs,
+        } = data_ctx.description();
+
+        let size = init.size();
+        let mut data = Vec::with_capacity(size);
+        match *init {
+            Init::Uninitialized => {
+                panic!("data is not initialized yet");
+            }
+            Init::Zeros { .. } => {
+                data.resize(size, 0);
+            }
+            Init::Bytes { ref contents } => {
+                data.extend_from_slice(contents);
+            }
+        }
+
+        let reloc_size = match self.isa.triple().pointer_width().unwrap() {
+            PointerWidth::U16 => 16,
+            PointerWidth::U32 => 32,
+            PointerWidth::U64 => 64,
+        };
+        let mut relocs = Vec::new();
+        for &(offset, id) in function_relocs {
+            relocs.push(RelocRecord {
+                offset,
+                name: function_decls[id].clone(),
+                kind: RelocationKind::Absolute,
+                encoding: RelocationEncoding::Generic,
+                size: reloc_size,
+                addend: 0,
+            });
+        }
+        for &(offset, id, addend) in data_relocs {
+            relocs.push(RelocRecord {
+                offset,
+                name: data_decls[id].clone(),
+                kind: RelocationKind::Absolute,
+                encoding: RelocationEncoding::Generic,
+                size: reloc_size,
+                addend,
+            });
+        }
+
+        let symbol = self.data_objects[data_id].unwrap();
+        let section = self.object.section_id(if writable {
+            StandardSection::Data
+        } else if relocs.is_empty() {
+            StandardSection::ReadOnlyData
+        } else {
+            StandardSection::ReadOnlyDataWithRel
+        });
+        let offset =
+            self.object
+                .add_symbol_data(symbol, section, &data, u64::from(align.unwrap_or(1)));
+        Ok(ObjectCompiledData {
+            offset,
+            section,
+            relocs,
+        })
+    }
+
+    fn write_data_funcaddr(
+        &mut self,
+        _data: &mut ObjectCompiledData,
+        _offset: usize,
+        _what: ir::FuncRef,
+    ) {
+        unimplemented!()
+    }
+
+    fn write_data_dataaddr(
+        &mut self,
+        _data: &mut ObjectCompiledData,
+        _offset: usize,
+        _what: ir::GlobalValue,
+        _usize: binemit::Addend,
+    ) {
+        unimplemented!()
+    }
+
+    fn finalize_function(
+        &mut self,
+        _id: FuncId,
+        func: &ObjectCompiledFunction,
+        namespace: &ModuleNamespace<Self>,
+    ) {
+        for &RelocRecord {
+            offset,
+            ref name,
+            kind,
+            encoding,
+            size,
+            addend,
+        } in &func.relocs
+        {
+            let offset = func.offset + offset as u64;
+            let symbol = self.get_symbol(namespace, name);
+            self.object
+                .add_relocation(
+                    func.section,
+                    Relocation {
+                        offset,
+                        size,
+                        kind,
+                        encoding,
+                        symbol,
+                        addend,
+                    },
+                )
+                .unwrap();
+        }
+    }
+
+    fn get_finalized_function(&self, _func: &ObjectCompiledFunction) {
+        // Nothing to do.
+    }
+
+    fn finalize_data(
+        &mut self,
+        _id: DataId,
+        data: &ObjectCompiledData,
+        namespace: &ModuleNamespace<Self>,
+    ) {
+        for &RelocRecord {
+            offset,
+            ref name,
+            kind,
+            encoding,
+            size,
+            addend,
+        } in &data.relocs
+        {
+            let offset = data.offset + offset as u64;
+            let symbol = self.get_symbol(namespace, name);
+            self.object
+                .add_relocation(
+                    data.section,
+                    Relocation {
+                        offset,
+                        size,
+                        kind,
+                        encoding,
+                        symbol,
+                        addend,
+                    },
+                )
+                .unwrap();
+        }
+    }
+
+    fn get_finalized_data(&self, _data: &ObjectCompiledData) {
+        // Nothing to do.
+    }
+
+    fn publish(&mut self) {
+        // Nothing to do.
+    }
+
+    fn finish(self) -> ObjectProduct {
+        ObjectProduct {
+            object: self.object,
+            functions: self.functions,
+            data_objects: self.data_objects,
+            traps: self.traps,
+        }
+    }
+}
+
+impl ObjectBackend {
+    // This should only be called during finalization because it creates
+    // symbols for missing libcalls.
+    fn get_symbol(
+        &mut self,
+        namespace: &ModuleNamespace<Self>,
+        name: &ir::ExternalName,
+    ) -> SymbolId {
+        match *name {
+            ir::ExternalName::User { .. } => {
+                if namespace.is_function(name) {
+                    let id = namespace.get_function_id(name);
+                    self.functions[id].unwrap()
+                } else {
+                    let id = namespace.get_data_id(name);
+                    self.data_objects[id].unwrap()
+                }
+            }
+            ir::ExternalName::LibCall(ref libcall) => {
+                let name = (self.libcall_names)(*libcall);
+                if let Some(symbol) = self.object.symbol_id(name.as_bytes()) {
+                    symbol
+                } else if let Some(symbol) = self.libcalls.get(libcall) {
+                    *symbol
+                } else {
+                    let symbol = self.object.add_symbol(Symbol {
+                        name: name.as_bytes().to_vec(),
+                        value: 0,
+                        size: 0,
+                        kind: SymbolKind::Text,
+                        scope: SymbolScope::Unknown,
+                        weak: false,
+                        section: None,
+                    });
+                    self.libcalls.insert(*libcall, symbol);
+                    symbol
+                }
+            }
+            _ => panic!("invalid ExternalName {}", name),
+        }
+    }
+}
+
+fn translate_linkage(linkage: Linkage) -> (SymbolScope, bool) {
+    let scope = match linkage {
+        Linkage::Import => SymbolScope::Unknown,
+        Linkage::Local => SymbolScope::Compilation,
+        Linkage::Export | Linkage::Preemptible => SymbolScope::Dynamic,
+    };
+    // TODO: this matches rustc_codegen_cranelift, but may be wrong.
+    let weak = linkage == Linkage::Preemptible;
+    (scope, weak)
+}
+
+#[derive(Clone)]
+pub struct ObjectCompiledFunction {
+    offset: u64,
+    size: u32,
+    section: SectionId,
+    relocs: Vec<RelocRecord>,
+}
+
+#[derive(Clone)]
+pub struct ObjectCompiledData {
+    offset: u64,
+    section: SectionId,
+    relocs: Vec<RelocRecord>,
+}
+
+/// This is the output of `Module`'s
+/// [`finish`](../cranelift_module/struct.Module.html#method.finish) function.
+/// It contains the generated `Object` and other information produced during
+/// compilation.
+pub struct ObjectProduct {
+    /// Object artifact with all functions and data from the module defined.
+    pub object: Object,
+    /// Symbol IDs for functions (both declared and defined).
+    pub functions: SecondaryMap<FuncId, Option<SymbolId>>,
+    /// Symbol IDs for data objects (both declared and defined).
+    pub data_objects: SecondaryMap<DataId, Option<SymbolId>>,
+    /// Trap sites for defined functions.
+    pub traps: SecondaryMap<FuncId, Vec<ObjectTrapSite>>,
+}
+
+impl ObjectProduct {
+    /// Return the `SymbolId` for the given function.
+    #[inline]
+    pub fn function_symbol(&self, id: FuncId) -> SymbolId {
+        self.functions[id].unwrap()
+    }
+
+    /// Return the `SymbolId` for the given data object.
+    #[inline]
+    pub fn data_symbol(&self, id: DataId) -> SymbolId {
+        self.data_objects[id].unwrap()
+    }
+
+    /// Write the object bytes in memory.
+    #[inline]
+    pub fn emit(self) -> Result<Vec<u8>, String> {
+        self.object.write()
+    }
+}
+
+#[derive(Clone)]
+struct RelocRecord {
+    offset: CodeOffset,
+    name: ir::ExternalName,
+    kind: RelocationKind,
+    encoding: RelocationEncoding,
+    size: u8,
+    addend: Addend,
+}
+
+#[derive(Default)]
+struct ObjectRelocSink {
+    relocs: Vec<RelocRecord>,
+}
+
+impl RelocSink for ObjectRelocSink {
+    fn reloc_ebb(&mut self, _offset: CodeOffset, _reloc: Reloc, _ebb_offset: CodeOffset) {
+        unimplemented!();
+    }
+
+    fn reloc_external(
+        &mut self,
+        offset: CodeOffset,
+        reloc: Reloc,
+        name: &ir::ExternalName,
+        addend: Addend,
+    ) {
+        let (kind, encoding, size) = match reloc {
+            Reloc::Abs4 => (RelocationKind::Absolute, RelocationEncoding::Generic, 32),
+            Reloc::Abs8 => (RelocationKind::Absolute, RelocationEncoding::Generic, 64),
+            Reloc::X86PCRel4 => (RelocationKind::Relative, RelocationEncoding::Generic, 32),
+            Reloc::X86CallPCRel4 => (RelocationKind::Relative, RelocationEncoding::X86Branch, 32),
+            // TODO: Get Cranelift to tell us when we can use
+            // R_X86_64_GOTPCRELX/R_X86_64_REX_GOTPCRELX.
+            Reloc::X86CallPLTRel4 => (
+                RelocationKind::PltRelative,
+                RelocationEncoding::X86Branch,
+                32,
+            ),
+            Reloc::X86GOTPCRel4 => (RelocationKind::GotRelative, RelocationEncoding::Generic, 32),
+            // FIXME
+            _ => unimplemented!(),
+        };
+        self.relocs.push(RelocRecord {
+            offset,
+            name: name.clone(),
+            kind,
+            encoding,
+            size,
+            addend,
+        });
+    }
+
+    fn reloc_jt(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::JumpTable) {
+        match reloc {
+            Reloc::X86PCRelRodata4 => {
+                // Not necessary to record this unless we are going to split apart code and its
+                // jumptbl/rodata.
+            }
+            _ => {
+                panic!("Unhandled reloc");
+            }
+        }
+    }
+
+    fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _jt: ir::ConstantOffset) {
+        match reloc {
+            Reloc::X86PCRelRodata4 => {
+                // Not necessary to record this unless we are going to split apart code and its
+                // jumptbl/rodata.
+            }
+            _ => {
+                panic!("Unhandled reloc");
+            }
+        }
+    }
+}
diff --git a/cranelift-object/src/lib.rs b/cranelift-object/src/lib.rs
new file mode 100644
index 000000000..162a0b47a
--- /dev/null
+++ b/cranelift-object/src/lib.rs
@@ -0,0 +1,38 @@
+//! Top-level lib.rs for `cranelift_object`.
+//!
+//! Users of this module should not have to depend on `object` directly.
+
+#![deny(
+    missing_docs,
+    trivial_numeric_casts,
+    unused_extern_crates,
+    unstable_features
+)]
+#![warn(unused_import_braces)]
+#![cfg_attr(feature = "clippy", plugin(clippy(conf_file = "../../clippy.toml")))]
+#![cfg_attr(
+    feature = "cargo-clippy",
+    allow(clippy::new_without_default, clippy::new_without_default_derive)
+)]
+#![cfg_attr(
+    feature = "cargo-clippy",
+    warn(
+        clippy::float_arithmetic,
+        clippy::mut_mut,
+        clippy::nonminimal_bool,
+        clippy::option_map_unwrap_or,
+        clippy::option_map_unwrap_or_else,
+        clippy::print_stdout,
+        clippy::unicode_not_nfc,
+        clippy::use_self
+    )
+)]
+
+mod backend;
+mod traps;
+
+pub use crate::backend::{ObjectBackend, ObjectBuilder, ObjectProduct, ObjectTrapCollection};
+pub use crate::traps::{ObjectTrapSink, ObjectTrapSite};
+
+/// Version number of this crate.
+pub const VERSION: &str = env!("CARGO_PKG_VERSION");
diff --git a/cranelift-object/src/traps.rs b/cranelift-object/src/traps.rs
new file mode 100644
index 000000000..a64f1e13f
--- /dev/null
+++ b/cranelift-object/src/traps.rs
@@ -0,0 +1,32 @@
+//! Records every `TrapCode` that cranelift outputs during code generation,
+//! for every function in the module. This data may be useful at runtime.
+
+use cranelift_codegen::{binemit, ir};
+
+/// Record of the arguments cranelift passes to `TrapSink::trap`
+#[derive(Clone)]
+pub struct ObjectTrapSite {
+    /// Offset into function
+    pub offset: binemit::CodeOffset,
+    /// Source location given to cranelift
+    pub srcloc: ir::SourceLoc,
+    /// Trap code, as determined by cranelift
+    pub code: ir::TrapCode,
+}
+
+/// Record of the trap sites for a given function
+#[derive(Default, Clone)]
+pub struct ObjectTrapSink {
+    /// All trap sites collected in function
+    pub sites: Vec<ObjectTrapSite>,
+}
+
+impl binemit::TrapSink for ObjectTrapSink {
+    fn trap(&mut self, offset: binemit::CodeOffset, srcloc: ir::SourceLoc, code: ir::TrapCode) {
+        self.sites.push(ObjectTrapSite {
+            offset,
+            srcloc,
+            code,
+        });
+    }
+}
diff --git a/cranelift-preopt/Cargo.toml b/cranelift-preopt/Cargo.toml
index 942c94f05..2bf1a1a52 100644
--- a/cranelift-preopt/Cargo.toml
+++ b/cranelift-preopt/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-preopt"
-version = "0.31.0"
+version = "0.43.1"
 description = "Support for optimizations in Cranelift"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://cranelift.readthedocs.io/"
@@ -12,8 +12,8 @@ keywords = ["optimize", "compile", "compiler", "jit"]
 edition = "2018"
 
 [dependencies]
-cranelift-codegen = { path = "../cranelift-codegen", version = "0.31.0", default-features = false }
-cranelift-entity = { path = "../cranelift-entity", version = "0.31.0", default-features = false }
+cranelift-codegen = { path = "../cranelift-codegen", version = "0.43.1", default-features = false }
+cranelift-entity = { path = "../cranelift-entity", version = "0.43.1", default-features = false }
 # This is commented out because it doesn't build on Rust 1.25.0, which
 # cranelift currently supports.
 # rustc_apfloat = { version = "0.1.2", default-features = false }
diff --git a/cranelift-preopt/src/lib.rs b/cranelift-preopt/src/lib.rs
index e8cf7be32..c35eea14a 100644
--- a/cranelift-preopt/src/lib.rs
+++ b/cranelift-preopt/src/lib.rs
@@ -19,7 +19,6 @@
     )
 )]
 #![no_std]
-#![cfg_attr(not(feature = "std"), feature(alloc))]
 
 #[cfg(not(feature = "std"))]
 #[macro_use]
diff --git a/cranelift-reader/Cargo.toml b/cranelift-reader/Cargo.toml
index 74b63aff4..769a5b455 100644
--- a/cranelift-reader/Cargo.toml
+++ b/cranelift-reader/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift-reader"
-version = "0.31.0"
+version = "0.43.1"
 description = "Cranelift textual IR reader"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://cranelift.readthedocs.io/"
@@ -10,8 +10,8 @@ readme = "README.md"
 edition = "2018"
 
 [dependencies]
-cranelift-codegen = { path = "../cranelift-codegen", version = "0.31.0" }
-target-lexicon = "0.4.0"
+cranelift-codegen = { path = "../cranelift-codegen", version = "0.43.1" }
+target-lexicon = "0.8.1"
 
 [badges]
 maintenance = { status = "experimental" }
diff --git a/cranelift-reader/src/lexer.rs b/cranelift-reader/src/lexer.rs
index 8673b2b92..2432ab055 100644
--- a/cranelift-reader/src/lexer.rs
+++ b/cranelift-reader/src/lexer.rs
@@ -27,6 +27,7 @@ pub enum Token<'a> {
     Dot,                  // '.'
     Colon,                // ':'
     Equal,                // '='
+    Not,                  // '!'
     Arrow,                // '->'
     Float(&'a str),       // Floating point immediate
     Integer(&'a str),     // Integer immediate
@@ -42,6 +43,7 @@ pub enum Token<'a> {
     SigRef(u32),          // sig2
     UserRef(u32),         // u345
     Name(&'a str),        // %9arbitrary_alphanum, %x3, %0, %function ...
+    String(&'a str),      // "abritrary quoted string with no escape" ...
     HexSequence(&'a str), // #89AF
     Identifier(&'a str),  // Unrecognized identifier (opcode, enumerator, ...)
     SourceLoc(&'a str),   // @00c7
@@ -363,6 +365,7 @@ impl<'a> Lexer<'a> {
             "i16" => types::I16,
             "i32" => types::I32,
             "i64" => types::I64,
+            "i128" => types::I128,
             "f32" => types::F32,
             "f64" => types::F64,
             "b1" => types::B1,
@@ -370,6 +373,9 @@ impl<'a> Lexer<'a> {
             "b16" => types::B16,
             "b32" => types::B32,
             "b64" => types::B64,
+            "b128" => types::B128,
+            "r32" => types::R32,
+            "r64" => types::R64,
             _ => return None,
         };
         if is_vector {
@@ -399,6 +405,27 @@ impl<'a> Lexer<'a> {
         token(Token::Name(&self.source[begin..end]), loc)
     }
 
+    /// Scan for a multi-line quoted string with no escape character.
+    fn scan_string(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
+        let loc = self.loc();
+        let begin = self.pos + 1;
+
+        assert_eq!(self.lookahead, Some('"'));
+
+        while let Some(c) = self.next_ch() {
+            if c == '"' {
+                break;
+            }
+        }
+
+        let end = self.pos;
+        if self.lookahead != Some('"') {
+            return error(LexError::InvalidChar, self.loc());
+        }
+        self.next_ch();
+        token(Token::String(&self.source[begin..end]), loc)
+    }
+
     fn scan_hex_sequence(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
         let loc = self.loc();
         let begin = self.pos + 1;
@@ -450,6 +477,7 @@ impl<'a> Lexer<'a> {
                 Some('.') => Some(self.scan_char(Token::Dot)),
                 Some(':') => Some(self.scan_char(Token::Colon)),
                 Some('=') => Some(self.scan_char(Token::Equal)),
+                Some('!') => Some(self.scan_char(Token::Not)),
                 Some('+') => Some(self.scan_number()),
                 Some('-') => {
                     if self.looking_at("->") {
@@ -461,6 +489,7 @@ impl<'a> Lexer<'a> {
                 Some(ch) if ch.is_digit(10) => Some(self.scan_number()),
                 Some(ch) if ch.is_alphabetic() => Some(self.scan_word()),
                 Some('%') => Some(self.scan_name()),
+                Some('"') => Some(self.scan_string()),
                 Some('#') => Some(self.scan_hex_sequence()),
                 Some('@') => Some(self.scan_srcloc()),
                 Some(ch) if ch.is_whitespace() => {
@@ -631,6 +660,33 @@ mod tests {
         assert_eq!(lex.next(), token(Token::Name("_"), 1));
     }
 
+    #[test]
+    fn lex_strings() {
+        let mut lex = Lexer::new(
+            r#"""  "0" "x3""function" "123 abc" "\" "start
+                    and end on
+                    different lines" "#,
+        );
+
+        assert_eq!(lex.next(), token(Token::String(""), 1));
+        assert_eq!(lex.next(), token(Token::String("0"), 1));
+        assert_eq!(lex.next(), token(Token::String("x3"), 1));
+        assert_eq!(lex.next(), token(Token::String("function"), 1));
+        assert_eq!(lex.next(), token(Token::String("123 abc"), 1));
+        assert_eq!(lex.next(), token(Token::String(r#"\"#), 1));
+        assert_eq!(
+            lex.next(),
+            token(
+                Token::String(
+                    r#"start
+                    and end on
+                    different lines"#
+                ),
+                1
+            )
+        );
+    }
+
     #[test]
     fn lex_userrefs() {
         let mut lex = Lexer::new("u0 u1 u234567890 u9:8765");
diff --git a/cranelift-reader/src/lib.rs b/cranelift-reader/src/lib.rs
index 803c17c62..f0922bf88 100644
--- a/cranelift-reader/src/lib.rs
+++ b/cranelift-reader/src/lib.rs
@@ -28,10 +28,10 @@
 
 pub use crate::error::{Location, ParseError, ParseResult};
 pub use crate::isaspec::{parse_options, IsaSpec};
-pub use crate::parser::{parse_functions, parse_test};
+pub use crate::parser::{parse_functions, parse_test, ParseOptions};
 pub use crate::sourcemap::SourceMap;
 pub use crate::testcommand::{TestCommand, TestOption};
-pub use crate::testfile::{Comment, Details, TestFile};
+pub use crate::testfile::{Comment, Details, Feature, TestFile};
 
 mod error;
 mod isaspec;
diff --git a/cranelift-reader/src/parser.rs b/cranelift-reader/src/parser.rs
index 037ed2acc..711b4d3cc 100644
--- a/cranelift-reader/src/parser.rs
+++ b/cranelift-reader/src/parser.rs
@@ -5,13 +5,14 @@ use crate::isaspec;
 use crate::lexer::{LexError, Lexer, LocatedError, LocatedToken, Token};
 use crate::sourcemap::SourceMap;
 use crate::testcommand::TestCommand;
-use crate::testfile::{Comment, Details, TestFile};
+use crate::testfile::{Comment, Details, Feature, TestFile};
 use cranelift_codegen::entity::EntityRef;
 use cranelift_codegen::ir;
 use cranelift_codegen::ir::entities::AnyEntity;
-use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64};
+use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64, V128Imm};
 use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, VariableArgs};
 use cranelift_codegen::ir::types::INVALID;
+use cranelift_codegen::ir::types::*;
 use cranelift_codegen::ir::{
     AbiParam, ArgumentExtension, ArgumentLoc, Ebb, ExtFuncData, ExternalName, FuncRef, Function,
     GlobalValue, GlobalValueData, Heap, HeapData, HeapStyle, JumpTable, JumpTableData, MemFlags,
@@ -21,6 +22,7 @@ use cranelift_codegen::ir::{
 use cranelift_codegen::isa::{self, CallConv, Encoding, RegUnit, TargetIsa};
 use cranelift_codegen::packed_option::ReservedValue;
 use cranelift_codegen::{settings, timing};
+use std::iter::FromIterator;
 use std::mem;
 use std::str::FromStr;
 use std::{u16, u32};
@@ -31,20 +33,37 @@ use target_lexicon::Triple;
 /// Any test commands or target declarations are ignored.
 pub fn parse_functions(text: &str) -> ParseResult<Vec<Function>> {
     let _tt = timing::parse_text();
-    parse_test(text, None, None)
+    parse_test(text, ParseOptions::default())
         .map(|file| file.functions.into_iter().map(|(func, _)| func).collect())
 }
 
+/// Options for configuring the parsing of filetests.
+pub struct ParseOptions<'a> {
+    /// Compiler passes to run on the parsed functions.
+    pub passes: Option<&'a [String]>,
+    /// Target ISA for compiling the parsed functions, e.g. "x86_64 skylake".
+    pub target: Option<&'a str>,
+    /// Default calling convention used when none is specified for a parsed function.
+    pub default_calling_convention: CallConv,
+}
+
+impl Default for ParseOptions<'_> {
+    fn default() -> Self {
+        Self {
+            passes: None,
+            target: None,
+            default_calling_convention: CallConv::Fast,
+        }
+    }
+}
+
 /// Parse the entire `text` as a test case file.
 ///
 /// The returned `TestFile` contains direct references to substrings of `text`.
-pub fn parse_test<'a>(
-    text: &'a str,
-    passes: Option<&'a [String]>,
-    target: Option<&str>,
-) -> ParseResult<TestFile<'a>> {
+pub fn parse_test<'a>(text: &'a str, options: ParseOptions<'a>) -> ParseResult<TestFile<'a>> {
     let _tt = timing::parse_text();
     let mut parser = Parser::new(text);
+
     // Gather the preamble comments.
     parser.start_gathering_comments();
 
@@ -53,18 +72,29 @@ pub fn parse_test<'a>(
 
     // Check for specified passes and target, if present throw out test commands/targets specified
     // in file.
-    match passes {
+    match options.passes {
         Some(pass_vec) => {
             parser.parse_test_commands();
             commands = parser.parse_cmdline_passes(pass_vec);
             parser.parse_target_specs()?;
-            isa_spec = parser.parse_cmdline_target(target)?;
+            isa_spec = parser.parse_cmdline_target(options.target)?;
         }
         None => {
             commands = parser.parse_test_commands();
             isa_spec = parser.parse_target_specs()?;
         }
     };
+    let features = parser.parse_cranelift_features()?;
+
+    // Decide between using the calling convention passed in the options or using the
+    // host's calling convention--if any tests are to be run on the host we should default to the
+    // host's calling convention.
+    parser = if commands.iter().any(|tc| tc.command == "run") {
+        let host_default_calling_convention = CallConv::triple_default(&Triple::host());
+        parser.with_default_calling_convention(host_default_calling_convention)
+    } else {
+        parser.with_default_calling_convention(options.default_calling_convention)
+    };
 
     parser.token();
     parser.claim_gathered_comments(AnyEntity::Function);
@@ -75,6 +105,7 @@ pub fn parse_test<'a>(
     Ok(TestFile {
         commands,
         isa_spec,
+        features,
         preamble_comments,
         functions,
     })
@@ -99,6 +130,9 @@ pub struct Parser<'a> {
 
     /// Comments collected so far.
     comments: Vec<Comment<'a>>,
+
+    /// Default calling conventions; used when none is specified.
+    default_calling_convention: CallConv,
 }
 
 /// Context for resolving references when parsing a single function.
@@ -235,11 +269,16 @@ impl<'a> Context<'a> {
     }
 
     // Allocate a new signature.
-    fn add_sig(&mut self, sig: SigRef, data: Signature, loc: Location) -> ParseResult<()> {
+    fn add_sig(
+        &mut self,
+        sig: SigRef,
+        data: Signature,
+        loc: Location,
+        defaultcc: CallConv,
+    ) -> ParseResult<()> {
         self.map.def_sig(sig, loc)?;
         while self.function.dfg.signatures.next_key().index() <= sig.index() {
-            self.function
-                .import_signature(Signature::new(CallConv::Fast));
+            self.function.import_signature(Signature::new(defaultcc));
         }
         self.function.dfg.signatures[sig] = data;
         Ok(())
@@ -318,6 +357,16 @@ impl<'a> Parser<'a> {
             gathering_comments: false,
             gathered_comments: Vec::new(),
             comments: Vec::new(),
+            default_calling_convention: CallConv::Fast,
+        }
+    }
+
+    /// Modify the default calling convention; returns a new parser with the changed calling
+    /// convention.
+    pub fn with_default_calling_convention(self, default_calling_convention: CallConv) -> Self {
+        Self {
+            default_calling_convention,
+            ..self
         }
     }
 
@@ -546,6 +595,37 @@ impl<'a> Parser<'a> {
         }
     }
 
+    // Match and consume a Uimm128 immediate; due to size restrictions on InstructionData, Uimm128
+    // is boxed in cranelift-codegen/meta/src/shared/immediates.rs
+    fn match_uimm128(&mut self, err_msg: &str) -> ParseResult<V128Imm> {
+        if let Some(Token::Integer(text)) = self.token() {
+            self.consume();
+            // Lexer just gives us raw text that looks like hex code.
+            // Parse it as an Uimm128 to check for overflow and other issues.
+            text.parse().map_err(|e| {
+                self.error(&format!(
+                    "expected u128 hexadecimal immediate, failed to parse: {}",
+                    e
+                ))
+            })
+        } else {
+            err!(self.loc, err_msg)
+        }
+    }
+
+    // Match and consume either a hexadecimal Uimm128 immediate (e.g. 0x000102...) or its literal list form (e.g. [0 1 2...])
+    fn match_uimm128_or_literals(&mut self, controlling_type: Type) -> ParseResult<V128Imm> {
+        if self.optional(Token::LBracket) {
+            // parse using a list of values, e.g. vconst.i32x4 [0 1 2 3]
+            let uimm128 = self.parse_literals_to_uimm128(controlling_type)?;
+            self.match_token(Token::RBracket, "expected a terminating right bracket")?;
+            Ok(uimm128)
+        } else {
+            // parse using a hexadecimal value
+            self.match_uimm128("expected an immediate hexadecimal operand")
+        }
+    }
+
     // Match and consume a Uimm64 immediate.
     fn match_uimm64(&mut self, err_msg: &str) -> ParseResult<Uimm64> {
         if let Some(Token::Integer(text)) = self.token() {
@@ -757,6 +837,36 @@ impl<'a> Parser<'a> {
         }
     }
 
+    /// Parse a list of literals (i.e. integers, floats, booleans); e.g.
+    fn parse_literals_to_uimm128(&mut self, ty: Type) -> ParseResult<V128Imm> {
+        macro_rules! consume {
+            ( $ty:ident, $match_fn:expr ) => {{
+                assert!($ty.is_vector());
+                let mut v = Vec::with_capacity($ty.lane_count() as usize);
+                for _ in 0..$ty.lane_count() {
+                    v.push($match_fn?);
+                }
+                V128Imm::from_iter(v)
+            }};
+        }
+
+        if !ty.is_vector() {
+            err!(self.loc, "Expected a controlling vector type, not {}", ty)
+        } else {
+            let uimm128 = match ty.lane_type() {
+                I8 => consume!(ty, self.match_uimm8("Expected an 8-bit unsigned integer")),
+                I16 => unimplemented!(), // TODO no 16-bit match yet
+                I32 => consume!(ty, self.match_imm32("Expected a 32-bit integer")),
+                I64 => consume!(ty, self.match_imm64("Expected a 64-bit integer")),
+                F32 => consume!(ty, self.match_ieee32("Expected a 32-bit float...")),
+                F64 => consume!(ty, self.match_ieee64("Expected a 64-bit float")),
+                b if b.is_bool() => consume!(ty, self.match_bool("Expected a boolean")),
+                _ => return err!(self.loc, "Expected a type of: float, int, bool"),
+            };
+            Ok(uimm128)
+        }
+    }
+
     /// Parse a list of test command passes specified in command line.
     pub fn parse_cmdline_passes(&mut self, passes: &'a [String]) -> Vec<TestCommand<'a>> {
         let mut list = Vec::new();
@@ -886,6 +996,27 @@ impl<'a> Parser<'a> {
         }
     }
 
+    /// Parse a list of expected features that Cranelift should be compiled with, or without.
+    pub fn parse_cranelift_features(&mut self) -> ParseResult<Vec<Feature<'a>>> {
+        let mut list = Vec::new();
+        while self.token() == Some(Token::Identifier("feature")) {
+            self.consume();
+            let has = !self.optional(Token::Not);
+            match (self.token(), has) {
+                (Some(Token::String(flag)), true) => list.push(Feature::With(flag)),
+                (Some(Token::String(flag)), false) => list.push(Feature::Without(flag)),
+                (tok, _) => {
+                    return err!(
+                        self.loc,
+                        format!("Expected feature flag string, got {:?}", tok)
+                    )
+                }
+            }
+            self.consume();
+        }
+        Ok(list)
+    }
+
     /// Parse a list of function definitions.
     ///
     /// This is the top-level parse function matching the whole contents of a file.
@@ -1001,7 +1132,7 @@ impl<'a> Parser<'a> {
     //
     fn parse_signature(&mut self, unique_isa: Option<&dyn TargetIsa>) -> ParseResult<Signature> {
         // Calling convention defaults to `fast`, but can be changed.
-        let mut sig = Signature::new(CallConv::Fast);
+        let mut sig = Signature::new(self.default_calling_convention);
 
         self.match_token(Token::LPar, "expected function signature: ( args... )")?;
         // signature ::=  "(" * [abi-param-list] ")" ["->" retlist] [callconv]
@@ -1153,7 +1284,9 @@ impl<'a> Parser<'a> {
                 Some(Token::SigRef(..)) => {
                     self.start_gathering_comments();
                     self.parse_signature_decl(ctx.unique_isa)
-                        .and_then(|(sig, dat)| ctx.add_sig(sig, dat, self.loc))
+                        .and_then(|(sig, dat)| {
+                            ctx.add_sig(sig, dat, self.loc, self.default_calling_convention)
+                        })
                 }
                 Some(Token::FuncRef(..)) => {
                     self.start_gathering_comments();
@@ -1890,7 +2023,7 @@ impl<'a> Parser<'a> {
         };
 
         // instruction ::=  [inst-results "="] Opcode(opc) ["." Type] * ...
-        let inst_data = self.parse_inst_operands(ctx, opcode)?;
+        let inst_data = self.parse_inst_operands(ctx, opcode, explicit_ctrl_type)?;
 
         // We're done parsing the instruction now.
         //
@@ -2099,6 +2232,7 @@ impl<'a> Parser<'a> {
         &mut self,
         ctx: &mut Context,
         opcode: Opcode,
+        explicit_control_type: Option<Type>,
     ) -> ParseResult<InstructionData> {
         let idata = match opcode.format() {
             InstructionFormat::Unary => InstructionData::Unary {
@@ -2291,6 +2425,36 @@ impl<'a> Parser<'a> {
                 let lane = self.match_uimm8("expected lane number")?;
                 InstructionData::ExtractLane { opcode, lane, arg }
             }
+            InstructionFormat::UnaryConst => match explicit_control_type {
+                None => {
+                    return err!(
+                        self.loc,
+                        "Expected {:?} to have a controlling type variable, e.g. inst.i32x4",
+                        opcode
+                    )
+                }
+                Some(controlling_type) => {
+                    let uimm128 = self.match_uimm128_or_literals(controlling_type)?;
+                    let constant_handle = ctx.function.dfg.constants.insert(uimm128.to_vec());
+                    InstructionData::UnaryConst {
+                        opcode,
+                        constant_handle,
+                    }
+                }
+            },
+            InstructionFormat::Shuffle => {
+                let a = self.match_value("expected SSA value first operand")?;
+                self.match_token(Token::Comma, "expected ',' between operands")?;
+                let b = self.match_value("expected SSA value second operand")?;
+                self.match_token(Token::Comma, "expected ',' between operands")?;
+                let uimm128 = self.match_uimm128_or_literals(I8X16)?;
+                let mask = ctx.function.dfg.immediates.push(uimm128.to_vec());
+                InstructionData::Shuffle {
+                    opcode,
+                    mask,
+                    args: [a, b],
+                }
+            }
             InstructionFormat::IntCompare => {
                 let cond = self.match_enum("expected intcc condition code")?;
                 let lhs = self.match_value("expected SSA value first operand")?;
@@ -2498,6 +2662,10 @@ impl<'a> Parser<'a> {
                 let dst = self.match_regunit(ctx.unique_isa)?;
                 InstructionData::CopySpecial { opcode, src, dst }
             }
+            InstructionFormat::CopyToSsa => InstructionData::CopyToSsa {
+                opcode,
+                src: self.match_regunit(ctx.unique_isa)?,
+            },
             InstructionFormat::RegSpill => {
                 let arg = self.match_value("expected SSA value operand")?;
                 self.match_token(Token::Comma, "expected ',' between operands")?;
@@ -2630,14 +2798,14 @@ mod tests {
         assert_eq!(sig.returns.len(), 0);
         assert_eq!(sig.call_conv, CallConv::SystemV);
 
-        let sig2 = Parser::new("(i8 uext, f32, f64, i32 sret) -> i32 sext, f64 baldrdash")
+        let sig2 = Parser::new("(i8 uext, f32, f64, i32 sret) -> i32 sext, f64 baldrdash_system_v")
             .parse_signature(None)
             .unwrap();
         assert_eq!(
             sig2.to_string(),
-            "(i8 uext, f32, f64, i32 sret) -> i32 sext, f64 baldrdash"
+            "(i8 uext, f32, f64, i32 sret) -> i32 sext, f64 baldrdash_system_v"
         );
-        assert_eq!(sig2.call_conv, CallConv::Baldrdash);
+        assert_eq!(sig2.call_conv, CallConv::BaldrdashSystemV);
 
         // Old-style signature without a calling convention.
         assert_eq!(
@@ -2916,14 +3084,15 @@ mod tests {
     #[test]
     fn test_file() {
         let tf = parse_test(
-            "; before
+            r#"; before
                              test cfg option=5
                              test verify
                              set enable_float=false
+                             feature "foo"
+                             feature !"bar"
                              ; still preamble
-                             function %comment() system_v {}",
-            None,
-            None,
+                             function %comment() system_v {}"#,
+            ParseOptions::default(),
         )
         .unwrap();
         assert_eq!(tf.commands.len(), 2);
@@ -2936,6 +3105,8 @@ mod tests {
             }
             _ => panic!("unexpected ISAs"),
         }
+        assert_eq!(tf.features[0], Feature::With(&"foo"));
+        assert_eq!(tf.features[1], Feature::Without(&"bar"));
         assert_eq!(tf.preamble_comments.len(), 2);
         assert_eq!(tf.preamble_comments[0].text, "; before");
         assert_eq!(tf.preamble_comments[1].text, "; still preamble");
@@ -2949,6 +3120,7 @@ mod tests {
         assert!(parse_test(
             "target
                             function %foo() system_v {}",
+            ParseOptions::default()
         )
         .is_err());
 
@@ -2956,6 +3128,7 @@ mod tests {
             "target riscv32
                             set enable_float=false
                             function %foo() system_v {}",
+            ParseOptions::default()
         )
         .is_err());
 
@@ -2963,6 +3136,7 @@ mod tests {
             "set enable_float=false
                           isa riscv
                           function %foo() system_v {}",
+            ParseOptions::default(),
         )
         .unwrap()
         .isa_spec
@@ -3023,4 +3197,57 @@ mod tests {
         );
         assert!(parser.parse_function(None).is_err());
     }
+
+    #[test]
+    fn change_default_calling_convention() {
+        let code = "function %test() {
+        ebb0:
+            return
+        }";
+
+        // By default the parser will use the fast calling convention if none is specified.
+        let mut parser = Parser::new(code);
+        assert_eq!(
+            parser.parse_function(None).unwrap().0.signature.call_conv,
+            CallConv::Fast
+        );
+
+        // However, we can specify a different calling convention to be the default.
+        let mut parser = Parser::new(code).with_default_calling_convention(CallConv::Cold);
+        assert_eq!(
+            parser.parse_function(None).unwrap().0.signature.call_conv,
+            CallConv::Cold
+        );
+    }
+
+    #[test]
+    fn uimm128() {
+        macro_rules! parse_as_uimm128 {
+            ($text:expr, $type:expr) => {{
+                Parser::new($text).parse_literals_to_uimm128($type)
+            }};
+        }
+        macro_rules! can_parse_as_uimm128 {
+            ($text:expr, $type:expr) => {{
+                assert!(parse_as_uimm128!($text, $type).is_ok())
+            }};
+        }
+        macro_rules! cannot_parse_as_uimm128 {
+            ($text:expr, $type:expr) => {{
+                assert!(parse_as_uimm128!($text, $type).is_err())
+            }};
+        }
+
+        can_parse_as_uimm128!("1 2 3 4", I32X4);
+        can_parse_as_uimm128!("1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16", I8X16);
+        can_parse_as_uimm128!("0x1.1 0x2.2 0x3.3 0x4.4", F32X4);
+        can_parse_as_uimm128!("true false true false true false true false", B16X8);
+        can_parse_as_uimm128!("0 -1", I64X2);
+        can_parse_as_uimm128!("true false", B64X2);
+        can_parse_as_uimm128!("true true true true true", B32X4); // note that parse_literals_to_uimm128 will leave extra tokens unconsumed
+
+        cannot_parse_as_uimm128!("0x0 0x1 0x2 0x3", I32X4);
+        cannot_parse_as_uimm128!("1 2 3", I32X4);
+        cannot_parse_as_uimm128!(" ", F32X4);
+    }
 }
diff --git a/cranelift-reader/src/sourcemap.rs b/cranelift-reader/src/sourcemap.rs
index a398dd584..6291d0cd7 100644
--- a/cranelift-reader/src/sourcemap.rs
+++ b/cranelift-reader/src/sourcemap.rs
@@ -211,7 +211,7 @@ impl SourceMap {
 
 #[cfg(test)]
 mod tests {
-    use crate::parse_test;
+    use crate::{parse_test, ParseOptions};
 
     #[test]
     fn details() {
@@ -222,8 +222,7 @@ mod tests {
                              ebb0(v4: i32, v7: i32):
                                v10 = iadd v4, v7
                              }",
-            None,
-            None,
+            ParseOptions::default(),
         )
         .unwrap();
         let map = &tf.functions[0].1.map;
diff --git a/cranelift-reader/src/testfile.rs b/cranelift-reader/src/testfile.rs
index 506694586..68c7d30a9 100644
--- a/cranelift-reader/src/testfile.rs
+++ b/cranelift-reader/src/testfile.rs
@@ -20,6 +20,8 @@ pub struct TestFile<'a> {
     pub commands: Vec<TestCommand<'a>>,
     /// `isa bar ...` lines.
     pub isa_spec: IsaSpec,
+    /// `feature ...` lines
+    pub features: Vec<Feature<'a>>,
     /// Comments appearing before the first function.
     /// These are all tagged as 'Function' scope for lack of a better entity.
     pub preamble_comments: Vec<Comment<'a>>,
@@ -55,3 +57,17 @@ pub struct Comment<'a> {
     /// Text of the comment, including the leading `;`.
     pub text: &'a str,
 }
+
+/// A cranelift feature in a test file preamble.
+///
+/// This represents the expectation of the test case. Before running any of the
+/// functions of the test file, the feature set should be compared with the
+/// feature set used to compile Cranelift. If there is any differences, then the
+/// test file should be skipped.
+#[derive(PartialEq, Eq, Debug)]
+pub enum Feature<'a> {
+    /// `feature "..."` lines
+    With(&'a str),
+    /// `feature ! "..."` lines.
+    Without(&'a str),
+}
diff --git a/cranelift-serde/Cargo.toml b/cranelift-serde/Cargo.toml
index 7c77ff513..16848ee23 100644
--- a/cranelift-serde/Cargo.toml
+++ b/cranelift-serde/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cranelift-serde"
-version = "0.31.0"
+version = "0.43.1"
 authors = ["The Cranelift Project Developers"]
 description = "Serializer/Deserializer for Cranelift IR"
 repository = "https://github.com/CraneStation/cranelift"
@@ -18,8 +18,8 @@ clap = "2.32.0"
 serde = "1.0.8"
 serde_derive = "1.0.75"
 serde_json = "1.0.26"
-cranelift-codegen = { path = "../cranelift-codegen", version = "0.31.0" }
-cranelift-reader = { path = "../cranelift-reader", version = "0.31.0" }
+cranelift-codegen = { path = "../cranelift-codegen", version = "0.43.1" }
+cranelift-reader = { path = "../cranelift-reader", version = "0.43.1" }
 
 [badges]
 maintenance = { status = "experimental" }
diff --git a/cranelift-serde/src/serde_clif_json.rs b/cranelift-serde/src/serde_clif_json.rs
index 50655b732..0d19ee5fa 100644
--- a/cranelift-serde/src/serde_clif_json.rs
+++ b/cranelift-serde/src/serde_clif_json.rs
@@ -58,6 +58,11 @@ pub enum SerInstData {
         arg: String,
         lane: String,
     },
+    Shuffle {
+        opcode: String,
+        args: [String; 2],
+        mask: String,
+    },
     IntCompare {
         opcode: String,
         args: [String; 2],
@@ -210,6 +215,10 @@ pub enum SerInstData {
         src: String,
         dst: String,
     },
+    CopyToSsa {
+        opcode: String,
+        src: String,
+    },
     RegSpill {
         opcode: String,
         arg: String,
@@ -327,6 +336,28 @@ pub fn get_inst_data(inst_index: Inst, func: &Function) -> SerInstData {
             arg: arg.to_string(),
             lane: lane.to_string(),
         },
+        InstructionData::UnaryConst {
+            opcode,
+            constant_handle,
+        } => {
+            let constant = func.dfg.constants.get(constant_handle);
+            SerInstData::UnaryImm {
+                opcode: opcode.to_string(),
+                imm: format!("{:?}", constant),
+            }
+        }
+        InstructionData::Shuffle { opcode, args, mask } => {
+            let mask = func
+                .dfg
+                .immediates
+                .get(mask)
+                .expect("Expected shuffle mask to already be inserted in immediate mapping");
+            SerInstData::Shuffle {
+                opcode: opcode.to_string(),
+                args: [args[0].to_string(), args[1].to_string()],
+                mask: format!("{:?}", mask),
+            }
+        }
         InstructionData::IntCompare { opcode, args, cond } => {
             let hold_args = [args[0].to_string(), args[1].to_string()];
             SerInstData::IntCompare {
@@ -651,6 +682,10 @@ pub fn get_inst_data(inst_index: Inst, func: &Function) -> SerInstData {
             src: src.to_string(),
             dst: dst.to_string(),
         },
+        InstructionData::CopyToSsa { opcode, src } => SerInstData::CopyToSsa {
+            opcode: opcode.to_string(),
+            src: src.to_string(),
+        },
         InstructionData::RegSpill {
             opcode,
             arg,
diff --git a/cranelift-simplejit/Cargo.toml b/cranelift-simplejit/Cargo.toml
index 2ced838fb..7078e483b 100644
--- a/cranelift-simplejit/Cargo.toml
+++ b/cranelift-simplejit/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cranelift-simplejit"
-version = "0.31.0"
+version = "0.43.1"
 authors = ["The Cranelift Project Developers"]
 description = "A simple JIT library backed by Cranelift"
 repository = "https://github.com/CraneStation/cranelift"
@@ -10,21 +10,26 @@ readme = "README.md"
 edition = "2018"
 
 [dependencies]
-cranelift-codegen = { path = "../cranelift-codegen", version = "0.31.0" }
-cranelift-module = { path = "../cranelift-module", version = "0.31.0" }
-cranelift-native = { path = "../cranelift-native", version = "0.31.0" }
+cranelift-codegen = { path = "../cranelift-codegen", version = "0.43.1" }
+cranelift-module = { path = "../cranelift-module", version = "0.43.1" }
+cranelift-native = { path = "../cranelift-native", version = "0.43.1" }
 region = "2.0.0"
 libc = { version = "0.2.42" }
 errno = "0.2.4"
-target-lexicon = { version = "0.4.0" }
+target-lexicon = "0.8.1"
+memmap = { version = "0.7.0", optional = true } 
 
 [target.'cfg(target_os = "windows")'.dependencies]
 winapi = { version = "0.3", features = ["winbase", "memoryapi"] }
 
+[features]
+selinux-fix = ['memmap']
+default = []
+
 [dev-dependencies]
-cranelift = { path = "../cranelift-umbrella", version = "0.31.0" }
-cranelift-frontend = { path = "../cranelift-frontend", version = "0.31.0" }
-cranelift-entity = { path = "../cranelift-entity", version = "0.31.0" }
+cranelift = { path = "../cranelift-umbrella", version = "0.43.1" }
+cranelift-frontend = { path = "../cranelift-frontend", version = "0.43.1" }
+cranelift-entity = { path = "../cranelift-entity", version = "0.43.1" }
 
 [badges]
 maintenance = { status = "experimental" }
diff --git a/cranelift-simplejit/src/backend.rs b/cranelift-simplejit/src/backend.rs
index c182ada67..68d603af3 100644
--- a/cranelift-simplejit/src/backend.rs
+++ b/cranelift-simplejit/src/backend.rs
@@ -1,13 +1,17 @@
 //! Defines `SimpleJITBackend`.
 
 use crate::memory::Memory;
-use cranelift_codegen::binemit::{Addend, CodeOffset, NullTrapSink, Reloc, RelocSink};
+use cranelift_codegen::binemit::{
+    Addend, CodeOffset, NullTrapSink, Reloc, RelocSink, Stackmap, StackmapSink,
+};
 use cranelift_codegen::isa::TargetIsa;
 use cranelift_codegen::{self, ir, settings};
 use cranelift_module::{
-    Backend, DataContext, DataDescription, Init, Linkage, ModuleNamespace, ModuleResult,
+    Backend, DataContext, DataDescription, DataId, FuncId, Init, Linkage, ModuleNamespace,
+    ModuleResult,
 };
 use cranelift_native;
+#[cfg(not(windows))]
 use libc;
 use std::collections::HashMap;
 use std::ffi::CString;
@@ -127,6 +131,13 @@ struct RelocRecord {
     addend: Addend,
 }
 
+struct StackmapRecord {
+    #[allow(dead_code)]
+    offset: CodeOffset,
+    #[allow(dead_code)]
+    stackmap: Stackmap,
+}
+
 pub struct SimpleJITCompiledFunction {
     code: *mut u8,
     size: usize,
@@ -212,12 +223,13 @@ impl<'simple_jit_backend> Backend for SimpleJITBackend {
         &*self.isa
     }
 
-    fn declare_function(&mut self, _name: &str, _linkage: Linkage) {
+    fn declare_function(&mut self, _id: FuncId, _name: &str, _linkage: Linkage) {
         // Nothing to do.
     }
 
     fn declare_data(
         &mut self,
+        _id: DataId,
         _name: &str,
         _linkage: Linkage,
         _writable: bool,
@@ -228,6 +240,7 @@ impl<'simple_jit_backend> Backend for SimpleJITBackend {
 
     fn define_function(
         &mut self,
+        _id: FuncId,
         name: &str,
         ctx: &cranelift_codegen::Context,
         _namespace: &ModuleNamespace<Self>,
@@ -253,7 +266,16 @@ impl<'simple_jit_backend> Backend for SimpleJITBackend {
         // Ignore traps for now. For now, frontends should just avoid generating code
         // that traps.
         let mut trap_sink = NullTrapSink {};
-        unsafe { ctx.emit_to_memory(&*self.isa, ptr, &mut reloc_sink, &mut trap_sink) };
+        let mut stackmap_sink = SimpleJITStackmapSink::new();
+        unsafe {
+            ctx.emit_to_memory(
+                &*self.isa,
+                ptr,
+                &mut reloc_sink,
+                &mut trap_sink,
+                &mut stackmap_sink,
+            )
+        };
 
         Ok(Self::CompiledFunction {
             code: ptr,
@@ -264,6 +286,7 @@ impl<'simple_jit_backend> Backend for SimpleJITBackend {
 
     fn define_data(
         &mut self,
+        _id: DataId,
         _name: &str,
         writable: bool,
         align: Option<u8>,
@@ -353,6 +376,7 @@ impl<'simple_jit_backend> Backend for SimpleJITBackend {
 
     fn finalize_function(
         &mut self,
+        _id: FuncId,
         func: &Self::CompiledFunction,
         namespace: &ModuleNamespace<Self>,
     ) -> Self::FinalizedFunction {
@@ -406,6 +430,7 @@ impl<'simple_jit_backend> Backend for SimpleJITBackend {
 
     fn finalize_data(
         &mut self,
+        _id: DataId,
         data: &Self::CompiledData,
         namespace: &ModuleNamespace<Self>,
     ) -> Self::FinalizedData {
@@ -547,4 +572,34 @@ impl RelocSink for SimpleJITRelocSink {
             }
         }
     }
+
+    fn reloc_constant(&mut self, _offset: CodeOffset, reloc: Reloc, _constant: ir::ConstantOffset) {
+        match reloc {
+            Reloc::X86PCRelRodata4 => {
+                // Not necessary to record this unless we are going to split apart code and its
+                // jumptbl/rodata.
+            }
+            _ => {
+                panic!("Unhandled reloc");
+            }
+        }
+    }
+}
+
+struct SimpleJITStackmapSink {
+    pub stackmaps: Vec<StackmapRecord>,
+}
+
+impl SimpleJITStackmapSink {
+    pub fn new() -> Self {
+        Self {
+            stackmaps: Vec::new(),
+        }
+    }
+}
+
+impl StackmapSink for SimpleJITStackmapSink {
+    fn add_stackmap(&mut self, offset: CodeOffset, stackmap: Stackmap) {
+        self.stackmaps.push(StackmapRecord { offset, stackmap });
+    }
 }
diff --git a/cranelift-simplejit/src/memory.rs b/cranelift-simplejit/src/memory.rs
index 6b0419331..c50bb480d 100644
--- a/cranelift-simplejit/src/memory.rs
+++ b/cranelift-simplejit/src/memory.rs
@@ -1,5 +1,12 @@
+#[cfg(not(feature = "selinux-fix"))]
 use errno;
+
+#[cfg(not(any(feature = "selinux-fix", windows)))]
 use libc;
+
+#[cfg(feature = "selinux-fix")]
+use memmap::MmapMut;
+
 use region;
 use std::mem;
 use std::ptr;
@@ -11,6 +18,9 @@ fn round_up_to_page_size(size: usize, page_size: usize) -> usize {
 
 /// A simple struct consisting of a pointer and length.
 struct PtrLen {
+    #[cfg(feature = "selinux-fix")]
+    map: Option<MmapMut>,
+
     ptr: *mut u8,
     len: usize,
 }
@@ -19,6 +29,9 @@ impl PtrLen {
     /// Create a new empty `PtrLen`.
     fn new() -> Self {
         Self {
+            #[cfg(feature = "selinux-fix")]
+            map: None,
+
             ptr: ptr::null_mut(),
             len: 0,
         }
@@ -26,13 +39,34 @@ impl PtrLen {
 
     /// Create a new `PtrLen` pointing to at least `size` bytes of memory,
     /// suitably sized and aligned for memory protection.
-    #[cfg(not(target_os = "windows"))]
+    #[cfg(all(not(target_os = "windows"), feature = "selinux-fix"))]
     fn with_size(size: usize) -> Result<Self, String> {
+        let page_size = region::page::size();
+        let alloc_size = round_up_to_page_size(size, page_size);
+        let map = MmapMut::map_anon(alloc_size);
+
+        match map {
+            Ok(mut map) => {
+                // The order here is important; we assign the pointer first to get
+                // around compile time borrow errors.
+                Ok(Self {
+                    ptr: map.as_mut_ptr(),
+                    map: Some(map),
+                    len: alloc_size,
+                })
+            }
+            Err(e) => Err(e.to_string()),
+        }
+    }
+
+    #[cfg(all(not(target_os = "windows"), not(feature = "selinux-fix")))]
+    fn with_size(size: usize) -> Result<Self, String> {
+        let mut ptr = ptr::null_mut();
         let page_size = region::page::size();
         let alloc_size = round_up_to_page_size(size, page_size);
         unsafe {
-            let mut ptr: *mut libc::c_void = mem::uninitialized();
             let err = libc::posix_memalign(&mut ptr, page_size, alloc_size);
+
             if err == 0 {
                 Ok(Self {
                     ptr: ptr as *mut u8,
@@ -122,11 +156,26 @@ impl Memory {
     pub fn set_readable_and_executable(&mut self) {
         self.finish_current();
 
-        for &PtrLen { ptr, len } in &self.allocations[self.executable..] {
-            if len != 0 {
-                unsafe {
-                    region::protect(ptr, len, region::Protection::ReadExecute)
-                        .expect("unable to make memory readable+executable");
+        #[cfg(feature = "selinux-fix")]
+        {
+            for &PtrLen { ref map, ptr, len } in &self.allocations[self.executable..] {
+                if len != 0 && map.is_some() {
+                    unsafe {
+                        region::protect(ptr, len, region::Protection::ReadExecute)
+                            .expect("unable to make memory readable+executable");
+                    }
+                }
+            }
+        }
+
+        #[cfg(not(feature = "selinux-fix"))]
+        {
+            for &PtrLen { ptr, len } in &self.allocations[self.executable..] {
+                if len != 0 {
+                    unsafe {
+                        region::protect(ptr, len, region::Protection::ReadExecute)
+                            .expect("unable to make memory readable+executable");
+                    }
                 }
             }
         }
@@ -136,11 +185,26 @@ impl Memory {
     pub fn set_readonly(&mut self) {
         self.finish_current();
 
-        for &PtrLen { ptr, len } in &self.allocations[self.executable..] {
-            if len != 0 {
-                unsafe {
-                    region::protect(ptr, len, region::Protection::Read)
-                        .expect("unable to make memory readonly");
+        #[cfg(feature = "selinux-fix")]
+        {
+            for &PtrLen { ref map, ptr, len } in &self.allocations[self.executable..] {
+                if len != 0 && map.is_some() {
+                    unsafe {
+                        region::protect(ptr, len, region::Protection::Read)
+                            .expect("unable to make memory readonly");
+                    }
+                }
+            }
+        }
+
+        #[cfg(not(feature = "selinux-fix"))]
+        {
+            for &PtrLen { ptr, len } in &self.allocations[self.executable..] {
+                if len != 0 {
+                    unsafe {
+                        region::protect(ptr, len, region::Protection::Read)
+                            .expect("unable to make memory readonly");
+                    }
                 }
             }
         }
diff --git a/cranelift-umbrella/Cargo.toml b/cranelift-umbrella/Cargo.toml
index 82c30d6bb..3f605832a 100644
--- a/cranelift-umbrella/Cargo.toml
+++ b/cranelift-umbrella/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 authors = ["The Cranelift Project Developers"]
 name = "cranelift"
-version = "0.31.0"
+version = "0.43.1"
 description = "Umbrella for commonly-used cranelift crates"
 license = "Apache-2.0 WITH LLVM-exception"
 documentation = "https://cranelift.readthedocs.io/"
@@ -12,8 +12,8 @@ keywords = ["compile", "compiler", "jit"]
 edition = "2018"
 
 [dependencies]
-cranelift-codegen = { path = "../cranelift-codegen", version = "0.31.0", default-features = false }
-cranelift-frontend = { path = "../cranelift-frontend", version = "0.31.0", default-features = false }
+cranelift-codegen = { path = "../cranelift-codegen", version = "0.43.1", default-features = false }
+cranelift-frontend = { path = "../cranelift-frontend", version = "0.43.1", default-features = false }
 
 [features]
 default = ["std"]
diff --git a/cranelift-wasm/Cargo.toml b/cranelift-wasm/Cargo.toml
index 0da33773b..a87e4a4cc 100644
--- a/cranelift-wasm/Cargo.toml
+++ b/cranelift-wasm/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "cranelift-wasm"
-version = "0.31.0"
+version = "0.43.1"
 authors = ["The Cranelift Project Developers"]
 description = "Translator from WebAssembly to Cranelift IR"
 repository = "https://github.com/CraneStation/cranelift"
@@ -11,23 +11,28 @@ keywords = ["webassembly", "wasm"]
 edition = "2018"
 
 [dependencies]
-wasmparser = { version = "0.35.1", default-features = false }
-cranelift-codegen = { path = "../cranelift-codegen", version = "0.31.0", default-features = false }
-cranelift-entity = { path = "../cranelift-entity", version = "0.31.0", default-features = false }
-cranelift-frontend = { path = "../cranelift-frontend", version = "0.31.0", default-features = false }
+wasmparser = { version = "0.37.0", default-features = false }
+cranelift-codegen = { path = "../cranelift-codegen", version = "0.43.1", default-features = false }
+cranelift-entity = { path = "../cranelift-entity", version = "0.43.1", default-features = false }
+cranelift-frontend = { path = "../cranelift-frontend", version = "0.43.1", default-features = false }
 hashmap_core = { version = "0.1.9", optional = true }
 failure = { version = "0.1.1", default-features = false, features = ["derive"] }
 failure_derive = { version = "0.1.1", default-features = false }
 log = { version = "0.4.6", default-features = false }
+serde = { version = "1.0.94", features = ["derive"], optional = true }
 
 [dev-dependencies]
-wabt = "0.7.0"
-target-lexicon = "0.4.0"
+wabt = "0.9.1"
+target-lexicon = "0.8.1"
 
 [features]
 default = ["std"]
 std = ["cranelift-codegen/std", "cranelift-frontend/std", "wasmparser/std"]
 core = ["hashmap_core", "cranelift-codegen/core", "cranelift-frontend/core", "wasmparser/core"]
+enable-serde = ["serde"]
+
+# Temporary feature that enforces basic block semantics.
+basic-blocks = ["cranelift-codegen/basic-blocks", "cranelift-frontend/basic-blocks"]
 
 [badges]
 maintenance = { status = "experimental" }
diff --git a/cranelift-wasm/src/code_translator.rs b/cranelift-wasm/src/code_translator.rs
index 0d53557ff..fef3215c5 100644
--- a/cranelift-wasm/src/code_translator.rs
+++ b/cranelift-wasm/src/code_translator.rs
@@ -23,16 +23,17 @@
 //! That is why `translate_function_body` takes an object having the `WasmRuntime` trait as
 //! argument.
 use super::{hash_map, HashMap};
-use crate::environ::{FuncEnvironment, GlobalVariable, ReturnMode, WasmError, WasmResult};
+use crate::environ::{FuncEnvironment, GlobalVariable, ReturnMode, WasmResult};
 use crate::state::{ControlStackFrame, TranslationState};
 use crate::translation_utils::{
     blocktype_to_type, f32_translation, f64_translation, num_return_values,
 };
 use crate::translation_utils::{FuncIndex, MemoryIndex, SignatureIndex, TableIndex};
+use crate::wasm_unsupported;
 use core::{i32, u32};
 use cranelift_codegen::ir::condcodes::{FloatCC, IntCC};
 use cranelift_codegen::ir::types::*;
-use cranelift_codegen::ir::{self, InstBuilder, JumpTableData, MemFlags, ValueLabel};
+use cranelift_codegen::ir::{self, InstBuilder, JumpTableData, MemFlags, Value, ValueLabel};
 use cranelift_codegen::packed_option::ReservedValue;
 use cranelift_frontend::{FunctionBuilder, Variable};
 use wasmparser::{MemoryImmediate, Operator};
@@ -92,7 +93,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         }
         Operator::SetGlobal { global_index } => {
             match state.get_global(builder.func, *global_index, environ)? {
-                GlobalVariable::Const(_) => panic!("global #{} is a constant", global_index),
+                GlobalVariable::Const(_) => panic!("global #{} is a constant", *global_index),
                 GlobalVariable::Memory { gv, offset, ty } => {
                     let addr = builder.ins().global_value(environ.pointer_type(), gv);
                     let flags = ir::MemFlags::trusted();
@@ -132,19 +133,19 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
          ***********************************************************************************/
         Operator::Block { ty } => {
             let next = builder.create_ebb();
-            if let Ok(ty_cre) = blocktype_to_type(*ty) {
+            if let Some(ty_cre) = blocktype_to_type(*ty)? {
                 builder.append_ebb_param(next, ty_cre);
             }
-            state.push_block(next, num_return_values(*ty));
+            state.push_block(next, num_return_values(*ty)?);
         }
         Operator::Loop { ty } => {
             let loop_body = builder.create_ebb();
             let next = builder.create_ebb();
-            if let Ok(ty_cre) = blocktype_to_type(*ty) {
+            if let Some(ty_cre) = blocktype_to_type(*ty)? {
                 builder.append_ebb_param(next, ty_cre);
             }
             builder.ins().jump(loop_body, &[]);
-            state.push_loop(loop_body, next, num_return_values(*ty));
+            state.push_loop(loop_body, next, num_return_values(*ty)?);
             builder.switch_to_block(loop_body);
             environ.translate_loop_header(builder.cursor())?;
         }
@@ -152,16 +153,25 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let val = state.pop1();
             let if_not = builder.create_ebb();
             let jump_inst = builder.ins().brz(val, if_not, &[]);
+
+            #[cfg(feature = "basic-blocks")]
+            {
+                let next_ebb = builder.create_ebb();
+                builder.ins().jump(next_ebb, &[]);
+                builder.seal_block(next_ebb); // Only predecessor is the current block.
+                builder.switch_to_block(next_ebb);
+            }
+
             // Here we append an argument to an Ebb targeted by an argumentless jump instruction
             // But in fact there are two cases:
             // - either the If does not have a Else clause, in that case ty = EmptyBlock
             //   and we add nothing;
             // - either the If have an Else clause, in that case the destination of this jump
             //   instruction will be changed later when we translate the Else operator.
-            if let Ok(ty_cre) = blocktype_to_type(*ty) {
+            if let Some(ty_cre) = blocktype_to_type(*ty)? {
                 builder.append_ebb_param(if_not, ty_cre);
             }
-            state.push_if(jump_inst, if_not, num_return_values(*ty));
+            state.push_if(jump_inst, if_not, num_return_values(*ty)?);
         }
         Operator::Else => {
             // We take the control frame pushed by the if, use its ebb as the else body
@@ -824,6 +834,12 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         Operator::F32Le | Operator::F64Le => {
             translate_fcmp(FloatCC::LessThanOrEqual, builder, state)
         }
+        Operator::RefNull => state.push1(builder.ins().null(environ.reference_type())),
+        Operator::RefIsNull => {
+            let arg = state.pop1();
+            let val = builder.ins().is_null(arg);
+            state.push1(val);
+        }
         Operator::Wake { .. }
         | Operator::I32Wait { .. }
         | Operator::I64Wait { .. }
@@ -891,10 +907,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         | Operator::I64AtomicRmw16UCmpxchg { .. }
         | Operator::I64AtomicRmw32UCmpxchg { .. }
         | Operator::Fence { .. } => {
-            return Err(WasmError::Unsupported("proposed thread operators"));
-        }
-        Operator::RefNull | Operator::RefIsNull { .. } => {
-            return Err(WasmError::Unsupported("proposed reference-type operators"));
+            wasm_unsupported!("proposed thread operator {:?}", op);
         }
         Operator::MemoryInit { .. }
         | Operator::DataDrop { .. }
@@ -907,31 +920,78 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         | Operator::TableSet { .. }
         | Operator::TableGrow { .. }
         | Operator::TableSize { .. } => {
-            return Err(WasmError::Unsupported("proposed bulk memory operators"));
+            wasm_unsupported!("proposed bulk memory operator {:?}", op);
         }
-        Operator::V128Load { .. }
-        | Operator::V128Store { .. }
-        | Operator::V128Const { .. }
-        | Operator::I8x16Splat
-        | Operator::I8x16ExtractLaneS { .. }
-        | Operator::I8x16ExtractLaneU { .. }
-        | Operator::I8x16ReplaceLane { .. }
+        Operator::V128Const { value } => {
+            let handle = builder.func.dfg.constants.insert(value.bytes().to_vec());
+            let value = builder.ins().vconst(I8X16, handle);
+            // the v128.const is typed in CLIF as a I8x16 but raw_bitcast to a different type before use
+            state.push1(value)
+        }
+        Operator::I8x16Splat
         | Operator::I16x8Splat
-        | Operator::I16x8ExtractLaneS { .. }
-        | Operator::I16x8ExtractLaneU { .. }
-        | Operator::I16x8ReplaceLane { .. }
         | Operator::I32x4Splat
-        | Operator::I32x4ExtractLane { .. }
-        | Operator::I32x4ReplaceLane { .. }
         | Operator::I64x2Splat
-        | Operator::I64x2ExtractLane { .. }
-        | Operator::I64x2ReplaceLane { .. }
         | Operator::F32x4Splat
-        | Operator::F32x4ExtractLane { .. }
-        | Operator::F32x4ReplaceLane { .. }
-        | Operator::F64x2Splat
-        | Operator::F64x2ExtractLane { .. }
-        | Operator::F64x2ReplaceLane { .. }
+        | Operator::F64x2Splat => {
+            let value_to_splat = state.pop1();
+            let ty = type_of(op);
+            let splatted = builder.ins().splat(ty, value_to_splat);
+            state.push1(splatted)
+        }
+        Operator::I8x16ExtractLaneS { lane } | Operator::I16x8ExtractLaneS { lane } => {
+            let vector = optionally_bitcast_vector(state.pop1(), type_of(op), builder);
+            let extracted = builder.ins().extractlane(vector, lane.clone());
+            state.push1(builder.ins().sextend(I32, extracted))
+        }
+        Operator::I8x16ExtractLaneU { lane } | Operator::I16x8ExtractLaneU { lane } => {
+            let vector = optionally_bitcast_vector(state.pop1(), type_of(op), builder);
+            state.push1(builder.ins().extractlane(vector, lane.clone()));
+            // on x86, PEXTRB zeroes the upper bits of the destination register of extractlane so uextend is elided; of course, this depends on extractlane being legalized to a PEXTRB
+        }
+        Operator::I32x4ExtractLane { lane }
+        | Operator::I64x2ExtractLane { lane }
+        | Operator::F32x4ExtractLane { lane }
+        | Operator::F64x2ExtractLane { lane } => {
+            let vector = optionally_bitcast_vector(state.pop1(), type_of(op), builder);
+            state.push1(builder.ins().extractlane(vector, lane.clone()))
+        }
+        Operator::I8x16ReplaceLane { lane }
+        | Operator::I16x8ReplaceLane { lane }
+        | Operator::I32x4ReplaceLane { lane }
+        | Operator::I64x2ReplaceLane { lane }
+        | Operator::F32x4ReplaceLane { lane }
+        | Operator::F64x2ReplaceLane { lane } => {
+            let (vector, replacement_value) = state.pop2();
+            let original_vector_type = builder.func.dfg.value_type(vector);
+            let vector = optionally_bitcast_vector(vector, type_of(op), builder);
+            let replaced_vector = builder
+                .ins()
+                .insertlane(vector, lane.clone(), replacement_value);
+            state.push1(optionally_bitcast_vector(
+                replaced_vector,
+                original_vector_type,
+                builder,
+            ))
+        }
+        Operator::V8x16Shuffle { lanes, .. } => {
+            let (vector_a, vector_b) = state.pop2();
+            let a = optionally_bitcast_vector(vector_a, I8X16, builder);
+            let b = optionally_bitcast_vector(vector_b, I8X16, builder);
+            let mask = builder.func.dfg.immediates.push(lanes.to_vec());
+            let shuffled = builder.ins().shuffle(a, b, mask);
+            state.push1(shuffled)
+            // At this point the original types of a and b are lost; users of this value (i.e. this
+            // WASM-to-CLIF translator) may need to raw_bitcast for type-correctness. This is due
+            // to WASM using the less specific v128 type for certain operations and more specific
+            // types (e.g. i8x16) for others.
+        }
+        Operator::I8x16Add | Operator::I16x8Add | Operator::I32x4Add | Operator::I64x2Add => {
+            let (a, b) = state.pop2();
+            state.push1(builder.ins().iadd(a, b))
+        }
+        Operator::V128Load { .. }
+        | Operator::V128Store { .. }
         | Operator::I8x16Eq
         | Operator::I8x16Ne
         | Operator::I8x16LtS
@@ -985,7 +1045,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         | Operator::I8x16Shl
         | Operator::I8x16ShrS
         | Operator::I8x16ShrU
-        | Operator::I8x16Add
         | Operator::I8x16AddSaturateS
         | Operator::I8x16AddSaturateU
         | Operator::I8x16Sub
@@ -998,7 +1057,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         | Operator::I16x8Shl
         | Operator::I16x8ShrS
         | Operator::I16x8ShrU
-        | Operator::I16x8Add
         | Operator::I16x8AddSaturateS
         | Operator::I16x8AddSaturateU
         | Operator::I16x8Sub
@@ -1011,7 +1069,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         | Operator::I32x4Shl
         | Operator::I32x4ShrS
         | Operator::I32x4ShrU
-        | Operator::I32x4Add
         | Operator::I32x4Sub
         | Operator::I32x4Mul
         | Operator::I64x2Neg
@@ -1020,7 +1077,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         | Operator::I64x2Shl
         | Operator::I64x2ShrS
         | Operator::I64x2ShrU
-        | Operator::I64x2Add
         | Operator::I64x2Sub
         | Operator::F32x4Abs
         | Operator::F32x4Neg
@@ -1047,14 +1103,13 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         | Operator::F32x4ConvertSI32x4
         | Operator::F32x4ConvertUI32x4
         | Operator::F64x2ConvertSI64x2
-        | Operator::F64x2ConvertUI64x2
+        | Operator::F64x2ConvertUI64x2 { .. }
         | Operator::V8x16Swizzle
-        | Operator::V8x16Shuffle { .. }
         | Operator::I8x16LoadSplat { .. }
         | Operator::I16x8LoadSplat { .. }
         | Operator::I32x4LoadSplat { .. }
         | Operator::I64x2LoadSplat { .. } => {
-            return Err(WasmError::Unsupported("proposed SIMD operators"));
+            wasm_unsupported!("proposed SIMD operator {:?}", op);
         }
     };
     Ok(())
@@ -1248,6 +1303,14 @@ fn translate_br_if(
     let val = state.pop1();
     let (br_destination, inputs) = translate_br_if_args(relative_depth, state);
     builder.ins().brnz(val, br_destination, inputs);
+
+    #[cfg(feature = "basic-blocks")]
+    {
+        let next_ebb = builder.create_ebb();
+        builder.ins().jump(next_ebb, &[]);
+        builder.seal_block(next_ebb); // The only predecessor is the current block.
+        builder.switch_to_block(next_ebb);
+    }
 }
 
 fn translate_br_if_args(
@@ -1270,3 +1333,174 @@ fn translate_br_if_args(
     let inputs = state.peekn(return_count);
     (br_destination, inputs)
 }
+
+/// Determine the returned value type of a WebAssembly operator
+fn type_of(operator: &Operator) -> Type {
+    match operator {
+        Operator::V128Load { .. }
+        | Operator::V128Store { .. }
+        | Operator::V128Const { .. }
+        | Operator::V128Not
+        | Operator::V128And
+        | Operator::V128Or
+        | Operator::V128Xor
+        | Operator::V128Bitselect => I8X16, // default type representing V128
+
+        Operator::V8x16Shuffle { .. }
+        | Operator::I8x16Splat
+        | Operator::I8x16ExtractLaneS { .. }
+        | Operator::I8x16ExtractLaneU { .. }
+        | Operator::I8x16ReplaceLane { .. }
+        | Operator::I8x16Eq
+        | Operator::I8x16Ne
+        | Operator::I8x16LtS
+        | Operator::I8x16LtU
+        | Operator::I8x16GtS
+        | Operator::I8x16GtU
+        | Operator::I8x16LeS
+        | Operator::I8x16LeU
+        | Operator::I8x16GeS
+        | Operator::I8x16GeU
+        | Operator::I8x16Neg
+        | Operator::I8x16AnyTrue
+        | Operator::I8x16AllTrue
+        | Operator::I8x16Shl
+        | Operator::I8x16ShrS
+        | Operator::I8x16ShrU
+        | Operator::I8x16Add
+        | Operator::I8x16AddSaturateS
+        | Operator::I8x16AddSaturateU
+        | Operator::I8x16Sub
+        | Operator::I8x16SubSaturateS
+        | Operator::I8x16SubSaturateU
+        | Operator::I8x16Mul => I8X16,
+
+        Operator::I16x8Splat
+        | Operator::I16x8ExtractLaneS { .. }
+        | Operator::I16x8ExtractLaneU { .. }
+        | Operator::I16x8ReplaceLane { .. }
+        | Operator::I16x8Eq
+        | Operator::I16x8Ne
+        | Operator::I16x8LtS
+        | Operator::I16x8LtU
+        | Operator::I16x8GtS
+        | Operator::I16x8GtU
+        | Operator::I16x8LeS
+        | Operator::I16x8LeU
+        | Operator::I16x8GeS
+        | Operator::I16x8GeU
+        | Operator::I16x8Neg
+        | Operator::I16x8AnyTrue
+        | Operator::I16x8AllTrue
+        | Operator::I16x8Shl
+        | Operator::I16x8ShrS
+        | Operator::I16x8ShrU
+        | Operator::I16x8Add
+        | Operator::I16x8AddSaturateS
+        | Operator::I16x8AddSaturateU
+        | Operator::I16x8Sub
+        | Operator::I16x8SubSaturateS
+        | Operator::I16x8SubSaturateU
+        | Operator::I16x8Mul => I16X8,
+
+        Operator::I32x4Splat
+        | Operator::I32x4ExtractLane { .. }
+        | Operator::I32x4ReplaceLane { .. }
+        | Operator::I32x4Eq
+        | Operator::I32x4Ne
+        | Operator::I32x4LtS
+        | Operator::I32x4LtU
+        | Operator::I32x4GtS
+        | Operator::I32x4GtU
+        | Operator::I32x4LeS
+        | Operator::I32x4LeU
+        | Operator::I32x4GeS
+        | Operator::I32x4GeU
+        | Operator::I32x4Neg
+        | Operator::I32x4AnyTrue
+        | Operator::I32x4AllTrue
+        | Operator::I32x4Shl
+        | Operator::I32x4ShrS
+        | Operator::I32x4ShrU
+        | Operator::I32x4Add
+        | Operator::I32x4Sub
+        | Operator::I32x4Mul
+        | Operator::F32x4ConvertSI32x4
+        | Operator::F32x4ConvertUI32x4 => I32X4,
+
+        Operator::I64x2Splat
+        | Operator::I64x2ExtractLane { .. }
+        | Operator::I64x2ReplaceLane { .. }
+        | Operator::I64x2Neg
+        | Operator::I64x2AnyTrue
+        | Operator::I64x2AllTrue
+        | Operator::I64x2Shl
+        | Operator::I64x2ShrS
+        | Operator::I64x2ShrU
+        | Operator::I64x2Add
+        | Operator::I64x2Sub
+        | Operator::F64x2ConvertSI64x2
+        | Operator::F64x2ConvertUI64x2 => I64X2,
+
+        Operator::F32x4Splat
+        | Operator::F32x4ExtractLane { .. }
+        | Operator::F32x4ReplaceLane { .. }
+        | Operator::F32x4Eq
+        | Operator::F32x4Ne
+        | Operator::F32x4Lt
+        | Operator::F32x4Gt
+        | Operator::F32x4Le
+        | Operator::F32x4Ge
+        | Operator::F32x4Abs
+        | Operator::F32x4Neg
+        | Operator::F32x4Sqrt
+        | Operator::F32x4Add
+        | Operator::F32x4Sub
+        | Operator::F32x4Mul
+        | Operator::F32x4Div
+        | Operator::F32x4Min
+        | Operator::F32x4Max
+        | Operator::I32x4TruncSF32x4Sat
+        | Operator::I32x4TruncUF32x4Sat => F32X4,
+
+        Operator::F64x2Splat
+        | Operator::F64x2ExtractLane { .. }
+        | Operator::F64x2ReplaceLane { .. }
+        | Operator::F64x2Eq
+        | Operator::F64x2Ne
+        | Operator::F64x2Lt
+        | Operator::F64x2Gt
+        | Operator::F64x2Le
+        | Operator::F64x2Ge
+        | Operator::F64x2Abs
+        | Operator::F64x2Neg
+        | Operator::F64x2Sqrt
+        | Operator::F64x2Add
+        | Operator::F64x2Sub
+        | Operator::F64x2Mul
+        | Operator::F64x2Div
+        | Operator::F64x2Min
+        | Operator::F64x2Max
+        | Operator::I64x2TruncSF64x2Sat
+        | Operator::I64x2TruncUF64x2Sat => F64X2,
+
+        _ => unimplemented!(
+            "Currently only the SIMD instructions are translated to their return type: {:?}",
+            operator
+        ),
+    }
+}
+
+/// Some SIMD operations only operate on I8X16 in CLIF; this will convert them to that type by
+/// adding a raw_bitcast if necessary
+fn optionally_bitcast_vector(
+    value: Value,
+    needed_type: Type,
+    builder: &mut FunctionBuilder,
+) -> Value {
+    if builder.func.dfg.value_type(value) != needed_type {
+        builder.ins().raw_bitcast(needed_type, value)
+    } else {
+        value
+    }
+}
diff --git a/cranelift-wasm/src/environ/dummy.rs b/cranelift-wasm/src/environ/dummy.rs
index 0404ccb52..fde1e6b19 100644
--- a/cranelift-wasm/src/environ/dummy.rs
+++ b/cranelift-wasm/src/environ/dummy.rs
@@ -17,7 +17,7 @@ use cranelift_codegen::ir::immediates::{Offset32, Uimm64};
 use cranelift_codegen::ir::types::*;
 use cranelift_codegen::ir::{self, InstBuilder};
 use cranelift_codegen::isa::TargetFrontendConfig;
-use cranelift_entity::{EntityRef, PrimaryMap};
+use cranelift_entity::{EntityRef, PrimaryMap, SecondaryMap};
 use std::boxed::Box;
 use std::string::String;
 use std::vec::Vec;
@@ -124,6 +124,9 @@ pub struct DummyEnvironment {
 
     /// Instructs to collect debug data during translation.
     debug_info: bool,
+
+    /// Function names.
+    function_names: SecondaryMap<FuncIndex, String>,
 }
 
 impl DummyEnvironment {
@@ -135,6 +138,7 @@ impl DummyEnvironment {
             func_bytecode_sizes: Vec::new(),
             return_mode,
             debug_info,
+            function_names: SecondaryMap::new(),
         }
     }
 
@@ -152,6 +156,12 @@ impl DummyEnvironment {
     pub fn get_num_func_imports(&self) -> usize {
         self.info.imported_funcs.len()
     }
+
+    /// Return the name of the function, if a name for the function with
+    /// the corresponding index exists.
+    pub fn get_func_name(&self, func_index: FuncIndex) -> Option<&str> {
+        self.function_names.get(func_index).map(String::as_ref)
+    }
 }
 
 /// The `FuncEnvironment` implementation for use by the `DummyEnvironment`.
@@ -367,8 +377,9 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
         self.info.config
     }
 
-    fn declare_signature(&mut self, sig: ir::Signature) {
+    fn declare_signature(&mut self, sig: ir::Signature) -> WasmResult<()> {
         self.info.signatures.push(sig);
+        Ok(())
     }
 
     fn declare_func_import(
@@ -376,7 +387,7 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
         sig_index: SignatureIndex,
         module: &'data str,
         field: &'data str,
-    ) {
+    ) -> WasmResult<()> {
         assert_eq!(
             self.info.functions.len(),
             self.info.imported_funcs.len(),
@@ -386,32 +397,48 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
         self.info
             .imported_funcs
             .push((String::from(module), String::from(field)));
+        Ok(())
     }
 
-    fn declare_func_type(&mut self, sig_index: SignatureIndex) {
+    fn declare_func_type(&mut self, sig_index: SignatureIndex) -> WasmResult<()> {
         self.info.functions.push(Exportable::new(sig_index));
+        Ok(())
     }
 
-    fn declare_global(&mut self, global: Global) {
+    fn declare_global(&mut self, global: Global) -> WasmResult<()> {
         self.info.globals.push(Exportable::new(global));
+        Ok(())
     }
 
-    fn declare_global_import(&mut self, global: Global, module: &'data str, field: &'data str) {
+    fn declare_global_import(
+        &mut self,
+        global: Global,
+        module: &'data str,
+        field: &'data str,
+    ) -> WasmResult<()> {
         self.info.globals.push(Exportable::new(global));
         self.info
             .imported_globals
             .push((String::from(module), String::from(field)));
+        Ok(())
     }
 
-    fn declare_table(&mut self, table: Table) {
+    fn declare_table(&mut self, table: Table) -> WasmResult<()> {
         self.info.tables.push(Exportable::new(table));
+        Ok(())
     }
 
-    fn declare_table_import(&mut self, table: Table, module: &'data str, field: &'data str) {
+    fn declare_table_import(
+        &mut self,
+        table: Table,
+        module: &'data str,
+        field: &'data str,
+    ) -> WasmResult<()> {
         self.info.tables.push(Exportable::new(table));
         self.info
             .imported_tables
             .push((String::from(module), String::from(field)));
+        Ok(())
     }
 
     fn declare_table_elements(
@@ -420,19 +447,27 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
         _base: Option<GlobalIndex>,
         _offset: usize,
         _elements: Box<[FuncIndex]>,
-    ) {
+    ) -> WasmResult<()> {
         // We do nothing
+        Ok(())
     }
 
-    fn declare_memory(&mut self, memory: Memory) {
+    fn declare_memory(&mut self, memory: Memory) -> WasmResult<()> {
         self.info.memories.push(Exportable::new(memory));
+        Ok(())
     }
 
-    fn declare_memory_import(&mut self, memory: Memory, module: &'data str, field: &'data str) {
+    fn declare_memory_import(
+        &mut self,
+        memory: Memory,
+        module: &'data str,
+        field: &'data str,
+    ) -> WasmResult<()> {
         self.info.memories.push(Exportable::new(memory));
         self.info
             .imported_memories
             .push((String::from(module), String::from(field)));
+        Ok(())
     }
 
     fn declare_data_initialization(
@@ -441,37 +476,55 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
         _base: Option<GlobalIndex>,
         _offset: usize,
         _data: &'data [u8],
-    ) {
+    ) -> WasmResult<()> {
         // We do nothing
+        Ok(())
     }
 
-    fn declare_func_export(&mut self, func_index: FuncIndex, name: &'data str) {
+    fn declare_func_export(&mut self, func_index: FuncIndex, name: &'data str) -> WasmResult<()> {
         self.info.functions[func_index]
             .export_names
             .push(String::from(name));
+        Ok(())
     }
 
-    fn declare_table_export(&mut self, table_index: TableIndex, name: &'data str) {
+    fn declare_table_export(
+        &mut self,
+        table_index: TableIndex,
+        name: &'data str,
+    ) -> WasmResult<()> {
         self.info.tables[table_index]
             .export_names
             .push(String::from(name));
+        Ok(())
     }
 
-    fn declare_memory_export(&mut self, memory_index: MemoryIndex, name: &'data str) {
+    fn declare_memory_export(
+        &mut self,
+        memory_index: MemoryIndex,
+        name: &'data str,
+    ) -> WasmResult<()> {
         self.info.memories[memory_index]
             .export_names
             .push(String::from(name));
+        Ok(())
     }
 
-    fn declare_global_export(&mut self, global_index: GlobalIndex, name: &'data str) {
+    fn declare_global_export(
+        &mut self,
+        global_index: GlobalIndex,
+        name: &'data str,
+    ) -> WasmResult<()> {
         self.info.globals[global_index]
             .export_names
             .push(String::from(name));
+        Ok(())
     }
 
-    fn declare_start_func(&mut self, func_index: FuncIndex) {
+    fn declare_start_func(&mut self, func_index: FuncIndex) -> WasmResult<()> {
         debug_assert!(self.info.start_func.is_none());
         self.info.start_func = Some(func_index);
+        Ok(())
     }
 
     fn define_function_body(
@@ -497,4 +550,9 @@ impl<'data> ModuleEnvironment<'data> for DummyEnvironment {
         self.info.function_bodies.push(func);
         Ok(())
     }
+
+    fn declare_func_name(&mut self, func_index: FuncIndex, name: &'data str) -> WasmResult<()> {
+        self.function_names[func_index] = String::from(name);
+        Ok(())
+    }
 }
diff --git a/cranelift-wasm/src/environ/mod.rs b/cranelift-wasm/src/environ/mod.rs
index 831fa0ef5..4b7405ea7 100644
--- a/cranelift-wasm/src/environ/mod.rs
+++ b/cranelift-wasm/src/environ/mod.rs
@@ -1,6 +1,7 @@
 //! Support for configurable wasm translation.
 
 mod dummy;
+#[macro_use]
 mod spec;
 
 pub use crate::environ::dummy::DummyEnvironment;
diff --git a/cranelift-wasm/src/environ/spec.rs b/cranelift-wasm/src/environ/spec.rs
index d30792267..ba87cc992 100644
--- a/cranelift-wasm/src/environ/spec.rs
+++ b/cranelift-wasm/src/environ/spec.rs
@@ -6,6 +6,7 @@
 //!
 //! [Wasmtime]: https://github.com/CraneStation/wasmtime
 
+use crate::state::VisibleTranslationState;
 use crate::translation_utils::{
     FuncIndex, Global, GlobalIndex, Memory, MemoryIndex, SignatureIndex, Table, TableIndex,
 };
@@ -14,9 +15,11 @@ use cranelift_codegen::cursor::FuncCursor;
 use cranelift_codegen::ir::immediates::Offset32;
 use cranelift_codegen::ir::{self, InstBuilder};
 use cranelift_codegen::isa::TargetFrontendConfig;
+use cranelift_frontend::FunctionBuilder;
 use failure_derive::Fail;
 use std::boxed::Box;
 use wasmparser::BinaryReaderError;
+use wasmparser::Operator;
 
 /// The value of a WebAssembly global variable.
 #[derive(Clone, Copy)]
@@ -57,7 +60,7 @@ pub enum WasmError {
     ///
     /// Embedding environments may have their own limitations and feature restrictions.
     #[fail(display = "Unsupported feature: {}", _0)]
-    Unsupported(&'static str),
+    Unsupported(std::string::String),
 
     /// An implementation limit was exceeded.
     ///
@@ -73,6 +76,13 @@ pub enum WasmError {
     User(std::string::String),
 }
 
+/// Return an `Err(WasmError::Unsupported(msg))` where `msg` the string built by calling `format!`
+/// on the arguments to this macro.
+#[macro_export]
+macro_rules! wasm_unsupported {
+    ($($arg:tt)*) => { return Err($crate::environ::WasmError::Unsupported(format!($($arg)*))) }
+}
+
 impl From<BinaryReaderError> for WasmError {
     /// Convert from a `BinaryReaderError` to a `WasmError`.
     fn from(e: BinaryReaderError) -> Self {
@@ -121,6 +131,17 @@ pub trait FuncEnvironment {
         ReturnMode::NormalReturns
     }
 
+    /// Get the Cranelift reference type to use for native references.
+    ///
+    /// This returns `R64` for 64-bit architectures and `R32` for 32-bit architectures.
+    fn reference_type(&self) -> ir::Type {
+        match self.pointer_type() {
+            ir::types::I32 => ir::types::R32,
+            ir::types::I64 => ir::types::R64,
+            _ => panic!("unsupported pointer type"),
+        }
+    }
+
     /// Set up the necessary preamble definitions in `func` to access the global variable
     /// identified by `index`.
     ///
@@ -253,6 +274,28 @@ pub trait FuncEnvironment {
         // By default, don't emit anything.
         Ok(())
     }
+
+    /// Optional callback for the `FunctionEnvironment` performing this translation to maintain
+    /// internal state or prepare custom state for the operator to translate
+    fn before_translate_operator(
+        &mut self,
+        _op: &Operator,
+        _builder: &mut FunctionBuilder,
+        _state: &VisibleTranslationState,
+    ) -> WasmResult<()> {
+        Ok(())
+    }
+
+    /// Optional callback for the `FunctionEnvironment` performing this translation to maintain
+    /// internal state or finalize custom state for the operator that was translated
+    fn after_translate_operator(
+        &mut self,
+        _op: &Operator,
+        _builder: &mut FunctionBuilder,
+        _state: &VisibleTranslationState,
+    ) -> WasmResult<()> {
+        Ok(())
+    }
 }
 
 /// An object satisfying the `ModuleEnvironment` trait can be passed as argument to the
@@ -264,14 +307,18 @@ pub trait ModuleEnvironment<'data> {
 
     /// Provides the number of signatures up front. By default this does nothing, but
     /// implementations can use this to preallocate memory if desired.
-    fn reserve_signatures(&mut self, _num: u32) {}
+    fn reserve_signatures(&mut self, _num: u32) -> WasmResult<()> {
+        Ok(())
+    }
 
     /// Declares a function signature to the environment.
-    fn declare_signature(&mut self, sig: ir::Signature);
+    fn declare_signature(&mut self, sig: ir::Signature) -> WasmResult<()>;
 
     /// Provides the number of imports up front. By default this does nothing, but
     /// implementations can use this to preallocate memory if desired.
-    fn reserve_imports(&mut self, _num: u32) {}
+    fn reserve_imports(&mut self, _num: u32) -> WasmResult<()> {
+        Ok(())
+    }
 
     /// Declares a function import to the environment.
     fn declare_func_import(
@@ -279,73 +326,113 @@ pub trait ModuleEnvironment<'data> {
         sig_index: SignatureIndex,
         module: &'data str,
         field: &'data str,
-    );
+    ) -> WasmResult<()>;
 
     /// Declares a table import to the environment.
-    fn declare_table_import(&mut self, table: Table, module: &'data str, field: &'data str);
+    fn declare_table_import(
+        &mut self,
+        table: Table,
+        module: &'data str,
+        field: &'data str,
+    ) -> WasmResult<()>;
 
     /// Declares a memory import to the environment.
-    fn declare_memory_import(&mut self, memory: Memory, module: &'data str, field: &'data str);
+    fn declare_memory_import(
+        &mut self,
+        memory: Memory,
+        module: &'data str,
+        field: &'data str,
+    ) -> WasmResult<()>;
 
     /// Declares a global import to the environment.
-    fn declare_global_import(&mut self, global: Global, module: &'data str, field: &'data str);
+    fn declare_global_import(
+        &mut self,
+        global: Global,
+        module: &'data str,
+        field: &'data str,
+    ) -> WasmResult<()>;
 
     /// Notifies the implementation that all imports have been declared.
-    fn finish_imports(&mut self) {}
+    fn finish_imports(&mut self) -> WasmResult<()> {
+        Ok(())
+    }
 
     /// Provides the number of defined functions up front. By default this does nothing, but
     /// implementations can use this to preallocate memory if desired.
-    fn reserve_func_types(&mut self, _num: u32) {}
+    fn reserve_func_types(&mut self, _num: u32) -> WasmResult<()> {
+        Ok(())
+    }
 
     /// Declares the type (signature) of a local function in the module.
-    fn declare_func_type(&mut self, sig_index: SignatureIndex);
+    fn declare_func_type(&mut self, sig_index: SignatureIndex) -> WasmResult<()>;
 
     /// Provides the number of defined tables up front. By default this does nothing, but
     /// implementations can use this to preallocate memory if desired.
-    fn reserve_tables(&mut self, _num: u32) {}
+    fn reserve_tables(&mut self, _num: u32) -> WasmResult<()> {
+        Ok(())
+    }
 
     /// Declares a table to the environment.
-    fn declare_table(&mut self, table: Table);
+    fn declare_table(&mut self, table: Table) -> WasmResult<()>;
 
     /// Provides the number of defined memories up front. By default this does nothing, but
     /// implementations can use this to preallocate memory if desired.
-    fn reserve_memories(&mut self, _num: u32) {}
+    fn reserve_memories(&mut self, _num: u32) -> WasmResult<()> {
+        Ok(())
+    }
 
     /// Declares a memory to the environment
-    fn declare_memory(&mut self, memory: Memory);
+    fn declare_memory(&mut self, memory: Memory) -> WasmResult<()>;
 
     /// Provides the number of defined globals up front. By default this does nothing, but
     /// implementations can use this to preallocate memory if desired.
-    fn reserve_globals(&mut self, _num: u32) {}
+    fn reserve_globals(&mut self, _num: u32) -> WasmResult<()> {
+        Ok(())
+    }
 
     /// Declares a global to the environment.
-    fn declare_global(&mut self, global: Global);
+    fn declare_global(&mut self, global: Global) -> WasmResult<()>;
 
     /// Provides the number of exports up front. By default this does nothing, but
     /// implementations can use this to preallocate memory if desired.
-    fn reserve_exports(&mut self, _num: u32) {}
+    fn reserve_exports(&mut self, _num: u32) -> WasmResult<()> {
+        Ok(())
+    }
 
     /// Declares a function export to the environment.
-    fn declare_func_export(&mut self, func_index: FuncIndex, name: &'data str);
+    fn declare_func_export(&mut self, func_index: FuncIndex, name: &'data str) -> WasmResult<()>;
 
     /// Declares a table export to the environment.
-    fn declare_table_export(&mut self, table_index: TableIndex, name: &'data str);
+    fn declare_table_export(&mut self, table_index: TableIndex, name: &'data str)
+        -> WasmResult<()>;
 
     /// Declares a memory export to the environment.
-    fn declare_memory_export(&mut self, memory_index: MemoryIndex, name: &'data str);
+    fn declare_memory_export(
+        &mut self,
+        memory_index: MemoryIndex,
+        name: &'data str,
+    ) -> WasmResult<()>;
 
     /// Declares a global export to the environment.
-    fn declare_global_export(&mut self, global_index: GlobalIndex, name: &'data str);
+    fn declare_global_export(
+        &mut self,
+        global_index: GlobalIndex,
+        name: &'data str,
+    ) -> WasmResult<()>;
 
     /// Notifies the implementation that all exports have been declared.
-    fn finish_exports(&mut self) {}
+    fn finish_exports(&mut self) -> WasmResult<()> {
+        Ok(())
+    }
 
     /// Declares the optional start function.
-    fn declare_start_func(&mut self, index: FuncIndex);
+    fn declare_start_func(&mut self, index: FuncIndex) -> WasmResult<()>;
 
     /// Provides the number of element initializers up front. By default this does nothing, but
     /// implementations can use this to preallocate memory if desired.
-    fn reserve_table_elements(&mut self, _num: u32) {}
+    fn reserve_table_elements(&mut self, _num: u32) -> WasmResult<()> {
+        Ok(())
+    }
 
     /// Fills a declared table with references to functions in the module.
     fn declare_table_elements(
@@ -354,7 +441,7 @@ pub trait ModuleEnvironment<'data> {
         base: Option<GlobalIndex>,
         offset: usize,
         elements: Box<[FuncIndex]>,
-    );
+    ) -> WasmResult<()>;
 
     /// Provides the contents of a function body.
     ///
@@ -368,7 +455,9 @@ pub trait ModuleEnvironment<'data> {
 
     /// Provides the number of data initializers up front. By default this does nothing, but
     /// implementations can use this to preallocate memory if desired.
-    fn reserve_data_initializers(&mut self, _num: u32) {}
+    fn reserve_data_initializers(&mut self, _num: u32) -> WasmResult<()> {
+        Ok(())
+    }
 
     /// Fills a declared memory with bytes at module instantiation.
     fn declare_data_initialization(
@@ -377,5 +466,19 @@ pub trait ModuleEnvironment<'data> {
         base: Option<GlobalIndex>,
         offset: usize,
         data: &'data [u8],
-    );
+    ) -> WasmResult<()>;
+
+    /// Declares the name of a function to the environment.
+    ///
+    /// By default this does nothing, but implementations can use this to read
+    /// the function name subsection of the custom name section if desired.
+    fn declare_func_name(&mut self, _func_index: FuncIndex, _name: &'data str) -> WasmResult<()> {
+        Ok(())
+    }
+
+    /// Indicates that a custom section has been found in the wasm file
+    fn custom_section(&mut self, name: &'data str, data: &'data [u8]) -> WasmResult<()> {
+        drop((name, data));
+        Ok(())
+    }
 }
diff --git a/cranelift-wasm/src/func_translator.rs b/cranelift-wasm/src/func_translator.rs
index 178147598..d17f3224e 100644
--- a/cranelift-wasm/src/func_translator.rs
+++ b/cranelift-wasm/src/func_translator.rs
@@ -5,9 +5,10 @@
 //! WebAssembly module and the runtime environment.
 
 use crate::code_translator::translate_operator;
-use crate::environ::{FuncEnvironment, ReturnMode, WasmError, WasmResult};
-use crate::state::TranslationState;
+use crate::environ::{FuncEnvironment, ReturnMode, WasmResult};
+use crate::state::{TranslationState, VisibleTranslationState};
 use crate::translation_utils::get_vmctx_value_label;
+use crate::wasm_unsupported;
 use cranelift_codegen::entity::EntityRef;
 use cranelift_codegen::ir::{self, Ebb, InstBuilder, ValueLabel};
 use cranelift_codegen::timing;
@@ -92,7 +93,8 @@ impl FuncTranslator {
         let entry_block = builder.create_ebb();
         builder.append_ebb_params_for_function_params(entry_block);
         builder.switch_to_block(entry_block); // This also creates values for the arguments.
-        builder.seal_block(entry_block);
+        builder.seal_block(entry_block); // Declare all predecessors known.
+
         // Make sure the entry block is inserted in the layout before we make any callbacks to
         // `environ`. The callback functions may need to insert things in the entry block.
         builder.ensure_inserted_ebb();
@@ -105,7 +107,7 @@ impl FuncTranslator {
         builder.append_ebb_params_for_function_returns(exit_block);
         self.state.initialize(&builder.func.signature, exit_block);
 
-        parse_local_decls(&mut reader, &mut builder, num_params)?;
+        parse_local_decls(&mut reader, &mut builder, num_params, environ)?;
         parse_function_body(reader, &mut builder, &mut self.state, environ)?;
 
         builder.finalize();
@@ -144,10 +146,11 @@ fn declare_wasm_parameters(builder: &mut FunctionBuilder, entry_block: Ebb) -> u
 /// Parse the local variable declarations that precede the function body.
 ///
 /// Declare local variables, starting from `num_params`.
-fn parse_local_decls(
+fn parse_local_decls<FE: FuncEnvironment + ?Sized>(
     reader: &mut BinaryReader,
     builder: &mut FunctionBuilder,
     num_params: usize,
+    environ: &mut FE,
 ) -> WasmResult<()> {
     let mut next_local = num_params;
     let local_count = reader.read_local_count()?;
@@ -156,7 +159,7 @@ fn parse_local_decls(
     for _ in 0..local_count {
         builder.set_srcloc(cur_srcloc(reader));
         let (count, ty) = reader.read_local_decl(&mut locals_total)?;
-        declare_locals(builder, count, ty, &mut next_local)?;
+        declare_locals(builder, count, ty, &mut next_local, environ)?;
     }
 
     Ok(())
@@ -165,11 +168,12 @@ fn parse_local_decls(
 /// Declare `count` local variables of the same type, starting from `next_local`.
 ///
 /// Fail of too many locals are declared in the function, or if the type is not valid for a local.
-pub fn declare_locals(
+pub fn declare_locals<FE: FuncEnvironment + ?Sized>(
     builder: &mut FunctionBuilder,
     count: u32,
     wasm_type: wasmparser::Type,
     next_local: &mut usize,
+    environ: &mut FE,
 ) -> WasmResult<()> {
     // All locals are initialized to 0.
     use wasmparser::Type::*;
@@ -178,7 +182,12 @@ pub fn declare_locals(
         I64 => builder.ins().iconst(ir::types::I64, 0),
         F32 => builder.ins().f32const(ir::immediates::Ieee32::with_bits(0)),
         F64 => builder.ins().f64const(ir::immediates::Ieee64::with_bits(0)),
-        _ => return Err(WasmError::Unsupported("unsupported local type")),
+        V128 => {
+            let constant_handle = builder.func.dfg.constants.insert([0; 16].to_vec());
+            builder.ins().vconst(ir::types::I8X16, constant_handle)
+        }
+        AnyRef => builder.ins().null(environ.reference_type()),
+        ty => wasm_unsupported!("unsupported local type {:?}", ty),
     };
 
     let ty = builder.func.dfg.value_type(zeroval);
@@ -209,7 +218,9 @@ fn parse_function_body<FE: FuncEnvironment + ?Sized>(
     while !state.control_stack.is_empty() {
         builder.set_srcloc(cur_srcloc(&reader));
         let op = reader.read_operator()?;
+        environ.before_translate_operator(&op, builder, &VisibleTranslationState::new(state))?;
         translate_operator(&op, builder, state, environ)?;
+        environ.after_translate_operator(&op, builder, &VisibleTranslationState::new(state))?;
     }
 
     // The final `End` operator left us in the exit block where we need to manually add a return
diff --git a/cranelift-wasm/src/lib.rs b/cranelift-wasm/src/lib.rs
index c27149089..14883fa1c 100644
--- a/cranelift-wasm/src/lib.rs
+++ b/cranelift-wasm/src/lib.rs
@@ -28,7 +28,6 @@
     )
 )]
 #![no_std]
-#![cfg_attr(not(feature = "std"), feature(alloc))]
 
 #[cfg(not(feature = "std"))]
 #[macro_use]
@@ -65,6 +64,7 @@ pub use crate::environ::{
 pub use crate::func_translator::{declare_locals, FuncTranslator};
 pub use crate::module_translator::translate_module;
 pub use crate::state::TranslationState;
+pub use crate::state::VisibleTranslationState;
 pub use crate::translation_utils::{
     get_vmctx_value_label, DefinedFuncIndex, DefinedGlobalIndex, DefinedMemoryIndex,
     DefinedTableIndex, FuncIndex, Global, GlobalIndex, GlobalInit, Memory, MemoryIndex,
diff --git a/cranelift-wasm/src/module_translator.rs b/cranelift-wasm/src/module_translator.rs
index 72f257ace..8d8b55397 100644
--- a/cranelift-wasm/src/module_translator.rs
+++ b/cranelift-wasm/src/module_translator.rs
@@ -1,16 +1,16 @@
 //! Translation skeleton that traverses the whole WebAssembly module and call helper functions
 //! to deal with each part of it.
-use crate::environ::{ModuleEnvironment, WasmResult};
+use crate::environ::{ModuleEnvironment, WasmError, WasmResult};
 use crate::sections_translator::{
     parse_code_section, parse_data_section, parse_element_section, parse_export_section,
     parse_function_section, parse_global_section, parse_import_section, parse_memory_section,
-    parse_start_section, parse_table_section, parse_type_section,
+    parse_name_section, parse_start_section, parse_table_section, parse_type_section,
 };
 use cranelift_codegen::timing;
-use wasmparser::{ModuleReader, SectionCode};
+use wasmparser::{CustomSectionKind, ModuleReader, SectionCode};
 
 /// Translate a sequence of bytes forming a valid Wasm binary into a list of valid Cranelift IR
-/// [`Function`](../codegen/ir/function/struct.Function.html).
+/// [`Function`](cranelift_codegen::ir::Function).
 pub fn translate_module<'data>(
     data: &'data [u8],
     environ: &mut dyn ModuleEnvironment<'data>,
@@ -18,125 +18,86 @@ pub fn translate_module<'data>(
     let _tt = timing::wasm_translate_module();
     let mut reader = ModuleReader::new(data)?;
 
-    reader.skip_custom_sections()?;
-    if reader.eof() {
-        return Ok(());
-    }
-    let mut section = reader.read()?;
-
-    if let SectionCode::Type = section.code {
-        let types = section.get_type_section_reader()?;
-        parse_type_section(types, environ)?;
-
-        reader.skip_custom_sections()?;
-        if reader.eof() {
-            return Ok(());
-        }
-        section = reader.read()?;
-    }
-
-    if let SectionCode::Import = section.code {
-        let imports = section.get_import_section_reader()?;
-        parse_import_section(imports, environ)?;
-
-        reader.skip_custom_sections()?;
-        if reader.eof() {
-            return Ok(());
-        }
-        section = reader.read()?;
-    }
-
-    if let SectionCode::Function = section.code {
-        let functions = section.get_function_section_reader()?;
-        parse_function_section(functions, environ)?;
-
-        reader.skip_custom_sections()?;
-        if reader.eof() {
-            return Ok(());
+    while !reader.eof() {
+        let section = reader.read()?;
+        match section.code {
+            SectionCode::Type => {
+                let types = section.get_type_section_reader()?;
+                parse_type_section(types, environ)?;
+            }
+
+            SectionCode::Import => {
+                let imports = section.get_import_section_reader()?;
+                parse_import_section(imports, environ)?;
+            }
+
+            SectionCode::Function => {
+                let functions = section.get_function_section_reader()?;
+                parse_function_section(functions, environ)?;
+            }
+
+            SectionCode::Table => {
+                let tables = section.get_table_section_reader()?;
+                parse_table_section(tables, environ)?;
+            }
+
+            SectionCode::Memory => {
+                let memories = section.get_memory_section_reader()?;
+                parse_memory_section(memories, environ)?;
+            }
+
+            SectionCode::Global => {
+                let globals = section.get_global_section_reader()?;
+                parse_global_section(globals, environ)?;
+            }
+
+            SectionCode::Export => {
+                let exports = section.get_export_section_reader()?;
+                parse_export_section(exports, environ)?;
+            }
+
+            SectionCode::Start => {
+                let start = section.get_start_section_content()?;
+                parse_start_section(start, environ)?;
+            }
+
+            SectionCode::Element => {
+                let elements = section.get_element_section_reader()?;
+                parse_element_section(elements, environ)?;
+            }
+
+            SectionCode::Code => {
+                let code = section.get_code_section_reader()?;
+                parse_code_section(code, environ)?;
+            }
+
+            SectionCode::Data => {
+                let data = section.get_data_section_reader()?;
+                parse_data_section(data, environ)?;
+            }
+
+            SectionCode::DataCount => {
+                return Err(WasmError::InvalidWebAssembly {
+                    message: "don't know how to handle the data count section yet",
+                    offset: reader.current_position(),
+                });
+            }
+
+            SectionCode::Custom {
+                kind: CustomSectionKind::Name,
+                name: _,
+            } => {
+                let names = section.get_name_section_reader()?;
+                parse_name_section(names, environ)?;
+            }
+
+            SectionCode::Custom { name, kind: _ } => {
+                let mut reader = section.get_binary_reader();
+                let len = reader.bytes_remaining();
+                let payload = reader.read_bytes(len)?;
+                environ.custom_section(name, payload)?;
+            }
         }
-        section = reader.read()?;
-    }
-
-    if let SectionCode::Table = section.code {
-        let tables = section.get_table_section_reader()?;
-        parse_table_section(tables, environ)?;
-
-        reader.skip_custom_sections()?;
-        if reader.eof() {
-            return Ok(());
-        }
-        section = reader.read()?;
-    }
-
-    if let SectionCode::Memory = section.code {
-        let memories = section.get_memory_section_reader()?;
-        parse_memory_section(memories, environ)?;
-
-        reader.skip_custom_sections()?;
-        if reader.eof() {
-            return Ok(());
-        }
-        section = reader.read()?;
-    }
-
-    if let SectionCode::Global = section.code {
-        let globals = section.get_global_section_reader()?;
-        parse_global_section(globals, environ)?;
-
-        reader.skip_custom_sections()?;
-        if reader.eof() {
-            return Ok(());
-        }
-        section = reader.read()?;
-    }
-
-    if let SectionCode::Export = section.code {
-        let exports = section.get_export_section_reader()?;
-        parse_export_section(exports, environ)?;
-
-        reader.skip_custom_sections()?;
-        if reader.eof() {
-            return Ok(());
-        }
-        section = reader.read()?;
-    }
-
-    if let SectionCode::Start = section.code {
-        let start = section.get_start_section_content()?;
-        parse_start_section(start, environ)?;
-
-        reader.skip_custom_sections()?;
-        if reader.eof() {
-            return Ok(());
-        }
-        section = reader.read()?;
-    }
-
-    if let SectionCode::Element = section.code {
-        let elements = section.get_element_section_reader()?;
-        parse_element_section(elements, environ)?;
-
-        reader.skip_custom_sections()?;
-        if reader.eof() {
-            return Ok(());
-        }
-        section = reader.read()?;
-    }
-
-    if let SectionCode::Code = section.code {
-        let code = section.get_code_section_reader()?;
-        parse_code_section(code, environ)?;
-
-        reader.skip_custom_sections()?;
-        if reader.eof() {
-            return Ok(());
-        }
-        section = reader.read()?;
-    }
-
-    if let SectionCode::Data = section.code {
-        let data = section.get_data_section_reader()?;
-        parse_data_section(data, environ)?;
     }
 
     Ok(())
diff --git a/cranelift-wasm/src/sections_translator.rs b/cranelift-wasm/src/sections_translator.rs
index 1236e943d..c8c66529a 100644
--- a/cranelift-wasm/src/sections_translator.rs
+++ b/cranelift-wasm/src/sections_translator.rs
@@ -9,10 +9,12 @@
 //! interpreted on the fly.
 use crate::environ::{ModuleEnvironment, WasmResult};
 use crate::translation_utils::{
-    type_to_type, FuncIndex, Global, GlobalIndex, GlobalInit, Memory, MemoryIndex, SignatureIndex,
-    Table, TableElementType, TableIndex,
+    tabletype_to_type, type_to_type, FuncIndex, Global, GlobalIndex, GlobalInit, Memory,
+    MemoryIndex, SignatureIndex, Table, TableElementType, TableIndex,
 };
+use crate::{wasm_unsupported, HashMap};
 use core::convert::TryFrom;
+use cranelift_codegen::ir::immediates::V128Imm;
 use cranelift_codegen::ir::{self, AbiParam, Signature};
 use cranelift_entity::EntityRef;
 use std::vec::Vec;
@@ -20,8 +22,8 @@ use wasmparser::{
     self, CodeSectionReader, Data, DataKind, DataSectionReader, Element, ElementKind,
     ElementSectionReader, Export, ExportSectionReader, ExternalKind, FuncType,
     FunctionSectionReader, GlobalSectionReader, GlobalType, ImportSectionEntryType,
-    ImportSectionReader, MemorySectionReader, MemoryType, Operator, TableSectionReader,
-    TypeSectionReader,
+    ImportSectionReader, MemorySectionReader, MemoryType, NameSectionReader, Naming, NamingReader,
+    Operator, TableSectionReader, TypeSectionReader,
 };
 
 /// Parses the Type section of the wasm module.
@@ -29,7 +31,7 @@ pub fn parse_type_section(
     types: TypeSectionReader,
     environ: &mut dyn ModuleEnvironment,
 ) -> WasmResult<()> {
-    environ.reserve_signatures(types.get_count());
+    environ.reserve_signatures(types.get_count())?;
 
     for entry in types {
         match entry? {
@@ -49,9 +51,9 @@ pub fn parse_type_section(
                         .expect("only numeric types are supported in function signatures");
                     AbiParam::new(cret_arg)
                 }));
-                environ.declare_signature(sig);
+                environ.declare_signature(sig)?;
             }
-            ref s => panic!("unsupported type: {:?}", s),
+            ty => wasm_unsupported!("unsupported type in type section: {:?}", ty),
         }
     }
     Ok(())
@@ -62,7 +64,7 @@ pub fn parse_import_section<'data>(
     imports: ImportSectionReader<'data>,
     environ: &mut dyn ModuleEnvironment<'data>,
 ) -> WasmResult<()> {
-    environ.reserve_imports(imports.get_count());
+    environ.reserve_imports(imports.get_count())?;
 
     for entry in imports {
         let import = entry?;
@@ -71,7 +73,11 @@ pub fn parse_import_section<'data>(
 
         match import.ty {
             ImportSectionEntryType::Function(sig) => {
-                environ.declare_func_import(SignatureIndex::from_u32(sig), module_name, field_name);
+                environ.declare_func_import(
+                    SignatureIndex::from_u32(sig),
+                    module_name,
+                    field_name,
+                )?;
             }
             ImportSectionEntryType::Memory(MemoryType {
                 limits: ref memlimits,
@@ -85,7 +91,7 @@ pub fn parse_import_section<'data>(
                     },
                     module_name,
                     field_name,
-                );
+                )?;
             }
             ImportSectionEntryType::Global(ref ty) => {
                 environ.declare_global_import(
@@ -96,26 +102,26 @@ pub fn parse_import_section<'data>(
                     },
                     module_name,
                     field_name,
-                );
+                )?;
             }
             ImportSectionEntryType::Table(ref tab) => {
                 environ.declare_table_import(
                     Table {
-                        ty: match type_to_type(tab.element_type) {
-                            Ok(t) => TableElementType::Val(t),
-                            Err(()) => TableElementType::Func,
+                        ty: match tabletype_to_type(tab.element_type)? {
+                            Some(t) => TableElementType::Val(t),
+                            None => TableElementType::Func,
                         },
                         minimum: tab.limits.initial,
                         maximum: tab.limits.maximum,
                     },
                     module_name,
                     field_name,
-                );
+                )?;
             }
         }
     }
 
-    environ.finish_imports();
+    environ.finish_imports()?;
     Ok(())
 }
 
@@ -124,11 +130,11 @@ pub fn parse_function_section(
     functions: FunctionSectionReader,
     environ: &mut dyn ModuleEnvironment,
 ) -> WasmResult<()> {
-    environ.reserve_func_types(functions.get_count());
+    environ.reserve_func_types(functions.get_count())?;
 
     for entry in functions {
         let sigindex = entry?;
-        environ.declare_func_type(SignatureIndex::from_u32(sigindex));
+        environ.declare_func_type(SignatureIndex::from_u32(sigindex))?;
     }
 
     Ok(())
@@ -139,18 +145,18 @@ pub fn parse_table_section(
     tables: TableSectionReader,
     environ: &mut dyn ModuleEnvironment,
 ) -> WasmResult<()> {
-    environ.reserve_tables(tables.get_count());
+    environ.reserve_tables(tables.get_count())?;
 
     for entry in tables {
         let table = entry?;
         environ.declare_table(Table {
-            ty: match type_to_type(table.element_type) {
-                Ok(t) => TableElementType::Val(t),
-                Err(()) => TableElementType::Func,
+            ty: match tabletype_to_type(table.element_type)? {
+                Some(t) => TableElementType::Val(t),
+                None => TableElementType::Func,
             },
             minimum: table.limits.initial,
             maximum: table.limits.maximum,
-        });
+        })?;
     }
 
     Ok(())
@@ -161,7 +167,7 @@ pub fn parse_memory_section(
     memories: MemorySectionReader,
     environ: &mut dyn ModuleEnvironment,
 ) -> WasmResult<()> {
-    environ.reserve_memories(memories.get_count());
+    environ.reserve_memories(memories.get_count())?;
 
     for entry in memories {
         let memory = entry?;
@@ -169,7 +175,7 @@ pub fn parse_memory_section(
             minimum: memory.limits.initial,
             maximum: memory.limits.maximum,
             shared: memory.shared,
-        });
+        })?;
     }
 
     Ok(())
@@ -180,7 +186,7 @@ pub fn parse_global_section(
     globals: GlobalSectionReader,
     environ: &mut dyn ModuleEnvironment,
 ) -> WasmResult<()> {
-    environ.reserve_globals(globals.get_count());
+    environ.reserve_globals(globals.get_count())?;
 
     for entry in globals {
         let wasmparser::Global {
@@ -196,17 +202,22 @@ pub fn parse_global_section(
             Operator::I64Const { value } => GlobalInit::I64Const(value),
             Operator::F32Const { value } => GlobalInit::F32Const(value.bits()),
             Operator::F64Const { value } => GlobalInit::F64Const(value.bits()),
+            Operator::V128Const { value } => {
+                GlobalInit::V128Const(V128Imm::from(value.bytes().to_vec().as_slice()))
+            }
             Operator::GetGlobal { global_index } => {
                 GlobalInit::GetGlobal(GlobalIndex::from_u32(global_index))
             }
-            ref s => panic!("unsupported init expr in global section: {:?}", s),
+            ref s => {
+                wasm_unsupported!("unsupported init expr in global section: {:?}", s);
+            }
         };
         let global = Global {
             ty: type_to_type(content_type).unwrap(),
             mutability: mutable,
             initializer,
         };
-        environ.declare_global(global);
+        environ.declare_global(global)?;
     }
 
     Ok(())
@@ -217,7 +228,7 @@ pub fn parse_export_section<'data>(
     exports: ExportSectionReader<'data>,
     environ: &mut dyn ModuleEnvironment<'data>,
 ) -> WasmResult<()> {
-    environ.reserve_exports(exports.get_count());
+    environ.reserve_exports(exports.get_count())?;
 
     for entry in exports {
         let Export {
@@ -231,20 +242,24 @@ pub fn parse_export_section<'data>(
         // becomes a concern here.
         let index = index as usize;
         match *kind {
-            ExternalKind::Function => environ.declare_func_export(FuncIndex::new(index), field),
-            ExternalKind::Table => environ.declare_table_export(TableIndex::new(index), field),
-            ExternalKind::Memory => environ.declare_memory_export(MemoryIndex::new(index), field),
-            ExternalKind::Global => environ.declare_global_export(GlobalIndex::new(index), field),
+            ExternalKind::Function => environ.declare_func_export(FuncIndex::new(index), field)?,
+            ExternalKind::Table => environ.declare_table_export(TableIndex::new(index), field)?,
+            ExternalKind::Memory => {
+                environ.declare_memory_export(MemoryIndex::new(index), field)?
+            }
+            ExternalKind::Global => {
+                environ.declare_global_export(GlobalIndex::new(index), field)?
+            }
         }
     }
 
-    environ.finish_exports();
+    environ.finish_exports()?;
     Ok(())
 }
 
 /// Parses the Start section of the wasm module.
 pub fn parse_start_section(index: u32, environ: &mut dyn ModuleEnvironment) -> WasmResult<()> {
-    environ.declare_start_func(FuncIndex::from_u32(index));
+    environ.declare_start_func(FuncIndex::from_u32(index))?;
     Ok(())
 }
 
@@ -253,7 +268,7 @@ pub fn parse_element_section<'data>(
     elements: ElementSectionReader<'data>,
     environ: &mut dyn ModuleEnvironment,
 ) -> WasmResult<()> {
-    environ.reserve_table_elements(elements.get_count());
+    environ.reserve_table_elements(elements.get_count())?;
 
     for entry in elements {
         let Element { kind, items } = entry?;
@@ -268,7 +283,9 @@ pub fn parse_element_section<'data>(
                 Operator::GetGlobal { global_index } => {
                     (Some(GlobalIndex::from_u32(global_index)), 0)
                 }
-                ref s => panic!("unsupported init expr in element section: {:?}", s),
+                ref s => {
+                    wasm_unsupported!("unsupported init expr in element section: {:?}", s);
+                }
             };
             let items_reader = items.get_items_reader()?;
             let mut elems = Vec::with_capacity(usize::try_from(items_reader.get_count()).unwrap());
@@ -281,9 +298,9 @@ pub fn parse_element_section<'data>(
                 base,
                 offset,
                 elems.into_boxed_slice(),
-            )
+            )?
         } else {
-            panic!("unsupported passive elements section");
+            wasm_unsupported!("unsupported passive elements section: {:?}", kind);
         }
     }
     Ok(())
@@ -308,7 +325,7 @@ pub fn parse_data_section<'data>(
     data: DataSectionReader<'data>,
     environ: &mut dyn ModuleEnvironment<'data>,
 ) -> WasmResult<()> {
-    environ.reserve_data_initializers(data.get_count());
+    environ.reserve_data_initializers(data.get_count())?;
 
     for entry in data {
         let Data { kind, data } = entry?;
@@ -323,18 +340,62 @@ pub fn parse_data_section<'data>(
                 Operator::GetGlobal { global_index } => {
                     (Some(GlobalIndex::from_u32(global_index)), 0)
                 }
-                ref s => panic!("unsupported init expr in data section: {:?}", s),
+                ref s => wasm_unsupported!("unsupported init expr in data section: {:?}", s),
             };
             environ.declare_data_initialization(
                 MemoryIndex::from_u32(memory_index),
                 base,
                 offset,
                 data,
-            );
+            )?;
         } else {
-            panic!("unsupported passive data section");
+            wasm_unsupported!("unsupported passive data section: {:?}", kind);
         }
     }
 
     Ok(())
 }
+
+/// Parses the Name section of the wasm module.
+pub fn parse_name_section<'data>(
+    mut names: NameSectionReader<'data>,
+    environ: &mut dyn ModuleEnvironment<'data>,
+) -> WasmResult<()> {
+    while let Ok(subsection) = names.read() {
+        match subsection {
+            wasmparser::Name::Function(function_subsection) => {
+                if let Some(function_names) = function_subsection
+                    .get_map()
+                    .ok()
+                    .and_then(parse_function_name_subsection)
+                {
+                    for (index, name) in function_names {
+                        environ.declare_func_name(index, name)?;
+                    }
+                }
+                return Ok(());
+            }
+            wasmparser::Name::Local(_) | wasmparser::Name::Module(_) => {}
+        };
+    }
+    Ok(())
+}
+
+fn parse_function_name_subsection<'data>(
+    mut naming_reader: NamingReader<'data>,
+) -> Option<HashMap<FuncIndex, &str>> {
+    let mut function_names = HashMap::new();
+    for _ in 0..naming_reader.get_count() {
+        let Naming { index, name } = naming_reader.read().ok()?;
+        if function_names
+            .insert(FuncIndex::from_u32(index), name)
+            .is_some()
+        {
+            // If the function index has been previously seen, then we
+            // break out of the loop and early return `None`, because these
+            // should be unique.
+            return None;
+        }
+    }
+    return Some(function_names);
+}
diff --git a/cranelift-wasm/src/state.rs b/cranelift-wasm/src/state.rs
index f829439ad..7016035b7 100644
--- a/cranelift-wasm/src/state.rs
+++ b/cranelift-wasm/src/state.rs
@@ -124,17 +124,41 @@ impl ControlStackFrame {
     }
 }
 
+/// VisibleTranslationState wraps a TranslationState with an interface appropriate for users
+/// outside this `cranelift-wasm`.
+///
+/// VisibleTranslationState is currently very minimal (only exposing reachability information), but
+/// is anticipated to grow in the future, with functions to inspect or modify the wasm operand
+/// stack for example.
+pub struct VisibleTranslationState<'a> {
+    state: &'a TranslationState,
+}
+
+impl<'a> VisibleTranslationState<'a> {
+    /// Build a VisibleTranslationState from an existing TranslationState
+    pub fn new(state: &'a TranslationState) -> Self {
+        VisibleTranslationState { state }
+    }
+
+    /// True if the current translation state expresses reachable code, false if it is unreachable
+    pub fn reachable(&self) -> bool {
+        self.state.reachable
+    }
+}
+
 /// Contains information passed along during the translation and that records:
 ///
 /// - The current value and control stacks.
 /// - The depth of the two unreachable control blocks stacks, that are manipulated when translating
 ///   unreachable code;
 pub struct TranslationState {
-    /// Stack
+    /// A stack of values corresponding to the active values in the input wasm function at this
+    /// point.
     pub stack: Vec<Value>,
-    /// Control stack
+    /// A stack of active control flow operations at this point in the input wasm function.
     pub control_stack: Vec<ControlStackFrame>,
-    /// Reachability
+    /// Is the current translation state still reachable? This is false when translating operators
+    /// like End, Return, or Unreachable.
     pub reachable: bool,
 
     // Map of global variables that have already been created by `FuncEnvironment::make_global`.
@@ -158,7 +182,7 @@ pub struct TranslationState {
 }
 
 impl TranslationState {
-    /// New TranslationState
+    /// Construct a new, empty, `TranslationState`
     pub fn new() -> Self {
         Self {
             stack: Vec::new(),
diff --git a/cranelift-wasm/src/translation_utils.rs b/cranelift-wasm/src/translation_utils.rs
index 18d69c764..95553c6e4 100644
--- a/cranelift-wasm/src/translation_utils.rs
+++ b/cranelift-wasm/src/translation_utils.rs
@@ -1,11 +1,17 @@
 //! Helper functions and structures for the translation.
+use crate::environ::WasmResult;
+use crate::wasm_unsupported;
 use core::u32;
 use cranelift_codegen::entity::entity_impl;
 use cranelift_codegen::ir;
+use cranelift_codegen::ir::immediates::V128Imm;
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
 use wasmparser;
 
 /// Index type of a function (imported or defined) inside the WebAssembly module.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct FuncIndex(u32);
 entity_impl!(FuncIndex);
 
@@ -50,7 +56,7 @@ pub struct SignatureIndex(u32);
 entity_impl!(SignatureIndex);
 
 /// WebAssembly global.
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone, Copy, Hash)]
 pub struct Global {
     /// The type of the value stored in the global.
     pub ty: ir::Type,
@@ -60,8 +66,8 @@ pub struct Global {
     pub initializer: GlobalInit,
 }
 
-/// Globals are initialized via the four `const` operators or by referring to another import.
-#[derive(Debug, Clone, Copy)]
+/// Globals are initialized via the `const` operators or by referring to another import.
+#[derive(Debug, Clone, Copy, Hash)]
 pub enum GlobalInit {
     /// An `i32.const`.
     I32Const(i32),
@@ -71,6 +77,8 @@ pub enum GlobalInit {
     F32Const(u32),
     /// An `f64.const`.
     F64Const(u64),
+    /// A `vconst`.
+    V128Const(V128Imm),
     /// A `get_global` of another global.
     GetGlobal(GlobalIndex),
     ///< The global is imported from, and thus initialized by, a different module.
@@ -78,7 +86,7 @@ pub enum GlobalInit {
 }
 
 /// WebAssembly table.
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone, Copy, Hash)]
 pub struct Table {
     /// The type of data stored in elements of the table.
     pub ty: TableElementType,
@@ -89,7 +97,7 @@ pub struct Table {
 }
 
 /// WebAssembly table element. Can be a function or a scalar type.
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone, Copy, Hash)]
 pub enum TableElementType {
     /// A scalar type.
     Val(ir::Type),
@@ -98,7 +106,7 @@ pub enum TableElementType {
 }
 
 /// WebAssembly linear memory.
-#[derive(Debug, Clone, Copy)]
+#[derive(Debug, Clone, Copy, Hash)]
 pub struct Memory {
     /// The minimum number of pages in the memory.
     pub minimum: u32,
@@ -109,21 +117,47 @@ pub struct Memory {
 }
 
 /// Helper function translating wasmparser types to Cranelift types when possible.
-pub fn type_to_type(ty: wasmparser::Type) -> Result<ir::Type, ()> {
-    Ok(match ty {
-        wasmparser::Type::I32 => ir::types::I32,
-        wasmparser::Type::I64 => ir::types::I64,
-        wasmparser::Type::F32 => ir::types::F32,
-        wasmparser::Type::F64 => ir::types::F64,
-        _ => return Err(()),
-    })
+pub fn type_to_type(ty: wasmparser::Type) -> WasmResult<ir::Type> {
+    match ty {
+        wasmparser::Type::I32 => Ok(ir::types::I32),
+        wasmparser::Type::I64 => Ok(ir::types::I64),
+        wasmparser::Type::F32 => Ok(ir::types::F32),
+        wasmparser::Type::F64 => Ok(ir::types::F64),
+        wasmparser::Type::V128 => Ok(ir::types::I8X16),
+        ty => wasm_unsupported!("type_to_type: wasm type {:?}", ty),
+    }
 }
 
-/// Helper function translating wasmparser block signatures to Cranelift types when possible.
-pub fn blocktype_to_type(ty: wasmparser::TypeOrFuncType) -> Result<ir::Type, ()> {
+/// Helper function translating wasmparser possible table types to Cranelift types when possible,
+/// or None for Func tables.
+pub fn tabletype_to_type(ty: wasmparser::Type) -> WasmResult<Option<ir::Type>> {
     match ty {
-        wasmparser::TypeOrFuncType::Type(ty) => type_to_type(ty),
-        wasmparser::TypeOrFuncType::FuncType(_) => unimplemented!("multi-value block signatures"),
+        wasmparser::Type::I32 => Ok(Some(ir::types::I32)),
+        wasmparser::Type::I64 => Ok(Some(ir::types::I64)),
+        wasmparser::Type::F32 => Ok(Some(ir::types::F32)),
+        wasmparser::Type::F64 => Ok(Some(ir::types::F64)),
+        wasmparser::Type::V128 => Ok(Some(ir::types::I8X16)),
+        wasmparser::Type::AnyFunc => Ok(None),
+        ty => wasm_unsupported!("tabletype_to_type: table wasm type {:?}", ty),
+    }
+}
+
+/// Helper function translating wasmparser block signatures to Cranelift types when possible.
+pub fn blocktype_to_type(ty_or_ft: wasmparser::TypeOrFuncType) -> WasmResult<Option<ir::Type>> {
+    match ty_or_ft {
+        wasmparser::TypeOrFuncType::Type(ty) => match ty {
+            wasmparser::Type::I32 => Ok(Some(ir::types::I32)),
+            wasmparser::Type::I64 => Ok(Some(ir::types::I64)),
+            wasmparser::Type::F32 => Ok(Some(ir::types::F32)),
+            wasmparser::Type::F64 => Ok(Some(ir::types::F64)),
+            wasmparser::Type::V128 => Ok(Some(ir::types::I8X16)),
+            wasmparser::Type::EmptyBlockType => Ok(None),
+            ty => wasm_unsupported!("blocktype_to_type: type {:?}", ty),
+        },
+        wasmparser::TypeOrFuncType::FuncType(_) => wasm_unsupported!(
+            "blocktype_to_type: multi-value block signature {:?}",
+            ty_or_ft
+        ),
     }
 }
 
@@ -138,17 +172,20 @@ pub fn f64_translation(x: wasmparser::Ieee64) -> ir::immediates::Ieee64 {
 }
 
 /// Translate a `wasmparser` type into its `Cranelift` equivalent, when possible
-pub fn num_return_values(ty: wasmparser::TypeOrFuncType) -> usize {
+pub fn num_return_values(ty: wasmparser::TypeOrFuncType) -> WasmResult<usize> {
     match ty {
         wasmparser::TypeOrFuncType::Type(ty) => match ty {
-            wasmparser::Type::EmptyBlockType => 0,
+            wasmparser::Type::EmptyBlockType => Ok(0),
             wasmparser::Type::I32
             | wasmparser::Type::F32
             | wasmparser::Type::I64
-            | wasmparser::Type::F64 => 1,
-            _ => panic!("unsupported return value type"),
+            | wasmparser::Type::F64
+            | wasmparser::Type::V128 => Ok(1),
+            ty => wasm_unsupported!("unsupported return value type {:?}", ty),
         },
-        wasmparser::TypeOrFuncType::FuncType(_) => unimplemented!("multi-value block signatures"),
+        wasmparser::TypeOrFuncType::FuncType(_) => {
+            wasm_unsupported!("multi-value block signature {:?}", ty);
+        }
     }
 }
 
diff --git a/cranelift-wasm/tests/wasm_testsuite.rs b/cranelift-wasm/tests/wasm_testsuite.rs
index f52182828..69db90ca6 100644
--- a/cranelift-wasm/tests/wasm_testsuite.rs
+++ b/cranelift-wasm/tests/wasm_testsuite.rs
@@ -2,7 +2,7 @@ use cranelift_codegen::isa;
 use cranelift_codegen::print_errors::pretty_verifier_error;
 use cranelift_codegen::settings::{self, Flags};
 use cranelift_codegen::verifier;
-use cranelift_wasm::{translate_module, DummyEnvironment, ReturnMode};
+use cranelift_wasm::{translate_module, DummyEnvironment, FuncIndex, ReturnMode};
 use std::fs;
 use std::fs::File;
 use std::io;
@@ -10,7 +10,7 @@ use std::io::prelude::*;
 use std::path::Path;
 use std::str::FromStr;
 use target_lexicon::triple;
-use wabt::wat2wasm;
+use wabt::{wat2wasm_with_features, Features, Wat2Wasm};
 
 #[test]
 fn testsuite() {
@@ -31,17 +31,42 @@ fn testsuite() {
     let flags = Flags::new(settings::builder());
     for path in paths {
         let path = path.path();
-        handle_module(&path, &flags, ReturnMode::NormalReturns);
+        let data = read_module(&path);
+        handle_module(data, &flags, ReturnMode::NormalReturns);
     }
 }
 
 #[test]
 fn use_fallthrough_return() {
     let flags = Flags::new(settings::builder());
-    handle_module(
-        Path::new("../wasmtests/use_fallthrough_return.wat"),
-        &flags,
-        ReturnMode::FallthroughReturn,
+    let path = Path::new("../wasmtests/use_fallthrough_return.wat");
+    let data = read_module(&path);
+    handle_module(data, &flags, ReturnMode::FallthroughReturn);
+}
+
+#[test]
+fn use_name_section() {
+    let wat = r#"
+        (module $module_name
+            (func $func_name (local $loc_name i32)
+            )
+        )"#;
+    let data = Wat2Wasm::new()
+        .write_debug_names(true)
+        .convert(wat)
+        .unwrap_or_else(|e| panic!("error converting wat to wasm: {:?}", e));
+
+    let flags = Flags::new(settings::builder());
+    let triple = triple!("riscv64");
+    let isa = isa::lookup(triple).unwrap().finish(flags.clone());
+    let return_mode = ReturnMode::NormalReturns;
+    let mut dummy_environ = DummyEnvironment::new(isa.frontend_config(), return_mode, false);
+
+    translate_module(data.as_ref(), &mut dummy_environ).unwrap();
+
+    assert_eq!(
+        dummy_environ.get_func_name(FuncIndex::from_u32(0)).unwrap(),
+        "func_name"
     );
 }
 
@@ -52,8 +77,10 @@ fn read_file(path: &Path) -> io::Result<Vec<u8>> {
     Ok(buf)
 }
 
-fn handle_module(path: &Path, flags: &Flags, return_mode: ReturnMode) {
-    let data = match path.extension() {
+fn read_module(path: &Path) -> Vec<u8> {
+    let mut features = Features::new();
+    features.enable_all();
+    match path.extension() {
         None => {
             panic!("the file extension is not wasm or wat");
         }
@@ -61,7 +88,7 @@ fn handle_module(path: &Path, flags: &Flags, return_mode: ReturnMode) {
             Some("wasm") => read_file(path).expect("error reading wasm file"),
             Some("wat") => {
                 let wat = read_file(path).expect("error reading wat file");
-                match wat2wasm(&wat) {
+                match wat2wasm_with_features(&wat, features) {
                     Ok(wasm) => wasm,
                     Err(e) => {
                         panic!("error converting wat to wasm: {:?}", e);
@@ -70,7 +97,10 @@ fn handle_module(path: &Path, flags: &Flags, return_mode: ReturnMode) {
             }
             None | Some(&_) => panic!("the file extension for {:?} is not wasm or wat", path),
         },
-    };
+    }
+}
+
+fn handle_module(data: Vec<u8>, flags: &Flags, return_mode: ReturnMode) {
     let triple = triple!("riscv64");
     let isa = isa::lookup(triple).unwrap().finish(flags.clone());
     let mut dummy_environ = DummyEnvironment::new(isa.frontend_config(), return_mode, false);
diff --git a/docs/callex.clif b/docs/callex.clif
index 853f5c409..1d9323919 100644
--- a/docs/callex.clif
+++ b/docs/callex.clif
@@ -4,10 +4,13 @@ function %gcd(i32 uext, i32 uext) -> i32 uext system_v {
     fn0 = %divmod(i32 uext, i32 uext) -> i32 uext, i32 uext
 
 ebb1(v0: i32, v1: i32):
-    brz v1, ebb2
+    brz v1, ebb3
+    jump ebb2
+
+ebb2:
     v2, v3 = call fn0(v0, v1)
     return v2
 
-ebb2:
+ebb3:
     return v0
 }
diff --git a/docs/clif_domain.py b/docs/clif_domain.py
deleted file mode 100644
index eb9de0157..000000000
--- a/docs/clif_domain.py
+++ /dev/null
@@ -1,385 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Sphinx domain for documenting compiler intermediate representations.
-#
-# This defines a 'clif' Sphinx domain with the following directives and roles:
-#
-# .. clif::type:: type
-#     Document an IR type.
-# .. clif:inst:: v0, v1 = inst op0, op1
-#     Document an IR instruction.
-#
-from __future__ import absolute_import
-
-import re
-
-from docutils import nodes
-from docutils.parsers.rst import directives
-
-from sphinx import addnodes
-from sphinx.directives import ObjectDescription
-from sphinx.domains import Domain, ObjType
-from sphinx.locale import l_
-from sphinx.roles import XRefRole
-from sphinx.util.docfields import Field, GroupedField, TypedField
-from sphinx.util.nodes import make_refnode
-
-import sphinx.ext.autodoc
-
-
-class ClifObject(ObjectDescription):
-    """
-    Any kind of Cranelift IR object.
-
-    This is a shared base class for the different kinds of indexable objects
-    in the Cranelift IR reference.
-    """
-    option_spec = {
-        'noindex': directives.flag,
-        'module': directives.unchanged,
-        'annotation': directives.unchanged,
-    }
-
-    def add_target_and_index(self, name, sig, signode):
-        """
-        Add ``name`` to the index.
-
-        :param name: The object name returned by :func:`handle_signature`.
-        :param sig: The signature text.
-        :param signode: The output node.
-        """
-        targetname = self.objtype + '-' + name
-        if targetname not in self.state.document.ids:
-            signode['names'].append(targetname)
-            signode['ids'].append(targetname)
-            signode['first'] = (not self.names)
-            self.state.document.note_explicit_target(signode)
-            inv = self.env.domaindata['clif']['objects']
-            if name in inv:
-                self.state_machine.reporter.warning(
-                    'duplicate Cranelift object description of %s, ' % name +
-                    'other instance in ' + self.env.doc2path(inv[name][0]),
-                    line=self.lineno)
-            inv[name] = (self.env.docname, self.objtype)
-
-        indextext = self.get_index_text(name)
-        if indextext:
-            self.indexnode['entries'].append(('single', indextext,
-                                              targetname, '', None))
-
-
-# Type variables are indicated as %T.
-typevar = re.compile('(\%[A-Z])')
-
-
-def parse_type(name, signode):
-    """
-    Parse a type with embedded type vars and append to signode.
-
-    Return a string that can be compiled into a regular expression matching
-    the type.
-    """
-
-    re_str = ''
-
-    for part in typevar.split(name):
-        if part == '':
-            continue
-        if len(part) == 2 and part[0] == '%':
-            # This is a type parameter. Don't display the %, use emphasis
-            # instead.
-            part = part[1]
-            signode += nodes.emphasis(part, part)
-            re_str += r'\w+'
-        else:
-            signode += addnodes.desc_name(part, part)
-            re_str += re.escape(part)
-    return re_str
-
-
-class ClifType(ClifObject):
-    """A Cranelift IR type description."""
-
-    def handle_signature(self, sig, signode):
-        """
-        Parse type signature in ``sig`` and append description to signode.
-
-        Return a global object name for ``add_target_and_index``.
-        """
-
-        name = sig.strip()
-        parse_type(name, signode)
-        return name
-
-    def get_index_text(self, name):
-        return name + ' (IR type)'
-
-
-sep_equal = re.compile('\s*=\s*')
-sep_comma = re.compile('\s*,\s*')
-
-
-def parse_params(s, signode):
-    for i, p in enumerate(sep_comma.split(s)):
-        if i != 0:
-            signode += nodes.Text(', ')
-        signode += nodes.emphasis(p, p)
-
-
-class ClifInst(ClifObject):
-    """A Cranelift IR instruction."""
-
-    doc_field_types = [
-        TypedField('argument', label=l_('Arguments'),
-                   names=('in', 'arg'),
-                   typerolename='type', typenames=('type',)),
-        TypedField('result', label=l_('Results'),
-                   names=('out', 'result'),
-                   typerolename='type', typenames=('type',)),
-        GroupedField(
-            'typevar', names=('typevar',), label=l_('Type Variables')),
-        GroupedField('flag', names=('flag',), label=l_('Flags')),
-        Field('resulttype', label=l_('Result type'), has_arg=False,
-              names=('rtype',)),
-    ]
-
-    def handle_signature(self, sig, signode):
-        # Look for signatures like
-        #
-        #   v0, v1 = foo op0, op1
-        #   v0 = foo
-        #   foo op0
-
-        parts = re.split(sep_equal, sig, 1)
-        if len(parts) == 2:
-            # Outgoing parameters.
-            parse_params(parts[0], signode)
-            signode += nodes.Text(' = ')
-            name = parts[1]
-        else:
-            name = parts[0]
-
-        # Parse 'name arg, arg'
-        parts = name.split(None, 1)
-        name = parts[0]
-        signode += addnodes.desc_name(name, name)
-
-        if len(parts) == 2:
-            # Incoming parameters.
-            signode += nodes.Text(' ')
-            parse_params(parts[1], signode)
-
-        return name
-
-    def get_index_text(self, name):
-        return name
-
-
-class ClifInstGroup(ClifObject):
-    """A Cranelift IR instruction group."""
-
-
-class CraneliftDomain(Domain):
-    """Cranelift domain for IR objects."""
-    name = 'clif'
-    label = 'Cranelift'
-
-    object_types = {
-        'type': ObjType(l_('type'), 'type'),
-        'inst': ObjType(l_('instruction'), 'inst')
-    }
-
-    directives = {
-        'type': ClifType,
-        'inst': ClifInst,
-        'instgroup': ClifInstGroup,
-    }
-
-    roles = {
-        'type': XRefRole(),
-        'inst': XRefRole(),
-        'instgroup': XRefRole(),
-    }
-
-    initial_data = {
-        'objects': {},  # fullname -> docname, objtype
-    }
-
-    def clear_doc(self, docname):
-        for fullname, (fn, _l) in list(self.data['objects'].items()):
-            if fn == docname:
-                del self.data['objects'][fullname]
-
-    def merge_domaindata(self, docnames, otherdata):
-        for fullname, (fn, objtype) in otherdata['objects'].items():
-            if fn in docnames:
-                self.data['objects'][fullname] = (fn, objtype)
-
-    def resolve_xref(self, env, fromdocname, builder, typ, target, node,
-                     contnode):
-        objects = self.data['objects']
-        if target not in objects:
-            return None
-        obj = objects[target]
-        return make_refnode(builder, fromdocname, obj[0],
-                            obj[1] + '-' + target, contnode, target)
-
-    def resolve_any_xref(self, env, fromdocname, builder, target,
-                         node, contnode):
-        objects = self.data['objects']
-        if target not in objects:
-            return []
-        obj = objects[target]
-        return [('clif:' + self.role_for_objtype(obj[1]),
-                 make_refnode(builder, fromdocname, obj[0],
-                              obj[1] + '-' + target, contnode, target))]
-
-
-class TypeDocumenter(sphinx.ext.autodoc.Documenter):
-    # Invoke with .. autocliftype::
-    objtype = 'cliftype'
-    # Convert into clif:type directives
-    domain = 'clif'
-    directivetype = 'type'
-
-    @classmethod
-    def can_document_member(cls, member, membername, isattr, parent):
-        return False
-
-    def resolve_name(self, modname, parents, path, base):
-        return 'base.types', [base]
-
-    def add_content(self, more_content, no_docstring=False):
-        super(TypeDocumenter, self).add_content(more_content, no_docstring)
-        sourcename = self.get_sourcename()
-        membytes = self.object.membytes
-        if membytes:
-            self.add_line(u':bytes: {}'.format(membytes), sourcename)
-        else:
-            self.add_line(u':bytes: Can\'t be stored in memory', sourcename)
-
-
-class InstDocumenter(sphinx.ext.autodoc.Documenter):
-    # Invoke with .. autoinst::
-    objtype = 'inst'
-    # Convert into clif:inst directives
-    domain = 'clif'
-    directivetype = 'inst'
-
-    @classmethod
-    def can_document_member(cls, member, membername, isattr, parent):
-        return False
-
-    def resolve_name(self, modname, parents, path, base):
-        if path:
-            return path.rstrip('.'), [base]
-        else:
-            return 'base.instructions', [base]
-
-    def format_signature(self):
-        inst = self.object
-        sig = inst.name
-        if len(inst.outs) > 0:
-            sig = ', '.join([op.name for op in inst.outs]) + ' = ' + sig
-        if len(inst.ins) > 0:
-            op = inst.ins[0]
-            sig += ' ' + op.name
-            # If the first input is variable-args, this is 'return'. No parens.
-            if op.kind.name == 'variable_args':
-                sig += '...'.format(op.name)
-            for op in inst.ins[1:]:
-                # This is a call or branch with args in (...).
-                if op.kind.name == 'variable_args':
-                    sig += '({}...)'.format(op.name)
-                else:
-                    sig += ', ' + op.name
-        return sig
-
-    def add_directive_header(self, sig):
-        """Add the directive header and options to the generated content."""
-        domain = getattr(self, 'domain', 'clif')
-        directive = getattr(self, 'directivetype', self.objtype)
-        sourcename = self.get_sourcename()
-        self.add_line(u'.. %s:%s:: %s' % (domain, directive, sig), sourcename)
-        if self.options.noindex:
-            self.add_line(u'   :noindex:', sourcename)
-
-    def add_content(self, more_content, no_docstring=False):
-        super(InstDocumenter, self).add_content(more_content, no_docstring)
-        sourcename = self.get_sourcename()
-        inst = self.object
-
-        # Add inputs and outputs.
-        for op in inst.ins:
-            if op.is_value():
-                typ = op.typevar
-            else:
-                typ = op.kind
-            self.add_line(u':in {} {}: {}'.format(
-                typ, op.name, op.get_doc()), sourcename)
-        for op in inst.outs:
-            if op.is_value():
-                typ = op.typevar
-            else:
-                typ = op.kind
-            self.add_line(u':out {} {}: {}'.format(
-                typ, op.name, op.get_doc()), sourcename)
-
-        # Document type inference for polymorphic instructions.
-        if inst.is_polymorphic:
-            if inst.ctrl_typevar is not None:
-                if inst.use_typevar_operand:
-                    tvopnum = inst.value_opnums[inst.format.typevar_operand]
-                    self.add_line(
-                            u':typevar {}: inferred from {}'
-                            .format(
-                                inst.ctrl_typevar.name,
-                                inst.ins[tvopnum]),
-                            sourcename)
-                else:
-                    self.add_line(
-                            u':typevar {}: explicitly provided'
-                            .format(inst.ctrl_typevar.name),
-                            sourcename)
-            for tv in inst.other_typevars:
-                self.add_line(
-                        u':typevar {}: from input operand'.format(tv.name),
-                        sourcename)
-
-
-class InstGroupDocumenter(sphinx.ext.autodoc.ModuleLevelDocumenter):
-    # Invoke with .. autoinstgroup::
-    objtype = 'instgroup'
-    # Convert into clif:instgroup directives
-    domain = 'clif'
-    directivetype = 'instgroup'
-
-    @classmethod
-    def can_document_member(cls, member, membername, isattr, parent):
-        return False
-
-    def format_name(self):
-        return "{}.{}".format(self.modname, ".".join(self.objpath))
-
-    def add_content(self, more_content, no_docstring=False):
-        super(InstGroupDocumenter, self).add_content(
-                more_content, no_docstring)
-        sourcename = self.get_sourcename()
-        indexed = self.env.domaindata['clif']['objects']
-
-        names = [inst.name for inst in self.object.instructions]
-        names.sort()
-        for name in names:
-            if name in indexed:
-                self.add_line(u':clif:inst:`{}`'.format(name), sourcename)
-            else:
-                self.add_line(u'``{}``'.format(name), sourcename)
-
-
-def setup(app):
-    app.add_domain(CraneliftDomain)
-    app.add_autodocumenter(TypeDocumenter)
-    app.add_autodocumenter(InstDocumenter)
-    app.add_autodocumenter(InstGroupDocumenter)
-
-    return {'version': '0.1'}
diff --git a/docs/conf.py b/docs/conf.py
index 09fd16453..05be11238 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -21,10 +21,6 @@
 import sys
 sys.path.insert(0, os.path.abspath('.'))
 
-# Also add the meta-python directory to sys.path so autodoc can find the Cranelift meta
-# language definitions.
-sys.path.insert(0, os.path.abspath('../cranelift-codegen/meta-python'))
-
 # -- General configuration ------------------------------------------------
 
 # We don't support Sphinx versions before 1.4 since the format of index
@@ -41,7 +37,6 @@
     'sphinx.ext.ifconfig',
     'sphinx.ext.graphviz',
     'sphinx.ext.inheritance_diagram',
-    'clif_domain',
     'clif_lexer',
 ]
 
@@ -59,7 +54,7 @@
 
 # General information about the project.
 project = u'cranelift'
-copyright = u'2018, Cranelift Developers'
+copyright = u'2019, Cranelift Developers'
 author = u'Cranelift Developers'
 
 # The version info for the project you're documenting, acts as replacement for
diff --git a/docs/example.clif b/docs/example.clif
index 2bc5c9cc4..b848f7026 100644
--- a/docs/example.clif
+++ b/docs/example.clif
@@ -6,11 +6,14 @@ function %average(i32, i32) -> f32 system_v {
 ebb1(v0: i32, v1: i32):
     v2 = f64const 0x0.0
     stack_store v2, ss0
-    brz v1, ebb3                  ; Handle count == 0.
+    brz v1, ebb5                  ; Handle count == 0.
+    jump ebb2
+
+ebb2:
     v3 = iconst.i32 0
-    jump ebb2(v3)
+    jump ebb3(v3)
 
-ebb2(v4: i32):
+ebb3(v4: i32):
     v5 = imul_imm v4, 4
     v6 = iadd v0, v5
     v7 = load.f32 v6              ; array[i]
@@ -20,14 +23,17 @@ ebb2(v4: i32):
     stack_store v10, ss0
     v11 = iadd_imm v4, 1
     v12 = icmp ult v11, v1
-    brnz v12, ebb2(v11)           ; Loop backedge.
+    brnz v12, ebb3(v11)           ; Loop backedge.
+    jump ebb4
+
+ebb4:
     v13 = stack_load.f64 ss0
     v14 = fcvt_from_uint.f64 v1
     v15 = fdiv v13, v14
     v16 = fdemote.f32 v15
     return v16
 
-ebb3:
+ebb5:
     v100 = f32const +NaN
     return v100
 }
diff --git a/docs/heapex-dyn.clif b/docs/heapex-dyn.clif
index 24fc254d9..93c40bd29 100644
--- a/docs/heapex-dyn.clif
+++ b/docs/heapex-dyn.clif
@@ -1,6 +1,6 @@
 test verifier
 
-function %add_members(i32, i64 vmctx) -> f32 baldrdash {
+function %add_members(i32, i64 vmctx) -> f32 baldrdash_system_v {
     gv0 = vmctx
     gv1 = load.i64 notrap aligned gv0+64
     gv2 = load.i32 notrap aligned gv0+72
diff --git a/docs/heapex-sm32.clif b/docs/heapex-sm32.clif
index 9c9c35e8f..acd38a656 100644
--- a/docs/heapex-sm32.clif
+++ b/docs/heapex-sm32.clif
@@ -1,6 +1,6 @@
 test verifier
 
-function %add_members(i32, i32 vmctx) -> f32 baldrdash {
+function %add_members(i32, i32 vmctx) -> f32 baldrdash_system_v {
     gv0 = vmctx
     gv1 = load.i32 notrap aligned gv0+64
     heap0 = static gv1, min 0x1000, bound 0x10_0000, offset_guard 0x1000
diff --git a/docs/heapex-sm64.clif b/docs/heapex-sm64.clif
index 22752eb40..20934ecba 100644
--- a/docs/heapex-sm64.clif
+++ b/docs/heapex-sm64.clif
@@ -1,6 +1,6 @@
 test verifier
 
-function %add_members(i32, i64 vmctx) -> f32 baldrdash {
+function %add_members(i32, i64 vmctx) -> f32 baldrdash_system_v {
     gv0 = vmctx
     gv1 = load.i64 notrap aligned gv0+64
     heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000
diff --git a/docs/index.rst b/docs/index.rst
index f37c83e34..9951f6d9e 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -15,11 +15,19 @@ Contents:
 Rust Crate Documentation
 ========================
 
-`cranelift <https://docs.rs/cranelift/>`_
+`cranelift <https://docs.rs/cranelift-codegen/>`_
+    This is an umbrella crate that re-exports the codegen and frontend crates,
+    to make them easier to use.
+
+`cranelift-codegen <https://docs.rs/cranelift-codegen/>`_
     This is the core code generator crate. It takes Cranelift IR as input
     and emits encoded machine instructions, along with symbolic relocations,
     as output.
 
+`cranelift-codegen-meta <https://docs.rs/cranelift-codegen-meta/>`_
+    This crate contains the meta-language utilities and descriptions used by the
+    code generator.
+
 `cranelift-wasm <https://docs.rs/cranelift-wasm/>`_
     This crate translates WebAssembly code into Cranelift IR.
 
diff --git a/docs/ir.rst b/docs/ir.rst
index 550d42a81..dca239999 100644
--- a/docs/ir.rst
+++ b/docs/ir.rst
@@ -5,6 +5,13 @@ Cranelift IR Reference
 .. default-domain:: clif
 .. highlight:: clif
 
+.. todo:: Update the IR reference
+
+   This document is likely to be outdated and missing some important
+   information. It is recommended to look at the list of instructions as
+   documented in the `InstBuilder` documentation:
+   https://docs.rs/cranelift-codegen/latest/cranelift_codegen/ir/trait.InstBuilder.html
+
 The Cranelift intermediate representation (:term:`IR`) has two primary forms:
 an *in-memory data structure* that the code generator library is using, and a
 *text format* which is used for test cases and debug output.
@@ -79,10 +86,9 @@ containing multiple assignments to the same variables into SSA form for
 Cranelift :term:`IR`.
 
 Such variables can also be presented to Cranelift as :term:`stack slot`\s.
-Stack slots are accessed with the :inst:`stack_store` and :inst:`stack_load`
-instructions, and can have their address taken with :inst:`stack_addr`, which
-supports C-like programming languages where local variables can have their
-address taken.
+Stack slots are accessed with the `stack_store` and `stack_load` instructions,
+and can have their address taken with `stack_addr`, which supports C-like
+programming languages where local variables can have their address taken.
 
 .. _value-types:
 
@@ -98,20 +104,20 @@ Boolean types
 
 Boolean values are either true or false.
 
-The :type:`b1` type represents an abstract boolean value. It can only exist as
+The `b1` type represents an abstract boolean value. It can only exist as
 an SSA value, and can't be directly stored in memory. It can, however, be
-converted into an integer with value 0 or 1 by the :inst:`bint` instruction (and
-converted back with :inst:`icmp_imm` with 0).
+converted into an integer with value 0 or 1 by the `bint` instruction (and
+converted back with `icmp_imm` with 0).
 
 Several larger boolean types are also defined, primarily to be used as SIMD
 element types. They can be stored in memory, and are represented as either all
 zero bits or all one bits.
 
-.. autocliftype:: b1
-.. autocliftype:: b8
-.. autocliftype:: b16
-.. autocliftype:: b32
-.. autocliftype:: b64
+- b1
+- b8
+- b16
+- b32
+- b64
 
 Integer types
 -------------
@@ -122,10 +128,10 @@ number, others don't care.
 
 The support for i8 and i16 arithmetic is incomplete and use could lead to bugs.
 
-.. autocliftype:: i8
-.. autocliftype:: i16
-.. autocliftype:: i32
-.. autocliftype:: i64
+- i8
+- i16
+- i32
+- i64
 
 Floating point types
 --------------------
@@ -153,8 +159,8 @@ instructions are encoded as follows:
   and all bits of the trailing significand other than the MSB set to
   nondeterministic values.
 
-.. autocliftype:: f32
-.. autocliftype:: f64
+- f32
+- f64
 
 CPU flags types
 ---------------
@@ -165,15 +171,15 @@ compared.
 
 Since some ISAs don't have CPU flags, these value types should not be used
 until the legalization phase of compilation where the code is adapted to fit
-the target ISA. Use instructions like :inst:`icmp` instead.
+the target ISA. Use instructions like `icmp` instead.
 
 The CPU flags types are also restricted such that two flags values can not be
 live at the same time. After legalization, some instruction encodings will
 clobber the flags, and flags values are not allowed to be live across such
 instructions either. The verifier enforces these rules.
 
-.. autocliftype:: iflags
-.. autocliftype:: fflags
+- iflags
+- fflags
 
 SIMD vector types
 -----------------
@@ -182,42 +188,38 @@ A SIMD vector type represents a vector of values from one of the scalar types
 (boolean, integer, and floating point). Each scalar value in a SIMD type is
 called a *lane*. The number of lanes must be a power of two in the range 2-256.
 
-.. type:: i%Bx%N
-
-    A SIMD vector of integers. The lane type :type:`iB` is one of the integer
-    types :type:`i8` ... :type:`i64`.
+i%Bx%N
+    A SIMD vector of integers. The lane type `iB` is one of the integer
+    types `i8` ... `i64`.
 
-    Some concrete integer vector types are :type:`i32x4`, :type:`i64x8`, and
-    :type:`i16x4`.
+    Some concrete integer vector types are `i32x4`, `i64x8`, and
+    `i16x4`.
 
     The size of a SIMD integer vector in memory is :math:`N B\over 8` bytes.
 
-.. type:: f32x%N
-
+f32x%N
     A SIMD vector of single precision floating point numbers.
 
-    Some concrete :type:`f32` vector types are: :type:`f32x2`, :type:`f32x4`,
-    and :type:`f32x8`.
-
-    The size of a :type:`f32` vector in memory is :math:`4N` bytes.
+    Some concrete `f32` vector types are: `f32x2`, `f32x4`,
+    and `f32x8`.
 
-.. type:: f64x%N
+    The size of a `f32` vector in memory is :math:`4N` bytes.
 
+f64x%N
     A SIMD vector of double precision floating point numbers.
 
-    Some concrete :type:`f64` vector types are: :type:`f64x2`, :type:`f64x4`,
-    and :type:`f64x8`.
-
-    The size of a :type:`f64` vector in memory is :math:`8N` bytes.
+    Some concrete `f64` vector types are: `f64x2`, `f64x4`,
+    and `f64x8`.
 
-.. type:: b1x%N
+    The size of a `f64` vector in memory is :math:`8N` bytes.
 
+b1x%N
     A boolean SIMD vector.
 
     Boolean vectors are used when comparing SIMD vectors. For example,
-    comparing two :type:`i32x4` values would produce a :type:`b1x4` result.
+    comparing two `i32x4` values would produce a `b1x4` result.
 
-    Like the :type:`b1` type, a boolean vector cannot be stored in memory.
+    Like the `b1` type, a boolean vector cannot be stored in memory.
 
 Pseudo-types and type classes
 -----------------------------
@@ -225,40 +227,32 @@ Pseudo-types and type classes
 These are not concrete types, but convenient names used to refer to real types
 in this reference.
 
-.. type:: iAddr
-
+iAddr
     A Pointer-sized integer representing an address.
 
-    This is either :type:`i32`, or :type:`i64`, depending on whether the target
+    This is either `i32`, or `i64`, depending on whether the target
     platform has 32-bit or 64-bit pointers.
 
-.. type:: iB
-
-    Any of the scalar integer types :type:`i8` -- :type:`i64`.
-
-.. type:: Int
-
-    Any scalar *or vector* integer type: :type:`iB` or :type:`iBxN`.
-
-.. type:: fB
+iB
+    Any of the scalar integer types `i8` -- `i64`.
 
-    Either of the floating point scalar types: :type:`f32` or :type:`f64`.
+Int
+    Any scalar *or vector* integer type: `iB` or `iBxN`.
 
-.. type:: Float
+fB
+    Either of the floating point scalar types: `f32` or `f64`.
 
-    Any scalar *or vector* floating point type: :type:`fB` or :type:`fBxN`.
-
-.. type:: %Tx%N
+Float
+    Any scalar *or vector* floating point type: `fB` or `fBxN`.
 
+%Tx%N
     Any SIMD vector type.
 
-.. type:: Mem
-
-    Any type that can be stored in memory: :type:`Int` or :type:`Float`.
-
-.. type:: Testable
+Mem
+    Any type that can be stored in memory: `Int` or `Float`.
 
-    Either :type:`b1` or :type:`iN`.
+Testable
+    Either `b1` or `iN`.
 
 Immediate operand types
 -----------------------
@@ -266,48 +260,41 @@ Immediate operand types
 These types are not part of the normal SSA type system. They are used to
 indicate the different kinds of immediate operands on an instruction.
 
-.. type:: imm64
-
+imm64
     A 64-bit immediate integer. The value of this operand is interpreted as a
     signed two's complement integer. Instruction encodings may limit the valid
     range.
 
-    In the textual format, :type:`imm64` immediates appear as decimal or
+    In the textual format, `imm64` immediates appear as decimal or
     hexadecimal literals using the same syntax as C.
 
-.. type:: offset32
-
+offset32
     A signed 32-bit immediate address offset.
 
-    In the textual format, :type:`offset32` immediates always have an explicit
+    In the textual format, `offset32` immediates always have an explicit
     sign, and a 0 offset may be omitted.
 
-.. type:: ieee32
-
+ieee32
     A 32-bit immediate floating point number in the IEEE 754-2008 binary32
     interchange format. All bit patterns are allowed.
 
-.. type:: ieee64
-
+ieee64
     A 64-bit immediate floating point number in the IEEE 754-2008 binary64
     interchange format. All bit patterns are allowed.
 
-.. type:: bool
-
+bool
     A boolean immediate value, either false or true.
 
-    In the textual format, :type:`bool` immediates appear as 'false'
+    In the textual format, `bool` immediates appear as 'false'
     and 'true'.
 
-.. type:: intcc
-
-    An integer condition code. See the :inst:`icmp` instruction for details.
-
-.. type:: floatcc
+intcc
+    An integer condition code. See the `icmp` instruction for details.
 
-    A floating point condition code. See the :inst:`fcmp` instruction for details.
+floatcc
+    A floating point condition code. See the `fcmp` instruction for details.
 
-The two IEEE floating point immediate types :type:`ieee32` and :type:`ieee64`
+The two IEEE floating point immediate types `ieee32` and `ieee64`
 are displayed as hexadecimal floating point literals in the textual :term:`IR`
 format. Decimal floating point literals are not allowed because some computer
 systems can round differently when converting to binary. The hexadecimal
@@ -317,9 +304,9 @@ to represent all NaN bit patterns:
 Normal numbers
     Compatible with C99: ``-0x1.Tpe`` where ``T`` are the trailing
     significand bits encoded as hexadecimal, and ``e`` is the unbiased exponent
-    as a decimal number. :type:`ieee32` has 23 trailing significand bits. They
+    as a decimal number. `ieee32` has 23 trailing significand bits. They
     are padded with an extra LSB to produce 6 hexadecimal digits. This is not
-    necessary for :type:`ieee64` which has 52 trailing significand bits
+    necessary for `ieee64` which has 52 trailing significand bits
     forming 13 hexadecimal digits with no padding.
 
 Zeros
@@ -351,17 +338,10 @@ arguments, if it has any. Conditional branches only take the branch if their
 condition is satisfied, otherwise execution continues at the following
 instruction in the EBB.
 
-.. autoinst:: jump
-.. autoinst:: brz
-.. autoinst:: brnz
-.. autoinst:: br_icmp
-.. autoinst:: br_table
-
-.. inst:: JT = jump_table [EBB0, EBB1, ..., EBBn]
-
+JT = jump_table [EBB0, EBB1, ..., EBBn]
     Declare a jump table in the :term:`function preamble`.
 
-    This declares a jump table for use by the :inst:`br_table` indirect branch
+    This declares a jump table for use by the `br_table` indirect branch
     instruction. Entries in the table are EBB names.
 
     The EBBs listed must belong to the current function, and they can't have
@@ -375,13 +355,9 @@ instruction in the EBB.
 Traps stop the program because something went wrong. The exact behavior depends
 on the target instruction set architecture and operating system. There are
 explicit trap instructions defined below, but some instructions may also cause
-traps for certain input value. For example, :inst:`udiv` traps when the divisor
+traps for certain input value. For example, `udiv` traps when the divisor
 is zero.
 
-.. autoinst:: trap
-.. autoinst:: trapz
-.. autoinst:: trapnz
-
 
 Function calls
 ==============
@@ -399,7 +375,7 @@ convention:
     param        : type [paramext] [paramspecial]
     paramext     : "uext" | "sext"
     paramspecial : "sret" | "link" | "fp" | "csr" | "vmctx" | "sigid" | "stack_limit"
-    callconv     : "fast" | "cold" | "system_v" | "fastcall" | "baldrdash"
+    callconv     : "fast" | "cold" | "system_v" | "fastcall" | "baldrdash_system_v" | "baldrdash_windows"
 
 A function's calling convention determines exactly how arguments and return
 values are passed, and how stack frames are managed. Since all of these details
@@ -426,7 +402,8 @@ fast       not-ABI-stable convention for best performance
 cold       not-ABI-stable convention for infrequently executed code
 system_v   System V-style convention used on many platforms
 fastcall   Windows "fastcall" convention, also used for x64 and ARM
-baldrdash  SpiderMonkey WebAssembly convention
+baldrdash_system_v  SpiderMonkey WebAssembly convention on platforms natively using SystemV.
+baldrdash_windows  SpiderMonkey WebAssembly convention on platforms natively using Windows.
 ========== ===========================================
 
 The "not-ABI-stable" conventions do not follow an external specification and
@@ -441,8 +418,7 @@ compilers.
 Functions that are called directly must be declared in the :term:`function
 preamble`:
 
-.. inst:: FN = [colocated] NAME signature
-
+FN = [colocated] NAME signature
     Declare a function so it can be called directly.
 
     If the colocated keyword is present, the symbol's definition will be
@@ -451,11 +427,7 @@ preamble`:
 
     :arg NAME: Name of the function, passed to the linker for resolution.
     :arg signature: Function signature. See below.
-    :result FN: A function identifier that can be used with :inst:`call`.
-
-.. autoinst:: call
-.. autoinst:: x_return
-.. autoinst:: fallthrough_return
+    :result FN: A function identifier that can be used with `call`.
 
 This simple example illustrates direct function calls and signatures:
 
@@ -465,46 +437,39 @@ This simple example illustrates direct function calls and signatures:
 
 Indirect function calls use a signature declared in the preamble.
 
-.. autoinst:: call_indirect
-.. autoinst:: func_addr
-
 .. _memory:
 
 Memory
 ======
 
-Cranelift provides fully general :inst:`load` and :inst:`store` instructions for
-accessing memory, as well as :ref:`extending loads and truncating stores
+Cranelift provides fully general `load` and `store` instructions for accessing
+memory, as well as :ref:`extending loads and truncating stores
 <extload-truncstore>`.
 
 If the memory at the given address is not :term:`addressable`, the behavior of
 these instructions is undefined. If it is addressable but not
 :term:`accessible`, they :term:`trap`.
 
-.. autoinst:: load
-.. autoinst:: store
-
 There are also more restricted operations for accessing specific types of memory
 objects.
 
 Additionally, instructions are provided for handling multi-register addressing.
 
-.. autoinst:: load_complex
-.. autoinst:: store_complex
-
 Memory operation flags
 ----------------------
 
 Loads and stores can have flags that loosen their semantics in order to enable
 optimizations.
 
-=======  ===========================================
+======== ===========================================
 Flag     Description
-=======  ===========================================
+======== ===========================================
 notrap   Memory is assumed to be :term:`accessible`.
 aligned  Trapping allowed for misaligned accesses.
-readonly The data at the specified address will not modified between when this function is called and exited.
-=======  ===========================================
+readonly The data at the specified address will not
+         modified between when this function is
+         called and exited.
+======== ===========================================
 
 When the ``accessible`` flag is set, the behavior is undefined if the memory
 is not :term:`accessible`.
@@ -523,8 +488,7 @@ allocated in the :term:`function preamble`. Stack slots are not typed, they
 simply represent a contiguous sequence of :term:`accessible` bytes in the stack
 frame.
 
-.. inst:: SS = explicit_slot Bytes, Flags...
-
+SS = explicit_slot Bytes, Flags...
     Allocate a stack slot in the preamble.
 
     If no alignment is specified, Cranelift will pick an appropriate alignment
@@ -534,9 +498,6 @@ frame.
     :flag align(N): Request at least N bytes alignment.
     :result SS: Stack slot index.
 
-.. autoinst:: stack_load
-.. autoinst:: stack_store
-
 The dedicated stack access instructions are easy for the compiler to reason
 about because stack slots and offsets are fixed at compile time. For example,
 the alignment of these stack memory accesses can be inferred from the offsets
@@ -545,9 +506,7 @@ and stack slot alignments.
 It's also possible to obtain the address of a stack slot, which can be used
 in :ref:`unrestricted loads and stores <memory>`.
 
-.. autoinst:: stack_addr
-
-The :inst:`stack_addr` instruction can be used to macro-expand the stack access
+The `stack_addr` instruction can be used to macro-expand the stack access
 instructions before instruction selection::
 
     v0 = stack_load.f64 ss3, 16
@@ -562,7 +521,7 @@ Global values
 -------------
 
 A *global value* is an object whose value is not known at compile time. The
-value is computed at runtime by :inst:`global_value`, possibly using
+value is computed at runtime by `global_value`, possibly using
 information provided by the linker via relocations. There are multiple
 kinds of global values using different methods for determining their value.
 Cranelift does not track the type of a global value, for they are just
@@ -577,8 +536,7 @@ Cranelift functions.
 Chains of global value expressions are possible, but cycles are not allowed.
 They will be caught by the IR verifier.
 
-.. inst:: GV = vmctx
-
+GV = vmctx
     Declare a global value of the address of the VM context struct.
 
     This declares a global value which is the VM context pointer which may
@@ -592,8 +550,7 @@ A global value can also be derived by treating another global variable as a
 struct pointer and loading from one of its fields. This makes it possible to
 chase pointers into VM runtime data structures.
 
-.. inst:: GV = load.Type BaseGV [Offset]
-
+GV = load.Type BaseGV [Offset]
     Declare a global value pointed to by BaseGV plus Offset, with type Type.
 
     It is assumed the BaseGV plus Offset resides in accessible memory with the
@@ -603,15 +560,13 @@ chase pointers into VM runtime data structures.
     :arg Offset: Offset added to the base before loading.
     :result GV: Global value.
 
-.. inst:: GV = iadd_imm BaseGV, Offset
-
+GV = iadd_imm BaseGV, Offset
     Declare a global value which has the value of BaseGV offset by Offset.
 
     :arg BaseGV: Global value providing the base value.
     :arg Offset: Offset added to the base value.
 
-.. inst:: GV = [colocated] symbol Name
-
+GV = [colocated] symbol Name
     Declare a symbolic address global value.
 
     The value of GV is symbolic and will be assigned a relocation, so that
@@ -624,10 +579,6 @@ chase pointers into VM runtime data structures.
     :arg Name: External name.
     :result GV: Global value.
 
-.. autoinst:: global_value
-.. autoinst:: symbol_value
-
-
 Heaps
 -----
 
@@ -637,9 +588,9 @@ in, and all accesses are bounds checked. Cranelift models this through the
 concept of *heaps*.
 
 A heap is declared in the function preamble and can be accessed with the
-:inst:`heap_addr` instruction that :term:`traps` on out-of-bounds accesses or
+`heap_addr` instruction that :term:`traps` on out-of-bounds accesses or
 returns a pointer that is guaranteed to trap. Heap addresses can be smaller than
-the native pointer size, for example unsigned :type:`i32` offsets on a 64-bit
+the native pointer size, for example unsigned `i32` offsets on a 64-bit
 architecture.
 
 .. digraph:: static
@@ -667,12 +618,10 @@ A heap appears as three consecutive ranges of address space:
    not :term:`accessible`.
 
 The *heap bound* is the total size of the mapped and unmapped pages. This is
-the bound that :inst:`heap_addr` checks against. Memory accesses inside the
+the bound that `heap_addr` checks against. Memory accesses inside the
 heap bounds can trap if they hit an unmapped page (which is not
 :term:`accessible`).
 
-.. autoinst:: heap_addr
-
 Two styles of heaps are supported, *static* and *dynamic*. They behave
 differently when resized.
 
@@ -686,8 +635,7 @@ unmapped pages where the heap can grow up to its maximum size. After the
 unmapped pages follow the offset-guard pages which are also guaranteed to
 generate a trap when accessed.
 
-.. inst:: H = static Base, min MinBytes, bound BoundBytes, offset_guard OffsetGuardBytes
-
+H = static Base, min MinBytes, bound BoundBytes, offset_guard OffsetGuardBytes
     Declare a static heap in the preamble.
 
     :arg Base: Global value holding the heap's base address.
@@ -705,8 +653,7 @@ A *dynamic heap* can be relocated to a different base address when it is
 resized, and its bound can move dynamically. The offset-guard pages move when
 the heap is resized. The bound of a dynamic heap is stored in a global value.
 
-.. inst:: H = dynamic Base, min MinBytes, bound BoundGV, offset_guard OffsetGuardBytes
-
+H = dynamic Base, min MinBytes, bound BoundGV, offset_guard OffsetGuardBytes
     Declare a dynamic heap in the preamble.
 
     :arg Base: Global value holding the heap's base address.
@@ -755,25 +702,22 @@ linear memory. WebAssembly uses *tables* to allow programs to refer to opaque
 values through integer indices.
 
 A table is declared in the function preamble and can be accessed with the
-:inst:`table_addr` instruction that :term:`traps` on out-of-bounds accesses.
+`table_addr` instruction that :term:`traps` on out-of-bounds accesses.
 Table addresses can be smaller than the native pointer size, for example
-unsigned :type:`i32` offsets on a 64-bit architecture.
+unsigned `i32` offsets on a 64-bit architecture.
 
 A table appears as a consecutive range of address space, conceptually
 divided into elements of fixed sizes, which are identified by their index.
 The memory is :term:`accessible`.
 
 The *table bound* is the number of elements currently in the table. This is
-the bound that :inst:`table_addr` checks against.
-
-.. autoinst:: table_addr
+the bound that `table_addr` checks against.
 
 A table can be relocated to a different base address when it is resized, and
 its bound can move dynamically. The bound of a table is stored in a global
 value.
 
-.. inst:: T = dynamic Base, min MinElements, bound BoundGV, element_size ElementSize
-
+T = dynamic Base, min MinElements, bound BoundGV, element_size ElementSize
     Declare a table in the preamble.
 
     :arg Base: Global value holding the table's base address.
@@ -781,85 +725,12 @@ value.
     :arg BoundGV: Global value containing the current heap bound in elements.
     :arg ElementSize: Size of each element.
 
-Operations
-==========
-
-.. autoinst:: select
-.. autoinst:: selectif
-
 Constant materialization
 ------------------------
 
-A few instructions have variants that take immediate operands (e.g.,
-:inst:`band` / :inst:`band_imm`), but in general an instruction is required to
-load a constant into an SSA value.
-
-.. autoinst:: iconst
-.. autoinst:: f32const
-.. autoinst:: f64const
-.. autoinst:: bconst
-
-Vector operations
------------------
-
-.. autoinst:: vsplit
-.. autoinst:: vconcat
-.. autoinst:: vselect
-.. autoinst:: splat
-.. autoinst:: insertlane
-.. autoinst:: extractlane
-
-Integer operations
-------------------
-
-.. autoinst:: icmp
-.. autoinst:: icmp_imm
-.. autoinst:: iadd
-.. autoinst:: iadd_imm
-.. autoinst:: iadd_cin
-.. autoinst:: iadd_cout
-.. autoinst:: iadd_carry
-.. autoinst:: isub
-.. autoinst:: irsub_imm
-.. autoinst:: isub_bin
-.. autoinst:: isub_bout
-.. autoinst:: isub_borrow
-
-.. todo:: Add and subtract with signed overflow.
-
-    For example, see
-    `llvm.sadd.with.overflow.*` and `llvm.ssub.with.overflow.*` in
-    `LLVM <https://llvm.org/docs/LangRef.html#arithmetic-with-overflow-intrinsics>`_.
-
-.. autoinst:: imul
-.. autoinst:: imul_imm
-
-.. todo:: Larger multiplication results.
-
-    For example, ``smulx`` which multiplies :type:`i32` operands to produce a
-    :type:`i64` result. Alternatively, ``smulhi`` and ``smullo`` pairs.
-
-.. autoinst:: udiv
-.. autoinst:: udiv_imm
-.. autoinst:: sdiv
-.. autoinst:: sdiv_imm
-.. autoinst:: urem
-.. autoinst:: urem_imm
-.. autoinst:: srem
-.. autoinst:: srem_imm
-
-.. todo:: Integer minimum / maximum.
-
-    NEON has ``smin``, ``smax``, ``umin``, and ``umax`` instructions. We should
-    replicate those for both scalar and vector integer types. Even if the
-    target ISA doesn't have scalar operations, these are good pattern matching
-    targets.
-
-.. todo:: Saturating arithmetic.
-
-    Mostly for SIMD use, but again these are good patterns for contraction.
-    Something like ``usatadd``, ``usatsub``, ``ssatadd``, and ``ssatsub`` is a
-    good start.
+A few instructions have variants that take immediate operands, but in general
+an instruction is required to load a constant into an SSA value: `iconst`,
+`f32const`, `f64const` and `bconst` serve this purpose.
 
 Bitwise operations
 ------------------
@@ -869,17 +740,6 @@ numbers, and booleans. When operating on integer or floating point types, the
 bitwise operations are working on the binary representation of the values. When
 operating on boolean values, the bitwise operations work as logical operators.
 
-.. autoinst:: band
-.. autoinst:: band_imm
-.. autoinst:: bor
-.. autoinst:: bor_imm
-.. autoinst:: bxor
-.. autoinst:: bxor_imm
-.. autoinst:: bnot
-.. autoinst:: band_not
-.. autoinst:: bor_not
-.. autoinst:: bxor_not
-
 The shift and rotate operations only work on integer types (scalar and vector).
 The shift amount does not have to be the same type as the value being shifted.
 Only the low `B` bits of the shift amount is significant.
@@ -888,37 +748,13 @@ When operating on an integer vector type, the shift amount is still a scalar
 type, and all the lanes are shifted the same amount. The shift amount is masked
 to the number of bits in a *lane*, not the full size of the vector type.
 
-.. autoinst:: rotl
-.. autoinst:: rotl_imm
-.. autoinst:: rotr
-.. autoinst:: rotr_imm
-.. autoinst:: ishl
-.. autoinst:: ishl_imm
-.. autoinst:: ushr
-.. autoinst:: ushr_imm
-.. autoinst:: sshr
-.. autoinst:: sshr_imm
-
-The bit-counting instructions below are scalar only.
-
-.. autoinst:: clz
-.. autoinst:: cls
-.. autoinst:: ctz
-.. autoinst:: popcnt
+The bit-counting instructions are scalar only.
 
 Floating point operations
 -------------------------
 
 These operations generally follow IEEE 754-2008 semantics.
 
-.. autoinst:: fcmp
-.. autoinst:: fadd
-.. autoinst:: fsub
-.. autoinst:: fmul
-.. autoinst:: fdiv
-.. autoinst:: sqrt
-.. autoinst:: fma
-
 Sign bit manipulations
 ~~~~~~~~~~~~~~~~~~~~~~
 
@@ -926,10 +762,6 @@ The sign manipulating instructions work as bitwise operations, so they don't
 have special behavior for signaling NaN operands. The exponent and trailing
 significand bits are always preserved.
 
-.. autoinst:: fneg
-.. autoinst:: fabs
-.. autoinst:: fcopysign
-
 Minimum and maximum
 ~~~~~~~~~~~~~~~~~~~
 
@@ -939,40 +771,15 @@ return NaN when either input is NaN.
 
 When comparing zeroes, these instructions behave as if :math:`-0.0 < 0.0`.
 
-.. autoinst:: fmin
-.. autoinst:: fmax
-
 Rounding
 ~~~~~~~~
 
 These instructions round their argument to a nearby integral value, still
 represented as a floating point number.
 
-.. autoinst:: ceil
-.. autoinst:: floor
-.. autoinst:: trunc
-.. autoinst:: nearest
-
 Conversion operations
 ---------------------
 
-.. autoinst:: bitcast
-.. autoinst:: breduce
-.. autoinst:: bextend
-.. autoinst:: bint
-.. autoinst:: bmask
-.. autoinst:: ireduce
-.. autoinst:: uextend
-.. autoinst:: sextend
-.. autoinst:: fpromote
-.. autoinst:: fdemote
-.. autoinst:: fcvt_to_uint
-.. autoinst:: fcvt_to_sint
-.. autoinst:: fcvt_to_uint_sat
-.. autoinst:: fcvt_to_sint_sat
-.. autoinst:: fcvt_from_uint
-.. autoinst:: fcvt_from_sint
-
 .. _extload-truncstore:
 
 Extending loads and truncating stores
@@ -982,23 +789,13 @@ Most ISAs provide instructions that load an integer value smaller than a registe
 and extends it to the width of the register. Similarly, store instructions that
 only write the low bits of an integer register are common.
 
-In addition to the normal :inst:`load` and :inst:`store` instructions, Cranelift
+In addition to the normal `load` and `store` instructions, Cranelift
 provides extending loads and truncation stores for 8, 16, and 32-bit memory
 accesses.
 
 These instructions succeed, trap, or have undefined behavior, under the same
 conditions as :ref:`normal loads and stores <memory>`.
 
-.. autoinst:: uload8
-.. autoinst:: sload8
-.. autoinst:: istore8
-.. autoinst:: uload16
-.. autoinst:: sload16
-.. autoinst:: istore16
-.. autoinst:: uload32
-.. autoinst:: sload32
-.. autoinst:: istore32
-
 ISA-specific instructions
 =========================
 
@@ -1010,16 +807,6 @@ x86
 
 Instructions that can only be used by the x86 target ISA.
 
-.. autoinst:: isa.x86.instructions.sdivmodx
-.. autoinst:: isa.x86.instructions.udivmodx
-.. autoinst:: isa.x86.instructions.cvtt2si
-.. autoinst:: isa.x86.instructions.fmin
-.. autoinst:: isa.x86.instructions.fmax
-.. autoinst:: isa.x86.instructions.bsf
-.. autoinst:: isa.x86.instructions.bsr
-.. autoinst:: isa.x86.instructions.push
-.. autoinst:: isa.x86.instructions.pop
-
 Codegen implementation instructions
 ===================================
 
@@ -1032,42 +819,18 @@ Legalization operations
 These instructions are used as helpers when legalizing types and operations for
 the target ISA.
 
-.. autoinst:: isplit
-.. autoinst:: iconcat
-
 Special register operations
 ---------------------------
 
 The prologue and epilogue of a function needs to manipulate special registers like the stack
 pointer and the frame pointer. These instructions should not be used in regular code.
 
-.. autoinst:: adjust_sp_down
-.. autoinst:: adjust_sp_up_imm
-.. autoinst:: adjust_sp_down_imm
-.. autoinst:: ifcmp_sp
-.. autoinst:: copy_special
-
-Low-level control flow operations
----------------------------------
-
-.. autoinst:: fallthrough
-
 CPU flag operations
 -------------------
 
 These operations are for working with the "flags" registers of some CPU
 architectures.
 
-.. autoinst:: ifcmp
-.. autoinst:: ifcmp_imm
-.. autoinst:: ffcmp
-.. autoinst:: trueif
-.. autoinst:: trueff
-.. autoinst:: trapif
-.. autoinst:: trapff
-.. autoinst:: brif
-.. autoinst:: brff
-
 Live range splitting
 --------------------
 
@@ -1077,37 +840,24 @@ value can be quite large, it is sometimes beneficial to split the live range
 into smaller parts.
 
 A live range is split by creating new SSA values that are copies or the
-original value or each other. The copies are created by inserting :inst:`copy`,
-:inst:`spill`, or :inst:`fill` instructions, depending on whether the values
+original value or each other. The copies are created by inserting `copy`,
+`spill`, or `fill` instructions, depending on whether the values
 are assigned to registers or stack slots.
 
 This approach permits SSA form to be preserved throughout the register
 allocation pass and beyond.
 
-.. autoinst:: copy
-.. autoinst:: spill
-.. autoinst:: fill
-
 Register values can be temporarily diverted to other registers by the
-:inst:`regmove` instruction, and to and from stack slots by :inst:`regspill`
-and :inst:`regfill`.
-
-.. autoinst:: regmove
-.. autoinst:: regspill
-.. autoinst:: regfill
-
+`regmove` instruction, and to and from stack slots by `regspill`
+and `regfill`.
 
 Instruction groups
 ==================
 
-All of the shared instructions are part of the :instgroup:`base` instruction
+All of the shared instructions are part of the `base` instruction
 group.
 
-.. autoinstgroup:: base.instructions.GROUP
-
-Target ISAs may define further instructions in their own instruction groups:
-
-.. autoinstgroup:: isa.x86.instructions.GROUP
+Target ISAs may define further instructions in their own instruction groups.
 
 Implementation limits
 =====================
@@ -1265,8 +1015,8 @@ Glossary
         execution somewhere else. Execution never continues at the instruction
         following a terminator instruction.
 
-        The basic terminator instructions are :inst:`br`, :inst:`return`, and
-        :inst:`trap`. Conditional branches and instructions that trap
+        The basic terminator instructions are `br`, `return`, and
+        `trap`. Conditional branches and instructions that trap
         conditionally are not terminator instructions.
 
     trap
diff --git a/docs/meta.rst b/docs/meta.rst
index 67457835b..5bc000a80 100644
--- a/docs/meta.rst
+++ b/docs/meta.rst
@@ -4,57 +4,49 @@ Cranelift Meta Language Reference
 
 .. default-domain:: py
 .. highlight:: python
-.. module:: cdsl
 
 The Cranelift meta language is used to define instructions for Cranelift. It is a
-domain specific language embedded in Python. This document describes the Python
-modules that form the embedded DSL.
+domain specific language embedded in Rust.
+
+.. todo:: Point to the Rust documentation of the meta crate here.
+
+   This document is very out-of-date. Instead, you can have a look at the
+   work-in-progress documentation of the `meta` crate there:
+   https://docs.rs/cranelift-codegen-meta/0.34.0/cranelift_codegen_meta/.
+
+This document describes the Python modules that form the embedded DSL.
 
 The meta language descriptions are Python modules under the
-:file:`cranelift-codegen/meta-python` directory. The descriptions are processed in two
+`cranelift-codegen/meta-python` directory. The descriptions are processed in two
 steps:
 
 1. The Python modules are imported. This has the effect of building static data
    structures in global values in the modules. These static data structures
-   in the :mod:`base` and :mod:`isa` packages use the classes in the
-   :mod:`cdsl` package to describe instruction sets and other properties.
+   in the `base` and `isa` packages use the classes in the
+   `cdsl` package to describe instruction sets and other properties.
 
 2. The static data structures are processed to produce Rust source code and
    constant tables.
 
 The main driver for this source code generation process is the
-:file:`cranelift-codegen/meta-python/build.py` script which is invoked as part of the build
-process if anything in the :file:`cranelift-codegen/meta-python` directory has changed
+`cranelift-codegen/meta-python/build.py` script which is invoked as part of the build
+process if anything in the `cranelift-codegen/meta-python` directory has changed
 since the last build.
 
-
-.. module:: cdsl.settings
-
 Settings
 ========
 
 Settings are used by the environment embedding Cranelift to control the details
 of code generation. Each setting is defined in the meta language so a compact
 and consistent Rust representation can be generated. Shared settings are defined
-in the :mod:`base.settings` module. Some settings are specific to a target ISA,
-and defined in a :file:`settings.py` module under the appropriate
-:file:`cranelift-codegen/meta-python/isa/*` directory.
+in the `base.settings` module. Some settings are specific to a target ISA,
+and defined in a `settings.py` module under the appropriate
+`cranelift-codegen/meta-python/isa/*` directory.
 
 Settings can take boolean on/off values, small numbers, or explicitly enumerated
-symbolic values. Each type is represented by a sub-class of :class:`Setting`:
-
-.. inheritance-diagram:: Setting BoolSetting NumSetting EnumSetting
-    :parts: 1
+symbolic values.
 
-.. autoclass:: Setting
-.. autoclass:: BoolSetting
-.. autoclass:: NumSetting
-.. autoclass:: EnumSetting
-
-All settings must belong to a *group*, represented by a :class:`SettingGroup`
-object.
-
-.. autoclass:: SettingGroup
+All settings must belong to a *group*, represented by a :class:`SettingGroup` object.
 
 Normally, a setting group corresponds to all settings defined in a module. Such
 a module looks like this::
@@ -67,9 +59,6 @@ a module looks like this::
 
     group.close(globals())
 
-
-.. module:: cdsl.instructions
-
 Instruction descriptions
 ========================
 
@@ -77,27 +66,16 @@ New instructions are defined as instances of the :class:`Instruction`
 class. As instruction instances are created, they are added to the currently
 open :class:`InstructionGroup`.
 
-.. autoclass:: InstructionGroup
-    :members:
-
 The basic Cranelift instruction set described in :doc:`ir` is defined by the
-Python module :mod:`base.instructions`. This module has a global value
-:data:`base.instructions.GROUP` which is an :class:`InstructionGroup` instance
+Python module `base.instructions`. This module has a global value
+`base.instructions.GROUP` which is an :class:`InstructionGroup` instance
 containing all the base instructions.
 
-.. autoclass:: Instruction
-
-.. currentmodule:: cdsl.operands
-
 An instruction is defined with a set of distinct input and output operands which
 must be instances of the :class:`Operand` class.
 
-.. autoclass:: Operand
-
 Cranelift uses two separate type systems for operand kinds and SSA values.
 
-.. module:: cdsl.typevar
-
 Type variables
 --------------
 
@@ -106,9 +84,6 @@ Instruction descriptions can be made polymorphic by using
 instead of a concrete value type. Polymorphism only works for SSA value
 operands. Other operands have a fixed operand kind.
 
-.. autoclass:: TypeVar
-    :members:
-
 If multiple operands refer to the same type variable they will be required to
 have the same concrete type. For example, this defines an integer addition
 instruction::
@@ -131,61 +106,27 @@ There are some practical restrictions on the use of type variables, see
 Immediate operands
 ------------------
 
-.. currentmodule:: cdsl.operands
-
 Immediate instruction operands don't correspond to SSA values, but have values
 that are encoded directly in the instruction. Immediate operands don't
 have types from the :class:`cdsl.types.ValueType` type system; they often have
 enumerated values of a specific type. The type of an immediate operand is
 indicated with an instance of :class:`ImmediateKind`.
 
-.. autoclass:: ImmediateKind
-
-.. automodule:: base.immediates
-    :members:
-
 Entity references
 -----------------
 
-.. currentmodule:: cdsl.operands
-
 Instruction operands can also refer to other entities in the same function. This
 can be extended basic blocks, or entities declared in the function preamble.
 
-.. autoclass:: EntityRefKind
-
-.. automodule:: base.entities
-    :members:
-
 Value types
 -----------
 
-.. currentmodule:: cdsl.types
-
 Concrete value types are represented as instances of :class:`ValueType`. There
 are subclasses to represent scalar and vector types.
 
-.. autoclass:: ValueType
-.. inheritance-diagram:: ValueType LaneType VectorType IntType FloatType BoolType SpecialType FlagsType
-    :parts: 1
-.. autoclass:: LaneType
-    :members:
-.. autoclass:: VectorType
-.. autoclass:: SpecialType
-.. autoclass:: IntType
-.. autoclass:: FloatType
-.. autoclass:: BoolType
-.. autoclass:: FlagsType
-
-.. automodule:: base.types
-    :members:
-
 There are no predefined vector types, but they can be created as needed with
 the :func:`LaneType.by` function.
 
-
-.. module:: cdsl.operands
-
 Instruction representation
 ==========================
 
@@ -195,24 +136,12 @@ written as Rust code in the ``cranelift.instructions`` module. The instruction
 representation depends on the input operand kinds and whether the instruction
 can produce multiple results.
 
-.. autoclass:: OperandKind
-.. inheritance-diagram:: OperandKind ImmediateKind EntityRefKind
-
 Since all SSA value operands are represented as a `Value` in Rust code, value
-types don't affect the representation. Two special operand kinds are used to
-represent SSA values:
-
-.. autodata:: VALUE
-.. autodata:: VARIABLE_ARGS
-
-.. module:: cdsl.formats
+types don't affect the representation.
 
 When an instruction description is created, it is automatically assigned a
 predefined instruction format which is an instance of
-:class:`InstructionFormat`:
-
-.. autoclass:: InstructionFormat
-
+:class:`InstructionFormat`.
 
 .. _restricted-polymorphism:
 
@@ -257,8 +186,6 @@ controlling type variable, or it can vary independently of the other operands.
 Encodings
 =========
 
-.. currentmodule:: cdsl.isa
-
 Encodings describe how Cranelift instructions are mapped to binary machine code
 for the target architecture. After the legalization pass, all remaining
 instructions are expected to map 1-1 to native instruction encodings. Cranelift
@@ -270,7 +197,7 @@ incompatible encodings. For example, a modern ARMv8 CPU might support three
 different CPU modes: *A64* where instructions are encoded in 32 bits, *A32*
 where all instructions are 32 bits, and *T32* which has a mix of 16-bit and
 32-bit instruction encodings. These are incompatible encoding spaces, and while
-an :clif:inst:`iadd` instruction can be encoded in 32 bits in each of them, it's
+an `iadd` instruction can be encoded in 32 bits in each of them, it's
 not the same 32 bits. It's a judgement call if CPU modes should be modelled as
 separate targets, or as sub-modes of the same target. In the ARMv8 case, the
 different register banks means that it makes sense to model A64 as a separate
@@ -281,8 +208,6 @@ instruction. Both RISC-V and ARMv8's T32 mode have 32-bit encodings of all
 instructions with 16-bit encodings available for some opcodes if certain
 constraints are satisfied.
 
-.. autoclass:: CPUMode
-
 Encodings are guarded by :term:`sub-target predicate`\s. For example, the RISC-V
 "C" extension which specifies the compressed encodings may not be supported, and
 a predicate would be used to disable all of the 16-bit encodings in that case.
@@ -320,7 +245,7 @@ An :py:class:`Encoding` instance specifies the encoding of a concrete
 instruction. The following properties are used to select instructions to be
 encoded:
 
-- An opcode, i.e. :clif:inst:`iadd_imm`, that must match the instruction's
+- An opcode, i.e. `iadd_imm`, that must match the instruction's
   opcode.
 - Values for any type variables if the opcode represents a polymorphic
   instruction.
@@ -343,8 +268,6 @@ The additional predicates in the :py:class:`EncRecipe` are merged with the
 per-encoding predicates when generating the encoding matcher code. Often
 encodings only need the recipe predicates.
 
-.. autoclass:: EncRecipe
-
 Register constraints
 ====================
 
@@ -364,9 +287,6 @@ Each encoding recipe specifies separate constraints for its value operands and
 result. These constraints are separate from the instruction predicate which can
 only evaluate the instruction's immediate operands.
 
-.. module:: cdsl.registers
-.. autoclass:: RegBank
-
 Register class constraints
 --------------------------
 
@@ -381,8 +301,6 @@ register class::
 This defines an encoding recipe for the ``Binary`` instruction format where
 both input operands must be allocated from the ``GPR`` register class.
 
-.. autoclass:: RegClass
-
 Tied register operands
 ----------------------
 
@@ -413,7 +331,7 @@ Stack operands
 --------------
 
 Cranelift's register allocator can assign an SSA value to a stack slot if there
-isn't enough registers. It will insert :clif:inst:`spill` and :clif:inst:`fill`
+isn't enough registers. It will insert `spill` and `fill`
 instructions as needed to satisfy instruction operand constraints, but it is
 also possible to have instructions that can access stack slots directly::
 
@@ -422,27 +340,14 @@ also possible to have instructions that can access stack slots directly::
 An output stack value implies a store to the stack, an input value implies a
 load.
 
-.. module:: cdsl.isa
-
 Targets
 =======
 
 Cranelift can be compiled with support for multiple target instruction set
 architectures. Each ISA is represented by a :py:class:`cdsl.isa.TargetISA` instance.
 
-.. autoclass:: TargetISA
-
 The definitions for each supported target live in a package under
-:file:`cranelift-codegen/meta-python/isa`.
-
-.. automodule:: isa
-    :members:
-
-.. automodule:: isa.riscv
-.. automodule:: isa.x86
-.. automodule:: isa.arm32
-.. automodule:: isa.arm64
-
+`cranelift-codegen/meta-python/isa`.
 
 Glossary
 ========
diff --git a/filetests/cfg/loop.clif b/filetests/cfg/loop.clif
index 728240940..a52ae0998 100644
--- a/filetests/cfg/loop.clif
+++ b/filetests/cfg/loop.clif
@@ -3,15 +3,33 @@ test print-cfg
 test verifier
 
 function %nonsense(i32, i32) -> f32 {
-; check: digraph "%nonsense" {
 ; regex: I=\binst\d+\b
-; check: label="{ebb0 | <$(BRZ=$I)>brz ebb2 | <$(JUMP=$I)>jump ebb1}"]
-
+; check: digraph "%nonsense" {
+; check:     ebb0 [shape=record, label="{ebb0(v1: i32, v2: i32):
+; check: | <$(BRZ=$I)>brz v2, ebb2
+; nextln: | <$(JUMP0=$I)>jump ebb3
+; nextln: }"]
+; nextln:     ebb3 [shape=record, label="{ebb3:
+; check: | <$(JUMP3=$I)>jump ebb1(v4)
+; nextln: }"]
+; nextln:     ebb1 [shape=record, label="{ebb1(v5: i32):
+; check:  | <$(BRNZ1=$I)>brnz v13, ebb1(v12)
+; nextln: | <$(JUMP1=$I)>jump ebb4
+; nextln: }"]
+; nextln:    ebb4 [shape=record, label="{ebb4:
+; check:  | <$I>return v17
+; nextln: }"]
+; nextln:    ebb2 [shape=record, label="{ebb2:
+; check:  | <$I>return v100
+; check:}"]
 ebb0(v1: i32, v2: i32):
     v3 = f64const 0x0.0
     brz v2, ebb2            ; unordered: ebb0:$BRZ -> ebb2
+    jump ebb3               ; unordered: ebb0:$JUMP0 -> ebb3
+
+ebb3:
     v4 = iconst.i32 0
-    jump ebb1(v4)           ; unordered: ebb0:$JUMP -> ebb1
+    jump ebb1(v4)           ; unordered: ebb3:$JUMP3 -> ebb1
 
 ebb1(v5: i32):
     v6 = imul_imm v5, 4
@@ -22,7 +40,10 @@ ebb1(v5: i32):
     v11 = fadd v9, v10
     v12 = iadd_imm v5, 1
     v13 = icmp ult v12, v2
-    brnz v13, ebb1(v12)     ; unordered: ebb1:inst12 -> ebb1
+    brnz v13, ebb1(v12)     ; unordered: ebb1:$BRNZ1 -> ebb1
+    jump ebb4               ; unordered: ebb1:$JUMP1 -> ebb4
+
+ebb4:
     v14 = f64const 0.0
     v15 = f64const 0.0
     v16 = fdiv v14, v15
diff --git a/filetests/cfg/unused_node.clif b/filetests/cfg/unused_node.clif
index 80f2402c0..1a2dd9fb1 100644
--- a/filetests/cfg/unused_node.clif
+++ b/filetests/cfg/unused_node.clif
@@ -3,10 +3,16 @@ test print-cfg
 
 function %not_reached(i32) -> i32 {
 ; check: digraph "%not_reached" {
-; check:     ebb0 [shape=record, label="{ebb0 | <inst0>brnz ebb2}"]
-; check:     ebb1 [shape=record, label="{ebb1 | <inst4>jump ebb0}"]
-; check:     ebb2 [shape=record, label="{ebb2}"]
-
+; check:     ebb0 [shape=record, label="{ebb0(v0: i32):
+; check:  | <inst0>brnz v0, ebb2
+; check:  | <inst1>trap user0
+; check: }"]
+; check:     ebb1 [shape=record, label="{ebb1:
+; check:  | <inst4>jump ebb0(v2)
+; check: }"]
+; check:     ebb2 [shape=record, label="{ebb2:
+; check:  | <inst5>return v0
+; check: }"]
 ebb0(v0: i32):
     brnz v0, ebb2       ; unordered: ebb0:inst0 -> ebb2
     trap user0
diff --git a/filetests/domtree/loops.clif b/filetests/domtree/loops.clif
index fda2984e9..3f3fafb01 100644
--- a/filetests/domtree/loops.clif
+++ b/filetests/domtree/loops.clif
@@ -16,6 +16,8 @@ function %test(i32) {
         jump ebb5
     ebb5:
         brz v0, ebb4
+        jump ebb6     ; dominates: ebb6
+    ebb6:
         return
 }
 ; Fall-through-first, prune-at-source DFT:
@@ -28,7 +30,9 @@ function %test(i32) {
 ;                 ebb2:brz v3, ebb1 -
 ;                 ebb2:brz v4, ebb4 {
 ;                     ebb2: jump ebb5 {
-;                         ebb5 {}
+;                         ebb5: jump ebb6 {
+;                             ebb6 {}
+;                         }
 ;                     }
 ;                     ebb4 {}
 ;                 }
@@ -43,6 +47,7 @@ function %test(i32) {
 ; } ebb0
 ;
 ; check: cfg_postorder:
+; sameln: ebb6
 ; sameln: ebb5
 ; sameln: ebb3
 ; sameln: ebb4
@@ -56,7 +61,8 @@ function %test(i32) {
 ; nextln: ebb2:
 ; nextln: ebb4:
 ; nextln: ebb3:
-; nextln: ebb5:
+; nextln: ebb5: ebb6
+; nextln: ebb6:
 ; nextln: }
 
 function %loop2(i32) system_v {
@@ -72,10 +78,14 @@ function %loop2(i32) system_v {
         jump ebb4
     ebb4:
         brz v0, ebb3
+        jump ebb8       ; dominates: ebb8
+    ebb8:
         brnz v0, ebb5
         jump ebb6       ; dominates: ebb6
     ebb5:
         brz v0, ebb4
+        jump ebb9       ; dominates: ebb9
+    ebb9:
         trap user0
     ebb6:
         jump ebb7       ; dominates: ebb7
@@ -83,9 +93,11 @@ function %loop2(i32) system_v {
         return
 }
 ; check: cfg_postorder:
+; sameln: ebb9
 ; sameln: ebb5
 ; sameln: ebb7
 ; sameln: ebb6
+; sameln: ebb8
 ; sameln: ebb3
 ; sameln: ebb4
 ; sameln: ebb2
@@ -96,9 +108,11 @@ function %loop2(i32) system_v {
 ; nextln: ebb0: ebb1 ebb2 ebb4 ebb3 ebb5
 ; nextln: ebb1:
 ; nextln: ebb2:
-; nextln: ebb4: ebb6
+; nextln: ebb4: ebb8
+; nextln: ebb8: ebb6
 ; nextln: ebb6: ebb7
 ; nextln: ebb7:
 ; nextln: ebb3:
-; nextln: ebb5:
+; nextln: ebb5: ebb9
+; nextln: ebb9:
 ; nextln: }
diff --git a/filetests/domtree/loops2.clif b/filetests/domtree/loops2.clif
index 80e00ca27..84712c112 100644
--- a/filetests/domtree/loops2.clif
+++ b/filetests/domtree/loops2.clif
@@ -3,6 +3,8 @@ test domtree
 function %loop1(i32) {
     ebb0(v0: i32):
         brz v0, ebb1    ; dominates: ebb1 ebb6
+        jump ebb10      ; dominates: ebb10
+    ebb10:
         brnz v0, ebb2   ; dominates: ebb2 ebb9
         jump ebb3       ; dominates: ebb3
     ebb1:
@@ -14,10 +16,14 @@ function %loop1(i32) {
         jump ebb9
     ebb4:
         brz v0, ebb4
+        jump ebb11      ; dominates: ebb11
+    ebb11:
         brnz v0, ebb6
         jump ebb7
     ebb5:
         brz v0, ebb7
+        jump ebb12      ; dominates: ebb12
+    ebb12:
         brnz v0, ebb8
         jump ebb9
     ebb6:
@@ -31,16 +37,19 @@ function %loop1(i32) {
 }
 
 ; check: domtree_preorder {
-; nextln: ebb0: ebb1 ebb2 ebb6 ebb3 ebb9
+; nextln: ebb0: ebb1 ebb10 ebb6
 ; nextln: ebb1:
+; nextln: ebb10: ebb2 ebb3 ebb9
 ; nextln: ebb2: ebb4 ebb5 ebb7 ebb8
-; nextln: ebb4:
-; nextln: ebb5:
+; nextln: ebb4: ebb11
+; nextln: ebb11:
+; nextln: ebb5: ebb12
+; nextln: ebb12:
 ; nextln: ebb7:
 ; nextln: ebb8:
-; nextln: ebb6:
 ; nextln: ebb3:
 ; nextln: ebb9:
+; nextln: ebb6:
 ; nextln: }
 
 function %loop2(i32) system_v {
@@ -59,9 +68,12 @@ function %loop2(i32) system_v {
         jump ebb5
     ebb5:
         brz v0, ebb4
+        jump ebb6       ; dominates: ebb6
+    ebb6:
         return
 }
 ; check: cfg_postorder:
+; sameln: ebb6
 ; sameln: ebb5
 ; sameln: ebb3
 ; sameln: ebb4
@@ -75,5 +87,6 @@ function %loop2(i32) system_v {
 ; nextln: ebb2:
 ; nextln: ebb4:
 ; nextln: ebb3:
-; nextln: ebb5:
+; nextln: ebb5: ebb6
+; nextln: ebb6:
 ; nextln: }
diff --git a/filetests/domtree/tall-tree.clif b/filetests/domtree/tall-tree.clif
index cad763fc3..6f93c023e 100644
--- a/filetests/domtree/tall-tree.clif
+++ b/filetests/domtree/tall-tree.clif
@@ -3,6 +3,8 @@ test domtree
 function %test(i32) {
     ebb0(v0: i32):
         brz v0, ebb1    ; dominates: ebb1
+        jump ebb12      ; dominates: ebb12
+    ebb12:
         brnz v0, ebb2   ; dominates: ebb2 ebb5
         jump ebb3       ; dominates: ebb3
     ebb1:
@@ -18,6 +20,8 @@ function %test(i32) {
         return
     ebb6:
         brz v0, ebb8    ; dominates: ebb11 ebb8
+        jump ebb13      ; dominates: ebb13
+    ebb13:
         brnz v0, ebb9   ; dominates: ebb9
         jump ebb10
     ebb7:
@@ -33,15 +37,17 @@ function %test(i32) {
 }
 
 ; check: domtree_preorder {
-; nextln: ebb0: ebb1 ebb2 ebb3 ebb5
+; nextln: ebb0: ebb1 ebb12
 ; nextln: ebb1: ebb4
 ; nextln: ebb4: ebb6 ebb7 ebb10
-; nextln: ebb6: ebb8 ebb9 ebb11
+; nextln: ebb6: ebb8 ebb13 ebb11
 ; nextln: ebb8:
+; nextln: ebb13: ebb9
 ; nextln: ebb9:
 ; nextln: ebb11:
 ; nextln: ebb7:
 ; nextln: ebb10:
+; nextln: ebb12: ebb2 ebb3 ebb5
 ; nextln: ebb2:
 ; nextln: ebb3:
 ; nextln: ebb5:
diff --git a/filetests/domtree/wide-tree.clif b/filetests/domtree/wide-tree.clif
index ae943dd7c..fdfdc169a 100644
--- a/filetests/domtree/wide-tree.clif
+++ b/filetests/domtree/wide-tree.clif
@@ -6,8 +6,14 @@ function %test(i32) {
         jump ebb1       ; dominates: ebb1
     ebb1:
         brz v0, ebb2    ; dominates: ebb2 ebb7
+        jump ebb20      ; dominates: ebb20
+    ebb20:
         brnz v0, ebb3   ; dominates: ebb3
+        jump ebb21      ; dominates: ebb21
+    ebb21:
         brz v0, ebb4    ; dominates: ebb4
+        jump ebb22      ; dominates: ebb22
+    ebb22:
         brnz v0, ebb5   ; dominates: ebb5
         jump ebb6       ; dominates: ebb6
     ebb2:
@@ -22,7 +28,11 @@ function %test(i32) {
         jump ebb7
     ebb7:
         brnz v0, ebb8   ; dominates: ebb8 ebb12
+        jump ebb23      ; dominates: ebb23
+    ebb23:
         brz v0, ebb9    ; dominates: ebb9
+        jump ebb24      ; dominates: ebb24
+    ebb24:
         brnz v0, ebb10  ; dominates: ebb10
         jump ebb11      ; dominates: ebb11
     ebb8:
@@ -43,16 +53,21 @@ function %test(i32) {
 ; check: domtree_preorder {
 ; nextln: ebb0: ebb13 ebb1
 ; nextln: ebb13:
-; nextln: ebb1: ebb2 ebb3 ebb4 ebb5 ebb6 ebb7
+; nextln: ebb1: ebb2 ebb20 ebb7
 ; nextln: ebb2:
+; nextln: ebb20: ebb3 ebb21
 ; nextln: ebb3:
+; nextln: ebb21: ebb4 ebb22
 ; nextln: ebb4:
+; nextln: ebb22: ebb5 ebb6
 ; nextln: ebb5:
 ; nextln: ebb6:
-; nextln: ebb7: ebb8 ebb9 ebb10 ebb12 ebb11
+; nextln: ebb7: ebb8 ebb23 ebb12
 ; nextln: ebb8:
+; nextln: ebb23: ebb9 ebb24
 ; nextln: ebb9:
+; nextln: ebb24: ebb10 ebb11
 ; nextln: ebb10:
-; nextln: ebb12:
 ; nextln: ebb11:
+; nextln: ebb12:
 ; nextln: }
diff --git a/filetests/isa/riscv/binary32.clif b/filetests/isa/riscv/binary32.clif
index 4d63084f9..b76bfcce6 100644
--- a/filetests/isa/riscv/binary32.clif
+++ b/filetests/isa/riscv/binary32.clif
@@ -95,36 +95,77 @@ ebb0(v9999: i32):
     call_indirect sig0, v2()                    ; bin: 000a80e7
 
     brz v1, ebb3
+    fallthrough ebb4
+
+ebb4:
     brnz v1, ebb1
+    fallthrough ebb5
 
+ebb5:
     ; jalr %x0, %x1, 0
     return v9999                        ; bin: 00008067
 
 ebb1:
     ; beq 0x000
     br_icmp eq v1, v2, ebb1             ; bin: 01550063
+    fallthrough ebb100
+
+ebb100:
     ; bne 0xffc
     br_icmp ne v1, v2, ebb1             ; bin: ff551ee3
+    fallthrough ebb101
+
+ebb101:
     ; blt 0xff8
     br_icmp slt v1, v2, ebb1            ; bin: ff554ce3
+    fallthrough ebb102
+
+ebb102:
     ; bge 0xff4
     br_icmp sge v1, v2, ebb1            ; bin: ff555ae3
+    fallthrough ebb103
+
+ebb103:
     ; bltu 0xff0
     br_icmp ult v1, v2, ebb1            ; bin: ff5568e3
+    fallthrough ebb104
+
+ebb104:
     ; bgeu 0xfec
     br_icmp uge v1, v2, ebb1            ; bin: ff5576e3
+    fallthrough ebb105
+
+ebb105:
 
     ; Forward branches.
+    fallthrough ebb106
+
+ebb106:
     ; beq 0x018
     br_icmp eq v2, v1, ebb2             ; bin: 00aa8c63
+    fallthrough ebb107
+
+ebb107:
     ; bne 0x014
     br_icmp ne v2, v1, ebb2             ; bin: 00aa9a63
+    fallthrough ebb108
+
+ebb108:
     ; blt 0x010
     br_icmp slt v2, v1, ebb2            ; bin: 00aac863
+    fallthrough ebb109
+
+ebb109:
     ; bge 0x00c
     br_icmp sge v2, v1, ebb2            ; bin: 00aad663
+    fallthrough ebb110
+
+ebb110:
     ; bltu 0x008
     br_icmp ult v2, v1, ebb2            ; bin: 00aae463
+    fallthrough ebb111
+
+ebb111:
     ; bgeu 0x004
     br_icmp uge v2, v1, ebb2            ; bin: 00aaf263
 
@@ -137,6 +178,9 @@ ebb2:
 ebb3:
     ; beq x, %x0
     brz v1, ebb3                        ; bin: 00050063
+    fallthrough ebb6
+
+ebb6:
     ; bne x, %x0
     brnz v1, ebb3                       ; bin: fe051ee3
 
diff --git a/filetests/isa/x86/abi64.clif b/filetests/isa/x86/abi64.clif
index 209b036ee..ccea6304e 100644
--- a/filetests/isa/x86/abi64.clif
+++ b/filetests/isa/x86/abi64.clif
@@ -18,8 +18,8 @@ ebb0:
     return
 }
 
-function %pass_stack_int64(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash {
-    sig0 = (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash
+function %pass_stack_int64(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash_system_v {
+    sig0 = (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash_system_v
     fn0 = u0:0 sig0
 
 ebb0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64):
diff --git a/filetests/isa/x86/allones_funcaddrs32.clif b/filetests/isa/x86/allones_funcaddrs32.clif
index b6d7542fc..5a6a5b970 100644
--- a/filetests/isa/x86/allones_funcaddrs32.clif
+++ b/filetests/isa/x86/allones_funcaddrs32.clif
@@ -1,6 +1,6 @@
 ; binary emission of 32-bit code.
 test binemit
-set opt_level=best
+set opt_level=speed_and_size
 set allones_funcaddrs
 target i686 haswell
 
diff --git a/filetests/isa/x86/allones_funcaddrs64.clif b/filetests/isa/x86/allones_funcaddrs64.clif
index 06ab24ed8..617db5a44 100644
--- a/filetests/isa/x86/allones_funcaddrs64.clif
+++ b/filetests/isa/x86/allones_funcaddrs64.clif
@@ -1,6 +1,6 @@
 ; binary emission of 64-bit code.
 test binemit
-set opt_level=best
+set opt_level=speed_and_size
 set allones_funcaddrs
 target x86_64 haswell
 
diff --git a/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif b/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif
index b2a0c9617..31c4016dc 100644
--- a/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif
+++ b/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif
@@ -1,5 +1,5 @@
 test binemit
-set opt_level=best
+set opt_level=speed_and_size
 target x86_64 baseline
 
 ; The binary encodings can be verified with the command:
diff --git a/filetests/isa/x86/binary32-float.clif b/filetests/isa/x86/binary32-float.clif
index 0b171388a..8a4ae5fe7 100644
--- a/filetests/isa/x86/binary32-float.clif
+++ b/filetests/isa/x86/binary32-float.clif
@@ -481,21 +481,44 @@ ebb0(v0: f32 [%xmm0]):
 ebb1:
     ; asm: jnp ebb1
     brff ord v1, ebb1                                           ; bin: 7b fe
+    jump ebb2
+
+ebb2:
     ; asm: jp ebb1
     brff uno v1, ebb1                                           ; bin: 7a fc
+    jump ebb3
+
+ebb3:
     ; asm: jne ebb1
     brff one v1, ebb1                                           ; bin: 75 fa
+    jump ebb4
+
+ebb4:
     ; asm: je ebb1
     brff ueq v1, ebb1                                           ; bin: 74 f8
+    jump ebb5
+
+ebb5:
     ; asm: ja ebb1
     brff gt v1, ebb1                                            ; bin: 77 f6
+    jump ebb6
+
+ebb6:
     ; asm: jae ebb1
     brff ge v1, ebb1                                            ; bin: 73 f4
+    jump ebb7
+
+ebb7:
     ; asm: jb ebb1
     brff ult v1, ebb1                                           ; bin: 72 f2
+    jump ebb8
+
+ebb8:
     ; asm: jbe ebb1
     brff ule v1, ebb1                                           ; bin: 76 f0
+    jump ebb9
 
+ebb9:
     ; asm: jp .+4; ud2
     trapff ord v1, user0                                        ; bin: 7a 02 user0 0f 0b
     ; asm: jnp .+4; ud2
diff --git a/filetests/isa/x86/binary32.clif b/filetests/isa/x86/binary32.clif
index e5af412d5..cc8f63801 100644
--- a/filetests/isa/x86/binary32.clif
+++ b/filetests/isa/x86/binary32.clif
@@ -1,6 +1,6 @@
 ; binary emission of x86-32 code.
 test binemit
-set opt_level=best
+set opt_level=speed_and_size
 target i686 haswell
 
 ; The binary encodings can be verified with the command:
@@ -469,15 +469,40 @@ ebb0:
     ; asm: mov    %cl,(%eax,%ebx,1)
     istore8_complex v601, v521+v522      ; bin: heap_oob 88 0c 18
 
+    ; Carry Addition
+    ; asm: addl %esi, %ecx
+    [-,%rcx,%rflags] v701, v702 = iadd_ifcout v1, v2  ; bin: 01 f1
+    ; asm: adcl %esi, %ecx
+    [-,%rcx] v703 = iadd_ifcin v1, v2, v702           ; bin: 11 f1
+    ; asm: adcl %esi, %ecx
+    [-,%rcx,%rflags] v704, v705 = iadd_ifcarry v1, v2, v702 ; bin: 11 f1
+
+    ; Borrow Subtraction
+    ; asm: subl %esi, %ecx
+    [-,%rcx,%rflags] v706, v707 = isub_ifbout v1, v2         ; bin: 29 f1
+    ; asm: sbbl %esi, %ecx
+    [-,%rcx] v708 = isub_ifbin v1, v2, v707                  ; bin: 19 f1
+    ; asm: sbbl %esi, %ecx
+    [-,%rcx,%rflags] v709, v710 = isub_ifborrow v1, v2, v707 ; bin: 19 f1
+
     ; asm: testl %ecx, %ecx
     ; asm: je ebb1
     brz v1, ebb1                                ; bin: 85 c9 74 0e
+    fallthrough ebb3
+
+ebb3:
     ; asm: testl %esi, %esi
     ; asm: je ebb1
     brz v2, ebb1                                ; bin: 85 f6 74 0a
+    fallthrough ebb4
+
+ebb4:
     ; asm: testl %ecx, %ecx
     ; asm: jne ebb1
     brnz v1, ebb1                               ; bin: 85 c9 75 06
+    fallthrough ebb5
+
+ebb5:
     ; asm: testl %esi, %esi
     ; asm: jne ebb1
     brnz v2, ebb1                               ; bin: 85 f6 75 02
@@ -506,16 +531,27 @@ ebb0:
     ; asm: testl $0xff, %edi
     ; asm: je ebb1
     brz v3, ebb1                                ; bin: f7 c7 000000ff 0f 84 00000015
+    fallthrough ebb2
+
+ebb2:
     ; asm: testb %bl, %bl
     ; asm: je ebb1
     brz v4, ebb1                                ; bin: 84 db 74 11
+    fallthrough ebb3
+
+ebb3:
     ; asm: testl $0xff, %edi
     ; asm: jne ebb1
     brnz v3, ebb1                               ; bin: f7 c7 000000ff 0f 85 00000005
+    fallthrough ebb4
+
+ebb4:
     ; asm: testb %bl, %bl
     ; asm: jne ebb1
     brnz v4, ebb1                               ; bin: 84 db 75 01
+    fallthrough ebb5
 
+ebb5:
     return
 
 ebb1:
@@ -537,24 +573,54 @@ ebb1:
 
     ; asm: je ebb1
     brif eq v11, ebb1                           ; bin: 74 fa
+    jump ebb2
+
+ebb2:
     ; asm: jne ebb1
     brif ne v11, ebb1                           ; bin: 75 f8
+    jump ebb3
+
+ebb3:
     ; asm: jl ebb1
     brif slt v11, ebb1                          ; bin: 7c f6
+    jump ebb4
+
+ebb4:
     ; asm: jge ebb1
     brif sge v11, ebb1                          ; bin: 7d f4
+    jump ebb5
+
+ebb5:
     ; asm: jg ebb1
     brif sgt v11, ebb1                          ; bin: 7f f2
+    jump ebb6
+
+ebb6:
     ; asm: jle ebb1
     brif sle v11, ebb1                          ; bin: 7e f0
+    jump ebb7
+
+ebb7:
     ; asm: jb ebb1
     brif ult v11, ebb1                          ; bin: 72 ee
+    jump ebb8
+
+ebb8:
     ; asm: jae ebb1
     brif uge v11, ebb1                          ; bin: 73 ec
+    jump ebb9
+
+ebb9:
     ; asm: ja ebb1
     brif ugt v11, ebb1                          ; bin: 77 ea
+    jump ebb10
+
+ebb10:
     ; asm: jbe ebb1
     brif ule v11, ebb1                          ; bin: 76 e8
+    jump ebb11
+
+ebb11:
 
     ; asm: sete %bl
     [-,%rbx]            v20 = trueif eq v11                           ; bin: 0f 94 c3
diff --git a/filetests/isa/x86/binary64-float.clif b/filetests/isa/x86/binary64-float.clif
index 582f4e9c4..171a3db7c 100644
--- a/filetests/isa/x86/binary64-float.clif
+++ b/filetests/isa/x86/binary64-float.clif
@@ -1,6 +1,6 @@
 ; Binary emission of 64-bit floating point code.
 test binemit
-set opt_level=best
+set opt_level=speed_and_size
 target x86_64 haswell
 
 ; The binary encodings can be verified with the command:
@@ -96,6 +96,26 @@ ebb0:
     ; asm: movaps %xmm5, %xmm10
     [-,%xmm10]          v39 = copy v10                          ; bin: 44 0f 28 d5
 
+    ; Copy to SSA
+
+    ; asm: movsd %xmm0, %xmm15
+    [-,%xmm15]          v400 = copy_to_ssa.f64 %xmm0            ; bin: f2 44 0f 10 f8
+    ; asm: movsd %xmm15, %xmm0
+    [-,%xmm0]           v401 = copy_to_ssa.f64 %xmm15           ; bin: f2 41 0f 10 c7
+    ; asm: movsd %xmm7, %xmm6.  Unfortunately we get a redundant REX prefix.
+    [-,%xmm6]           v402 = copy_to_ssa.f64 %xmm7            ; bin: f2 40 0f 10 f7
+    ; asm: movsd %xmm11, %xmm14
+    [-,%xmm14]          v403 = copy_to_ssa.f64 %xmm11           ; bin: f2 45 0f 10 f3
+
+    ; asm: movss %xmm0, %xmm15
+    [-,%xmm15]          v404 = copy_to_ssa.f32 %xmm0            ; bin: f3 44 0f 10 f8
+    ; asm: movss %xmm15, %xmm0
+    [-,%xmm0]           v405 = copy_to_ssa.f32 %xmm15           ; bin: f3 41 0f 10 c7
+    ; asm: movss %xmm7, %xmm6.  Unfortunately we get a redundant REX prefix.
+    [-,%xmm6]           v406 = copy_to_ssa.f32 %xmm7            ; bin: f3 40 0f 10 f7
+    ; asm: movss %xmm11, %xmm14
+    [-,%xmm14]          v407 = copy_to_ssa.f32 %xmm11           ; bin: f3 45 0f 10 f3
+
     ; Convert float to int.
 
     ; asm: cvttss2si %xmm5, %ecx
@@ -542,21 +562,44 @@ ebb0(v0: f32 [%xmm0]):
 ebb1:
     ; asm: jnp ebb1
     brff ord v1, ebb1                                           ; bin: 7b fe
+    jump ebb2
+
+ebb2:
     ; asm: jp ebb1
     brff uno v1, ebb1                                           ; bin: 7a fc
+    jump ebb3
+
+ebb3:
     ; asm: jne ebb1
     brff one v1, ebb1                                           ; bin: 75 fa
+    jump ebb4
+
+ebb4:
     ; asm: je ebb1
     brff ueq v1, ebb1                                           ; bin: 74 f8
+    jump ebb5
+
+ebb5:
     ; asm: ja ebb1
     brff gt v1, ebb1                                            ; bin: 77 f6
+    jump ebb6
+
+ebb6:
     ; asm: jae ebb1
     brff ge v1, ebb1                                            ; bin: 73 f4
+    jump ebb7
+
+ebb7:
     ; asm: jb ebb1
     brff ult v1, ebb1                                           ; bin: 72 f2
+    jump ebb8
+
+ebb8:
     ; asm: jbe ebb1
     brff ule v1, ebb1                                           ; bin: 76 f0
+    jump ebb9
 
+ebb9:
     ; asm: jp .+4; ud2
     trapff ord v1, user0                                        ; bin: 7a 02 user0 0f 0b
     ; asm: jnp .+4; ud2
diff --git a/filetests/isa/x86/binary64-pic.clif b/filetests/isa/x86/binary64-pic.clif
index adda09da7..3f3d86288 100644
--- a/filetests/isa/x86/binary64-pic.clif
+++ b/filetests/isa/x86/binary64-pic.clif
@@ -1,6 +1,6 @@
 ; binary emission of 64-bit code.
 test binemit
-set opt_level=best
+set opt_level=speed_and_size
 set is_pic
 target x86_64 haswell
 
diff --git a/filetests/isa/x86/binary64-run.clif b/filetests/isa/x86/binary64-run.clif
new file mode 100644
index 000000000..b255770c1
--- /dev/null
+++ b/filetests/isa/x86/binary64-run.clif
@@ -0,0 +1,10 @@
+test run
+target x86_64
+
+; this verifies that returning b64 immediates does not result in a segmentation fault, see https://github.com/CraneStation/cranelift/issues/911
+function %test_b64() -> b64 {
+ebb0:
+[-, %r10]   v0 = bconst.b64 true
+            return v0
+}
+; run
diff --git a/filetests/isa/x86/binary64.clif b/filetests/isa/x86/binary64.clif
index ed1316ad2..0430cd78a 100644
--- a/filetests/isa/x86/binary64.clif
+++ b/filetests/isa/x86/binary64.clif
@@ -1,6 +1,6 @@
 ; binary emission of x86-64 code.
 test binemit
-set opt_level=best
+set opt_level=speed_and_size
 target x86_64 haswell
 
 ; The binary encodings can be verified with the command:
@@ -179,6 +179,26 @@ ebb0:
     ; asm: movq %r10, %rsp
     copy_special %r10 -> %rsp                   ; bin: 4c 89 d4
 
+    ; Copy to SSA
+
+    ; asm: movq %rax, %r15
+    [-,%r15]      v700 = copy_to_ssa.i64 %rax    ; bin: 49 89 c7
+    ; asm: movq %r15, %rax
+    [-,%rax]      v701 = copy_to_ssa.i64 %r15    ; bin: 4c 89 f8
+    ; asm: movq %rdi, %rsi
+    [-,%rsi]      v702 = copy_to_ssa.i64 %rdi    ; bin: 48 89 fe
+    ; asm: movq %r11, %r14
+    [-,%r14]      v703 = copy_to_ssa.i64 %r11    ; bin: 4d 89 de
+
+    ; asm: movl %eax, %r15d
+    [-,%r15]      v704 = copy_to_ssa.i32 %rax    ; bin: 41 89 c7
+    ; asm: movl %r15d, %eax
+    [-,%rax]      v705 = copy_to_ssa.i32 %r15    ; bin: 44 89 f8
+    ; asm: movl %edi, %esi.  Unfortunately we get a redundant REX prefix.
+    [-,%rsi]      v706 = copy_to_ssa.i32 %rdi    ; bin: 40 89 fe
+    ; asm: movl %r11, %r14
+    [-,%r14]      v707 = copy_to_ssa.i32 %r11    ; bin: 45 89 de
+
     ; Load/Store instructions.
 
     ; Register indirect addressing with no displacement.
@@ -690,18 +710,33 @@ ebb0:
     ; asm: testq %rcx, %rcx
     ; asm: je ebb1
     brz v1, ebb1                                ; bin: 48 85 c9 74 1b
+    fallthrough ebb3
+
+ebb3:
     ; asm: testq %rsi, %rsi
     ; asm: je ebb1
     brz v2, ebb1                                ; bin: 48 85 f6 74 16
+    fallthrough ebb4
+
+ebb4:
     ; asm: testq %r10, %r10
     ; asm: je ebb1
     brz v3, ebb1                                ; bin: 4d 85 d2 74 11
+    fallthrough ebb5
+
+ebb5:
     ; asm: testq %rcx, %rcx
     ; asm: jne ebb1
     brnz v1, ebb1                               ; bin: 48 85 c9 75 0c
+    fallthrough ebb6
+
+ebb6:
     ; asm: testq %rsi, %rsi
     ; asm: jne ebb1
     brnz v2, ebb1                               ; bin: 48 85 f6 75 07
+    fallthrough ebb7
+
+ebb7:
     ; asm: testq %r10, %r10
     ; asm: jne ebb1
     brnz v3, ebb1                               ; bin: 4d 85 d2 75 02
@@ -711,11 +746,14 @@ ebb0:
 
     ; asm: ebb1:
 ebb1:
-    return                                       ; bin: c3
+    return                                      ; bin: c3
 
     ; asm: ebb2:
 ebb2:
-    jump ebb1                                   ; bin: eb fd
+    ; Add a no-op instruction to prevent fold_redundant_jump from removing this block.
+    ; asm: notq %rcx
+    [-,%rcx]             v5000 = bnot v1        ; bin: 48 f7 d1
+    jump ebb1                                   ; bin: eb fa
 }
 
 ; CPU flag instructions.
@@ -733,24 +771,54 @@ ebb1:
 
     ; asm: je ebb1
     brif eq v11, ebb1                           ; bin: 74 f8
+    jump ebb2
+
+ebb2:
     ; asm: jne ebb1
     brif ne v11, ebb1                           ; bin: 75 f6
+    jump ebb3
+
+ebb3:
     ; asm: jl ebb1
     brif slt v11, ebb1                          ; bin: 7c f4
+    jump ebb4
+
+ebb4:
     ; asm: jge ebb1
     brif sge v11, ebb1                          ; bin: 7d f2
+    jump ebb5
+
+ebb5:
     ; asm: jg ebb1
     brif sgt v11, ebb1                          ; bin: 7f f0
+    jump ebb6
+
+ebb6:
     ; asm: jle ebb1
     brif sle v11, ebb1                          ; bin: 7e ee
+    jump ebb7
+
+ebb7:
     ; asm: jb ebb1
     brif ult v11, ebb1                          ; bin: 72 ec
+    jump ebb8
+
+ebb8:
     ; asm: jae ebb1
     brif uge v11, ebb1                          ; bin: 73 ea
+    jump ebb9
+
+ebb9:
     ; asm: ja ebb1
     brif ugt v11, ebb1                          ; bin: 77 e8
+    jump ebb10
+
+ebb10:
     ; asm: jbe ebb1
     brif ule v11, ebb1                          ; bin: 76 e6
+    jump ebb11
+
+ebb11:
 
     ; asm: sete %bl
     [-,%rbx]            v20 = trueif eq v11                           ; bin: 0f 94 c3
@@ -1248,18 +1316,33 @@ ebb0:
     ; asm: testl %ecx, %ecx
     ; asm: je ebb1x
     brz v1, ebb1                                ; bin: 85 c9 74 18
+    fallthrough ebb3
+
+ebb3:
     ; asm: testl %esi, %esi
     ; asm: je ebb1x
     brz v2, ebb1                                ; bin: 85 f6 74 14
+    fallthrough ebb4
+
+ebb4:
     ; asm: testl %r10d, %r10d
     ; asm: je ebb1x
     brz v3, ebb1                                ; bin: 45 85 d2 74 0f
+    fallthrough ebb5
+
+ebb5:
     ; asm: testl %ecx, %ecx
     ; asm: jne ebb1x
     brnz v1, ebb1                               ; bin: 85 c9 75 0b
+    fallthrough ebb6
+
+ebb6:
     ; asm: testl %esi, %esi
     ; asm: jne ebb1x
     brnz v2, ebb1                               ; bin: 85 f6 75 07
+    fallthrough ebb7
+
+ebb7:
     ; asm: testl %r10d, %r10d
     ; asm: jne ebb1x
     brnz v3, ebb1                               ; bin: 45 85 d2 75 02
@@ -1269,11 +1352,14 @@ ebb0:
 
     ; asm: ebb1x:
 ebb1:
-    return                                       ; bin: c3
+    return                                      ; bin: c3
 
     ; asm: ebb2x:
 ebb2:
-    jump ebb1                                   ; bin: eb fd
+    ; Add a no-op instruction to prevent fold_redundant_jump from removing this block.
+    ; asm: notl %ecx
+    [-,%rcx]             v5000 = bnot v1        ; bin: f7 d1
+    jump ebb1                                   ; bin: eb fb
 
 }
 
@@ -1582,3 +1668,10 @@ ebb0:
 
     return
 }
+
+function %B64() {
+ebb0:
+    [-, %rax]   v1 = bconst.b64 true    ; bin: 40 b8 00000001
+    [-, %r10]   v0 = bconst.b64 true    ; bin: 41 ba 00000001
+    return
+}
diff --git a/filetests/isa/x86/br-i128.clif b/filetests/isa/x86/br-i128.clif
new file mode 100644
index 000000000..e3ac6a9d8
--- /dev/null
+++ b/filetests/isa/x86/br-i128.clif
@@ -0,0 +1,30 @@
+test compile
+target x86_64
+
+function u0:0(i128) -> i8 fast {
+ebb0(v0: i128):
+    brz v0, ebb2
+    jump ebb1
+
+ebb1:
+    v1 = iconst.i8 0
+    return v1
+
+ebb2:
+    v2 = iconst.i8 1
+    return v2
+}
+
+function u0:1(i128) -> i8 fast {
+ebb0(v0: i128):
+    brnz v0, ebb2
+    jump ebb1
+
+ebb1:
+    v1 = iconst.i8 0
+    return v1
+
+ebb2:
+    v2 = iconst.i8 1
+    return v2
+}
diff --git a/filetests/isa/x86/compile-vconst.clif b/filetests/isa/x86/compile-vconst.clif
new file mode 100644
index 000000000..c64c9fc50
--- /dev/null
+++ b/filetests/isa/x86/compile-vconst.clif
@@ -0,0 +1,16 @@
+test compile
+set enable_simd=true
+set probestack_enabled=false
+target x86_64 haswell
+
+; use baldrdash calling convention here for simplicity (avoids prologue, epilogue)
+function %test_vconst_i32() -> i32x4 baldrdash_system_v {
+ebb0:
+    v0 = vconst.i32x4 0x1234
+    return v0
+}
+
+; check:   ebb0:
+; nextln:     v0 = vconst.i32x4 0x1234
+; nextln:     return v0
+; nextln: }
diff --git a/filetests/isa/x86/extractlane-binemit.clif b/filetests/isa/x86/extractlane-binemit.clif
new file mode 100644
index 000000000..86f16315c
--- /dev/null
+++ b/filetests/isa/x86/extractlane-binemit.clif
@@ -0,0 +1,38 @@
+test binemit
+set enable_simd
+target x86_64 haswell
+
+; for extractlane, floats are legalized differently than integers and booleans; integers and
+; booleans use x86_pextr which is manually placed in the IR so that it can be binemit-tested
+
+function %test_extractlane_b8() {
+ebb0:
+[-, %rax]   v0 = bconst.b8 true
+[-, %xmm0]  v1 = splat.b8x16 v0
+[-, %rax]   v2 = x86_pextr v1, 10    ; bin: 66 0f 3a 14 c0 0a
+            return
+}
+
+function %test_extractlane_i16() {
+ebb0:
+[-, %rax]   v0 = iconst.i16 4
+[-, %xmm1]  v1 = splat.i16x8 v0
+[-, %rax]   v2 = x86_pextr v1, 4    ; bin: 66 0f c5 c8 04
+            return
+}
+
+function %test_extractlane_i32() {
+ebb0:
+[-, %rax]   v0 = iconst.i32 42
+[-, %xmm4]  v1 = splat.i32x4 v0
+[-, %rcx]   v2 = x86_pextr v1, 2    ; bin: 66 0f 3a 16 e1 02
+            return
+}
+
+function %test_extractlane_b64() {
+ebb0:
+[-, %rax]   v0 = bconst.b64 false
+[-, %xmm2]  v1 = splat.b64x2 v0
+[-, %rbx]   v2 = x86_pextr v1, 1    ; bin: 66 48 0f 3a 16 d3 01
+            return
+}
diff --git a/filetests/isa/x86/extractlane-run.clif b/filetests/isa/x86/extractlane-run.clif
new file mode 100644
index 000000000..adb2e7b8e
--- /dev/null
+++ b/filetests/isa/x86/extractlane-run.clif
@@ -0,0 +1,68 @@
+test run
+set enable_simd
+
+function %test_extractlane_b8() -> b8 {
+ebb0:
+    v1 = vconst.b8x16 [false false false false false false false false false false true false false
+    false false false]
+    v2 = extractlane v1, 10
+    return v2
+}
+; run
+
+function %test_extractlane_i16() -> b1 {
+ebb0:
+    v0 = vconst.i16x8 0x00080007000600050004000300020001
+    v1 = extractlane v0, 1
+    v2 = icmp_imm eq v1, 2
+    return v2
+}
+; run
+
+function %test_extractlane_f32() -> b1 {
+ebb0:
+    v0 = f32const 0x42.42
+    v1 = vconst.f32x4 [0x00.00 0x00.00 0x00.00 0x42.42]
+    v2 = extractlane v1, 3
+    v3 = fcmp eq v2, v0
+    return v3
+}
+; run
+
+function %test_extractlane_i32_with_vector_reuse() -> b1 {
+ebb0:
+    v0 = iconst.i32 42
+    v1 = iconst.i32 99
+
+    v2 = splat.i32x4 v0
+    v3 = insertlane v2, 2, v1
+
+    v4 = extractlane v3, 3
+    v5 = icmp eq v4, v0
+
+    v6 = extractlane v3, 2
+    v7 = icmp eq v6, v1
+
+    v8 = band v5, v7
+    return v8
+}
+; run
+
+function %test_extractlane_f32_with_vector_reuse() -> b1 {
+ebb0:
+    v0 = f32const 0x42.42
+    v1 = f32const 0x99.99
+
+    v2 = splat.f32x4 v0
+    v3 = insertlane v2, 2, v1
+
+    v4 = extractlane v3, 3
+    v5 = fcmp eq v4, v0
+
+    v6 = extractlane v3, 2
+    v7 = fcmp eq v6, v1
+
+    v8 = band v5, v7
+    return v8
+}
+; run
diff --git a/filetests/isa/x86/floating-point-zero-constants-32bit.clif b/filetests/isa/x86/floating-point-zero-constants-32bit.clif
new file mode 100644
index 000000000..802137555
--- /dev/null
+++ b/filetests/isa/x86/floating-point-zero-constants-32bit.clif
@@ -0,0 +1,17 @@
+; Check that floating-point and integer constants equal to zero are optimized correctly.
+test binemit
+target i686
+
+function %foo() -> f32 fast {
+ebb0:
+  ; asm: xorps %xmm0, %xmm0
+  [-,%xmm0]    v0 = f32const 0.0     ; bin: 0f 57 c0
+  return v0
+}
+
+function %bar() -> f64 fast {
+ebb0:
+  ; asm: xorpd %xmm0, %xmm0
+  [-,%xmm0]    v1 = f64const 0.0     ; bin: 66 0f 57 c0
+  return v1
+}
diff --git a/filetests/isa/x86/floating-point-zero-constants.clif b/filetests/isa/x86/floating-point-zero-constants.clif
new file mode 100644
index 000000000..049320870
--- /dev/null
+++ b/filetests/isa/x86/floating-point-zero-constants.clif
@@ -0,0 +1,31 @@
+; Check that floating-point constants equal to zero are optimized correctly.
+test binemit
+target x86_64
+
+function %zero_const_32bit_no_rex() -> f32 fast {
+ebb0:
+  ; asm: xorps %xmm0, %xmm0
+  [-,%xmm0]    v0 = f32const 0.0     ; bin: 40 0f 57 c0
+  return v0
+}
+
+function %zero_const_32bit_rex() -> f32 fast {
+ebb0:
+  ; asm: xorps %xmm8, %xmm8
+  [-,%xmm8]    v1 = f32const 0.0     ; bin: 45 0f 57 c0
+  return v1
+}
+
+function %zero_const_64bit_no_rex() -> f64 fast {
+ebb0:
+  ; asm: xorpd %xmm0, %xmm0
+  [-,%xmm0]    v0 = f64const 0.0     ; bin: 66 40 0f 57 c0
+  return v0
+}
+
+function %zero_const_64bit_rex() -> f64 fast {
+ebb0:
+  ; asm: xorpd %xmm8, %xmm8
+  [-,%xmm8]    v1 = f64const 0.0     ; bin: 66 45 0f 57 c0
+  return v1
+}
diff --git a/filetests/isa/x86/i128.clif b/filetests/isa/x86/i128.clif
new file mode 100644
index 000000000..b710a7430
--- /dev/null
+++ b/filetests/isa/x86/i128.clif
@@ -0,0 +1,46 @@
+test compile
+target x86_64
+
+function u0:0(i64, i64) -> i128 fast {
+ebb0(v0: i64, v1: i64):
+;check: ebb0(v0: i64 [%rdi], v1: i64 [%rsi], v3: i64 [%rbp]):
+
+    v2 = iconcat.i64 v0, v1
+    ; check: regmove v0, %rdi -> %rax
+    ; check: regmove v1, %rsi -> %rdx
+
+    return v2
+    ; check: v4 = x86_pop.i64
+    ; check: return v0, v1, v4
+}
+
+function u0:1(i128) -> i64, i64 fast {
+ebb0(v0: i128):
+; check: ebb0(v3: i64 [%rdi], v4: i64 [%rsi], v5: i64 [%rbp]):
+
+    v1, v2 = isplit v0
+    ; check: regmove v3, %rdi -> %rax
+    ; check: regmove v4, %rsi -> %rdx
+
+    return v1, v2
+    ; check: v6 = x86_pop.i64
+    ; check: return v3, v4, v6
+}
+
+function u0:2(i64, i128) fast {
+; check: ebb0(v0: i64 [%rdi], v2: i64 [%rsi], v3: i64 [%rdx], v6: i64 [%rbp]):
+ebb0(v0: i64, v1: i128):
+    ; check: store v2, v0+8
+    ; check: store v3, v0+16
+    store v1, v0+8
+    return
+}
+
+function u0:3(i64) -> i128 fast {
+ebb0(v0: i64):
+    ; check: v2 = load.i64 v0+8
+    ; check: v3 = load.i64 v0+16
+    v1 = load.i128 v0+8
+    ; check: return v2, v3, v5
+    return v1
+}
diff --git a/filetests/isa/x86/iadd-simd.clif b/filetests/isa/x86/iadd-simd.clif
new file mode 100644
index 000000000..0884a97f5
--- /dev/null
+++ b/filetests/isa/x86/iadd-simd.clif
@@ -0,0 +1,50 @@
+test run
+test binemit
+set enable_simd
+target x86_64 skylake
+
+function %iadd_i32x4() -> b1 {
+ebb0:
+[-, %xmm0]    v0 = vconst.i32x4 [1 1 1 1]
+[-, %xmm1]    v1 = vconst.i32x4 [1 2 3 4]
+[-, %xmm0]    v2 = iadd v0, v1      ; bin: 66 0f fe c1
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, 2
+
+    v5 = extractlane v2, 3
+    v6 = icmp_imm eq v5, 5
+    ; TODO replace extractlanes with vector comparison
+
+    v7 = band v4, v6
+    return v7
+}
+
+; run
+
+function %iadd_i8x16_with_overflow() -> b1 {
+ebb0:
+[-, %xmm0]    v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255]
+[-, %xmm7]    v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
+[-, %xmm0]    v2 = iadd v0, v1      ; bin: 66 0f fc c7
+
+    v3 = extractlane v2, 0
+    v4 = icmp_imm eq v3, 1
+    ; TODO replace extractlane with vector comparison
+
+    return v4
+}
+
+; run
+
+function %iadd_i16x8(i16x8, i16x8) -> i16x8 {
+ebb0(v0: i16x8 [%xmm1], v1: i16x8 [%xmm2]):
+[-, %xmm1]  v2 = iadd v0, v1      ; bin: 66 0f fd ca
+            return v2
+}
+
+function %iadd_i64x2(i64x2, i64x2) -> i64x2 {
+ebb0(v0: i64x2 [%xmm3], v1: i64x2 [%xmm4]):
+[-, %xmm3]  v2 = iadd v0, v1      ; bin: 66 0f d4 dc
+            return v2
+}
diff --git a/filetests/isa/x86/insertlane-binemit.clif b/filetests/isa/x86/insertlane-binemit.clif
new file mode 100644
index 000000000..c388ed6fa
--- /dev/null
+++ b/filetests/isa/x86/insertlane-binemit.clif
@@ -0,0 +1,42 @@
+test binemit
+set enable_simd
+target x86_64 haswell
+
+; for insertlane, floats are legalized differently than integers and booleans; integers and
+; booleans use x86_pinsr which is manually placed in the IR so that it can be binemit-tested
+
+function %test_insertlane_b8() {
+ebb0:
+[-, %rax]   v0 = bconst.b8 true
+[-, %rbx]   v1 = bconst.b8 false
+[-, %xmm0]  v2 = splat.b8x16 v0
+[-, %xmm0]  v3 = x86_pinsr v2, 10, v1    ; bin: 66 0f 3a 20 c3 0a
+            return
+}
+
+function %test_insertlane_i16() {
+ebb0:
+[-, %rax]   v0 = iconst.i16 4
+[-, %rbx]   v1 = iconst.i16 5
+[-, %xmm1]  v2 = splat.i16x8 v0
+[-, %xmm1]  v3 = x86_pinsr v2, 4, v1    ; bin: 66 0f c4 cb 04
+            return
+}
+
+function %test_insertlane_i32() {
+ebb0:
+[-, %rax]   v0 = iconst.i32 42
+[-, %rbx]   v1 = iconst.i32 99
+[-, %xmm4]  v2 = splat.i32x4 v0
+[-, %xmm4]  v3 = x86_pinsr v2, 2, v1    ; bin: 66 0f 3a 22 e3 02
+            return
+}
+
+function %test_insertlane_b64() {
+ebb0:
+[-, %rax]   v0 = bconst.b64 true
+[-, %rbx]   v1 = bconst.b64 false
+[-, %xmm2]  v2 = splat.b64x2 v0
+[-, %xmm2]  v3 = x86_pinsr v2, 1, v1    ; bin: 66 48 0f 3a 22 d3 01
+            return
+}
diff --git a/filetests/isa/x86/insertlane-run.clif b/filetests/isa/x86/insertlane-run.clif
new file mode 100644
index 000000000..92fb38202
--- /dev/null
+++ b/filetests/isa/x86/insertlane-run.clif
@@ -0,0 +1,48 @@
+test run
+set enable_simd
+
+; TODO once SIMD vector comparison is implemented, remove use of extractlane below
+
+function %test_insertlane_b8() -> b8 {
+ebb0:
+    v1 = bconst.b8 true
+    v2 = vconst.b8x16 [false false false false false false false false false false false false false
+     false false false]
+    v3 = insertlane v2, 10, v1
+    v4 = extractlane v3, 10
+    return v4
+}
+; run
+
+function %test_insertlane_f32() -> b1 {
+ebb0:
+    v0 = f32const 0x42.42
+    v1 = vconst.f32x4 0x00
+    v2 = insertlane v1, 1, v0
+    v3 = extractlane v2, 1
+    v4 = fcmp eq v3, v0
+    return v4
+}
+; run
+
+function %test_insertlane_f64_lane1() -> b1 {
+ebb0:
+    v0 = f64const 0x42.42
+    v1 = vconst.f64x2 0x00
+    v2 = insertlane v1, 1, v0
+    v3 = extractlane v2, 1
+    v4 = fcmp eq v3, v0
+    return v4
+}
+; run
+
+function %test_insertlane_f64_lane0() -> b1 {
+ebb0:
+    v0 = f64const 0x42.42
+    v1 = vconst.f64x2 0x00
+    v2 = insertlane v1, 0, v0
+    v3 = extractlane v2, 0
+    v4 = fcmp eq v3, v0
+    return v4
+}
+; run
diff --git a/filetests/isa/x86/isplit-not-legalized-twice.clif b/filetests/isa/x86/isplit-not-legalized-twice.clif
new file mode 100644
index 000000000..4b81a186d
--- /dev/null
+++ b/filetests/isa/x86/isplit-not-legalized-twice.clif
@@ -0,0 +1,20 @@
+test compile
+target x86_64
+
+function u0:0(i64, i64) -> i128 system_v {
+ebb0(v0: i64, v1: i64):
+    trap user0
+
+ebb30:
+    v245 = iconst.i64 0
+    v246 = iconcat v245, v245
+    ; The next instruction used to be legalized twice, causing a panic the second time.
+    v250, v251 = isplit.i128 v370
+    v252, v253 = isplit v246
+    trap user0
+
+ebb45:
+    v369 = iconst.i64 0
+    v370 = load.i128 v369
+    trap user0
+}
diff --git a/filetests/isa/x86/isub_imm-i8.clif b/filetests/isa/x86/isub_imm-i8.clif
index 8958b1afa..35698c9ab 100644
--- a/filetests/isa/x86/isub_imm-i8.clif
+++ b/filetests/isa/x86/isub_imm-i8.clif
@@ -1,13 +1,14 @@
 test compile
+set opt_level=speed_and_size
 target x86_64
 
 function u0:0(i8) -> i8 fast {
 ebb0(v0: i8):
     v1 = iconst.i8 0
     v2 = isub v1, v0
-    ; check: v4 = uextend.i32 v0
-    ; nextln: v6 = iconst.i32 0
-    ; nextln = isub v6, v4
-    ; nextln = ireduce.i8 v5
+    ; check: v3 = uextend.i32 v0
+    ; nextln: v5 = iconst.i32 0
+    ; nextln = isub v5, v3
+    ; nextln = ireduce.i8 v4
     return v2
 }
diff --git a/filetests/isa/x86/jump_i128_param_unused.clif b/filetests/isa/x86/jump_i128_param_unused.clif
new file mode 100644
index 000000000..9d96fcbe3
--- /dev/null
+++ b/filetests/isa/x86/jump_i128_param_unused.clif
@@ -0,0 +1,10 @@
+test compile
+target x86_64
+
+function u0:0(i128) system_v {
+ebb0(v0: i128):
+    jump ebb1(v0)
+
+ebb1(v1: i128):
+    return
+}
diff --git a/filetests/isa/x86/legalize-br-table-bb.clif b/filetests/isa/x86/legalize-br-table-bb.clif
new file mode 100644
index 000000000..69e424076
--- /dev/null
+++ b/filetests/isa/x86/legalize-br-table-bb.clif
@@ -0,0 +1,31 @@
+test compile
+target x86_64
+feature "basic-blocks"
+; regex: V=v\d+
+; regex: EBB=ebb\d+
+
+function u0:0(i64) system_v {
+    ss0 = explicit_slot 1
+    jt0 = jump_table [ebb1]
+
+ebb0(v0: i64):
+    v1 = stack_addr.i64 ss0
+    v2 = load.i8 v1
+    br_table v2, ebb2, jt0
+; check:     $(oob=$V) = ifcmp_imm $(idx=$V), 1
+; ebb2 is replaced by ebb1 by fold_redundant_jump
+; nextln:    brif uge $oob, ebb1
+; nextln:    fallthrough $(inb=$EBB)
+; check:   $inb:
+; nextln:    $(final_idx=$V) = uextend.i64 $idx
+; nextln:    $(base=$V) = jump_table_base.i64 jt0
+; nextln:    $(rel_addr=$V) = jump_table_entry $final_idx, $base, 4, jt0
+; nextln:    $(addr=$V) = iadd $base, $rel_addr
+; nextln:    indirect_jump_table_br $addr, jt0
+
+ebb2:
+    jump ebb1
+
+ebb1:
+    return
+}
diff --git a/filetests/isa/x86/legalize-br-table.clif b/filetests/isa/x86/legalize-br-table.clif
index 959d3028b..51ab2d08d 100644
--- a/filetests/isa/x86/legalize-br-table.clif
+++ b/filetests/isa/x86/legalize-br-table.clif
@@ -1,6 +1,9 @@
 test compile
-
+set opt_level=speed_and_size
 target x86_64
+feature !"basic-blocks"
+; regex: V=v\d+
+; regex: EBB=ebb\d+
 
 function u0:0(i64) system_v {
     ss0 = explicit_slot 1
@@ -10,6 +13,15 @@ ebb0(v0: i64):
     v1 = stack_addr.i64 ss0
     v2 = load.i8 v1
     br_table v2, ebb2, jt0
+; check:     $(oob=$V) = ifcmp_imm $(idx=$V), 1
+; nextln:    brif uge $oob, ebb2
+; nextln:    fallthrough $(inb=$EBB)
+; check:   $inb:
+; nextln:    $(final_idx=$V) = uextend.i64 $idx
+; nextln:    $(base=$V) = jump_table_base.i64 jt0
+; nextln:    $(rel_addr=$V) = jump_table_entry $final_idx, $base, 4, jt0
+; nextln:    $(addr=$V) = iadd $base, $rel_addr
+; nextln:    indirect_jump_table_br $addr, jt0
 
 ebb2:
     jump ebb1
diff --git a/filetests/isa/x86/legalize-call.clif b/filetests/isa/x86/legalize-call.clif
index b66e7e3a5..240b07537 100644
--- a/filetests/isa/x86/legalize-call.clif
+++ b/filetests/isa/x86/legalize-call.clif
@@ -1,6 +1,6 @@
 ; Test legalization of a non-colocated call in 64-bit non-PIC mode.
 test legalizer
-set opt_level=best
+set opt_level=speed_and_size
 target x86_64 haswell
 
 function %call() {
diff --git a/filetests/isa/x86/legalize-custom.clif b/filetests/isa/x86/legalize-custom.clif
index 667c08804..2657bfd49 100644
--- a/filetests/isa/x86/legalize-custom.clif
+++ b/filetests/isa/x86/legalize-custom.clif
@@ -33,6 +33,8 @@ ebb0(v1: i32):
     return
     ; check: ebb0(v1: i32
     ; check: brnz v2, $(new=$EBB)
+    ; check: jump $(trap=$EBB)
+    ; check: $trap:
     ; nextln: trap user7
     ; check: $new:
     ; nextln: return
@@ -45,6 +47,8 @@ ebb0(v1: i32):
     return
     ; check: ebb0(v1: i32
     ; check: brz v2, $(new=$EBB)
+    ; check: jump $(trap=$EBB)
+    ; check: $trap:
     ; nextln: trap user9
     ; check: $new:
     ; nextln: return
@@ -58,14 +62,6 @@ ebb0:
     return v1
 }
 
-function %f64const() -> f64 {
-ebb0:
-    v1 = f64const 0x1.0p1
-    ; check: $(tmp=$V) = iconst.i64
-    ; check: v1 = bitcast.f64 $tmp
-    return v1
-}
-
 function %select_f64(f64, f64, i32) -> f64 {
 ebb0(v0: f64, v1: f64, v2: i32):
     v3 = select v2, v0, v1
@@ -80,7 +76,7 @@ function %f32_min(f32, f32) -> f32 {
 ebb0(v0: f32, v1: f32):
     v2 = fmin v0, v1
     return v2
-    ; check: $(vnat=$V) = x86_fmin v0, v1
+    ; check: $(vnat=$V) = x86_fmin.f32 v0, v1
     ; nextln: jump $(done=$EBB)($vnat)
 
     ; check: $(uno=$EBB):
diff --git a/filetests/isa/x86/legalize-f64const-x64.clif b/filetests/isa/x86/legalize-f64const-x64.clif
new file mode 100644
index 000000000..addafe90a
--- /dev/null
+++ b/filetests/isa/x86/legalize-f64const-x64.clif
@@ -0,0 +1,13 @@
+; Test the legalization of f64const.
+test legalizer
+target x86_64
+
+; regex: V=v\d+
+
+function %f64const() -> f64 {
+ebb0:
+    v1 = f64const 0x1.0p1
+    ; check: $(tmp=$V) = iconst.i64
+    ; check: v1 = bitcast.f64 $tmp
+    return v1
+}
diff --git a/filetests/isa/x86/legalize-heaps.clif b/filetests/isa/x86/legalize-heaps.clif
index 5f60c7b43..c2f1ccb2d 100644
--- a/filetests/isa/x86/legalize-heaps.clif
+++ b/filetests/isa/x86/legalize-heaps.clif
@@ -2,6 +2,7 @@ test legalizer
 target x86_64
 
 ; Test legalization for various forms of heap addresses.
+; regex: EBB=ebb\d+
 
 function %heap_addrs(i32, i64, i64 vmctx) {
     gv4 = vmctx
@@ -37,9 +38,11 @@ ebb0(v0: i32, v1: i64, v3: i64):
 
     v5 = heap_addr.i64 heap1, v0, 0
     ; check:         v14 = icmp_imm ugt v0, 0x0001_0000
-    ; check:         brz v14, ebb1
-    ; check:         trap heap_oob
-    ; check:     ebb1:
+    ; check:         brz v14, $(resume_1=$EBB)
+    ; nextln:        jump $(trap_1=$EBB)
+    ; check:     $trap_1:
+    ; nextln:        trap heap_oob
+    ; check:     $resume_1:
     ; check:         v15 = uextend.i64 v0
     ; check:         v16 = iadd_imm.i64 v3, 64
     ; check:         v5 = iadd v16, v15
@@ -47,17 +50,21 @@ ebb0(v0: i32, v1: i64, v3: i64):
     v6 = heap_addr.i64 heap2, v1, 0
     ; check:         v19 = iconst.i64 0x0001_0000_0000
     ; check:         v17 = icmp.i64 ugt v1, v19
-    ; check:         brz v17, ebb2
-    ; check:         trap heap_oob
-    ; check:     ebb2:
+    ; check:         brz v17, $(resume_2=$EBB)
+    ; nextln:        jump $(trap_2=$EBB)
+    ; check:     $trap_2:
+    ; nextln:        trap heap_oob
+    ; check:     $resume_2:
     ; check:         v18 = iadd_imm.i64 v3, 64
     ; check:         v6 = iadd v18, v1
 
     v7 = heap_addr.i64 heap3, v1, 0
     ; check:         v20 = icmp_imm.i64 ugt v1, 0x0001_0000
-    ; check:         brz v20, ebb3
-    ; check:         trap heap_oob
-    ; check:     ebb3:
+    ; check:         brz v20, $(resume_3=$EBB)
+    ; nextln:        jump $(trap_3=$EBB)
+    ; check:     $trap_3:
+    ; nextln:        trap heap_oob
+    ; check:     $resume_3:
     ; check:         v21 = iadd_imm.i64 v3, 64
     ; check:         v7 = iadd v21, v1
 
@@ -65,9 +72,11 @@ ebb0(v0: i32, v1: i64, v3: i64):
     ; check:         v22 = load.i32 notrap aligned v3+88
     ; check:         v23 = iadd_imm v22, 0
     ; check:         v24 = icmp.i32 ugt v0, v23
-    ; check:         brz v24, ebb4
-    ; check:         trap heap_oob
-    ; check:     ebb4:
+    ; check:         brz v24, $(resume_4=$EBB)
+    ; nextln:        jump $(trap_4=$EBB)
+    ; check:     $trap_4:
+    ; nextln:        trap heap_oob
+    ; check:     $resume_4:
     ; check:         v25 = uextend.i64 v0
     ; check:         v26 = iadd_imm.i64 v3, 72
     ; check:         v8 = iadd v26, v25
@@ -76,9 +85,11 @@ ebb0(v0: i32, v1: i64, v3: i64):
     ; check:         v27 = load.i32 notrap aligned v3+88
     ; check:         v28 = iadd_imm v27, 0
     ; check:         v29 = icmp.i32 ugt v0, v28
-    ; check:         brz v29, ebb5
-    ; check:         trap heap_oob
-    ; check:     ebb5:
+    ; check:         brz v29, $(resume_5=$EBB)
+    ; nextln:        jump $(trap_5=$EBB)
+    ; check:     $trap_5:
+    ; nextln:        trap heap_oob
+    ; check:     $resume_5:
     ; check:         v30 = uextend.i64 v0
     ; check:         v31 = iadd_imm.i64 v3, 72
     ; check:         v9 = iadd v31, v30
@@ -87,9 +98,11 @@ ebb0(v0: i32, v1: i64, v3: i64):
     ; check:         v32 = iadd_imm.i64 v3, 80
     ; check:         v33 = iadd_imm v32, 0
     ; check:         v34 = icmp.i64 ugt v1, v33
-    ; check:         brz v34, ebb6
-    ; check:         trap heap_oob
-    ; check:     ebb6:
+    ; check:         brz v34, $(resume_6=$EBB)
+    ; nextln:        jump $(trap_6=$EBB)
+    ; check:     $trap_6:
+    ; nextln:        trap heap_oob
+    ; check:     $resume_6:
     ; check:         v35 = iadd_imm.i64 v3, 72
     ; check:         v10 = iadd v35, v1
 
@@ -97,9 +110,11 @@ ebb0(v0: i32, v1: i64, v3: i64):
     ; check:         v36 = iadd_imm.i64 v3, 80
     ; check:         v37 = iadd_imm v36, 0
     ; check:         v38 = icmp.i64 ugt v1, v37
-    ; check:         brz v38, ebb7
-    ; check:         trap heap_oob
-    ; check:     ebb7:
+    ; check:         brz v38, $(resume_7=$EBB)
+    ; nextln:        jump $(trap_7=$EBB)
+    ; check:     $trap_7:
+    ; nextln:        trap heap_oob
+    ; check:     $resume_7:
     ; check:         v39 = iadd_imm.i64 v3, 72
     ; check:         v11 = iadd v39, v1
 
diff --git a/filetests/isa/x86/legalize-i64.clif b/filetests/isa/x86/legalize-i64.clif
new file mode 100644
index 000000000..8ae88f736
--- /dev/null
+++ b/filetests/isa/x86/legalize-i64.clif
@@ -0,0 +1,27 @@
+; Test the legalization of i64 instructions on x86_32.
+test legalizer
+target i686 haswell
+
+; regex: V=v\d+
+
+function %iadd(i64, i64) -> i64 {
+ebb0(v1: i64, v2: i64):
+    v10 = iadd v1, v2
+    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
+    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
+    ; nextln: $(v10_lsb=$V), $(carry=$V) = iadd_ifcout $v1_lsb, $v2_lsb
+    ; nextln: $(v10_msb=$V) = iadd_ifcin $v1_msb, $v2_msb, $carry
+    ; nextln: v10 = iconcat $v10_lsb, $v10_msb
+    return v10
+}
+
+function %isub(i64, i64) -> i64 {
+ebb0(v1: i64, v2: i64):
+    v10 = isub v1, v2
+    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
+    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
+    ; nextln: $(v10_lsb=$V), $(borrow=$V) = isub_ifbout $v1_lsb, $v2_lsb
+    ; nextln: $(v10_msb=$V) = isub_ifbin $v1_msb, $v2_msb, $borrow
+    ; nextln: v10 = iconcat $v10_lsb, $v10_msb
+    return v10
+}
diff --git a/filetests/isa/x86/legalize-isplit-backwards.clif b/filetests/isa/x86/legalize-isplit-backwards.clif
new file mode 100644
index 000000000..43881fe09
--- /dev/null
+++ b/filetests/isa/x86/legalize-isplit-backwards.clif
@@ -0,0 +1,24 @@
+test compile
+target x86_64
+
+function u0:0(i128) -> i64, i64 fast {
+; check: ebb0(v4: i64 [%rdi], v5: i64 [%rsi], v8: i64 [%rbp]):
+ebb0(v0: i128):
+    jump ebb2
+
+ebb1:
+    ; When this `isplit` is legalized, the bnot below is not yet legalized,
+    ; so there isn't a corresponding `iconcat` yet. We should try legalization
+    ; for this `isplit` again once all instrucions have been legalized.
+    v2, v3 = isplit.i128 v1
+    ; return v6, v7
+    return v2, v3
+
+ebb2:
+    ; check: v6 = bnot.i64 v4
+    ; check: v2 -> v6
+    ; check: v7 = bnot.i64 v5
+    ; check: v3 -> v7
+    v1 = bnot.i128 v0
+    jump ebb1
+}
diff --git a/filetests/isa/x86/legalize-memory.clif b/filetests/isa/x86/legalize-memory.clif
index eb24523b1..348d76371 100644
--- a/filetests/isa/x86/legalize-memory.clif
+++ b/filetests/isa/x86/legalize-memory.clif
@@ -44,7 +44,7 @@ ebb1:
 
 ; SpiderMonkey VM-style static 4+2 GB heap.
 ; This eliminates bounds checks completely for offsets < 2GB.
-function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash {
+function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v {
     gv0 = vmctx
     gv1 = iadd_imm.i64 gv0, 64
     heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000
@@ -65,7 +65,7 @@ ebb0(v0: i32, v999: i64):
     return v4
 }
 
-function %staticheap_static_oob_sm64(i32, i64 vmctx) -> f32 baldrdash {
+function %staticheap_static_oob_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v {
     gv0 = vmctx
     gv1 = iadd_imm.i64 gv0, 64
     heap0 = static gv1, min 0x1000, bound 0x1000_0000, offset_guard 0x8000_0000
@@ -89,7 +89,7 @@ ebb0(v0: i32, v999: i64):
 
 ; SpiderMonkey VM-style static 4+2 GB heap.
 ; Offsets >= 2 GB do require a boundscheck.
-function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash {
+function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v {
     gv0 = vmctx
     gv1 = iadd_imm.i64 gv0, 64
     heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000
@@ -100,6 +100,8 @@ ebb0(v0: i32, v999: i64):
     ; Boundscheck code
     ; check: $(oob=$V) = icmp
     ; nextln: brz $oob, $(ok=$EBB)
+    ; nextln: jump $(trap_oob=$EBB)
+    ; check: $trap_oob:
     ; nextln: trap heap_oob
     ; check: $ok:
     ; Checks here are assuming that no pipehole opts fold the load offsets.
diff --git a/filetests/isa/x86/legalize-splat.clif b/filetests/isa/x86/legalize-splat.clif
new file mode 100644
index 000000000..19d61d529
--- /dev/null
+++ b/filetests/isa/x86/legalize-splat.clif
@@ -0,0 +1,73 @@
+test compile
+set enable_simd=true
+set probestack_enabled=false
+target x86_64 haswell
+
+; use baldrdash_system_v calling convention here for simplicity (avoids prologue, epilogue)
+function %test_splat_i32() -> i32x4 baldrdash_system_v {
+ebb0:
+    v0 = iconst.i32 42
+    v1 = splat.i32x4 v0
+    return v1
+}
+
+; sameln: function %test_splat_i32() -> i32x4 [%xmm0] baldrdash_system_v {
+; nextln:   ss0 = incoming_arg 0, offset 0
+; nextln: 
+; nextln:   ebb0:
+; nextln:     v0 = iconst.i32 42
+; nextln:     v2 = scalar_to_vector.i32x4 v0
+; nextln:     v1 = x86_pshufd v2, 0
+; nextln:     return v1
+; nextln: }
+
+
+
+function %test_splat_i64() -> i64x2 baldrdash_system_v {
+ebb0:
+    v0 = iconst.i64 42
+    v1 = splat.i64x2 v0
+    return v1
+}
+
+; check:   ebb0:
+; nextln:     v0 = iconst.i64 42
+; nextln:     v2 = scalar_to_vector.i64x2 v0
+; nextln:     v1 = x86_pinsr v2, 1, v0
+; nextln:     return v1
+
+
+
+function %test_splat_b16() -> b16x8 baldrdash_system_v {
+ebb0:
+    v0 = bconst.b16 true
+    v1 = splat.b16x8 v0
+    return v1
+}
+
+; check:   ebb0:
+; nextln:     v0 = bconst.b16 true
+; nextln:     v2 = scalar_to_vector.b16x8 v0
+; nextln:     v3 = x86_pinsr v2, 1, v0
+; nextln:     v4 = raw_bitcast.i32x4 v3
+; nextln:     v5 = x86_pshufd v4, 0
+; nextln:     v1 = raw_bitcast.b16x8 v5
+; nextln:     return v1
+
+
+
+function %test_splat_i8() -> i8x16 baldrdash_system_v {
+ebb0:
+    v0 = iconst.i8 42
+    v1 = splat.i8x16 v0
+    return v1
+}
+
+; check:   ebb0:
+; nextln:     v2 = iconst.i32 42
+; nextln:     v0 = ireduce.i8 v2
+; nextln:     v3 = scalar_to_vector.i8x16 v0
+; nextln:     v4 = f64const 0.0
+; nextln:     v5 = raw_bitcast.i8x16 v4
+; nextln:     v1 = x86_pshufb v3, v5
+; nextln:     return v1
diff --git a/filetests/isa/x86/legalize-tables.clif b/filetests/isa/x86/legalize-tables.clif
index 5995c230e..762f8a103 100644
--- a/filetests/isa/x86/legalize-tables.clif
+++ b/filetests/isa/x86/legalize-tables.clif
@@ -2,6 +2,7 @@ test legalizer
 target x86_64
 
 ; Test legalization for various forms of table addresses.
+; regex: EBB=ebb\d+
 
 function %table_addrs(i32, i64, i64 vmctx) {
     gv4 = vmctx
@@ -23,9 +24,11 @@ ebb0(v0: i32, v1: i64, v3: i64):
     v4 = table_addr.i64 table0, v0, +0
     ; check:         v8 = load.i32 notrap aligned v3+88
     ; check:         v9 = icmp uge v0, v8
-    ; check:         brz v9, ebb1
-    ; check:         trap table_oob
-    ; check:     ebb1:
+    ; check:         brz v9, $(resume_1=$EBB)
+    ; nextln:        jump $(trap_1=$EBB)
+    ; check:     $trap_1:
+    ; nextln:        trap table_oob
+    ; check:     $resume_1:
     ; check:         v10 = uextend.i64 v0
     ; check:         v11 = iadd_imm.i64 v3, 72
     ; check:         v4 = iadd v11, v10
@@ -33,9 +36,11 @@ ebb0(v0: i32, v1: i64, v3: i64):
     v5 = table_addr.i64 table1, v0, +0
     ; check:         v12 = load.i32 notrap aligned v3+88
     ; check:         v13 = icmp.i32 uge v0, v12
-    ; check:         brz v13, ebb2
-    ; check:         trap table_oob
-    ; check:     ebb2:
+    ; check:         brz v13, $(resume_2=$EBB)
+    ; nextln:        jump $(trap_2=$EBB)
+    ; check:     $trap_2:
+    ; nextln:        trap table_oob
+    ; check:     $resume_2:
     ; check:         v14 = uextend.i64 v0
     ; check:         v15 = iadd_imm.i64 v3, 72
     ; check:         v16 = ishl_imm v14, 4
@@ -44,18 +49,22 @@ ebb0(v0: i32, v1: i64, v3: i64):
     v6 = table_addr.i64 table2, v1, +0
     ; check:         v17 = iadd_imm.i64 v3, 80
     ; check:         v18 = icmp.i64 uge v1, v17
-    ; check:         brz v18, ebb3
-    ; check:         trap table_oob
-    ; check:     ebb3:
+    ; check:         brz v18, $(resume_3=$EBB)
+    ; nextln:        jump $(trap_3=$EBB)
+    ; check:     $trap_3:
+    ; nextln:        trap table_oob
+    ; check:     $resume_3:
     ; check:         v19 = iadd_imm.i64 v3, 72
     ; check:         v6 = iadd v19, v1
 
     v7 = table_addr.i64 table3, v1, +0
     ; check:         v20 = iadd_imm.i64 v3, 80
     ; check:         v21 = icmp.i64 uge v1, v20
-    ; check:         brz v21, ebb4
-    ; check:         trap table_oob
-    ; check:     ebb4:
+    ; check:         brz v21, $(resume_4=$EBB)
+    ; nextln:        jump $(trap_4=$EBB)
+    ; check:     $trap_4:
+    ; nextln:        trap table_oob
+    ; check:     $resume_4:
     ; check:         v22 = iadd_imm.i64 v3, 72
     ; check:         v23 = ishl_imm.i64 v1, 4
     ; check:         v7 = iadd v22, v23
diff --git a/filetests/isa/x86/load-store-narrow.clif b/filetests/isa/x86/load-store-narrow.clif
new file mode 100644
index 000000000..5f95b92fc
--- /dev/null
+++ b/filetests/isa/x86/load-store-narrow.clif
@@ -0,0 +1,16 @@
+test compile
+target i686
+
+function u0:0(i64, i32) system_v {
+ebb0(v0: i64, v1: i32):
+    v2 = bor v0, v0
+    store v2, v1
+    return
+}
+
+function u0:1(i32) -> i64 system_v {
+ebb0(v1: i32):
+    v0 = load.i64 v1
+    v2 = bor v0, v0
+    return v2
+}
diff --git a/filetests/isa/x86/optimized-zero-constants-32bit.clif b/filetests/isa/x86/optimized-zero-constants-32bit.clif
index c90e45552..7dbbcc86e 100644
--- a/filetests/isa/x86/optimized-zero-constants-32bit.clif
+++ b/filetests/isa/x86/optimized-zero-constants-32bit.clif
@@ -1,5 +1,6 @@
-; Check that floating-point constants equal to zero are optimized correctly.
+; Check that floating-point and integer constants equal to zero are optimized correctly.
 test binemit
+set opt_level=speed_and_size
 target i686
 
 function %foo() -> f32 fast {
@@ -16,3 +17,36 @@ ebb0:
   return v1
 }
 
+function %zero_dword() -> i32 fast {
+ebb0:
+  ; asm: xor %eax, %eax
+  [-,%rax]     v0 = iconst.i32 0     ; bin: 31 c0
+  ; asm: xor %edi, %edi
+  [-,%rdi]     v1 = iconst.i32 0     ; bin: 31 ff
+  return v0
+}
+
+function %zero_word() -> i16 fast {
+ebb0:
+  ; while you may expect this to be encoded like 6631c0, aka
+  ; xor %ax, %ax, the upper 16 bits of the register used for
+  ; i16 are left undefined, so it's not wrong to clear them.
+  ;
+  ; discarding the 66 prefix is shorter, so this test expects
+  ; that we do so.
+  ;
+  ; asm: xor %eax, %eax
+  [-,%rax]     v0 = iconst.i16 0     ; bin: 31 c0
+  ; asm: xor %edi, %edi
+  [-,%rdi]     v1 = iconst.i16 0     ; bin: 31 ff
+  return v0
+}
+
+function %zero_byte() -> i8 fast {
+ebb0:
+  ; asm: xor %al, %al
+  [-,%rax]     v0 = iconst.i8 0     ; bin: 30 c0
+  ; asm: xor %dh, %dh
+  [-,%rdi]     v1 = iconst.i8 0     ; bin: 30 ff
+  return v0
+}
diff --git a/filetests/isa/x86/optimized-zero-constants.clif b/filetests/isa/x86/optimized-zero-constants.clif
index 44060e9b9..807466e84 100644
--- a/filetests/isa/x86/optimized-zero-constants.clif
+++ b/filetests/isa/x86/optimized-zero-constants.clif
@@ -1,11 +1,12 @@
 ; Check that floating-point constants equal to zero are optimized correctly.
 test binemit
+set opt_level=speed_and_size
 target x86_64
 
 function %zero_const_32bit_no_rex() -> f32 fast {
 ebb0:
   ; asm: xorps %xmm0, %xmm0
-  [-,%xmm0]    v0 = f32const 0.0     ; bin: 40 0f 57 c0
+  [-,%xmm0]    v0 = f32const 0.0     ; bin: 0f 57 c0
   return v0
 }
 
@@ -19,7 +20,7 @@ ebb0:
 function %zero_const_64bit_no_rex() -> f64 fast {
 ebb0:
   ; asm: xorpd %xmm0, %xmm0
-  [-,%xmm0]    v0 = f64const 0.0     ; bin: 66 40 0f 57 c0
+  [-,%xmm0]    v0 = f64const 0.0     ; bin: 66 0f 57 c0
   return v0
 }
 
@@ -30,3 +31,42 @@ ebb0:
   return v1
 }
 
+function %imm_zero_register() -> i64 fast {
+ebb0:
+  ; asm: xor %eax, %eax
+  [-,%rax]     v0 = iconst.i64 0     ; bin: 31 c0
+  ; asm: xor %edi, %edi
+  [-,%rdi]     v1 = iconst.i64 0     ; bin: 31 ff
+  ; asm: xor %r8, r8
+  [-,%r8]      v2 = iconst.i64 0     ; bin: 45 31 c0
+  ; asm: xor %r15, %r15
+  [-,%r15]     v4 = iconst.i64 0     ; bin: 45 31 ff
+  return v0
+}
+
+function %zero_word() -> i16 fast {
+ebb0:
+  ; while you may expect this to be encoded like 6631c0, aka
+  ; xor %ax, %ax, the upper 16 bits of the register used for
+  ; i16 are left undefined, so it's not wrong to clear them.
+  ;
+  ; discarding the 66 prefix is shorter, so this test expects
+  ; that we do so.
+  ;
+  ; asm: xor %eax, %eax
+  [-,%rax]     v0 = iconst.i16 0     ; bin: 31 c0
+  ; asm: xor %edi, %edi
+  [-,%rdi]     v1 = iconst.i16 0     ; bin: 31 ff
+  return v0
+}
+
+function %zero_byte() -> i8 fast {
+ebb0:
+  ; asm: xor %r8b, %r8b
+  [-,%r15]     v0 = iconst.i8 0     ; bin: 45 30 ff
+  ; asm: xor %al, %al
+  [-,%rax]     v1 = iconst.i8 0     ; bin: 30 c0
+  ; asm: xor %dh, %dh
+  [-,%rdi]     v2 = iconst.i8 0     ; bin: 30 ff
+  return v0
+}
diff --git a/filetests/isa/x86/pinned-reg.clif b/filetests/isa/x86/pinned-reg.clif
new file mode 100644
index 000000000..2a447a6d9
--- /dev/null
+++ b/filetests/isa/x86/pinned-reg.clif
@@ -0,0 +1,74 @@
+test compile
+
+set enable_pinned_reg=true
+set use_pinned_reg_as_heap_base=true
+set opt_level=speed_and_size
+
+target x86_64
+
+; regex: V=v\d+
+
+; r15 is the pinned heap register. It must not be rewritten, so it must not be
+; used as a tied output register.
+function %tied_input() -> i64 system_v {
+ebb0:
+    v1 = get_pinned_reg.i64
+    v2 = iadd_imm v1, 42
+    return v2
+}
+
+; check: ,%r15]
+; sameln: v1 = get_pinned_reg.i64
+; nextln: regmove v1, %r15 -> %rax
+; nextln: ,%rax]
+; sameln: iadd_imm v1, 42
+
+;; It musn't be used even if this is a tied input used twice.
+function %tied_twice() -> i64 system_v {
+ebb0:
+    v1 = get_pinned_reg.i64
+    v2 = iadd v1, v1
+    return v2
+}
+
+; check: ,%r15]
+; sameln: v1 = get_pinned_reg.i64
+; nextln: regmove v1, %r15 -> %rax
+; nextln: ,%rax]
+; sameln: iadd v1, v1
+
+function %uses() -> i64 system_v {
+ebb0:
+    v1 = get_pinned_reg.i64
+    v2 = iadd_imm v1, 42
+    v3 = get_pinned_reg.i64
+    v4 = iadd v2, v3
+    return v4
+}
+
+; check: ,%r15]
+; sameln: v1 = get_pinned_reg.i64
+; nextln: regmove v1, %r15 -> %rax
+; nextln: ,%rax]
+; sameln: iadd_imm v1, 42
+; nextln: ,%r15
+; sameln: v3 = get_pinned_reg.i64
+; nextln: ,%rax]
+; sameln: iadd v2, v3
+
+; When the pinned register is used as the heap base, the final load instruction
+; must use the %r15 register, since x86 implements the complex addressing mode.
+function u0:1(i64 vmctx) -> i64 system_v {
+    gv0 = vmctx
+    heap0 = static gv0, min 0x000a_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
+
+ebb0(v42: i64):
+    v5 = iconst.i32 42
+    v6 = heap_addr.i64 heap0, v5, 0
+    v7 = load.i64 v6
+    return v7
+}
+
+; check: ,%r15]
+; sameln: $(heap_base=$V) = get_pinned_reg.i64
+; nextln: load_complex.i64 $heap_base+
diff --git a/filetests/isa/x86/prologue-epilogue.clif b/filetests/isa/x86/prologue-epilogue.clif
index f408d1a19..f2fd3c68e 100644
--- a/filetests/isa/x86/prologue-epilogue.clif
+++ b/filetests/isa/x86/prologue-epilogue.clif
@@ -1,5 +1,5 @@
 test compile
-set opt_level=best
+set opt_level=speed_and_size
 set is_pic
 target x86_64 haswell
 
@@ -213,20 +213,23 @@ function %divert(i32) -> i32 system_v {
 ebb0(v0: i32):
     v2 = iconst.i32 0
     v3 = iconst.i32 1
-    jump ebb3(v0, v3, v2)
+    jump ebb1(v0, v3, v2)
 
-ebb3(v4: i32, v5: i32, v6: i32):
-    brz v4, ebb4
+ebb1(v4: i32, v5: i32, v6: i32):
+    brz v4, ebb3
+    jump ebb2
+
+ebb2:
     v7 = iadd v5, v6
     v8 = iadd_imm v4, -1
-    jump ebb3(v8, v7, v5)
+    jump ebb1(v8, v7, v5)
 
-ebb4:
+ebb3:
     return v5
 }
 
 ; check: function %divert
-; check: regmove v5, %rcx -> %rbx
+; check: regmove.i32 v5, %rcx -> %rbx
 ; check: [Op1popq#58,%rbx]                   v15 = x86_pop.i64
 
 ; Stack limit checking
diff --git a/filetests/isa/x86/pshufb.clif b/filetests/isa/x86/pshufb.clif
new file mode 100644
index 000000000..6fb31b198
--- /dev/null
+++ b/filetests/isa/x86/pshufb.clif
@@ -0,0 +1,13 @@
+test binemit
+set enable_simd
+target x86_64 has_ssse3=true
+
+function %test_pshufb() {
+ebb0:
+[-, %rax]   v0 = iconst.i8 42
+[-, %xmm0]  v1 = scalar_to_vector.i8x16 v0   ; bin: 66 40 0f 6e c0
+[-, %rbx]   v2 = iconst.i8 43
+[-, %xmm4]  v3 = scalar_to_vector.i8x16 v2   ; bin: 66 40 0f 6e e3
+[-, %xmm0]  v4 = x86_pshufb v1, v3               ; bin: 66 0f 38 00 c4
+            return
+}
diff --git a/filetests/isa/x86/pshufd.clif b/filetests/isa/x86/pshufd.clif
new file mode 100644
index 000000000..6f4896d0d
--- /dev/null
+++ b/filetests/isa/x86/pshufd.clif
@@ -0,0 +1,11 @@
+test binemit
+set enable_simd
+target x86_64
+
+function %test_pshuf() {
+ebb0:
+[-, %rax]   v0 = iconst.i32 42
+[-, %xmm0]  v1 = scalar_to_vector.i32x4 v0  ; bin: 66 40 0f 6e c0
+[-, %xmm0]  v2 = x86_pshufd v1, 0                ; bin: 66 0f 70 c0 00
+            return
+}
diff --git a/filetests/isa/x86/raw_bitcast.clif b/filetests/isa/x86/raw_bitcast.clif
new file mode 100644
index 000000000..5c1c2ea32
--- /dev/null
+++ b/filetests/isa/x86/raw_bitcast.clif
@@ -0,0 +1,10 @@
+test binemit
+target x86_64
+
+function %test_raw_bitcast_i16x8_to_b32x4() {
+ebb0:
+[-, %rbx]   v0 = bconst.b16 true
+[-, %xmm2]  v1 = scalar_to_vector.b16x8 v0
+[-, %xmm2]  v2 = raw_bitcast.i32x4 v1       ; bin:
+            return
+}
diff --git a/filetests/isa/x86/relax_branch.clif b/filetests/isa/x86/relax_branch.clif
index c735224c0..15c7e876a 100644
--- a/filetests/isa/x86/relax_branch.clif
+++ b/filetests/isa/x86/relax_branch.clif
@@ -1,5 +1,5 @@
 test binemit
-set opt_level=best
+set opt_level=speed_and_size
 set avoid_div_traps
 set baldrdash_prologue_words=3
 set allones_funcaddrs
@@ -10,7 +10,7 @@ target x86_64 haswell
 ; particular, the first block has to be non-empty but its encoding size must be
 ; zero (i.e. not generate any code). See also issue #666 for more details.
 
-function u0:2691(i32 [%rdi], i32 [%rsi], i64 vmctx [%r14]) -> i64 uext [%rax] baldrdash {
+function u0:2691(i32 [%rdi], i32 [%rsi], i64 vmctx [%r14]) -> i64 uext [%rax] baldrdash_system_v {
     ss0 = incoming_arg 24, offset -24
     gv0 = vmctx
     gv1 = iadd_imm.i64 gv0, 48
@@ -55,6 +55,9 @@ function u0:2691(i32 [%rdi], i32 [%rsi], i64 vmctx [%r14]) -> i64 uext [%rax] ba
 @004c [Op1r_id#4081,%rcx]           v37 = band_imm v35, 255
 [Op1rcmp_ib#7083,%rflags]           v97 = ifcmp_imm v37, 26
 @0050 [Op1brib#70]                  brif sge v97, ebb6
+@0050 [-]                           fallthrough ebb10
+
+                                ebb10:
 [Op1umr#89,%rcx]                    v101 = copy v18
 @0054 [Op1jmpb#eb]                  jump ebb5(v18, v101)
 
@@ -82,6 +85,9 @@ function u0:2691(i32 [%rdi], i32 [%rsi], i64 vmctx [%r14]) -> i64 uext [%rax] ba
 [Op1rcmp_ib#7083,%rflags]           v98 = ifcmp_imm v63, 26
 @0084 [RexOp1rmov#89]               regmove v47, %rdi -> %rbx
 @0084 [Op1brib#70]                  brif sge v98, ebb8
+@0084 [-]                           fallthrough ebb11
+
+                                ebb11:
 [RexOp1umr#89,%rdx]                 v103 = copy.i32 v29
 @0088 [Op1jmpb#eb]                  jump ebb7(v29, v10, v21, v103)
 
@@ -99,6 +105,9 @@ function u0:2691(i32 [%rdi], i32 [%rsi], i64 vmctx [%r14]) -> i64 uext [%rax] ba
 @0098 [Op2urm_noflags_abcd#4b6,%rbx] v77 = bint.i32 v76
 @0099 [RexOp1rr#21,%r10]            v78 = band.i32 v50, v77
 @009a [RexOp1tjccb#74]              brz v78, ebb9
+@009a [-]                           fallthrough ebb12
+
+                                ebb12:
 [RexOp1umr#89,%rcx]                 v99 = copy v81
 [Op1umr#89,%rdx]                    v100 = copy v79
 @00a4 [RexOp1rmov#89]               regmove v100, %rdx -> %rdi
diff --git a/filetests/isa/x86/run-const.clif b/filetests/isa/x86/run-const.clif
new file mode 100644
index 000000000..1ac5062e4
--- /dev/null
+++ b/filetests/isa/x86/run-const.clif
@@ -0,0 +1,11 @@
+test run
+
+function %test_compare_i32() -> b1 {
+ebb0:
+    v0 = iconst.i32 42
+    v1 = iconst.i32 42
+    v2 = icmp eq v0, v1
+    return v2
+}
+
+; run
diff --git a/filetests/isa/x86/run-i64.clif b/filetests/isa/x86/run-i64.clif
new file mode 100644
index 000000000..6fae71966
--- /dev/null
+++ b/filetests/isa/x86/run-i64.clif
@@ -0,0 +1,17 @@
+; Test i64 instructions on x86_32.
+test compile
+target i686 haswell
+
+function %iadd(i64, i64) -> i64 {
+ebb0(v1: i64, v2: i64):
+    v10 = iadd v1, v2
+    ; check: iadd_ifcout
+    return v10
+}
+
+function %isub(i64, i64) -> i64 {
+ebb0(v1: i64, v2: i64):
+    v10 = isub v1, v2
+    ; check: isub_ifbout
+    return v10
+}
diff --git a/filetests/isa/x86/saturating-float-cast.clif b/filetests/isa/x86/saturating-float-cast.clif
new file mode 100644
index 000000000..5986e1f86
--- /dev/null
+++ b/filetests/isa/x86/saturating-float-cast.clif
@@ -0,0 +1,13 @@
+test compile
+target x86_64
+
+function u0:0() -> f32 system_v {
+ebb0:
+    v0 = iconst.i8 255
+; check: v2 = iconst.i32 255
+; nextln: v0 = ireduce.i8 v2
+    v1 = fcvt_from_uint.f32 v0
+; nextln: v3 = uextend.i64 v0
+; nextln: v1 = fcvt_from_sint.f32 v3
+    return v1
+}
diff --git a/filetests/isa/x86/scalar_to_vector-binemit.clif b/filetests/isa/x86/scalar_to_vector-binemit.clif
new file mode 100644
index 000000000..149d54b03
--- /dev/null
+++ b/filetests/isa/x86/scalar_to_vector-binemit.clif
@@ -0,0 +1,32 @@
+test binemit
+set opt_level=speed_and_size
+set enable_simd
+target x86_64
+
+function %test_scalar_to_vector_b8() {
+ebb0:
+[-, %rax]   v0 = bconst.b8 true
+[-, %xmm0]  v1 = scalar_to_vector.b8x16 v0    ; bin: 66 0f 6e c0
+            return
+}
+
+function %test_scalar_to_vector_i16() {
+ebb0:
+[-, %rbx]   v0 = iconst.i16 42
+[-, %xmm2]  v1 = scalar_to_vector.i16x8 v0    ; bin: 66 0f 6e d3
+            return
+}
+
+function %test_scalar_to_vector_b32() {
+ebb0:
+[-, %rcx]   v0 = bconst.b32 false
+[-, %xmm3]  v1 = scalar_to_vector.b32x4 v0    ; bin: 66 0f 6e d9
+            return
+}
+
+function %test_scalar_to_vector_i64() {
+ebb0:
+[-, %rdx]   v0 = iconst.i64 42
+[-, %xmm7]  v1 = scalar_to_vector.i64x2 v0    ; bin: 66 0f 6e fa
+            return
+}
diff --git a/filetests/isa/x86/scalar_to_vector-compile.clif b/filetests/isa/x86/scalar_to_vector-compile.clif
new file mode 100644
index 000000000..6db3d12f4
--- /dev/null
+++ b/filetests/isa/x86/scalar_to_vector-compile.clif
@@ -0,0 +1,19 @@
+test compile
+set opt_level=speed_and_size
+set probestack_enabled=false
+set enable_simd
+target x86_64
+
+; ensure that scalar_to_vector emits no instructions for floats (already exist in an XMM register)
+function %test_scalar_to_vector_f32() -> f32x4 baldrdash_system_v {
+ebb0:
+    v0 = f32const 0x0.42
+    v1 = scalar_to_vector.f32x4 v0
+    return v1
+}
+
+; check: ebb0
+; nextln: v2 = iconst.i32 0x3e84_0000
+; nextln: v0 = bitcast.f32 v2
+; nextln: [null_fpr#00,%xmm0]                 v1 = scalar_to_vector.f32x4 v0
+; nextln: return v1
diff --git a/filetests/isa/x86/shrink-multiple-uses.clif b/filetests/isa/x86/shrink-multiple-uses.clif
index d20cc7806..358d098a6 100644
--- a/filetests/isa/x86/shrink-multiple-uses.clif
+++ b/filetests/isa/x86/shrink-multiple-uses.clif
@@ -1,5 +1,5 @@
 test shrink
-set opt_level=best
+set opt_level=speed_and_size
 target x86_64
 
 function %test_multiple_uses(i32 [%rdi]) -> i32 {
@@ -8,6 +8,9 @@ ebb0(v0: i32 [%rdi]):
 [Op2seti_abcd#490,%rax]             v1 = trueif eq v3
 [RexOp2urm_noflags#4b6,%rax]        v2 = bint.i32 v1
 [Op1brib#70]                        brif eq v3, ebb1
+[Op1jmpb#eb]                        jump ebb2
+
+ebb2:
 [Op1ret#c3]                         return v2
 
 ebb1:
diff --git a/filetests/isa/x86/shrink.clif b/filetests/isa/x86/shrink.clif
index f0d78af22..b0d3174ec 100644
--- a/filetests/isa/x86/shrink.clif
+++ b/filetests/isa/x86/shrink.clif
@@ -1,5 +1,5 @@
 test binemit
-set opt_level=best
+set opt_level=speed_and_size
 target x86_64
 
 ; Test that instruction shrinking eliminates REX prefixes when possible.
diff --git a/filetests/isa/x86/shuffle-legalize.clif b/filetests/isa/x86/shuffle-legalize.clif
new file mode 100644
index 000000000..d19248944
--- /dev/null
+++ b/filetests/isa/x86/shuffle-legalize.clif
@@ -0,0 +1,31 @@
+test legalizer
+set enable_simd
+target x86_64 skylake
+
+function %test_shuffle_different_ssa_values() -> i8x16 {
+ebb0:
+    v0 = vconst.i8x16 0x00
+    v1 = vconst.i8x16 0x01
+    v2 = shuffle v0, v1, 0x11000000000000000000000000000000     ; pick the second lane of v1, the rest use the first lane of v0
+    return v2
+}
+
+; check:  v1 = vconst.i8x16 0x01
+; nextln: v3 = vconst.i8x16 0x80000000000000000000000000000000
+; nextln: v4 = x86_pshufb v0, v3
+; nextln: v5 = vconst.i8x16 0x01808080808080808080808080808080
+; nextln: v6 = x86_pshufb v1, v5
+; nextln: v2 = bor v4, v6
+
+
+
+function %test_shuffle_same_ssa_value() -> i8x16 {
+ebb0:
+    v1 = vconst.i8x16 0x01
+    v2 = shuffle v1, v1, 0x13000000000000000000000000000000     ; pick the fourth lane of v1 and the rest from the first lane of v1
+    return v2
+}
+
+; check:  v1 = vconst.i8x16 0x01
+; nextln: v3 = vconst.i8x16 0x03000000000000000000000000000000
+; nextln: v2 = x86_pshufb v1, v3
diff --git a/filetests/isa/x86/shuffle-run.clif b/filetests/isa/x86/shuffle-run.clif
new file mode 100644
index 000000000..bc9eecb68
--- /dev/null
+++ b/filetests/isa/x86/shuffle-run.clif
@@ -0,0 +1,60 @@
+test run
+set enable_simd
+
+function %test_shuffle_different_ssa_values() -> b1 {
+ebb0:
+    v0 = vconst.i8x16 0x00
+    v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42]
+    v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31]     ; use the first lane of v0 throughout except use the last lane of v1
+    v3 = extractlane.i8x16 v2, 15
+    v4 = iconst.i8 42
+    v5 = icmp eq v3, v4
+    return v5
+}
+
+; run
+
+function %test_shuffle_same_ssa_value() -> b1 {
+ebb0:
+    v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000     ; note where lane 15 is when written with hexadecimal syntax
+    v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f  ; use the last lane of v0 to fill all lanes
+    v2 = extractlane.i8x16 v1, 4
+    v3 = iconst.i8 0x01
+    v4 = icmp eq v2, v3
+    return v4
+}
+
+; run
+
+function %compare_shuffle() -> b1 {
+ebb0:
+    v1 = vconst.i32x4 [0 1 2 3]
+    v2 = raw_bitcast.i8x16  v1 ; we have to cast because shuffle is type-limited to Tx16
+    ; keep each lane in place from the first vector
+    v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
+    v4 = raw_bitcast.i32x4 v3
+    v5 = extractlane.i32x4 v4, 3
+    v6 = icmp_imm eq v5, 3
+    v7 = extractlane.i32x4 v4, 0
+    v8 = icmp_imm eq v7, 0
+    v9 = band v6, v8
+    return v9
+}
+
+; run
+
+
+function %compare_shuffle() -> b32 {
+ebb0:
+    v1 = vconst.b32x4 [true false true false]
+    v2 = raw_bitcast.b8x16 v1 ; we have to cast because shuffle is type-limited to Tx16
+    ; pair up the true values to make the entire vector true
+    v3 = shuffle v2, v2, [0 1 2 3 0 1 2 3 8 9 10 11 8 9 10 11]
+    v4 = raw_bitcast.b32x4 v3
+    v5 = extractlane v4, 3
+    v6 = extractlane v4, 0
+    v7 = band v5, v6
+    return v7
+}
+
+; run
diff --git a/filetests/isa/x86/stack-addr64.clif b/filetests/isa/x86/stack-addr64.clif
index a333f2cd5..c80d19090 100644
--- a/filetests/isa/x86/stack-addr64.clif
+++ b/filetests/isa/x86/stack-addr64.clif
@@ -1,6 +1,6 @@
 ; binary emission of stack address instructions on x86-64.
 test binemit
-set opt_level=fastest
+set opt_level=none
 target x86_64 haswell
 
 ; The binary encodings can be verified with the command:
diff --git a/filetests/isa/x86/stack-load-store64.clif b/filetests/isa/x86/stack-load-store64.clif
index c1854e623..3c0e2c8c0 100644
--- a/filetests/isa/x86/stack-load-store64.clif
+++ b/filetests/isa/x86/stack-load-store64.clif
@@ -1,6 +1,6 @@
 ; legalization of stack load and store instructions on x86-64.
 test legalizer
-set opt_level=fastest
+set opt_level=none
 target x86_64 haswell
 
 function %stack_load_and_store() {
diff --git a/filetests/isa/x86/vconst-binemit.clif b/filetests/isa/x86/vconst-binemit.clif
new file mode 100644
index 000000000..b07dc0fd4
--- /dev/null
+++ b/filetests/isa/x86/vconst-binemit.clif
@@ -0,0 +1,11 @@
+test binemit
+set opt_level=speed_and_size
+set enable_simd
+target x86_64
+
+function %test_vconst_b8() {
+ebb0:
+[-, %xmm2]  v0 = vconst.b8x16 0x00  ; bin: 0f 10 15 00000008 PCRelRodata4(15)
+[-, %xmm3]  v1 = vconst.b8x16 0x01  ; bin: 0f 10 1d 00000011 PCRelRodata4(31)
+            return
+}
diff --git a/filetests/isa/x86/vconst-rodata.clif b/filetests/isa/x86/vconst-rodata.clif
new file mode 100644
index 000000000..34c203dce
--- /dev/null
+++ b/filetests/isa/x86/vconst-rodata.clif
@@ -0,0 +1,19 @@
+test rodata
+set enable_simd=true
+target x86_64 haswell
+
+function %test_vconst_i32() -> i32x4 {
+ebb0:
+    v0 = vconst.i32x4 0x1234
+    return v0
+}
+
+; sameln: [34, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+
+function %test_vconst_b16() -> b16x8 {
+ebb0:
+    v0 = vconst.b16x8 [true false true false true false true true]
+    return v0
+}
+
+; sameln: [1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0]
diff --git a/filetests/isa/x86/vconst-run.clif b/filetests/isa/x86/vconst-run.clif
new file mode 100644
index 000000000..9ec160c2e
--- /dev/null
+++ b/filetests/isa/x86/vconst-run.clif
@@ -0,0 +1,21 @@
+test run
+set enable_simd
+
+function %test_vconst_syntax() -> b1 {
+ebb0:
+    v0 = vconst.i32x4 0x00000004_00000003_00000002_00000001     ; build constant using hexadecimal syntax
+    v1 = vconst.i32x4 [1 2 3 4]                                 ; build constant using literal list syntax
+
+    ; verify lane 1 matches
+    v2 = extractlane v0, 1
+    v3 = extractlane v1, 1
+    v4 = icmp eq v3, v2
+
+    ; verify lane 1 has the correct value
+    v5 = icmp_imm eq v3, 2
+
+    v6 = band v4, v5
+    return v6
+}
+
+; run
diff --git a/filetests/isa/x86/windows_fastcall_x64.clif b/filetests/isa/x86/windows_fastcall_x64.clif
index 14b46e579..a621abfe9 100644
--- a/filetests/isa/x86/windows_fastcall_x64.clif
+++ b/filetests/isa/x86/windows_fastcall_x64.clif
@@ -1,5 +1,5 @@
 test compile
-set opt_level=best
+set opt_level=speed_and_size
 set is_pic
 target x86_64 haswell
 
diff --git a/filetests/legalizer/br_table_cond.clif b/filetests/legalizer/br_table_cond.clif
index 5b7beb325..bd823d3ca 100644
--- a/filetests/legalizer/br_table_cond.clif
+++ b/filetests/legalizer/br_table_cond.clif
@@ -4,8 +4,10 @@ set jump_tables_enabled=false
 target x86_64
 
 ; Test that when jump_tables_enables is false, all jump tables are eliminated.
+; regex: V=v\d+
+; regex: EBB=ebb\d+
 
-function u0:0(i64 vmctx) baldrdash {
+function u0:0(i64 vmctx) baldrdash_system_v {
     gv0 = vmctx
     gv1 = iadd_imm.i64 gv0, 48
     jt0 = jump_table [ebb2, ebb2, ebb7]
@@ -20,10 +22,32 @@ ebb5:
     trapnz v2, interrupt
     v3 = iconst.i32 0
     br_table v3, ebb3, jt0
+; check:  ebb5:
+; check:    $(val0=$V) = iconst.i32 0
+; nextln:   $(cmp0=$V) = icmp_imm eq $val0, 0
+; nextln:   brnz $cmp0, ebb2
+; nextln:   jump $(fail0=$EBB)
+; check:  $fail0:
+; nextln:   $(cmp1=$V) = icmp_imm.i32 eq $val0, 1
+; nextln:   brnz $cmp1, ebb2
+; nextln:   jump $(fail1=$EBB)
+; check:  $fail1:
+; nextln:   $(cmp2=$V) = icmp_imm.i32 eq $val0, 2
+; nextln:   brnz $cmp2, ebb7
+; nextln:   jump ebb3
 
 ebb7:
     v4 = iconst.i32 0
     br_table v4, ebb3, jt1
+; check:  ebb7:
+; check:    $(val1=$V) = iconst.i32 0
+; nextln:   $(cmp3=$V) = icmp_imm eq $val1, 0
+; nextln:   brnz $cmp3, ebb8
+; nextln:   jump $(fail3=$EBB)
+; check:  $fail3:
+; nextln:   $(cmp4=$V) = icmp_imm.i32 eq $val1, 1
+; nextln:   brnz $cmp4, ebb8
+; nextln:   jump ebb3
 
 ebb8:
     jump ebb5
diff --git a/filetests/legalizer/empty_br_table.clif b/filetests/legalizer/empty_br_table.clif
new file mode 100644
index 000000000..6dfceb5a6
--- /dev/null
+++ b/filetests/legalizer/empty_br_table.clif
@@ -0,0 +1,17 @@
+test legalizer
+set probestack_enabled=false
+set jump_tables_enabled=false
+target x86_64
+
+function u0:0(i64) {
+    jt0 = jump_table []
+
+ebb0(v0: i64):
+    br_table v0, ebb1, jt0
+; check:  ebb0(v0: i64):
+; nextln:   jump ebb1
+
+ebb1:
+    return
+}
+; not: jump_table
diff --git a/filetests/legalizer/iconst-i64.clif b/filetests/legalizer/iconst-i64.clif
new file mode 100644
index 000000000..a3c916841
--- /dev/null
+++ b/filetests/legalizer/iconst-i64.clif
@@ -0,0 +1,12 @@
+test legalizer
+target i686
+
+function %foo() -> i64 {
+ebb0:
+    v1 = iconst.i64 0x6400000042
+    return v1
+}
+
+; check: v2 = iconst.i32 66
+; check: v3 = iconst.i32 100
+; check: v1 = iconcat v2, v3
diff --git a/filetests/licm/basic.clif b/filetests/licm/basic.clif
index f409523c7..3f5dfbbe1 100644
--- a/filetests/licm/basic.clif
+++ b/filetests/licm/basic.clif
@@ -10,11 +10,14 @@ ebb1(v1: i32):
     v2 = iconst.i32 1
     v3 = iconst.i32 2
     v4 = iadd v2, v3
-    brz v1, ebb2(v1)
+    brz v1, ebb3(v1)
+    jump ebb2
+
+ebb2:
     v5 = isub v1, v2
     jump ebb1(v5)
 
-ebb2(v6: i32):
+ebb3(v6: i32):
     return v6
 
 }
@@ -26,10 +29,13 @@ ebb2(v6: i32):
 ; nextln:     jump ebb1(v0)
 ; nextln: 
 ; nextln: ebb1(v1: i32):
-; nextln:     brz v1, ebb2(v1)
-; nextln:     v5 = isub v1, v2
+; nextln:     brz v1, ebb3(v1)
+; nextln:     jump ebb2
+; nextln: 
+; nextln: ebb2:
+; nextln:     v5 = isub.i32 v1, v2
 ; nextln:     jump ebb1(v5)
 ; nextln: 
-; nextln: ebb2(v6: i32):
+; nextln: ebb3(v6: i32):
 ; nextln:     return v6
 ; nextln: }
diff --git a/filetests/licm/complex.clif b/filetests/licm/complex.clif
index c7d26f9a3..2774cde5b 100644
--- a/filetests/licm/complex.clif
+++ b/filetests/licm/complex.clif
@@ -39,6 +39,9 @@ ebb0(v0: i32):
             v19 = iadd v18, v2
             v20 = iadd.i32 v2, v3
 [SBzero#18] brz.i32 v1, ebb1(v20)
+[UJ#1b]     jump ebb7
+
+         ebb7:
 [Iret#19]   return v19
 }
 
@@ -53,10 +56,10 @@ ebb0(v0: i32):
 ; nextln: 
 ; nextln: ebb1(v1: i32):
 ; nextln:     v4 = iadd.i32 v2, v1
-; nextln:     brz v1, ebb7(v2)
-; nextln:     jump ebb8(v4)
+; nextln:     brz v1, ebb8(v2)
+; nextln:     jump ebb9(v4)
 ; nextln: 
-; nextln: ebb7(v21: i32):
+; nextln: ebb8(v21: i32):
 ; nextln:     v8 = iadd.i32 v6, v1
 ; nextln:     v11 = iadd.i32 v1, v4
 ; nextln:     jump ebb2(v21)
@@ -70,7 +73,7 @@ ebb0(v0: i32):
 ; nextln:     brz.i32 v1, ebb2(v9)
 ; nextln:     jump ebb6(v10)
 ; nextln: 
-; nextln: ebb8(v22: i32):
+; nextln: ebb9(v22: i32):
 ; nextln:     v15 = iadd.i32 v4, v13
 ; nextln:     jump ebb4(v22)
 ; nextln: 
@@ -86,5 +89,8 @@ ebb0(v0: i32):
 ; nextln: ebb6(v18: i32):
 ; nextln:     v19 = iadd v18, v2
 ; nextln:     brz.i32 v1, ebb1(v20)
+; nextln:     jump ebb7
+; nextln: 
+; nextln: ebb7:
 ; nextln:     return v19
 ; nextln: }
diff --git a/filetests/licm/critical-edge.clif b/filetests/licm/critical-edge.clif
index 1c69f6364..89beb387c 100644
--- a/filetests/licm/critical-edge.clif
+++ b/filetests/licm/critical-edge.clif
@@ -6,37 +6,49 @@ target riscv32
 function %critical_edge(i32, i32) -> i32 {
 
             ebb0(v0: i32, v7: i32):
-[SBzero#38]   brnz v7, ebb1(v0)
+[SBzero#38]   brnz v7, ebb2(v0)
+[UJ#1b]       jump ebb1
+
+            ebb1:
 [Iret#19]     return v0
 
-            ebb1(v1: i32):
+            ebb2(v1: i32):
               v2 = iconst.i32 1
               v3 = iconst.i32 2
               v4 = iadd v2, v3
-[SBzero#18]   brz v1, ebb2(v1)
+[SBzero#18]   brz v1, ebb4(v1)
+[UJ#1b]       jump ebb3
+
+            ebb3:
               v5 = isub v1, v2
-[UJ#1b]       jump ebb1(v5)
+[UJ#1b]       jump ebb2(v5)
 
-            ebb2(v6: i32):
+            ebb4(v6: i32):
 [Iret#19]     return v6
 
 }
 ; sameln: function %critical_edge
 ; nextln: ebb0(v0: i32, v7: i32):
-; nextln:     brnz v7, ebb3(v0)
+; nextln:     brnz v7, ebb5(v0)
+; nextln:     jump ebb1
+; nextln: 
+; nextln: ebb1:
 ; nextln:     return v0
 ; nextln: 
-; nextln: ebb3(v8: i32):
+; nextln: ebb5(v8: i32):
 ; nextln:     v2 = iconst.i32 1
 ; nextln:     v3 = iconst.i32 2
 ; nextln:     v4 = iadd v2, v3
-; nextln:     jump ebb1(v8)
+; nextln:     jump ebb2(v8)
+; nextln: 
+; nextln: ebb2(v1: i32):
+; nextln:     brz v1, ebb4(v1)
+; nextln:     jump ebb3
 ; nextln: 
-; nextln: ebb1(v1: i32):
-; nextln:     brz v1, ebb2(v1)
-; nextln:     v5 = isub v1, v2
-; nextln:     jump ebb1(v5)
+; nextln: ebb3:
+; nextln:     v5 = isub.i32 v1, v2
+; nextln:     jump ebb2(v5)
 ; nextln: 
-; nextln: ebb2(v6: i32):
+; nextln: ebb4(v6: i32):
 ; nextln:     return v6
 ; nextln: }
diff --git a/filetests/licm/encoding.clif b/filetests/licm/encoding.clif
index 27a965f55..b029a51c1 100644
--- a/filetests/licm/encoding.clif
+++ b/filetests/licm/encoding.clif
@@ -11,11 +11,14 @@ function %simple_loop(i32) -> i32 {
 [Iz#04,%x0]   v2 = iconst.i32 1
 [Iz#04,%x1]   v3 = iconst.i32 2
 [R#0c,%x2]    v4 = iadd v2, v3
-[SBzero#18]   brz v1, ebb2(v1)
+[SBzero#18]   brz v1, ebb3(v1)
+[UJ#1b]       jump ebb2
+
+            ebb2:
 [R#200c,%x5]  v5 = isub v1, v2
 [UJ#1b]       jump ebb1(v5)
 
-            ebb2(v6: i32):
+            ebb3(v6: i32):
 [Iret#19]     return v6
 }
 
@@ -27,10 +30,13 @@ function %simple_loop(i32) -> i32 {
 ; nextln: [UJ#1b]                             jump ebb1(v0)
 ; nextln: 
 ; nextln:                                 ebb1(v1: i32):
-; nextln: [SBzero#18]                         brz v1, ebb2(v1)
-; nextln: [R#200c,%x5]                        v5 = isub v1, v2
+; nextln: [SBzero#18]                         brz v1, ebb3(v1)
+; nextln: [UJ#1b]                             jump ebb2
+; nextln: 
+; nextln:                                 ebb2:
+; nextln: [R#200c,%x5]                        v5 = isub.i32 v1, v2
 ; nextln: [UJ#1b]                             jump ebb1(v5)
 ; nextln: 
-; nextln:                                 ebb2(v6: i32):
+; nextln:                                 ebb3(v6: i32):
 ; nextln: [Iret#19]                           return v6
 ; nextln: }
diff --git a/filetests/licm/jump-table-entry.clif b/filetests/licm/jump-table-entry.clif
index 00a6b3fd1..cbf51cd08 100644
--- a/filetests/licm/jump-table-entry.clif
+++ b/filetests/licm/jump-table-entry.clif
@@ -1,5 +1,4 @@
 test licm
-
 target x86_64
 
 function %dont_hoist_jump_table_entry_during_licm() {
@@ -15,15 +14,20 @@ ebb1: ; the loop!
     fallthrough ebb2
 
 ebb2:
-    v1 = iconst.i32 -14
+    v1 = iconst.i64 -14
     v8 = ifcmp_imm v1, 2
     brif uge v8, ebb1
+    jump ebb3
+
+ebb3:
     v5 = jump_table_base.i64 jt0
     v6 = jump_table_entry.i64 v1, v5, 4, jt0
     v7 = iadd v5, v6
     indirect_jump_table_br v7, jt0
 ; check: ebb2:
-; nextln: v8 = ifcmp_imm.i32 v1, 2
+; nextln: v8 = ifcmp_imm.i64 v1, 2
 ; nextln: brif uge v8, ebb1
+; nextln: jump ebb3
+; check: ebb3:
 ; nextln: jump_table_entry.i64
 }
diff --git a/filetests/licm/load_readonly_notrap.clif b/filetests/licm/load_readonly_notrap.clif
index 9a9d2dcbf..4731bd664 100644
--- a/filetests/licm/load_readonly_notrap.clif
+++ b/filetests/licm/load_readonly_notrap.clif
@@ -18,11 +18,14 @@ ebb1(v2: i32, v3: i64):
     v5 = heap_addr.i64 heap0, v4, 1
     v6 = load.i32 notrap aligned readonly v5
     v7 = iadd v2, v6
-    brz v2, ebb2(v2)
+    brz v2, ebb3(v2)
+    jump ebb2
+
+ebb2:
     v8 = isub v2, v4
     jump ebb1(v8, v3)
 
-ebb2(v9: i32):
+ebb3(v9: i32):
     return v9
 }
 
@@ -39,10 +42,13 @@ ebb2(v9: i32):
 ; nextln: 
 ; nextln: ebb1(v2: i32, v3: i64):
 ; nextln:    v7 = iadd v2, v6
-; nextln:    brz v2, ebb2(v2)
-; nextln:    v8 = isub v2, v4
+; nextln:    brz v2, ebb3(v2)
+; nextln:     jump ebb2
+; nextln: 
+; nextln: ebb2:
+; nextln:    v8 = isub.i32 v2, v4
 ; nextln:    jump ebb1(v8, v3)
 ; nextln: 
-; nextln: ebb2(v9: i32):
+; nextln: ebb3(v9: i32):
 ; nextln:    return v9
 ; nextln: }
diff --git a/filetests/licm/multiple-blocks.clif b/filetests/licm/multiple-blocks.clif
index de5af884e..ea23505ef 100644
--- a/filetests/licm/multiple-blocks.clif
+++ b/filetests/licm/multiple-blocks.clif
@@ -10,16 +10,22 @@ ebb1(v10: i32):
     v11 = iconst.i32 1
     v12 = iconst.i32 2
     v13 = iadd v11, v12
-    brz v10, ebb2(v10)
+    brz v10, ebb4(v10)
+    jump ebb2
+
+ebb2:
     v15 = isub v10, v11
-    brz v15, ebb3(v15)
+    brz v15, ebb5(v15)
+    jump ebb3
+
+ebb3:
     v14 = isub v10, v11
     jump ebb1(v14)
 
-ebb2(v20: i32):
+ebb4(v20: i32):
     return v20
 
-ebb3(v30: i32):
+ebb5(v30: i32):
     v31 = iadd v11, v13
     jump ebb1(v30)
 
@@ -33,15 +39,21 @@ ebb3(v30: i32):
 ; nextln:     jump ebb1(v0)
 ; nextln: 
 ; nextln: ebb1(v10: i32):
-; nextln:     brz v10, ebb2(v10)
-; nextln:     v15 = isub v10, v11
-; nextln:     brz v15, ebb3(v15)
-; nextln:     v14 = isub v10, v11
+; nextln:     brz v10, ebb4(v10)
+; nextln:     jump ebb2
+; nextln: 
+; nextln: ebb2:
+; nextln:     v15 = isub.i32 v10, v11
+; nextln:     brz v15, ebb5(v15)
+; nextln:     jump ebb3
+; nextln: 
+; nextln: ebb3:
+; nextln:     v14 = isub.i32 v10, v11
 ; nextln:     jump ebb1(v14)
 ; nextln: 
-; nextln: ebb2(v20: i32):
+; nextln: ebb4(v20: i32):
 ; nextln:     return v20
 ; nextln: 
-; nextln: ebb3(v30: i32):
+; nextln: ebb5(v30: i32):
 ; nextln:     jump ebb1(v30)
 ; nextln: }
diff --git a/filetests/licm/nested_loops.clif b/filetests/licm/nested_loops.clif
index f9ade38cf..423b24d33 100644
--- a/filetests/licm/nested_loops.clif
+++ b/filetests/licm/nested_loops.clif
@@ -14,17 +14,20 @@ ebb1(v1: i32):
     jump ebb2(v5, v5)
 
 ebb2(v10: i32, v11: i32):
-    brz v11, ebb3(v10)
+    brz v11, ebb4(v10)
+    jump ebb3
+
+ebb3:
     v12 = iconst.i32 1
     v15 = iadd v12, v5
     v13 = isub v11, v12
     jump ebb2(v10,v13)
 
-ebb3(v20: i32):
-    brz v20, ebb4(v20)
+ebb4(v20: i32):
+    brz v20, ebb5(v20)
     jump ebb1(v20)
 
-ebb4(v30: i32):
+ebb5(v30: i32):
     return v30
 
 }
@@ -43,14 +46,17 @@ ebb4(v30: i32):
 ; nextln:     jump ebb2(v5, v5)
 ; nextln: 
 ; nextln: ebb2(v10: i32, v11: i32):
-; nextln:     brz v11, ebb3(v10)
-; nextln:     v13 = isub v11, v12
+; nextln:     brz v11, ebb4(v10)
+; nextln:     jump ebb3
+; nextln: 
+; nextln: ebb3:
+; nextln:     v13 = isub.i32 v11, v12
 ; nextln:     jump ebb2(v10, v13)
 ; nextln: 
-; nextln: ebb3(v20: i32):
-; nextln:     brz v20, ebb4(v20)
+; nextln: ebb4(v20: i32):
+; nextln:     brz v20, ebb5(v20)
 ; nextln:     jump ebb1(v20)
 ; nextln: 
-; nextln: ebb4(v30: i32):
+; nextln: ebb5(v30: i32):
 ; nextln:     return v30
 ; nextln: }
diff --git a/filetests/licm/reject.clif b/filetests/licm/reject.clif
index a23decb1e..43823c129 100644
--- a/filetests/licm/reject.clif
+++ b/filetests/licm/reject.clif
@@ -11,11 +11,14 @@ ebb1(v1: i32):
 ; check: ebb1(v1: i32):
 ; check: regmove.i32 v0, %x10 -> %x20
     v2 = iconst.i32 1
-    brz v1, ebb2(v1)
+    brz v1, ebb3(v1)
+    jump ebb2
+
+ebb2:
     v5 = isub v1, v2
     jump ebb1(v5)
 
-ebb2(v6: i32):
+ebb3(v6: i32):
     return v6
 
 }
@@ -31,12 +34,15 @@ ebb1(v2: i32, v3: i32):
 ; check: ifcmp.i32 v0, v1
 ; check: v5 = selectif.i32 eq v4, v2, v3
     v8 = iconst.i32 1
-    brz v1, ebb2(v1)
+    brz v1, ebb3(v1)
+    jump ebb2
+
+ebb2:
     v9 = isub v1, v8
     v10 = iadd v1, v8
     jump ebb1(v9, v10)
 
-ebb2(v6: i32):
+ebb3(v6: i32):
     return v6
 }
 
@@ -53,11 +59,14 @@ ebb1(v3: i32, v4: i32):
 ; check: v5 = spill.i32 v1
 ; check: v6 = fill.i32 v2
 ; check: v7 = fill v5
-    brz v1, ebb2(v1)
+    brz v1, ebb3(v1)
+    jump ebb2
+
+ebb2:
     v9 = isub v1, v4
     jump ebb1(v9, v3)
 
-ebb2(v10: i32):
+ebb3(v10: i32):
     return v10
 }
 
@@ -72,11 +81,14 @@ ebb1(v1: i32):
     v2 = iadd v8, v9
 ; check: ebb1(v1: i32):
 ; check: v2 = iadd v8, v9
-    brz v1, ebb2(v1)
+    brz v1, ebb3(v1)
+    jump ebb2
+
+ebb2:
     v5 = isub v1, v2
     jump ebb1(v5)
 
-ebb2(v6: i32):
+ebb3(v6: i32):
     return v6
 
 }
diff --git a/filetests/licm/reject_load_notrap.clif b/filetests/licm/reject_load_notrap.clif
index 1d26faa71..71385807e 100644
--- a/filetests/licm/reject_load_notrap.clif
+++ b/filetests/licm/reject_load_notrap.clif
@@ -19,11 +19,14 @@ ebb0(v0: i32, v1: i64):
 ebb1(v2: i32, v3: i64):
     v6 = load.i32 notrap aligned v5
     v7 = iadd v2, v6
-    brz v2, ebb2(v2)
+    brz v2, ebb3(v2)
+    jump ebb2
+
+ebb2:
     v8 = isub v2, v4
     jump ebb1(v8, v3)
 
-ebb2(v9: i32):
+ebb3(v9: i32):
     return v9
 }
 
@@ -40,10 +43,13 @@ ebb2(v9: i32):
 ; nextln: ebb1(v2: i32, v3: i64):
 ; nextln:    v6 = load.i32 notrap aligned v5
 ; nextln:    v7 = iadd v2, v6
-; nextln:    brz v2, ebb2(v2)
-; nextln:    v8 = isub v2, v4
+; nextln:    brz v2, ebb3(v2)
+; nextln:    jump ebb2
+; nextln: 
+; nextln: ebb2:
+; nextln:    v8 = isub.i32 v2, v4
 ; nextln:    jump ebb1(v8, v3)
 ; nextln: 
-; nextln: ebb2(v9: i32):
+; nextln: ebb3(v9: i32):
 ; nextln:    return v9
 ; nextln: }
diff --git a/filetests/licm/reject_load_readonly.clif b/filetests/licm/reject_load_readonly.clif
index 5b6a41171..ea7b72469 100644
--- a/filetests/licm/reject_load_readonly.clif
+++ b/filetests/licm/reject_load_readonly.clif
@@ -19,11 +19,14 @@ ebb1(v2: i32, v3: i64):
     v5 = heap_addr.i64 heap0, v4, 1
     v6 = load.i32 aligned readonly v5
     v7 = iadd v2, v6
-    brz v2, ebb2(v2)
+    brz v2, ebb3(v2)
+    jump ebb2
+
+ebb2:
     v8 = isub v2, v4
     jump ebb1(v8, v3)
 
-ebb2(v9: i32):
+ebb3(v9: i32):
     return v9
 }
 
@@ -40,10 +43,13 @@ ebb2(v9: i32):
 ; nextln: ebb1(v2: i32, v3: i64):
 ; nextln:    v6 = load.i32 aligned readonly v5
 ; nextln:    v7 = iadd v2, v6
-; nextln:    brz v2, ebb2(v2)
-; nextln:    v8 = isub v2, v4
+; nextln:    brz v2, ebb3(v2)
+; nextln:    jump ebb2
+; nextln: 
+; nextln: ebb2:
+; nextln:    v8 = isub.i32 v2, v4
 ; nextln:    jump ebb1(v8, v3)
 ; nextln: 
-; nextln: ebb2(v9: i32):
+; nextln: ebb3(v9: i32):
 ; nextln:    return v9
 ; nextln: }
diff --git a/filetests/parser/call.clif b/filetests/parser/call.clif
index c75fa0239..28e3011f3 100644
--- a/filetests/parser/call.clif
+++ b/filetests/parser/call.clif
@@ -10,13 +10,13 @@ ebb1:
 ; nextln:     return
 ; nextln: }
 
-function %r1() -> i32, f32 baldrdash {
+function %r1() -> i32, f32 baldrdash_system_v {
 ebb1:
     v1 = iconst.i32 3
     v2 = f32const 0.0
     return v1, v2
 }
-; sameln: function %r1() -> i32, f32 baldrdash {
+; sameln: function %r1() -> i32, f32 baldrdash_system_v {
 ; nextln: ebb1:
 ; nextln:     v1 = iconst.i32 3
 ; nextln:     v2 = f32const 0.0
@@ -25,13 +25,13 @@ ebb1:
 
 function %signatures() {
     sig10 = ()
-    sig11 = (i32, f64) -> i32, b1 baldrdash
+    sig11 = (i32, f64) -> i32, b1 baldrdash_system_v
     fn5 = %foo sig11
     fn8 = %bar(i32) -> b1
 }
 ; sameln: function %signatures() fast {
 ; check:      sig10 = () fast
-; check:      sig11 = (i32, f64) -> i32, b1 baldrdash
+; check:      sig11 = (i32, f64) -> i32, b1 baldrdash_system_v
 ; check:      sig12 = (i32) -> b1 fast
 ; not:        fn0
 ; check:      fn5 = %foo sig11
diff --git a/filetests/parser/flags.clif b/filetests/parser/flags.clif
index d3c096f1c..aac8017e8 100644
--- a/filetests/parser/flags.clif
+++ b/filetests/parser/flags.clif
@@ -5,11 +5,20 @@ function %iflags(i32) {
 ebb200(v0: i32):
     v1 = ifcmp_imm v0, 17
     brif eq v1, ebb201
+    jump ebb400
+
+ebb400:
     brif ugt v1, ebb202
+    jump ebb401
+
+ebb401:
     v2 = iconst.i32 34
     v3 = ifcmp v0, v2
     v4 = trueif eq v3
     brnz v4, ebb202
+    jump ebb402
+
+ebb402:
     return
 
 ebb201:
@@ -21,7 +30,7 @@ ebb202:
 ; check: v1 = ifcmp_imm v0, 17
 ; check: brif eq v1, ebb201
 ; check: brif ugt v1, ebb202
-; check: v3 = ifcmp v0, v2
+; check: v3 = ifcmp.i32 v0, v2
 ; check: v4 = trueif eq v3
 
 function %fflags(f32) {
@@ -29,9 +38,18 @@ ebb200(v0: f32):
     v1 = f32const 0x34.0p0
     v2 = ffcmp v0, v1
     brff eq v2, ebb201
+    jump ebb400
+
+ebb400:
     brff ord v2, ebb202
+    jump ebb401
+
+ebb401:
     v3 = trueff gt v2
     brnz v3, ebb202
+    jump ebb402
+
+ebb402:
     return
 
 ebb201:
diff --git a/filetests/parser/instruction_encoding.clif b/filetests/parser/instruction_encoding.clif
index 9a405030a..5f7ae26af 100644
--- a/filetests/parser/instruction_encoding.clif
+++ b/filetests/parser/instruction_encoding.clif
@@ -8,7 +8,7 @@ function %foo(i32, i32) {
 ebb1(v0: i32 [%x8], v1: i32):
     [-,-] v2 = iadd v0, v1
     [-] trap heap_oob
-    [R#1234, %x5, %x11] v6, v7 = iadd_cout v2, v0
+    [R#1234, %x5, %x11] v6, v7 = iadd_ifcout v2, v0
     [Rshamt#beef, %x25] v8 = ishl_imm v6, 2
 @55 v9 = iadd v8, v7
 @a5 [Iret#5] return v0, v8
@@ -17,7 +17,7 @@ ebb1(v0: i32 [%x8], v1: i32):
 ; nextln: ebb1(v0: i32 [%x8], v1: i32):
 ; nextln:     [-,-]$WS v2 = iadd v0, v1
 ; nextln:     [-]$WS trap heap_oob
-; nextln:     [R#1234,%x5,%x11]$WS v6, v7 = iadd_cout v2, v0
+; nextln:     [R#1234,%x5,%x11]$WS v6, v7 = iadd_ifcout v2, v0
 ; nextln:     [Rshamt#beef,%x25]$WS v8 = ishl_imm v6, 2
 ; nextln:     @0055 [-,-]$WS v9 = iadd v8, v7
 ; nextln:     @00a5 [Iret#05]$WS return v0, v8
diff --git a/filetests/parser/ternary.clif b/filetests/parser/ternary.clif
index 75f09bb22..d17e5592c 100644
--- a/filetests/parser/ternary.clif
+++ b/filetests/parser/ternary.clif
@@ -3,22 +3,22 @@ test verifier
 
 function %add_i96(i32, i32, i32, i32, i32, i32) -> i32, i32, i32 {
 ebb1(v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32):
-    v10, v11 = iadd_cout v1, v4
-    ;check: v10, v11 = iadd_cout v1, v4
-    v20, v21 = iadd_carry v2, v5, v11
-    ; check: v20, v21 = iadd_carry v2, v5, v11
-    v30 = iadd_cin v3, v6, v21
-    ; check: v30 = iadd_cin v3, v6, v21
+    v10, v11 = iadd_ifcout v1, v4
+    ;check: v10, v11 = iadd_ifcout v1, v4
+    v20, v21 = iadd_ifcarry v2, v5, v11
+    ; check: v20, v21 = iadd_ifcarry v2, v5, v11
+    v30 = iadd_ifcin v3, v6, v21
+    ; check: v30 = iadd_ifcin v3, v6, v21
     return v10, v20, v30
 }
 
 function %sub_i96(i32, i32, i32, i32, i32, i32) -> i32, i32, i32 {
 ebb1(v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32):
-    v10, v11 = isub_bout v1, v4
-    ;check: v10, v11 = isub_bout v1, v4
-    v20, v21 = isub_borrow v2, v5, v11
-    ; check: v20, v21 = isub_borrow v2, v5, v11
-    v30 = isub_bin v3, v6, v21
-    ; check: v30 = isub_bin v3, v6, v21
+    v10, v11 = isub_ifbout v1, v4
+    ;check: v10, v11 = isub_ifbout v1, v4
+    v20, v21 = isub_ifborrow v2, v5, v11
+    ; check: v20, v21 = isub_ifborrow v2, v5, v11
+    v30 = isub_ifbin v3, v6, v21
+    ; check: v30 = isub_ifbin v3, v6, v21
     return v10, v20, v30
 }
diff --git a/filetests/postopt/basic.clif b/filetests/postopt/basic.clif
index 5b6540700..442d47de8 100644
--- a/filetests/postopt/basic.clif
+++ b/filetests/postopt/basic.clif
@@ -7,6 +7,9 @@ function %br_icmp(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
 [Op1icscc#39,%rdx]  v2 = icmp slt v0, v1
 [Op1t8jccd_long#85] brnz v2, ebb1
+[Op1jmpb#eb]        jump ebb2
+
+ebb2:
 [Op1ret#c3]         return v1
 
 ebb1:
@@ -18,6 +21,9 @@ ebb1:
 ; nextln:    v9 = ifcmp v0, v1
 ; nextln:    v2 = trueif slt v9
 ; nextln:    brif slt v9, ebb1
+; nextln:    jump ebb2
+; nextln: 
+; nextln: ebb2:
 ; nextln:    return v1
 ; nextln: 
 ; nextln: ebb1:
@@ -31,6 +37,9 @@ function %br_icmp_inverse(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
 [Op1icscc#39,%rdx]  v2 = icmp slt v0, v1
 [Op1t8jccd_long#84] brz v2, ebb1
+[Op1jmpb#eb]        jump ebb2
+
+ebb2:
 [Op1ret#c3]         return v1
 
 ebb1:
@@ -42,6 +51,9 @@ ebb1:
 ; nextln:    v9 = ifcmp v0, v1
 ; nextln:    v2 = trueif slt v9
 ; nextln:    brif sge v9, ebb1
+; nextln:    jump ebb2
+; nextln: 
+; nextln: ebb2:
 ; nextln:    return v1
 ; nextln: 
 ; nextln: ebb1:
@@ -55,6 +67,9 @@ function %br_icmp_imm(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
 [Op1icscc_ib#7083]  v2 = icmp_imm slt v0, 2
 [Op1t8jccd_long#84] brz v2, ebb1
+[Op1jmpb#eb]        jump ebb2
+
+ebb2:
 [Op1ret#c3]         return v1
 
 ebb1:
@@ -66,6 +81,9 @@ ebb1:
 ; nextln:    v9 = ifcmp_imm v0, 2
 ; nextln:    v2 = trueif slt v9
 ; nextln:    brif sge v9, ebb1
+; nextln:    jump ebb2
+; nextln: 
+; nextln: ebb2:
 ; nextln:    return v1
 ; nextln: 
 ; nextln: ebb1:
@@ -79,6 +97,9 @@ function %br_fcmp(f32, f32) -> f32 {
 ebb0(v0: f32, v1: f32):
 [Op2fcscc#42e,%rdx] v2 = fcmp gt v0, v1
 [Op1t8jccd_long#84] brz v2, ebb1
+[Op1jmpb#eb]        jump ebb2
+
+ebb2:
 [Op1ret#c3]         return v1
 
 ebb1:
@@ -91,6 +112,9 @@ ebb1:
 ; nextln:    v19 = ffcmp v0, v1
 ; nextln:    v2 = trueff gt v19
 ; nextln:    brff ule v19, ebb1
+; nextln:    jump ebb2
+; nextln: 
+; nextln: ebb2:
 ; nextln:    return v1
 ; nextln: 
 ; nextln: ebb1:
diff --git a/filetests/regalloc/aliases.clif b/filetests/regalloc/aliases.clif
index 2a9419241..7e6d5c602 100644
--- a/filetests/regalloc/aliases.clif
+++ b/filetests/regalloc/aliases.clif
@@ -1,7 +1,7 @@
 test regalloc
 target x86_64 haswell
 
-function %value_aliases(i32, f32, i64 vmctx) baldrdash {
+function %value_aliases(i32, f32, i64 vmctx) baldrdash_system_v {
     gv0 = vmctx
     heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000
 
diff --git a/filetests/regalloc/coalesce-bb.clif b/filetests/regalloc/coalesce-bb.clif
new file mode 100644
index 000000000..384ef2ca4
--- /dev/null
+++ b/filetests/regalloc/coalesce-bb.clif
@@ -0,0 +1,158 @@
+test regalloc
+target riscv32
+feature "basic-blocks"
+
+; Test the coalescer.
+; regex: V=v\d+
+; regex: WS=\s+
+; regex: LOC=%\w+
+; regex: EBB=ebb\d+
+
+; This function is already CSSA, so no copies should be inserted.
+function %cssa(i32) -> i32 {
+ebb0(v0: i32):
+    ; not: copy
+    ; v0 is used by the branch and passed as an arg - that's no conflict.
+    brnz v0, ebb1(v0)
+    jump ebb2
+
+ebb2:
+    ; v0 is live across the branch above. That's no conflict.
+    v1 = iadd_imm v0, 7
+    jump ebb1(v1)
+
+ebb1(v10: i32):
+    v11 = iadd_imm v10, 7
+    return v11
+}
+
+function %trivial(i32) -> i32 {
+ebb0(v0: i32):
+    ; check:    brnz v0, $(splitEdge=$EBB)
+    brnz v0, ebb1(v0)
+    jump ebb2
+
+ebb2:
+    ; not: copy
+    v1 = iadd_imm v0, 7
+    jump ebb1(v1)
+
+    ; check:  $splitEdge:
+    ; nextln:   $(cp1=$V) = copy.i32 v0
+    ; nextln:   jump ebb1($cp1)
+
+ebb1(v10: i32):
+    ; Use v0 in the destination EBB causes a conflict.
+    v11 = iadd v10, v0
+    return v11
+}
+
+; A value is used as an SSA argument twice in the same branch.
+function %dualuse(i32) -> i32 {
+ebb0(v0: i32):
+    ; check:  brnz v0, $(splitEdge=$EBB)
+    brnz v0, ebb1(v0, v0)
+    jump ebb2
+
+ebb2:
+    v1 = iadd_imm v0, 7
+    v2 = iadd_imm v1, 56
+    jump ebb1(v1, v2)
+
+    ; check:  $splitEdge:
+    ; check:    $(cp1=$V) = copy.i32 v0
+    ; nextln:   jump ebb1($cp1, v0)
+
+ebb1(v10: i32, v11: i32):
+    v12 = iadd v10, v11
+    return v12
+}
+
+; Interference away from the branch
+; The interference can be broken with a copy at either branch.
+function %interference(i32) -> i32 {
+ebb0(v0: i32):
+    ; not:    copy
+    ; check:  brnz v0, $(splitEdge=$EBB)
+    ; not:    copy
+    brnz v0, ebb1(v0)
+    jump ebb2
+
+ebb2:
+    v1 = iadd_imm v0, 7
+    ; v1 and v0 interfere here:
+    v2 = iadd_imm v0, 8
+    ; check: $(cp0=$V) = copy v1
+    ; check: jump ebb1($cp0)
+    jump ebb1(v1)
+
+    ; check:  $splitEdge:
+    ; not:      copy
+    ; nextln:   jump ebb1(v0)
+
+ebb1(v10: i32):
+    ; not: copy
+    v11 = iadd_imm v10, 7
+    return v11
+}
+
+; A loop where one induction variable is used as a backedge argument.
+function %fibonacci(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 1
+    v2 = iconst.i32 2
+    jump ebb1(v1, v2)
+
+ebb1(v10: i32, v11: i32):
+    ; v11 needs to be isolated because it interferes with v10.
+    ; check: ebb1(v10: i32 [$LOC], $(nv11a=$V): i32 [$LOC])
+    ; check: v11 = copy $nv11a
+    v12 = iadd v10, v11
+    v13 = icmp ult v12, v0
+    ; check:  brnz v13, $(splitEdge=$EBB)
+    brnz v13, ebb1(v11, v12)
+    jump ebb2
+
+    ; check:  $splitEdge:
+    ; check:    $(nv11b=$V) = copy.i32 v11
+    ; not:      copy
+    ; check:   jump ebb1($nv11b, v12)
+
+ebb2:
+    return v12
+}
+
+; Function arguments passed on the stack aren't allowed to be part of a virtual
+; register, at least for now. This is because the other values in the virtual
+; register would need to be spilled to the incoming_arg stack slot which we treat
+; as belonging to the caller.
+function %stackarg(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i32 {
+; check: ss0 = incoming_arg 4
+; not: incoming_arg
+ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32):
+    ; check: fill v8
+    ; not: v8
+    jump ebb1(v8)
+
+ebb1(v10: i32):
+    v11 = iadd_imm v10, 1
+    return v11
+}
+
+function %gvn_unremovable_phi(i32) system_v {
+ebb0(v0: i32):
+    v2 = iconst.i32 0
+    jump ebb2(v2, v0)
+
+ebb2(v3: i32, v4: i32):
+    brnz v3, ebb2(v3, v4)
+    jump ebb3
+
+ebb3:
+    v5 = iconst.i32 1
+    brnz v3, ebb2(v2, v5)
+    jump ebb4
+
+ebb4:
+    return
+}
diff --git a/filetests/regalloc/coalesce.clif b/filetests/regalloc/coalesce.clif
index 0085ce0fd..c78219ea5 100644
--- a/filetests/regalloc/coalesce.clif
+++ b/filetests/regalloc/coalesce.clif
@@ -1,5 +1,6 @@
 test regalloc
 target riscv32
+feature !"basic-blocks"
 
 ; Test the coalescer.
 ; regex: V=v\d+
@@ -12,6 +13,9 @@ ebb0(v0: i32):
     ; not: copy
     ; v0 is used by the branch and passed as an arg - that's no conflict.
     brnz v0, ebb1(v0)
+    jump ebb2
+
+ebb2:
     ; v0 is live across the branch above. That's no conflict.
     v1 = iadd_imm v0, 7
     jump ebb1(v1)
@@ -26,6 +30,9 @@ ebb0(v0: i32):
     ; check: $(cp1=$V) = copy v0
     ; nextln: brnz v0, ebb1($cp1)
     brnz v0, ebb1(v0)
+    jump ebb2
+
+ebb2:
     ; not: copy
     v1 = iadd_imm v0, 7
     jump ebb1(v1)
@@ -42,6 +49,9 @@ ebb0(v0: i32):
     ; check: $(cp1=$V) = copy v0
     ; nextln: brnz v0, ebb1($cp1, v0)
     brnz v0, ebb1(v0, v0)
+    jump ebb2
+
+ebb2:
     v1 = iadd_imm v0, 7
     v2 = iadd_imm v1, 56
     jump ebb1(v1, v2)
@@ -59,6 +69,9 @@ ebb0(v0: i32):
     ; not: copy
     ; check: brnz v0, ebb1($cp0)
     brnz v0, ebb1(v0)
+    jump ebb2
+
+ebb2:
     v1 = iadd_imm v0, 7
     ; v1 and v0 interfere here:
     v2 = iadd_imm v0, 8
@@ -89,6 +102,9 @@ ebb1(v10: i32, v11: i32):
     ; not: copy
     ; check: brnz v13, ebb1($nv11b, v12)
     brnz v13, ebb1(v11, v12)
+    jump ebb2
+
+ebb2:
     return v12
 }
 
@@ -116,7 +132,13 @@ ebb0(v0: i32):
 
 ebb2(v3: i32, v4: i32):
     brnz v3, ebb2(v3, v4)
+    jump ebb3
+
+ebb3:
     v5 = iconst.i32 1
     brnz v3, ebb2(v2, v5)
+    jump ebb4
+
+ebb4:
     return
 }
diff --git a/filetests/regalloc/coalescing-207.clif b/filetests/regalloc/coalescing-207.clif
index 30b33ee44..17d33d33b 100644
--- a/filetests/regalloc/coalescing-207.clif
+++ b/filetests/regalloc/coalescing-207.clif
@@ -22,6 +22,9 @@ ebb0(v0: i64, v1: i32, v2: i32):
     v6 = iconst.i32 0x4ffe
     v7 = icmp uge v5, v6
     brz v7, ebb1
+    jump ebb100
+
+ebb100:
     trap heap_oob
 
 ebb1:
@@ -35,6 +38,9 @@ ebb1:
     v15 = iconst.i32 0x4ffe
     v16 = icmp.i32 uge v4, v15
     brz v16, ebb2
+    jump ebb101
+
+ebb101:
     trap heap_oob
 
 ebb2:
@@ -46,6 +52,9 @@ ebb2:
     v21 = iconst.i32 0x4ffe
     v22 = icmp.i32 uge v2, v21
     brz v22, ebb3
+    jump ebb102
+
+ebb102:
     trap heap_oob
 
 ebb3:
@@ -60,15 +69,24 @@ ebb3:
     v31 = icmp eq v29, v30
     v32 = bint.i32 v31
     brnz v32, ebb90(v14, v1)
+    jump ebb103
+
+ebb103:
     v33 = call fn0(v0, v1, v27)
     v34 = iconst.i32 0
     v35 = iconst.i32 0
     v36 = icmp eq v33, v35
     v37 = bint.i32 v36
     brnz v37, ebb90(v14, v34)
+    jump ebb104
+
+ebb104:
     v38 = iconst.i32 0x4ffe
     v39 = icmp.i32 uge v2, v38
     brz v39, ebb4
+    jump ebb105
+
+ebb105:
     trap heap_oob
 
 ebb4:
@@ -81,9 +99,15 @@ ebb4:
     v46 = icmp eq v44, v45
     v47 = bint.i32 v46
     brnz v47, ebb56(v33, v14)
+    jump ebb106
+
+ebb106:
     v48 = iconst.i32 0x4ffe
     v49 = icmp.i32 uge v33, v48
     brz v49, ebb5
+    jump ebb107
+
+ebb107:
     trap heap_oob
 
 ebb5:
@@ -96,9 +120,15 @@ ebb5:
     v56 = icmp eq v54, v55
     v57 = bint.i32 v56
     brnz v57, ebb90(v14, v34)
+    jump ebb108
+
+ebb108:
     v58 = iconst.i32 0x4ffe
     v59 = icmp.i32 uge v2, v58
     brz v59, ebb6
+    jump ebb109
+
+ebb109:
     trap heap_oob
 
 ebb6:
@@ -111,9 +141,15 @@ ebb6:
     v66 = icmp eq v64, v65
     v67 = bint.i32 v66
     brnz v67, ebb42
+    jump ebb110
+
+ebb110:
     v68 = iconst.i32 0x4ffe
     v69 = icmp.i32 uge v33, v68
     brz v69, ebb7
+    jump ebb111
+
+ebb111:
     trap heap_oob
 
 ebb7:
@@ -126,9 +162,15 @@ ebb7:
     v76 = icmp eq v74, v75
     v77 = bint.i32 v76
     brnz v77, ebb90(v14, v34)
+    jump ebb112
+
+ebb112:
     v78 = iconst.i32 0x4ffe
     v79 = icmp.i32 uge v2, v78
     brz v79, ebb8
+    jump ebb113
+
+ebb113:
     trap heap_oob
 
 ebb8:
@@ -141,9 +183,15 @@ ebb8:
     v86 = icmp eq v84, v85
     v87 = bint.i32 v86
     brnz v87, ebb46
+    jump ebb114
+
+ebb114:
     v88 = iconst.i32 0x4ffe
     v89 = icmp.i32 uge v33, v88
     brz v89, ebb9
+    jump ebb115
+
+ebb115:
     trap heap_oob
 
 ebb9:
@@ -156,9 +204,15 @@ ebb9:
     v96 = icmp eq v94, v95
     v97 = bint.i32 v96
     brnz v97, ebb90(v14, v34)
+    jump ebb116
+
+ebb116:
     v98 = iconst.i32 0x4ffe
     v99 = icmp.i32 uge v2, v98
     brz v99, ebb10
+    jump ebb117
+
+ebb117:
     trap heap_oob
 
 ebb10:
@@ -171,6 +225,9 @@ ebb10:
     v106 = icmp eq v104, v105
     v107 = bint.i32 v106
     brnz v107, ebb54
+    jump ebb118
+
+ebb118:
     v108 = iconst.i32 1
     v109 = iadd.i32 v2, v108
     v110 = iconst.i32 1048
@@ -179,6 +236,9 @@ ebb10:
     v113 = iconst.i32 0x4ffe
     v114 = icmp uge v111, v113
     brz v114, ebb11
+    jump ebb119
+
+ebb119:
     trap heap_oob
 
 ebb11:
@@ -193,6 +253,9 @@ ebb11:
     v122 = iconst.i32 0x4ffe
     v123 = icmp uge v120, v122
     brz v123, ebb12
+    jump ebb120
+
+ebb120:
     trap heap_oob
 
 ebb12:
@@ -205,6 +268,9 @@ ebb12:
     v129 = iconst.i32 0x4ffe
     v130 = icmp.i32 uge v14, v129
     brz v130, ebb13
+    jump ebb121
+
+ebb121:
     trap heap_oob
 
 ebb13:
@@ -217,6 +283,9 @@ ebb13:
     v136 = iconst.i32 0x4ffe
     v137 = icmp.i32 uge v14, v136
     brz v137, ebb14
+    jump ebb122
+
+ebb122:
     trap heap_oob
 
 ebb14:
@@ -235,6 +304,9 @@ ebb15(v143: i32, v144: i32):
     v148 = iconst.i32 0x4ffe
     v149 = icmp uge v147, v148
     brz v149, ebb16
+    jump ebb123
+
+ebb123:
     trap heap_oob
 
 ebb16:
@@ -247,6 +319,9 @@ ebb16:
     v156 = icmp eq v154, v155
     v157 = bint.i32 v156
     brnz v157, ebb89(v14)
+    jump ebb124
+
+ebb124:
     v158 = iconst.i32 255
     v159 = band.i32 v144, v158
     v160 = iconst.i32 2
@@ -257,6 +332,9 @@ ebb16:
     v165 = iconst.i32 0x4ffe
     v166 = icmp uge v162, v165
     brz v166, ebb17
+    jump ebb125
+
+ebb125:
     trap heap_oob
 
 ebb17:
@@ -275,6 +353,9 @@ ebb17:
     v178 = iconst.i32 0x4ffe
     v179 = icmp uge v177, v178
     brz v179, ebb18
+    jump ebb126
+
+ebb126:
     trap heap_oob
 
 ebb18:
@@ -291,6 +372,9 @@ ebb18:
     v190 = iconst.i32 0x4ffe
     v191 = icmp.i32 uge v177, v190
     brz v191, ebb19
+    jump ebb127
+
+ebb127:
     trap heap_oob
 
 ebb19:
@@ -307,6 +391,9 @@ ebb19:
     v201 = iconst.i32 0x4ffe
     v202 = icmp uge v200, v201
     brz v202, ebb20
+    jump ebb128
+
+ebb128:
     trap heap_oob
 
 ebb20:
@@ -329,6 +416,9 @@ ebb21:
     v215 = icmp ult v213, v214
     v216 = bint.i32 v215
     brnz v216, ebb38(v2, v211, v209, v210, v208, v198, v213, v33, v14)
+    jump ebb129
+
+ebb129:
     v217 = iconst.i32 -1
     v218 = iconst.i32 0
     v219 = iconst.i32 1
@@ -344,6 +434,9 @@ ebb22(v223: i32, v224: i32, v225: i32, v226: i32, v227: i32, v228: i32, v229: i3
     v237 = iconst.i32 0x4ffe
     v238 = icmp uge v236, v237
     brz v238, ebb23
+    jump ebb130
+
+ebb130:
     trap heap_oob
 
 ebb23:
@@ -357,9 +450,15 @@ ebb23:
     v246 = icmp ne v243, v245
     v247 = bint.i32 v246
     brnz v247, ebb24
+    jump ebb131
+
+ebb131:
     v248 = icmp.i32 ne v224, v226
     v249 = bint.i32 v248
     brnz v249, ebb25
+    jump ebb132
+
+ebb132:
     v250 = iadd.i32 v227, v226
     v251 = iconst.i32 1
     jump ebb27(v251, v250, v223, v226)
@@ -368,6 +467,9 @@ ebb24:
     v252 = icmp.i32 ule v243, v245
     v253 = bint.i32 v252
     brnz v253, ebb26
+    jump ebb133
+
+ebb133:
     v254 = isub.i32 v234, v223
     v255 = iconst.i32 1
     jump ebb27(v255, v234, v223, v254)
@@ -391,10 +493,16 @@ ebb27(v264: i32, v265: i32, v266: i32, v267: i32):
     v269 = icmp uge v268, v229
     v270 = bint.i32 v269
     brnz v270, ebb29
+    jump ebb134
+
+ebb134:
     v271 = iadd.i32 v2, v268
     v272 = iconst.i32 0x4ffe
     v273 = icmp uge v271, v272
     brz v273, ebb28
+    jump ebb135
+
+ebb135:
     trap heap_oob
 
 ebb28:
@@ -424,6 +532,9 @@ ebb31(v285: i32, v286: i32, v287: i32, v288: i32, v289: i32, v290: i32, v291: i3
     v300 = iconst.i32 0x4ffe
     v301 = icmp uge v299, v300
     brz v301, ebb32
+    jump ebb136
+
+ebb136:
     trap heap_oob
 
 ebb32:
@@ -437,9 +548,15 @@ ebb32:
     v309 = icmp ne v306, v308
     v310 = bint.i32 v309
     brnz v310, ebb33
+    jump ebb137
+
+ebb137:
     v311 = icmp.i32 ne v286, v288
     v312 = bint.i32 v311
     brnz v312, ebb34
+    jump ebb138
+
+ebb138:
     v313 = iadd.i32 v289, v288
     v314 = iconst.i32 1
     jump ebb36(v314, v313, v285, v288)
@@ -448,6 +565,9 @@ ebb33:
     v315 = icmp.i32 uge v306, v308
     v316 = bint.i32 v315
     brnz v316, ebb35
+    jump ebb139
+
+ebb139:
     v317 = isub.i32 v297, v285
     v318 = iconst.i32 1
     jump ebb36(v318, v297, v285, v317)
@@ -471,10 +591,16 @@ ebb36(v327: i32, v328: i32, v329: i32, v330: i32):
     v332 = icmp uge v331, v291
     v333 = bint.i32 v332
     brnz v333, ebb38(v2, v330, v292, v329, v293, v294, v291, v295, v296)
+    jump ebb140
+
+ebb140:
     v334 = iadd.i32 v2, v331
     v335 = iconst.i32 0x4ffe
     v336 = icmp uge v334, v335
     brz v336, ebb37
+    jump ebb141
+
+ebb141:
     trap heap_oob
 
 ebb37:
@@ -494,12 +620,18 @@ ebb38(v343: i32, v344: i32, v345: i32, v346: i32, v347: i32, v348: i32, v349: i3
     v356 = icmp ugt v353, v355
     v357 = bint.i32 v356
     brnz v357, ebb39(v344)
+    jump ebb142
+
+ebb142:
     v358 = copy v345
     jump ebb39(v358)
 
 ebb39(v359: i32):
     v360 = iadd.i32 v343, v359
     brnz.i32 v357, ebb40(v346)
+    jump ebb143
+
+ebb143:
     v361 = copy.i32 v347
     jump ebb40(v361)
 
@@ -511,6 +643,9 @@ ebb40(v362: i32):
     v367 = icmp eq v365, v366
     v368 = bint.i32 v367
     brnz v368, ebb63
+    jump ebb144
+
+ebb144:
     v369 = iconst.i32 1
     v370 = iadd v362, v369
     v371 = isub.i32 v348, v370
@@ -520,6 +655,9 @@ ebb40(v362: i32):
     v375 = bint.i32 v374
     v376 = copy v362
     brnz v375, ebb41(v376)
+    jump ebb145
+
+ebb145:
     v377 = copy v373
     jump ebb41(v377)
 
@@ -536,6 +674,9 @@ ebb42:
     v385 = iconst.i32 0x4ffe
     v386 = icmp.i32 uge v33, v385
     brz v386, ebb43
+    jump ebb146
+
+ebb146:
     trap heap_oob
 
 ebb43:
@@ -557,6 +698,9 @@ ebb44(v392: i32, v393: i32, v394: i32):
     v402 = icmp eq v401, v384
     v403 = bint.i32 v402
     brnz v403, ebb56(v394, v14)
+    jump ebb147
+
+ebb147:
     v404 = iconst.i32 2
     v405 = iadd v394, v404
     v406 = iconst.i32 1
@@ -564,6 +708,9 @@ ebb44(v392: i32, v393: i32, v394: i32):
     v408 = iconst.i32 0x4ffe
     v409 = icmp uge v405, v408
     brz v409, ebb45
+    jump ebb148
+
+ebb148:
     trap heap_oob
 
 ebb45:
@@ -584,6 +731,9 @@ ebb46:
     v420 = iconst.i32 0x4ffe
     v421 = icmp.i32 uge v33, v420
     brz v421, ebb47
+    jump ebb149
+
+ebb149:
     trap heap_oob
 
 ebb47:
@@ -616,6 +766,9 @@ ebb48(v440: i32, v441: i32):
     v446 = iconst.i32 0x4ffe
     v447 = icmp uge v445, v446
     brz v447, ebb49
+    jump ebb150
+
+ebb150:
     trap heap_oob
 
 ebb49:
@@ -628,6 +781,9 @@ ebb49:
     v454 = icmp eq v452, v453
     v455 = bint.i32 v454
     brnz v455, ebb51(v14)
+    jump ebb151
+
+ebb151:
     v456 = bor.i32 v441, v452
     v457 = iconst.i32 8
     v458 = ishl v456, v457
@@ -647,6 +803,9 @@ ebb51(v462: i32):
     v466 = iconst.i32 0x4ffe
     v467 = icmp uge v463, v466
     brz v467, ebb52
+    jump ebb152
+
+ebb152:
     trap heap_oob
 
 ebb52:
@@ -657,6 +816,9 @@ ebb52:
     store.i32 v465, v471+4
     v472 = iconst.i32 0
     brnz.i32 v452, ebb53(v443)
+    jump ebb153
+
+ebb153:
     v473 = copy v472
     jump ebb53(v473)
 
@@ -673,6 +835,9 @@ ebb54:
     v481 = iconst.i32 0x4ffe
     v482 = icmp.i32 uge v33, v481
     brz v482, ebb55
+    jump ebb154
+
+ebb154:
     trap heap_oob
 
 ebb55:
@@ -713,6 +878,9 @@ ebb58(v505: i32, v506: i32):
     v511 = iconst.i32 0x4ffe
     v512 = icmp uge v508, v511
     brz v512, ebb59
+    jump ebb155
+
+ebb155:
     trap heap_oob
 
 ebb59:
@@ -725,6 +893,9 @@ ebb59:
     v519 = icmp eq v517, v518
     v520 = bint.i32 v519
     brnz v520, ebb61(v14)
+    jump ebb156
+
+ebb156:
     v521 = iconst.i32 8
     v522 = ishl.i32 v506, v521
     v523 = bor v522, v517
@@ -739,6 +910,9 @@ ebb60:
 ebb61(v526: i32):
     v527 = iconst.i32 0
     brnz.i32 v517, ebb62(v510)
+    jump ebb157
+
+ebb157:
     v528 = copy v527
     jump ebb62(v528)
 
@@ -772,9 +946,15 @@ ebb65(v547: i32, v548: i32, v549: i32, v550: i32, v551: i32, v552: i32, v553: i3
     v564 = icmp uge v563, v549
     v565 = bint.i32 v564
     brnz v565, ebb67(v547)
+    jump ebb158
+
+ebb158:
     v566 = iconst.i32 0
     v567 = call fn2(v0, v547, v566, v550)
     brnz v567, ebb66
+    jump ebb159
+
+ebb159:
     v568 = iadd v547, v550
     jump ebb67(v568)
 
@@ -783,6 +963,9 @@ ebb66:
     v570 = icmp ult v569, v549
     v571 = bint.i32 v570
     brnz v571, ebb89(v552)
+    jump ebb160
+
+ebb160:
     v572 = copy.i32 v567
     jump ebb67(v572)
 
@@ -792,6 +975,9 @@ ebb67(v573: i32):
     v576 = iconst.i32 0x4ffe
     v577 = icmp uge v575, v576
     brz v577, ebb68
+    jump ebb161
+
+ebb161:
     trap heap_oob
 
 ebb68:
@@ -813,6 +999,9 @@ ebb68:
     v593 = iconst.i32 0x4ffe
     v594 = icmp uge v592, v593
     brz v594, ebb69
+    jump ebb162
+
+ebb162:
     trap heap_oob
 
 ebb69:
@@ -826,12 +1015,18 @@ ebb69:
     v602 = icmp eq v600, v601
     v603 = bint.i32 v602
     brnz v603, ebb74
+    jump ebb163
+
+ebb163:
     v604 = iconst.i32 2
     v605 = ishl.i32 v582, v604
     v606 = iadd.i32 v552, v605
     v607 = iconst.i32 0x4ffe
     v608 = icmp uge v606, v607
     brz v608, ebb70
+    jump ebb164
+
+ebb164:
     trap heap_oob
 
 ebb70:
@@ -845,12 +1040,18 @@ ebb70:
     v616 = icmp eq v614, v615
     v617 = bint.i32 v616
     brnz v617, ebb75
+    jump ebb165
+
+ebb165:
     v618 = iconst.i32 1
     v619 = iadd v614, v618
     v620 = icmp ult v619, v554
     v621 = bint.i32 v620
     v622 = copy.i32 v553
     brnz v621, ebb71(v622)
+    jump ebb166
+
+ebb166:
     v623 = copy v619
     jump ebb71(v623)
 
@@ -879,6 +1080,9 @@ ebb75:
     v634 = bint.i32 v633
     v635 = copy.i32 v558
     brnz v634, ebb76(v635)
+    jump ebb167
+
+ebb167:
     v636 = copy.i32 v555
     jump ebb76(v636)
 
@@ -887,6 +1091,9 @@ ebb76(v637: i32):
     v639 = iconst.i32 0x4ffe
     v640 = icmp uge v638, v639
     brz v640, ebb77
+    jump ebb168
+
+ebb168:
     trap heap_oob
 
 ebb77:
@@ -899,6 +1106,9 @@ ebb77:
     v647 = icmp eq v645, v646
     v648 = bint.i32 v647
     brnz v648, ebb82(v548, v549, v551, v552)
+    jump ebb169
+
+ebb169:
     v649 = iadd.i32 v548, v637
     v650 = iadd.i32 v559, v637
     v651 = iadd.i32 v560, v637
@@ -910,6 +1120,9 @@ ebb78(v652: i32, v653: i32, v654: i32, v655: i32):
     v658 = iconst.i32 0x4ffe
     v659 = icmp uge v653, v658
     brz v659, ebb79
+    jump ebb170
+
+ebb170:
     trap heap_oob
 
 ebb79:
@@ -923,6 +1136,9 @@ ebb79:
     v667 = copy.i32 v554
     v668 = copy.i32 v562
     brnz v666, ebb87(v548, v654, v573, v549, v550, v551, v552, v553, v667, v668, v557, v558, v559, v560, v561)
+    jump ebb171
+
+ebb171:
     v669 = iconst.i32 1
     v670 = iadd.i32 v653, v669
     v671 = iconst.i32 1
@@ -930,6 +1146,9 @@ ebb79:
     v673 = iconst.i32 0x4ffe
     v674 = icmp.i32 uge v655, v673
     brz v674, ebb80
+    jump ebb172
+
+ebb172:
     trap heap_oob
 
 ebb80:
@@ -950,6 +1169,9 @@ ebb82(v682: i32, v683: i32, v684: i32, v685: i32):
     v686 = icmp.i32 ule v558, v555
     v687 = bint.i32 v686
     brnz v687, ebb90(v685, v682)
+    jump ebb173
+
+ebb173:
     v688 = copy.i32 v561
     jump ebb83(v688)
 
@@ -958,6 +1180,9 @@ ebb83(v689: i32):
     v691 = iconst.i32 0x4ffe
     v692 = icmp uge v690, v691
     brz v692, ebb84
+    jump ebb174
+
+ebb174:
     trap heap_oob
 
 ebb84:
@@ -970,6 +1195,9 @@ ebb84:
     v699 = iconst.i32 0x4ffe
     v700 = icmp uge v698, v699
     brz v700, ebb85
+    jump ebb175
+
+ebb175:
     trap heap_oob
 
 ebb85:
@@ -981,6 +1209,9 @@ ebb85:
     v706 = icmp.i32 ne v697, v705
     v707 = bint.i32 v706
     brnz v707, ebb86
+    jump ebb176
+
+ebb176:
     v708 = icmp.i32 ule v689, v555
     v709 = bint.i32 v708
     v710 = iconst.i32 -1
@@ -1019,6 +1250,9 @@ ebb90(v756: i32, v757: i32):
     v761 = iconst.i32 0x4ffe
     v762 = icmp uge v758, v761
     brz v762, ebb91
+    jump ebb177
+
+ebb177:
     trap heap_oob
 
 ebb91:
@@ -1073,6 +1307,9 @@ ebb0(v0: f64, v1: i64):
     v24 = icmp ult v22, v23
     v25 = bint.i32 v24
     brnz v25, ebb10
+    jump ebb178
+
+ebb178:
     v26 = iconst.i64 0x7fff_ffff_ffff_ffff
     v27 = band v14, v26
     v28 = iconst.i64 0x7ff0_0000_0000_0000
@@ -1086,10 +1323,16 @@ ebb10:
     v32 = icmp.i32 ult v22, v31
     v33 = bint.i32 v32
     brnz v33, ebb8
+    jump ebb179
+
+ebb179:
     v34 = iconst.i32 0x3ff0_a2b2
     v35 = icmp.i32 uge v22, v34
     v36 = bint.i32 v35
     brnz v36, ebb6
+    jump ebb180
+
+ebb180:
     v37 = iconst.i32 1
     v38 = bxor.i32 v17, v37
     v39 = isub v38, v17
@@ -1106,6 +1349,9 @@ ebb9:
     v44 = bint.i32 v43
     v45 = bor v42, v44
     brnz v45, ebb7
+    jump ebb181
+
+ebb181:
     v141 = iconst.i64 0x7fe0_0000_0000_0000
     v46 = bitcast.f64 v141
     v47 = fmul.f64 v0, v46
@@ -1116,6 +1362,9 @@ ebb8:
     v49 = icmp.i32 ule v22, v48
     v50 = bint.i32 v49
     brnz v50, ebb3
+    jump ebb182
+
+ebb182:
     v51 = iconst.i32 0
     v142 = iconst.i64 0
     v52 = bitcast.f64 v142
@@ -1129,6 +1378,9 @@ ebb7:
     v55 = bint.i32 v54
     v56 = bor v55, v44
     brnz v56, ebb6
+    jump ebb183
+
+ebb183:
     v144 = iconst.i64 0xb6a0_0000_0000_0000
     v57 = bitcast.f64 v144
     v58 = fdiv v57, v0
@@ -1165,8 +1417,14 @@ ebb6:
     v158 = iconst.i32 0x8000_0000
     v154 = icmp ne v76, v158
     brnz v154, ebb11
+    jump ebb184
+
+ebb184:
     v155 = fcmp uno v75, v75
     brz v155, ebb12
+    jump ebb185
+
+ebb185:
     trap bad_toint
 
 ebb12:
@@ -1174,6 +1432,9 @@ ebb12:
     v156 = bitcast.f64 v159
     v157 = fcmp ge v156, v75
     brz v157, ebb13
+    jump ebb186
+
+ebb186:
     trap int_ovf
 
 ebb13:
@@ -1230,6 +1491,9 @@ ebb4(v86: f64, v87: f64, v108: f64, v113: i32):
     v114 = icmp eq v113, v169
     v115 = bint.i32 v114
     brnz v115, ebb2(v12, v112)
+    jump ebb187
+
+ebb187:
     v116 = call fn0(v112, v113, v1)
     jump ebb2(v12, v116)
 
diff --git a/filetests/regalloc/coalescing-216.clif b/filetests/regalloc/coalescing-216.clif
index b4d6e6393..a6a73cd2e 100644
--- a/filetests/regalloc/coalescing-216.clif
+++ b/filetests/regalloc/coalescing-216.clif
@@ -14,15 +14,27 @@ ebb0(v0: i32, v1: i64):
 ebb4(v11: i64, v29: i64):
     v6 = iconst.i32 0
     brz v6, ebb14
+    jump ebb15
+
+ebb15:
     v9 = iconst.i32 -17
     v12 = iconst.i32 0xffff_ffff_ffff_8000
     jump ebb9(v12)
 
 ebb9(v10: i32):
     brnz v10, ebb8(v9, v11, v11)
+    jump ebb16
+
+ebb16:
     brz.i32 v9, ebb13
+    jump ebb17
+
+ebb17:
     v13 = iconst.i32 0
     brnz v13, ebb6(v11, v11)
+    jump ebb18
+
+ebb18:
     v14 = iconst.i32 0
     brz v14, ebb12
     jump ebb11
@@ -40,6 +52,9 @@ ebb13:
 ebb10(v21: i64):
     v16 = iconst.i32 0
     brnz v16, ebb6(v21, v11)
+    jump ebb19
+
+ebb19:
     v17 = iconst.i32 0xffff_ffff_ffff_9f35
     jump ebb8(v17, v21, v11)
 
diff --git a/filetests/regalloc/coloring-227.clif b/filetests/regalloc/coloring-227.clif
index 7f14ca4e2..a469230b5 100644
--- a/filetests/regalloc/coloring-227.clif
+++ b/filetests/regalloc/coloring-227.clif
@@ -17,6 +17,9 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
                           ebb6:
 [RexOp1pu_id#b8]              v8 = iconst.i32 0
 [RexOp1tjccb#75]              brnz v8, ebb5
+[Op1jmpb#eb]                  jump ebb20
+
+                          ebb20:
 [RexOp1pu_id#b8]              v9 = iconst.i32 0
 [RexOp1pu_id#b8]              v11 = iconst.i32 0
 [RexOp1icscc#39]              v12 = icmp.i32 eq v15, v11
@@ -27,8 +30,14 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
 
                           ebb7:
 [RexOp1tjccb#74]              brz.i32 v17, ebb8
+[Op1jmpb#eb]                  jump ebb17
+
+                          ebb17:
 [RexOp1pu_id#b8]              v18 = iconst.i32 0
 [RexOp1tjccb#74]              brz v18, ebb9
+[Op1jmpb#eb]                  jump ebb16
+
+                          ebb16:
 [RexOp1pu_id#b8]              v21 = iconst.i32 0
 [RexOp1umr#89]                v79 = uextend.i64 v5
 [RexOp1r_ib#8083]             v80 = iadd_imm.i64 v4, 0
@@ -63,8 +72,14 @@ function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8])
                           ebb2(v7: i32, v45: i32, v52: i32, v59: i32, v66: i32, v73: i32):
 [RexOp1pu_id#b8]              v44 = iconst.i32 0
 [RexOp1tjccb#74]              brz v44, ebb12
+[Op1jmpb#eb]                  jump ebb18
+
+                          ebb18:
 [RexOp1pu_id#b8]              v50 = iconst.i32 11
 [RexOp1tjccb#74]              brz v50, ebb14
+[Op1jmpb#eb]                  jump ebb19
+
+                          ebb19:
 [RexOp1umr#89]                v82 = uextend.i64 v52
 [RexOp1r_ib#8083]             v83 = iadd_imm.i64 v4, 0
 [RexOp1ld#808b]               v84 = load.i64 v83
diff --git a/filetests/regalloc/ghost-param.clif b/filetests/regalloc/ghost-param.clif
index 8a022b4af..f2a1883a0 100644
--- a/filetests/regalloc/ghost-param.clif
+++ b/filetests/regalloc/ghost-param.clif
@@ -20,6 +20,9 @@ ebb5(v9: f64):
     v6 = iconst.i32 0
     v7 = iconst.i32 1
     brnz v7, ebb4(v6)
+    jump ebb8
+
+ebb8:
     v8 = iconst.i32 0
     jump ebb7(v8)
 
diff --git a/filetests/regalloc/global-constraints.clif b/filetests/regalloc/global-constraints.clif
index e3e59dd47..11a3dbef2 100644
--- a/filetests/regalloc/global-constraints.clif
+++ b/filetests/regalloc/global-constraints.clif
@@ -14,6 +14,9 @@ ebb0(v0: i32):
     v4 = icmp_imm ne v0, 4
     v5 = icmp_imm sge v0, 5
     brnz v5, ebb1
+    jump ebb2
+
+ebb2:
     return
 
 ebb1:
diff --git a/filetests/regalloc/iterate.clif b/filetests/regalloc/iterate.clif
index 424ecfa74..347dbc5f2 100644
--- a/filetests/regalloc/iterate.clif
+++ b/filetests/regalloc/iterate.clif
@@ -1,7 +1,7 @@
 test regalloc
 target x86_64 haswell
 
-function u0:9(i64 [%rdi], f32 [%xmm0], f64 [%xmm1], i32 [%rsi], i32 [%rdx], i64 vmctx [%r14]) -> i64 [%rax] baldrdash {
+function u0:9(i64 [%rdi], f32 [%xmm0], f64 [%xmm1], i32 [%rsi], i32 [%rdx], i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v {
 ebb0(v0: i64, v1: f32, v2: f64, v3: i32, v4: i32, v5: i64):
     v32 = iconst.i32 0
     v6 = bitcast.f32 v32
@@ -20,6 +20,9 @@ ebb0(v0: i64, v1: f32, v2: f64, v3: i32, v4: i32, v5: i64):
     v44 = iconst.i64 0
     v37 = icmp slt v0, v44
     brnz v37, ebb2
+    jump ebb11
+
+ebb11:
     v38 = fcvt_from_sint.f64 v0
     jump ebb3(v38)
 
@@ -41,6 +44,9 @@ ebb3(v15: f64):
     v54 = iconst.i64 0
     v47 = icmp.i64 slt v13, v54
     brnz v47, ebb4
+    jump ebb12
+
+ebb12:
     v48 = fcvt_from_sint.f64 v13
     jump ebb5(v48)
 
@@ -57,6 +63,9 @@ ebb5(v20: f64):
     v63 = iconst.i64 0
     v56 = icmp.i64 slt v7, v63
     brnz v56, ebb6
+    jump ebb13
+
+ebb13:
     v57 = fcvt_from_sint.f64 v7
     jump ebb7(v57)
 
@@ -82,8 +91,14 @@ ebb7(v21: f64):
     v69 = iconst.i64 0x8000_0000_0000_0000
     v65 = icmp ne v30, v69
     brnz v65, ebb8
+    jump ebb15
+
+ebb15:
     v66 = fcmp uno v29, v29
     brz v66, ebb9
+    jump ebb16
+
+ebb16:
     trap bad_toint
 
 ebb9:
@@ -91,6 +106,9 @@ ebb9:
     v67 = bitcast.f64 v70
     v68 = fcmp gt v67, v29
     brz v68, ebb10
+    jump ebb17
+
+ebb17:
     trap int_ovf
 
 ebb10:
@@ -103,10 +121,10 @@ ebb1(v31: i64):
     return v31
 }
 
-function u0:26(i64 vmctx [%r14]) -> i64 [%rax] baldrdash {
+function u0:26(i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v {
     gv1 = vmctx
     gv0 = iadd_imm.i64 gv1, 48
-    sig0 = (i32 [%rdi], i64 [%rsi], i64 vmctx [%r14], i64 sigid [%rbx]) -> i64 [%rax] baldrdash
+    sig0 = (i32 [%rdi], i64 [%rsi], i64 vmctx [%r14], i64 sigid [%rbx]) -> i64 [%rax] baldrdash_system_v
 
 ebb0(v0: i64):
     v1 = iconst.i32 32
@@ -118,6 +136,9 @@ ebb0(v0: i64):
     v7 = icmp uge v3, v6
     ; If we're unlucky, there are no ABCD registers available for v7 at this branch.
     brz v7, ebb2
+    jump ebb4
+
+ebb4:
     trap oob
 
 ebb2:
@@ -128,6 +149,9 @@ ebb2:
     v11 = iadd v8, v10
     v12 = load.i64 v11
     brnz v12, ebb3
+    jump ebb5
+
+ebb5:
     trap icall_null
 
 ebb3:
diff --git a/filetests/regalloc/reload-208-bb.clif b/filetests/regalloc/reload-208-bb.clif
new file mode 100644
index 000000000..47438d7d6
--- /dev/null
+++ b/filetests/regalloc/reload-208-bb.clif
@@ -0,0 +1,111 @@
+test regalloc
+target x86_64 haswell
+feature "basic-blocks"
+
+; regex: V=v\d+
+; regex: EBB=ebb\d+
+
+; Filed as https://github.com/CraneStation/cranelift/issues/208
+;
+; The verifier complains about a branch argument that is not in the same virtual register as the
+; corresponding EBB argument.
+;
+; The problem was the reload pass rewriting EBB arguments on "brnz v9, ebb3(v9)"
+
+function %pr208(i64 vmctx [%rdi]) system_v {
+    gv1 = vmctx
+    gv0 = iadd_imm.i64 gv1, -8
+    heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000
+    sig0 = (i64 vmctx [%rdi]) -> i32 [%rax] system_v
+    sig1 = (i64 vmctx [%rdi], i32 [%rsi]) system_v
+    fn0 = u0:1 sig0
+    fn1 = u0:3 sig1
+
+ebb0(v0: i64):
+    v1 = iconst.i32 0
+    v2 = call fn0(v0)
+    v20 = iconst.i32 0x4ffe
+    v16 = icmp uge v2, v20
+    brz v16, ebb5
+    jump ebb9
+
+ebb9:
+    trap heap_oob
+
+ebb5:
+    v17 = uextend.i64 v2
+    v18 = iadd_imm.i64 v0, -8
+    v19 = load.i64 v18
+    v3 = iadd v19, v17
+    v4 = load.i32 v3
+    v21 = iconst.i32 0
+    v5 = icmp eq v4, v21
+    v6 = bint.i32 v5
+    brnz v6, ebb2
+    jump ebb3(v4)
+
+ebb3(v7: i32):
+    call fn1(v0, v7)
+    v26 = iconst.i32 0x4ffe
+    v22 = icmp uge v7, v26
+    brz v22, ebb6
+    jump ebb10
+
+ebb10:
+    trap heap_oob
+
+ebb6:
+    v23 = uextend.i64 v7
+    v24 = iadd_imm.i64 v0, -8
+    v25 = load.i64 v24
+    v8 = iadd v25, v23
+    v9 = load.i32 v8+56
+    ; check: v9 = spill
+    ; check: brnz $V, $(splitEdge=$EBB)
+    brnz v9, ebb3(v9)
+    jump ebb4
+
+    ; check: $splitEdge:
+    ; nextln:  jump ebb3(v9)
+
+ebb4:
+    jump ebb2
+
+ebb2:
+    v10 = iconst.i32 0
+    v31 = iconst.i32 0x4ffe
+    v27 = icmp uge v10, v31
+    brz v27, ebb7
+    jump ebb11
+
+ebb11:
+    trap heap_oob
+
+ebb7:
+    v28 = uextend.i64 v10
+    v29 = iadd_imm.i64 v0, -8
+    v30 = load.i64 v29
+    v11 = iadd v30, v28
+    v12 = load.i32 v11+12
+    call fn1(v0, v12)
+    v13 = iconst.i32 0
+    v36 = iconst.i32 0x4ffe
+    v32 = icmp uge v13, v36
+    brz v32, ebb8
+    jump ebb12
+
+ebb12:
+    trap heap_oob
+
+ebb8:
+    v33 = uextend.i64 v13
+    v34 = iadd_imm.i64 v0, -8
+    v35 = load.i64 v34
+    v14 = iadd v35, v33
+    v15 = load.i32 v14+12
+    call fn1(v0, v15)
+    jump ebb1
+
+ebb1:
+    return
+}
diff --git a/filetests/regalloc/reload-208.clif b/filetests/regalloc/reload-208.clif
index 116e5b719..c76767025 100644
--- a/filetests/regalloc/reload-208.clif
+++ b/filetests/regalloc/reload-208.clif
@@ -1,5 +1,6 @@
 test regalloc
 target x86_64 haswell
+feature !"basic-blocks"
 
 ; regex: V=v\d+
 
@@ -25,6 +26,9 @@ ebb0(v0: i64):
     v20 = iconst.i32 0x4ffe
     v16 = icmp uge v2, v20
     brz v16, ebb5
+    jump ebb9
+
+ebb9:
     trap heap_oob
 
 ebb5:
@@ -44,6 +48,9 @@ ebb3(v7: i32):
     v26 = iconst.i32 0x4ffe
     v22 = icmp uge v7, v26
     brz v22, ebb6
+    jump ebb10
+
+ebb10:
     trap heap_oob
 
 ebb6:
@@ -65,6 +72,9 @@ ebb2:
     v31 = iconst.i32 0x4ffe
     v27 = icmp uge v10, v31
     brz v27, ebb7
+    jump ebb11
+
+ebb11:
     trap heap_oob
 
 ebb7:
@@ -78,6 +88,9 @@ ebb7:
     v36 = iconst.i32 0x4ffe
     v32 = icmp uge v13, v36
     brz v32, ebb8
+    jump ebb12
+
+ebb12:
     trap heap_oob
 
 ebb8:
diff --git a/filetests/regalloc/spill.clif b/filetests/regalloc/spill.clif
index 21731f321..525921a37 100644
--- a/filetests/regalloc/spill.clif
+++ b/filetests/regalloc/spill.clif
@@ -151,6 +151,9 @@ ebb0(v1: i32):
     ; check: v1 = spill
     v2 = iconst.i32 1
     brnz v1, ebb1(v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2)
+    jump ebb2
+
+ebb2:
     return v1
 
 ebb1(v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32, v20: i32, v21: i32):
diff --git a/filetests/regalloc/unreachable_code.clif b/filetests/regalloc/unreachable_code.clif
index 9d11ded01..79e95ab9b 100644
--- a/filetests/regalloc/unreachable_code.clif
+++ b/filetests/regalloc/unreachable_code.clif
@@ -6,7 +6,7 @@ target x86_64 haswell
 
 ; This function contains unreachable blocks which trip up the register
 ; allocator if they don't get cleared out.
-function %unreachable_blocks(i64 vmctx) -> i32 baldrdash {
+function %unreachable_blocks(i64 vmctx) -> i32 baldrdash_system_v {
 ebb0(v0: i64):
     v1 = iconst.i32 0
     v2 = iconst.i32 0
diff --git a/filetests/regalloc/x86-regres-bb.clif b/filetests/regalloc/x86-regres-bb.clif
new file mode 100644
index 000000000..28eba22a6
--- /dev/null
+++ b/filetests/regalloc/x86-regres-bb.clif
@@ -0,0 +1,49 @@
+test regalloc
+target i686
+feature "basic-blocks"
+
+; regex: V=v\d+
+; regex: EBB=ebb\d+
+
+; The value v9 appears both as the branch control and one of the EBB arguments
+; in the brnz instruction in ebb2. It also happens that v7 and v9 are assigned
+; to the same register, so v9 doesn't need to be moved before the brnz.
+;
+; This ended up confusong the constraint solver which had not made a record of
+; the fixed register assignment for v9 since it was already in the correct
+; register.
+function %pr147(i32) -> i32 system_v {
+ebb0(v0: i32):
+    v1 = iconst.i32 0
+    v2 = iconst.i32 1
+    v3 = iconst.i32 0
+    jump ebb2(v3, v2, v0)
+
+ebb2(v4: i32, v5: i32, v7: i32):
+    ; check: ebb2
+    v6 = iadd v4, v5
+    v8 = iconst.i32 -1
+    ; v7 is killed here and v9 gets the same register.
+    v9 = iadd v7, v8
+    ; check:   v9 = iadd v7, v8
+    ; Here v9 the brnz control appears to interfere with v9 the EBB argument,
+    ; so divert_fixed_input_conflicts() calls add_var(v9), which is ok. The
+    ; add_var sanity checks got confused when no fixed assignment could be
+    ; found for v9.
+    ;
+    ; We should be able to handle this situation without making copies of v9.
+    brnz v9, ebb2(v5, v6, v9)
+    ; check:   brnz v9, $(splitEdge=$EBB)
+    jump ebb3
+
+    ; check: $splitEdge:
+    ; check:   jump ebb2($V, $V, v9)
+ebb3:
+    return v5
+}
+
+function %select_i64(i64, i64, i32) -> i64 {
+ebb0(v0: i64, v1: i64, v2: i32):
+    v3 = select v2, v0, v1
+    return v3
+}
diff --git a/filetests/regalloc/x86-regres.clif b/filetests/regalloc/x86-regres.clif
index 66c3d45c2..ac6e82d66 100644
--- a/filetests/regalloc/x86-regres.clif
+++ b/filetests/regalloc/x86-regres.clif
@@ -1,6 +1,6 @@
 test regalloc
-
 target i686
+feature !"basic-blocks"
 
 ; regex: V=v\d+
 
diff --git a/filetests/regress/allow-relaxation-shrink.clif b/filetests/regress/allow-relaxation-shrink.clif
index 21c0f847b..3dd825065 100644
--- a/filetests/regress/allow-relaxation-shrink.clif
+++ b/filetests/regress/allow-relaxation-shrink.clif
@@ -30,6 +30,9 @@ ebb4:
     v19 = bint.i8 v18
     v20 = uextend.i32 v19
     brz v20, ebb6
+    jump ebb7
+
+ebb7:
     trap user0
 
 ebb5:
diff --git a/filetests/safepoint/basic-bb.clif b/filetests/safepoint/basic-bb.clif
new file mode 100644
index 000000000..b2ccedf55
--- /dev/null
+++ b/filetests/safepoint/basic-bb.clif
@@ -0,0 +1,72 @@
+test safepoint
+set enable_safepoints=true
+target x86_64
+feature "basic-blocks"
+
+function %test(i32, r64, r64) -> r64 {
+    ebb0(v0: i32, v1:r64, v2:r64):
+        jump ebb1(v0)
+    ebb1(v3: i32):
+        v4 = irsub_imm v3, 1
+        jump ebb2(v4)
+    ebb2(v5: i32):
+        resumable_trap interrupt
+        brz v5, ebb1(v5)
+        jump ebb3
+    ebb3:
+        v6 = null.r64
+        v7 = is_null v6
+        brnz v7, ebb2(v0)
+        jump ebb4
+    ebb4:
+        brnz v0, ebb5
+        jump ebb6
+    ebb5:
+        return v1
+    ebb6:
+        return v2
+}
+
+; sameln: function %test(i32 [%rdi], r64 [%rsi], r64 [%rdx]) -> r64 [%rax] fast {
+; nextln: ebb0(v0: i32 [%rdi], v1: r64 [%rsi], v2: r64 [%rdx]):
+; nextln:   v10 = copy v0
+; nextln:   jump ebb1(v10)
+; nextln: 
+; nextln: ebb1(v3: i32 [%rax]):
+; nextln:   v8 = iconst.i32 1
+; nextln:   v4 = isub v8, v3
+; nextln:   jump ebb2(v4)
+; nextln: 
+; nextln: ebb2(v5: i32 [%rcx]):
+; nextln:   safepoint v1, v2
+; nextln:   resumable_trap interrupt
+; nextln:   brz v5, ebb7
+; nextln:   jump ebb3
+; nextln: 
+; nextln: ebb3:
+; nextln:   v6 = null.r64 
+; nextln:   v7 = is_null v6
+; nextln:   brnz v7, ebb8
+; nextln:   jump ebb4
+; nextln: 
+; nextln: ebb4:
+; nextln:   brnz.i32 v0, ebb5
+; nextln:   jump ebb6
+; nextln: 
+; nextln: ebb5:
+; nextln:   regmove.r64 v1, %rsi -> %rax
+; nextln:   return v1
+; nextln: 
+; nextln: ebb6:
+; nextln:   regmove.r64 v2, %rdx -> %rax
+; nextln:   return v2
+; nextln: 
+; nextln: ebb7:
+; nextln:   regmove.i32 v5, %rcx -> %rax
+; nextln:   jump ebb1(v5)
+; nextln: 
+; nextln: ebb8:
+; nextln:   v9 = copy.i32 v0
+; nextln:   regmove v9, %rax -> %rcx
+; nextln:   jump ebb2(v9)
+; nextln: }
diff --git a/filetests/safepoint/basic.clif b/filetests/safepoint/basic.clif
new file mode 100644
index 000000000..820594e85
--- /dev/null
+++ b/filetests/safepoint/basic.clif
@@ -0,0 +1,55 @@
+test safepoint
+set enable_safepoints=true
+target x86_64
+feature !"basic-blocks"
+
+function %test(i32, r64, r64) -> r64 {
+    ebb0(v0: i32, v1:r64, v2:r64):
+        jump ebb1(v0)
+    ebb1(v3: i32):
+        v4 = irsub_imm v3, 1
+        jump ebb2(v4)
+    ebb2(v5: i32):
+        resumable_trap interrupt
+        brz v5, ebb1(v5)
+        v6 = null.r64
+        v7 = is_null v6
+        brnz v7, ebb2(v0)
+        brnz v0, ebb3
+        jump ebb4
+    ebb3:
+        return v1
+    ebb4:
+        return v2
+}
+
+; sameln: function %test(i32 [%rdi], r64 [%rsi], r64 [%rdx]) -> r64 [%rax] fast {
+; nextln: ebb0(v0: i32 [%rdi], v1: r64 [%rsi], v2: r64 [%rdx]):
+; nextln:   v10 = copy v0
+; nextln:   jump ebb1(v10)
+; nextln: 
+; nextln: ebb1(v3: i32 [%rax]):
+; nextln:   v8 = iconst.i32 1
+; nextln:   v4 = isub v8, v3
+; nextln:   jump ebb2(v4)
+; nextln: 
+; nextln: ebb2(v5: i32 [%rcx]):
+; nextln:   safepoint v1, v2
+; nextln:   resumable_trap interrupt
+; nextln:   regmove v5, %rcx -> %rax
+; nextln:   brz v5, ebb1(v5)
+; nextln:   v6 = null.r64 
+; nextln:   v7 = is_null v6
+; nextln:   v9 = copy.i32 v0
+; nextln:   brnz v7, ebb2(v9)
+; nextln:   brnz.i32 v0, ebb3
+; nextln:   jump ebb4
+; nextln: 
+; nextln: ebb3:
+; nextln:   regmove.r64 v1, %rsi -> %rax
+; nextln:   return v1
+; nextln: 
+; nextln: ebb4:
+; nextln:   regmove.r64 v2, %rdx -> %rax
+; nextln:   return v2
+; nextln: }
diff --git a/filetests/safepoint/call.clif b/filetests/safepoint/call.clif
new file mode 100644
index 000000000..9e9583093
--- /dev/null
+++ b/filetests/safepoint/call.clif
@@ -0,0 +1,58 @@
+test safepoint
+set enable_safepoints=true
+target x86_64
+
+function %direct() -> r64 {
+    fn0 = %none()
+    fn1 = %one() -> r64
+    fn2 = %two() -> i32, r64
+
+ebb0:
+    call fn0()
+    v1 = call fn1()
+    v2, v3 = call fn2()
+    brz v2, ebb2
+    jump ebb1
+ebb1:
+    return v1
+ebb2:
+    v4 = call fn1()
+    return v3
+}
+
+; sameln: function %direct() -> r64 [%rax] fast {
+; nextln: ss0 = spill_slot 8
+; nextln: ss1 = spill_slot 8
+; nextln: sig0 = () fast
+; nextln: sig1 = () -> r64 [%rax] fast
+; nextln: sig2 = () -> i32 [%rax], r64 [%rdx] fast
+; nextln: fn0 = %none sig0
+; nextln: fn1 = %one sig1
+; nextln: fn2 = %two sig2
+; nextln: 
+; nextln: ebb0:
+; nextln:   v5 = func_addr.i64 fn0
+; nextln:   call_indirect sig0, v5()
+; nextln:   v6 = func_addr.i64 fn1
+; nextln:   v9 = call_indirect sig1, v6()
+; nextln:   v1 = spill v9
+; nextln:   v7 = func_addr.i64 fn2
+; nextln:   safepoint v1
+; nextln:   v2, v10 = call_indirect sig2, v7()
+; nextln:   v3 = spill v10
+; nextln:   brz v2, ebb2
+; nextln:   jump ebb1
+; nextln: 
+; nextln: ebb1:
+; nextln:   v11 = fill.r64 v1
+; nextln:   regmove v11, %r15 -> %rax
+; nextln:   return v11
+; nextln: 
+; nextln: ebb2:
+; nextln:   v8 = func_addr.i64 fn1
+; nextln:   safepoint v3
+; nextln:   v4 = call_indirect sig1, v8()
+; nextln:   v12 = fill.r64 v3
+; nextln:   regmove v12, %r15 -> %rax
+; nextln:   return v12
+; nextln: }
diff --git a/filetests/simple_gvn/basic.clif b/filetests/simple_gvn/basic.clif
index df8fd495c..72155a324 100644
--- a/filetests/simple_gvn/basic.clif
+++ b/filetests/simple_gvn/basic.clif
@@ -24,6 +24,9 @@ function %redundancies_on_some_paths(i32, i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32, v2: i32):
     v3 = iadd v0, v1
     brz v3, ebb1
+    jump ebb3
+
+ebb3:
     v4 = iadd v0, v1
     jump ebb2(v4)
 ; check: jump ebb2(v3)
diff --git a/filetests/simple_gvn/scopes.clif b/filetests/simple_gvn/scopes.clif
index 85ea5583c..bf4e7fac9 100644
--- a/filetests/simple_gvn/scopes.clif
+++ b/filetests/simple_gvn/scopes.clif
@@ -5,6 +5,9 @@ ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32):
     v5 = iconst.i32 16
     ; check: v5 = iconst.i32 16
     brz v0, ebb1
+    jump ebb5
+
+ebb5:
     v6 = iconst.i32 17
     ; check: v6 = iconst.i32 17
     v7 = iconst.i32 16
@@ -30,6 +33,9 @@ ebb2:
     v14 = iconst.i32 16
     ; not: v14 = iconst.i32 16
     brz v1, ebb3
+    jump ebb6
+
+ebb6:
     v15 = iconst.i32 20
     ; check: v15 = iconst.i32 20
     v16 = iconst.i32 19
diff --git a/filetests/simple_preopt/div_by_const_indirect.clif b/filetests/simple_preopt/div_by_const_indirect.clif
index fa66337fc..c11111319 100644
--- a/filetests/simple_preopt/div_by_const_indirect.clif
+++ b/filetests/simple_preopt/div_by_const_indirect.clif
@@ -1,5 +1,5 @@
 test simple_preopt
-target i686 baseline
+target x86_64 baseline
 
 ; Cases where the denominator is created by an iconst
 
@@ -13,8 +13,8 @@ ebb0(v0: i32):
     ; check: isub v0, v4
     ; check: ushr_imm v5, 1
     ; check: iadd v6, v4
-    ; check: ushr_imm v7, 2
-    ; check: copy v8
+    ; check: v8 = ushr_imm v7, 2
+    ; check: v2 -> v8
     return v2
 }
 
@@ -27,8 +27,8 @@ ebb0(v0: i32):
     ; check: smulhi v0, v3
     ; check: sshr_imm v4, 3
     ; check: ushr_imm v5, 31
-    ; check: iadd v5, v6
-    ; check: copy v7
+    ; check: v7 = iadd v5, v6
+    ; check: v2 -> v7
     return v2
 }
 
@@ -39,8 +39,8 @@ ebb0(v0: i64):
     ; check: iconst.i64 1337
     ; check: iconst.i64 0xc411_9d95_2866_a139
     ; check: umulhi v0, v3
-    ; check: ushr_imm v4, 10
-    ; check: copy v5
+    ; check: v5 = ushr_imm v4, 10
+    ; check: v2 -> v5
     return v2
 }
 
@@ -53,7 +53,7 @@ ebb0(v0: i64):
     ; check: smulhi v0, v3
     ; check: sshr_imm v4, 14
     ; check: ushr_imm v5, 63
-    ; check: iadd v5, v6
-    ; check: copy v7
+    ; check: v7 = iadd v5, v6
+    ; check: v2 -> v7
     return v2
 }
diff --git a/filetests/simple_preopt/div_by_const_non_power_of_2.clif b/filetests/simple_preopt/div_by_const_non_power_of_2.clif
index fa0ac41bf..2a16699aa 100644
--- a/filetests/simple_preopt/div_by_const_non_power_of_2.clif
+++ b/filetests/simple_preopt/div_by_const_non_power_of_2.clif
@@ -12,8 +12,8 @@ ebb0(v0: i32):
     ; check: isub v0, v3
     ; check: ushr_imm v4, 1
     ; check: iadd v5, v3
-    ; check: ushr_imm v6, 2
-    ; check: copy v7
+    ; check: v7 = ushr_imm v6, 2
+    ; check: v1 -> v7
     return v1
 }
 
@@ -23,8 +23,8 @@ ebb0(v0: i32):
     v1 = udiv_imm v0, 125
     ; check: iconst.i32 0x1062_4dd3
     ; check: umulhi v0, v2
-    ; check: ushr_imm v3, 3
-    ; check: copy v4
+    ; check: v4 = ushr_imm v3, 3
+    ; check: v1 -> v4
     return v1
 }
 
@@ -33,8 +33,8 @@ function %t_udiv32_p641(i32) -> i32 {
 ebb0(v0: i32):
     v1 = udiv_imm v0, 641
     ; check: iconst.i32 0x0066_3d81
-    ; check: umulhi v0, v2
-    ; check: copy v3
+    ; check: v3 = umulhi v0, v2
+    ; check: v1 -> v3
     return v1
 }
 
@@ -48,8 +48,8 @@ ebb0(v0: i32):
     ; check: iconst.i32 0xffff_ffff_d555_5555
     ; check: smulhi v0, v2
     ; check: ushr_imm v3, 31
-    ; check: iadd v3, v4
-    ; check: copy v5
+    ; check: v5 = iadd v3, v4
+    ; check: v1 -> v5
     return v1
 }
 
@@ -61,8 +61,8 @@ ebb0(v0: i32):
     ; check: smulhi v0, v2
     ; check: sshr_imm v3, 1
     ; check: ushr_imm v4, 31
-    ; check: iadd v4, v5
-    ; check: copy v6
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
     return v1
 }
 
@@ -75,8 +75,8 @@ ebb0(v0: i32):
     ; check: isub v3, v0
     ; check: sshr_imm v4, 1
     ; check: ushr_imm v5, 31
-    ; check: iadd v5, v6
-    ; check: copy v7
+    ; check: v7 = iadd v5, v6
+    ; check: v1 -> v7
     return v1
 }
 
@@ -87,8 +87,8 @@ ebb0(v0: i32):
     ; check: iconst.i32 0x2aaa_aaab
     ; check: smulhi v0, v2
     ; check: ushr_imm v3, 31
-    ; check: iadd v3, v4
-    ; check: copy v5
+    ; check: v5 = iadd v3, v4
+    ; check: v1 -> v5
     return v1
 }
 
@@ -101,8 +101,8 @@ ebb0(v0: i32):
     ; check: iadd v3, v0
     ; check: sshr_imm v4, 2
     ; check: ushr_imm v5, 31
-    ; check: iadd v5, v6
-    ; check: copy v7
+    ; check: v7 = iadd v5, v6
+    ; check: v1 -> v7
     return v1
 }
 
@@ -114,8 +114,8 @@ ebb0(v0: i32):
     ; check: smulhi v0, v2
     ; check: sshr_imm v3, 8
     ; check: ushr_imm v4, 31
-    ; check: iadd v4, v5
-    ; check: copy v6
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
     return v1
 }
 
@@ -131,8 +131,8 @@ ebb0(v0: i64):
     ; check: isub v0, v3
     ; check: ushr_imm v4, 1
     ; check: iadd v5, v3
-    ; check: ushr_imm v6, 2
-    ; check: copy v7
+    ; check: v7 = ushr_imm v6, 2
+    ; check: v1 -> v7
     return v1
 }
 
@@ -142,8 +142,8 @@ ebb0(v0: i64):
     v1 = udiv_imm v0, 9
     ; check: iconst.i64 0xe38e_38e3_8e38_e38f
     ; check: umulhi v0, v2
-    ; check: ushr_imm v3, 3
-    ; check: copy v4
+    ; check: v4 = ushr_imm v3, 3
+    ; check: v1 -> v4
     return v1
 }
 
@@ -156,8 +156,8 @@ ebb0(v0: i64):
     ; check: isub v0, v3
     ; check: ushr_imm v4, 1
     ; check: iadd v5, v3
-    ; check: ushr_imm v6, 6
-    ; check: copy v7
+    ; check: v7 = ushr_imm v6, 6
+    ; check: v1 -> v7
     return v1
 }
 
@@ -166,8 +166,8 @@ function %t_udiv64_p274177(i64) -> i64 {
 ebb0(v0: i64):
     v1 = udiv_imm v0, 274177
     ; check: iconst.i64 0x3d30_f19c_d101
-    ; check: umulhi v0, v2
-    ; check: copy v3
+    ; check: v3 = umulhi v0, v2
+    ; check: v1 -> v3
     return v1
 }
 
@@ -182,8 +182,8 @@ ebb0(v0: i64):
     ; check: smulhi v0, v2
     ; check: sshr_imm v3, 7
     ; check: ushr_imm v4, 63
-    ; check: iadd v4, v5
-    ; check: copy v6
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
     return v1
 }
 
@@ -194,8 +194,8 @@ ebb0(v0: i64):
     ; check: iconst.i64 0xd555_5555_5555_5555
     ; check: smulhi v0, v2
     ; check: ushr_imm v3, 63
-    ; check: iadd v3, v4
-    ; check: copy v5
+    ; check: v5 = iadd v3, v4
+    ; check: v1 -> v5
     return v1
 }
 
@@ -207,8 +207,8 @@ ebb0(v0: i64):
     ; check: smulhi v0, v2
     ; check: sshr_imm v3, 1
     ; check: ushr_imm v4, 63
-    ; check: iadd v4, v5
-    ; check: copy v6
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
     return v1
 }
 
@@ -221,8 +221,8 @@ ebb0(v0: i64):
     ; check: isub v3, v0
     ; check: sshr_imm v4, 1
     ; check: ushr_imm v5, 63
-    ; check: iadd v5, v6
-    ; check: copy v7
+    ; check: v7 = iadd v5, v6
+    ; check: v1 -> v7
     return v1
 }
 
@@ -233,8 +233,8 @@ ebb0(v0: i64):
     ; check: iconst.i64 0x2aaa_aaaa_aaaa_aaab
     ; check: smulhi v0, v2
     ; check: ushr_imm v3, 63
-    ; check: iadd v3, v4
-    ; check: copy v5
+    ; check: v5 = iadd v3, v4
+    ; check: v1 -> v5
     return v1
 }
 
@@ -247,8 +247,8 @@ ebb0(v0: i64):
     ; check: iadd v3, v0
     ; check: sshr_imm v4, 3
     ; check: ushr_imm v5, 63
-    ; check: iadd v5, v6
-    ; check: copy v7
+    ; check: v7 = iadd v5, v6
+    ; check: v1 -> v7
     return v1
 }
 
@@ -260,7 +260,7 @@ ebb0(v0: i64):
     ; check: smulhi v0, v2
     ; check: sshr_imm v3, 7
     ; check: ushr_imm v4, 63
-    ; check: iadd v4, v5
-    ; check: copy v6
+    ; check: v6 = iadd v4, v5
+    ; check: v1 -> v6
     return v1
 }
diff --git a/filetests/simple_preopt/div_by_const_power_of_2.clif b/filetests/simple_preopt/div_by_const_power_of_2.clif
index 5a959750b..fb9c1744f 100644
--- a/filetests/simple_preopt/div_by_const_power_of_2.clif
+++ b/filetests/simple_preopt/div_by_const_power_of_2.clif
@@ -11,11 +11,11 @@ ebb0(v0: i32):
     return v1
 }
 
-; converted to a copy
+; converted to a nop
 function %t_udiv32_p1(i32) -> i32 {
 ebb0(v0: i32):
     v1 = udiv_imm v0, 1
-    ; check: copy v0
+    ; check: nop
     return v1
 }
 
@@ -46,11 +46,11 @@ ebb0(v0: i64):
     return v1
 }
 
-; converted to a copy
+; converted to a nop
 function %t_udiv64_p1(i64) -> i64 {
 ebb0(v0: i64):
     v1 = udiv_imm v0, 1
-    ; check: copy v0
+    ; check: nop
     return v1
 }
 
@@ -81,11 +81,11 @@ ebb0(v0: i32):
     return v1
 }
 
-; converted to a copy
+; converted to a nop
 function %t_sdiv32_p1(i32) -> i32 {
 ebb0(v0: i32):
     v1 = sdiv_imm v0, 1
-    ; check: copy v0
+    ; check: nop
     return v1
 }
 
@@ -104,7 +104,7 @@ ebb0(v0: i32):
     ; check: ushr_imm v0, 31
     ; check: iadd v0, v2
     ; check: sshr_imm v3, 1
-    ; check: copy v4
+    ; check: v1 -> v4
     return v1
 }
 
@@ -126,8 +126,8 @@ ebb0(v0: i32):
     ; check: v2 = sshr_imm v0, 1
     ; check: ushr_imm v2, 30
     ; check: iadd v0, v3
-    ; check: sshr_imm v4, 2
-    ; check: copy v5
+    ; check: v5 = sshr_imm v4, 2
+    ; check: v1 -> v5
 
     return v1
 }
@@ -151,8 +151,8 @@ ebb0(v0: i32):
     ; check: sshr_imm v0, 29
     ; check: ushr_imm v2, 2
     ; check: iadd v0, v3
-    ; check: sshr_imm v4, 30
-    ; check: copy v5
+    ; check: v5 = sshr_imm v4, 30
+    ; check: v1 -> v5
     return v1
 }
 
@@ -192,11 +192,11 @@ ebb0(v0: i64):
     return v1
 }
 
-; converted to a copy
+; converted to a nop
 function %t_sdiv64_p1(i64) -> i64 {
 ebb0(v0: i64):
     v1 = sdiv_imm v0, 1
-    ; check: copy v0
+    ; check: nop
     return v1
 }
 
@@ -214,8 +214,8 @@ ebb0(v0: i64):
     v1 = sdiv_imm v0, 2
     ; check: ushr_imm v0, 63
     ; check: iadd v0, v2
-    ; check: sshr_imm v3, 1
-    ; check: copy v4
+    ; check: v4 = sshr_imm v3, 1
+    ; check: v1 -> v4
     return v1
 }
 
@@ -237,8 +237,8 @@ ebb0(v0: i64):
     ; check: sshr_imm v0, 1
     ; check: ushr_imm v2, 62
     ; check: iadd v0, v3
-    ; check: sshr_imm v4, 2
-    ; check: copy v5
+    ; check: v5 = sshr_imm v4, 2
+    ; check: v1 -> v5
     return v1
 }
 
@@ -261,8 +261,8 @@ ebb0(v0: i64):
     ; check: sshr_imm v0, 61
     ; check: ushr_imm v2, 2
     ; check: iadd v0, v3
-    ; check: sshr_imm v4, 62
-    ; check: copy v5
+    ; check: v5 = sshr_imm v4, 62
+    ; check: v1 -> v5
     return v1
 }
 
diff --git a/filetests/simple_preopt/simplify.clif b/filetests/simple_preopt/simplify32.clif
similarity index 59%
rename from filetests/simple_preopt/simplify.clif
rename to filetests/simple_preopt/simplify32.clif
index 1592defe2..45add1b7a 100644
--- a/filetests/simple_preopt/simplify.clif
+++ b/filetests/simple_preopt/simplify32.clif
@@ -1,6 +1,8 @@
 test simple_preopt
 target i686
 
+;; 32-bits platforms.
+
 function %iadd_imm(i32) -> i32 {
 ebb0(v0: i32):
     v1 = iconst.i32 2
@@ -42,39 +44,18 @@ ebb0(v0: i32):
 ; nextln:     return v3
 ; nextln: }
 
-function %brz_bint(i32) {
-ebb0(v0: i32):
-    v3 = icmp_imm slt v0, 0
-    v1 = bint.i32 v3
-    v2 = select v1, v1, v1
-    trapz v1, user0
-    brz v1, ebb1
-    jump ebb2
-
-ebb1:
-    return
-
-ebb2:
-    return
-}
-; sameln: function %brz_bint
-; nextln: (v0: i32):
-; nextln:    v3 = icmp_imm slt v0, 0
-; nextln:    v1 = bint.i32 v3
-; nextln:    v2 = select v3, v1, v1
-; nextln:    trapz v3, user0
-; nextln:    brnz v3, ebb2
-; nextln:    jump ebb1
-
-function %irsub_imm(i32) -> i32 {
-ebb0(v0: i32):
-    v1 = iconst.i32 2
-    v2 = isub v1, v0
+;; Don't simplify operations that would get illegal because of lack of native
+;; support.
+function %iadd_imm(i64) -> i64 {
+ebb0(v0: i64):
+    v1 = iconst.i64 2
+    v2 = iadd v0, v1
     return v2
 }
-; sameln: function %irsub_imm
-; nextln: ebb0(v0: i32):
-; nextln:     v1 = iconst.i32 2
-; nextln:     v2 = irsub_imm v0, 2
+; sameln: function %iadd_imm
+; nextln: ebb0(v0: i64):
+; nextln:     v1 = iconst.i64 2
+; nextln:     v2 = iadd v0, v1
 ; nextln:     return v2
 ; nextln: }
+
diff --git a/filetests/simple_preopt/simplify64.clif b/filetests/simple_preopt/simplify64.clif
new file mode 100644
index 000000000..db485ce77
--- /dev/null
+++ b/filetests/simple_preopt/simplify64.clif
@@ -0,0 +1,295 @@
+test simple_preopt
+target x86_64
+
+;; 64-bits platforms.
+
+function %iadd_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = iadd v0, v1
+    return v2
+}
+; sameln: function %iadd_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, 2
+; nextln:     return v2
+; nextln: }
+
+function %isub_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v0, v1
+    return v2
+}
+; sameln: function %isub_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = iadd_imm v0, -2
+; nextln:     return v2
+; nextln: }
+
+function %icmp_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = icmp slt v0, v1
+    v3 = bint.i32 v2
+    return v3
+}
+; sameln: function %icmp_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = icmp_imm slt v0, 2
+; nextln:     v3 = bint.i32 v2
+; nextln:     return v3
+; nextln: }
+
+function %brz_bint(i32) {
+ebb0(v0: i32):
+    v3 = icmp_imm slt v0, 0
+    v1 = bint.i32 v3
+    v2 = select v1, v1, v1
+    trapz v1, user0
+    brz v1, ebb1
+    jump ebb2
+
+ebb1:
+    return
+
+ebb2:
+    return
+}
+; sameln: function %brz_bint
+; nextln: (v0: i32):
+; nextln:    v3 = icmp_imm slt v0, 0
+; nextln:    v1 = bint.i32 v3
+; nextln:    v2 = select v3, v1, v1
+; nextln:    trapz v3, user0
+; nextln:    brnz v3, ebb2
+; nextln:    jump ebb1
+
+function %irsub_imm(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = isub v1, v0
+    return v2
+}
+; sameln: function %irsub_imm
+; nextln: ebb0(v0: i32):
+; nextln:     v1 = iconst.i32 2
+; nextln:     v2 = irsub_imm v0, 2
+; nextln:     return v2
+; nextln: }
+
+;; Sign-extensions.
+
+;; 8 -> 16
+function %uextend_8_16() -> i16 {
+ebb0:
+    v0 = iconst.i16 37
+    v1 = ishl_imm v0, 8
+    v2 = ushr_imm v1, 8
+    return v2
+}
+; sameln: function %uextend_8_16
+; nextln: ebb0:
+; nextln:     v0 = iconst.i16 37
+; nextln:     v1 = ishl_imm v0, 8
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = uextend.i16 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_8_16() -> i16 {
+ebb0:
+    v0 = iconst.i16 37
+    v1 = ishl_imm v0, 8
+    v2 = sshr_imm v1, 8
+    return v2
+}
+; sameln: function %sextend_8_16
+; nextln: ebb0:
+; nextln:     v0 = iconst.i16 37
+; nextln:     v1 = ishl_imm v0, 8
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = sextend.i16 v3
+; nextln:     return v2
+; nextln: }
+
+;; 8 -> 32
+function %uextend_8_32() -> i32 {
+ebb0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 24
+    v2 = ushr_imm v1, 24
+    return v2
+}
+; sameln: function %uextend_8_32
+; nextln: ebb0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 24
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = uextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_8_32() -> i32 {
+ebb0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 24
+    v2 = sshr_imm v1, 24
+    return v2
+}
+; sameln: function %sextend_8_32
+; nextln: ebb0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 24
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = sextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+;; 16 -> 32
+function %uextend_16_32() -> i32 {
+ebb0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 16
+    v2 = ushr_imm v1, 16
+    return v2
+}
+; sameln: function %uextend_16_32
+; nextln: ebb0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 16
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = uextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_16_32() -> i32 {
+ebb0:
+    v0 = iconst.i32 37
+    v1 = ishl_imm v0, 16
+    v2 = sshr_imm v1, 16
+    return v2
+}
+; sameln: function %sextend_16_32
+; nextln: ebb0:
+; nextln:     v0 = iconst.i32 37
+; nextln:     v1 = ishl_imm v0, 16
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = sextend.i32 v3
+; nextln:     return v2
+; nextln: }
+
+;; 8 -> 64
+function %uextend_8_64() -> i64 {
+ebb0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 56
+    v2 = ushr_imm v1, 56
+    return v2
+}
+; sameln: function %uextend_8_64
+; nextln: ebb0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 56
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = uextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_8_64() -> i64 {
+ebb0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 56
+    v2 = sshr_imm v1, 56
+    return v2
+}
+; sameln: function %sextend_8_64
+; nextln: ebb0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 56
+; nextln:     v3 = ireduce.i8 v0
+; nextln:     v2 = sextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+;; 16 -> 64
+function %uextend_16_64() -> i64 {
+ebb0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 48
+    v2 = ushr_imm v1, 48
+    return v2
+}
+; sameln: function %uextend_16_64
+; nextln: ebb0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 48
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = uextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_16_64() -> i64 {
+ebb0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 48
+    v2 = sshr_imm v1, 48
+    return v2
+}
+; sameln: function %sextend_16_64
+; nextln: ebb0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 48
+; nextln:     v3 = ireduce.i16 v0
+; nextln:     v2 = sextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+;; 32 -> 64
+function %uextend_32_64() -> i64 {
+ebb0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 32
+    v2 = ushr_imm v1, 32
+    return v2
+}
+; sameln: function %uextend_32_64
+; nextln: ebb0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 32
+; nextln:     v3 = ireduce.i32 v0
+; nextln:     v2 = uextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %sextend_32_64() -> i64 {
+ebb0:
+    v0 = iconst.i64 37
+    v1 = ishl_imm v0, 32
+    v2 = sshr_imm v1, 32
+    return v2
+}
+; sameln: function %sextend_32_64
+; nextln: ebb0:
+; nextln:     v0 = iconst.i64 37
+; nextln:     v1 = ishl_imm v0, 32
+; nextln:     v3 = ireduce.i32 v0
+; nextln:     v2 = sextend.i64 v3
+; nextln:     return v2
+; nextln: }
+
+function %add_imm_fold(i32) -> i32 {
+ebb0(v0: i32):
+  v1 = iadd_imm v0, 42
+  v2 = iadd_imm v1, -42
+  return v2
+}
+; sameln: function %add_imm_fold(i32)
+; nextln: ebb0(v0: i32):
+; nextln:    v2 -> v0
+; nextln:    v1 = iadd_imm v0, 42
+; nextln:    nop
+; nextln:    return v2
diff --git a/filetests/verifier/bad_layout.clif b/filetests/verifier/bad_layout.clif
index fd597359b..034dd7843 100644
--- a/filetests/verifier/bad_layout.clif
+++ b/filetests/verifier/bad_layout.clif
@@ -1,11 +1,13 @@
 test verifier
 
-function %test(i32) {
+function %test_1(i32) {
     ebb0(v0: i32):
-        jump ebb1       ; error: terminator
+        return          ; error: terminator
         return
-    ebb1:
-        jump ebb2
+}
+function %test_2(i32) {
+    ebb0(v0: i32):
+        jump ebb2       ; error: a terminator instruction was encountered before the end of ebb0
         brz v0, ebb3
     ebb2:
         jump ebb3
@@ -13,7 +15,7 @@ function %test(i32) {
         return
 }
 
-function %test(i32) {    ; Ok
+function %test_3(i32) { ; Ok
     ebb0(v0: i32):
         return
 }
diff --git a/filetests/verifier/bitcast.clif b/filetests/verifier/bitcast.clif
new file mode 100644
index 000000000..eb5303cfc
--- /dev/null
+++ b/filetests/verifier/bitcast.clif
@@ -0,0 +1,23 @@
+test verifier
+
+; bitcast between two types of equal size if ok
+function %valid_bitcast1(i32) -> f32 { ; Ok
+ebb0(v0: i32):
+    v1 = bitcast.f32 v0
+    return v1
+}
+
+; bitcast to a type larger than the operand is ok
+function %valid_bitcast2(i32) -> i64 { ; Ok
+ebb0(v0: i32):
+    v1 = bitcast.i64 v0
+    return v1
+}
+
+; bitcast to a smaller type is not ok
+function %bad_bitcast(i64) -> i32 {
+ebb0(v0: i64):
+    v1 = bitcast.i32 v0 ; error: The bitcast argument v0 doesn't fit in a type of 32 bits
+    return v1
+}
+
diff --git a/filetests/verifier/scalar-to-vector.clif b/filetests/verifier/scalar-to-vector.clif
new file mode 100644
index 000000000..927abdafc
--- /dev/null
+++ b/filetests/verifier/scalar-to-vector.clif
@@ -0,0 +1,10 @@
+test verifier
+set enable_simd=true
+target x86_64
+
+function %scalar_to_vector() {
+ebb0:
+    v0 = iconst.i32 42
+    v1 = scalar_to_vector.f32x4 v0 ; error: arg 0 (v0) has type i32, expected f32
+    return
+}
diff --git a/filetests/verifier/simd-lane-index.clif b/filetests/verifier/simd-lane-index.clif
new file mode 100644
index 000000000..064254c0e
--- /dev/null
+++ b/filetests/verifier/simd-lane-index.clif
@@ -0,0 +1,41 @@
+test verifier
+set enable_simd
+target x86_64
+
+function %insertlane_i32x4() {
+ebb0:
+    v0 = vconst.i32x4 [0 0 0 0]
+    v1 = iconst.i32 42
+    v2 = insertlane v0, 4, v1 ; error: The lane 4 does not index into the type i32x4
+    return
+}
+
+function %insertlane_b16x8() {
+ebb0:
+    v0 = vconst.b16x8 [false false false false false false false false]
+    v1 = bconst.b16 true
+    v2 = insertlane v0, 8, v1 ; error: The lane 8 does not index into the type b16x8
+    return
+}
+
+function %insertlane_f64x2() {
+ebb0:
+    v0 = vconst.f64x2 0x00
+    v1 = f64const 0x0.1
+    v2 = insertlane v0, 2, v1 ; error: The lane 2 does not index into the type f64x2
+    return
+}
+
+function %extractlane_i32x4() {
+ebb0:
+    v0 = vconst.i32x4 [0 0 0 0]
+    v1 = extractlane v0, 4 ; error: The lane 4 does not index into the type i32x4
+    return
+}
+
+function %extractlane_b8x16() {
+ebb0:
+    v0 = vconst.b8x16 0x00
+    v1 = extractlane v0, 16 ; error: The lane 16 does not index into the type b8x16
+    return
+}
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
index fb46b9631..f164491ef 100644
--- a/fuzz/Cargo.toml
+++ b/fuzz/Cargo.toml
@@ -15,7 +15,7 @@ libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer-sys.git" }
 cranelift-codegen = { path = "../cranelift-codegen" }
 cranelift-wasm = { path = "../cranelift-wasm" }
 cranelift-reader = { path = "../cranelift-reader" }
-target-lexicon = "0.4.0"
+target-lexicon = "0.8.1"
 
 # Prevent this from interfering with workspaces
 [workspace]
diff --git a/fuzz/fuzz_reader_parse_test.rs b/fuzz/fuzz_reader_parse_test.rs
index cccf13591..4f1789826 100644
--- a/fuzz/fuzz_reader_parse_test.rs
+++ b/fuzz/fuzz_reader_parse_test.rs
@@ -6,6 +6,7 @@ use std::str;
 
 fuzz_target!(|data: &[u8]| {
     if let Ok(s) = str::from_utf8(data) {
-        let _ = cranelift_reader::parse_test(s, None, None);
+        let options = cranelift_reader::ParseOptions::default();
+        let _ = cranelift_reader::parse_test(s, options);
     }
 });
diff --git a/publish-all.sh b/publish-all.sh
index ffe2fe421..e52228a9c 100755
--- a/publish-all.sh
+++ b/publish-all.sh
@@ -9,7 +9,7 @@ topdir=$(dirname "$0")
 cd "$topdir"
 
 # All the cranelift-* crates have the same version number
-version="0.31.0"
+version="0.43.1"
 
 # Update all of the Cargo.toml files.
 #
@@ -47,5 +47,5 @@ do
 
     # Sleep for a few seconds to allow the server to update the index.
     # https://internals.rust-lang.org/t/changes-to-how-crates-io-handles-index-updates/9608
-    echo sleep 3
+    echo sleep 10
 done
diff --git a/rustc.md b/rustc.md
index 90e9fb03c..154194081 100644
--- a/rustc.md
+++ b/rustc.md
@@ -22,7 +22,7 @@ There's plenty of work to do to achieve these goals, and if we achieve
 them, we'll have enabled a Rust compiler written entirely in Rust, and
 enabled faster Rust compile times for important use cases.
 
-See [issues tagged "rustc"](https://github.com/CraneStation/cranelift/labels/FA-rustc)
+See [issues tagged "rustc"](https://github.com/CraneStation/cranelift/labels/goal%3Arustc)
 for a list of some of the things that will be needed.
 
 With all that said, there is a potential goal beyond that, which is to
diff --git a/src/bugpoint.rs b/src/bugpoint.rs
new file mode 100644
index 000000000..90d475199
--- /dev/null
+++ b/src/bugpoint.rs
@@ -0,0 +1,857 @@
+//! CLI tool to reduce Cranelift IR files crashing during compilation.
+
+use crate::disasm::{PrintRelocs, PrintStackmaps, PrintTraps};
+use crate::utils::{parse_sets_and_triple, read_to_string};
+use cranelift_codegen::ir::{
+    Ebb, FuncRef, Function, GlobalValueData, Inst, InstBuilder, InstructionData, StackSlots,
+    TrapCode,
+};
+use cranelift_codegen::isa::TargetIsa;
+use cranelift_codegen::Context;
+use cranelift_entity::PrimaryMap;
+use cranelift_reader::{parse_test, ParseOptions};
+use std::collections::HashMap;
+use std::path::Path;
+
+use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
+
+pub fn run(
+    filename: &str,
+    flag_set: &[String],
+    flag_isa: &str,
+    verbose: bool,
+) -> Result<(), String> {
+    let parsed = parse_sets_and_triple(flag_set, flag_isa)?;
+    let fisa = parsed.as_fisa();
+
+    let path = Path::new(&filename).to_path_buf();
+
+    let buffer = read_to_string(&path).map_err(|e| format!("{}: {}", filename, e))?;
+    let test_file =
+        parse_test(&buffer, ParseOptions::default()).map_err(|e| format!("{}: {}", filename, e))?;
+
+    // If we have an isa from the command-line, use that. Otherwise if the
+    // file contains a unique isa, use that.
+    let isa = if let Some(isa) = fisa.isa {
+        isa
+    } else if let Some(isa) = test_file.isa_spec.unique_isa() {
+        isa
+    } else {
+        return Err(String::from("compilation requires a target isa"));
+    };
+
+    std::env::set_var("RUST_BACKTRACE", "0"); // Disable backtraces to reduce verbosity
+
+    for (func, _) in test_file.functions {
+        let (orig_ebb_count, orig_inst_count) = (ebb_count(&func), inst_count(&func));
+
+        match reduce(isa, func, verbose) {
+            Ok((func, crash_msg)) => {
+                println!("Crash message: {}", crash_msg);
+
+                println!("\n{}", func);
+
+                println!(
+                    "{} ebbs {} insts -> {} ebbs {} insts",
+                    orig_ebb_count,
+                    orig_inst_count,
+                    ebb_count(&func),
+                    inst_count(&func)
+                );
+            }
+            Err(err) => println!("Warning: {}", err),
+        }
+    }
+
+    Ok(())
+}
+
+enum MutationKind {
+    /// The mutation reduced the amount of instructions or ebbs.
+    Shrinked,
+    /// The mutation only changed an instruction. Performing another round of mutations may only
+    /// reduce the test case if another mutation shrank the test case.
+    Changed,
+}
+
+trait Mutator {
+    fn name(&self) -> &'static str;
+
+    fn mutation_count(&self, func: &Function) -> Option<usize>;
+
+    fn mutate(&mut self, func: Function) -> Option<(Function, String, MutationKind)>;
+
+    fn reduce(
+        &mut self,
+        ccc: &mut CrashCheckContext,
+        mut func: Function,
+        progress_bar_prefix: String,
+        verbose: bool,
+        should_keep_reducing: &mut bool,
+    ) -> Function {
+        let progress = ProgressBar::with_draw_target(
+            self.mutation_count(&func).unwrap_or(0) as u64,
+            ProgressDrawTarget::stdout(),
+        );
+        progress.set_style(
+            ProgressStyle::default_bar().template("{bar:60} {prefix:40} {pos:>4}/{len:>4} {msg}"),
+        );
+
+        progress.set_prefix(&(progress_bar_prefix + &format!(" phase {}", self.name())));
+
+        for _ in 0..10000 {
+            progress.inc(1);
+
+            let (mutated_func, msg, mutation_kind) = match self.mutate(func.clone()) {
+                Some(res) => res,
+                None => {
+                    break;
+                }
+            };
+
+            progress.set_message(&msg);
+
+            match ccc.check_for_crash(&mutated_func) {
+                CheckResult::Succeed => {
+                    // Shrinking didn't hit the problem anymore, discard changes.
+                    continue;
+                }
+                CheckResult::Crash(_) => {
+                    // Panic remained while shrinking, make changes definitive.
+                    func = mutated_func;
+                    match mutation_kind {
+                        MutationKind::Shrinked => {
+                            *should_keep_reducing = true;
+                            if verbose {
+                                progress.println(format!("{}: shrink", msg));
+                            }
+                        }
+                        MutationKind::Changed => {
+                            if verbose {
+                                progress.println(format!("{}: changed", msg));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        progress.set_message("done");
+        progress.finish();
+
+        func
+    }
+}
+
+/// Try to remove instructions.
+struct RemoveInst {
+    ebb: Ebb,
+    inst: Inst,
+}
+
+impl RemoveInst {
+    fn new(func: &Function) -> Self {
+        let first_ebb = func.layout.entry_block().unwrap();
+        let first_inst = func.layout.first_inst(first_ebb).unwrap();
+        Self {
+            ebb: first_ebb,
+            inst: first_inst,
+        }
+    }
+}
+
+impl Mutator for RemoveInst {
+    fn name(&self) -> &'static str {
+        "remove inst"
+    }
+
+    fn mutation_count(&self, func: &Function) -> Option<usize> {
+        Some(inst_count(func))
+    }
+
+    fn mutate(&mut self, mut func: Function) -> Option<(Function, String, MutationKind)> {
+        if let Some((prev_ebb, prev_inst)) =
+            next_inst_ret_prev(&func, &mut self.ebb, &mut self.inst)
+        {
+            func.layout.remove_inst(prev_inst);
+            if func.layout.ebb_insts(prev_ebb).next().is_none() {
+                // Make sure empty ebbs are removed, as `next_inst_ret_prev` depends on non empty ebbs
+                func.layout.remove_ebb(prev_ebb);
+                Some((
+                    func,
+                    format!("Remove inst {} and empty ebb {}", prev_inst, prev_ebb),
+                    MutationKind::Shrinked,
+                ))
+            } else {
+                Some((
+                    func,
+                    format!("Remove inst {}", prev_inst),
+                    MutationKind::Shrinked,
+                ))
+            }
+        } else {
+            None
+        }
+    }
+}
+
+/// Try to replace instructions with `iconst`.
+struct ReplaceInstWithIconst {
+    ebb: Ebb,
+    inst: Inst,
+}
+
+impl ReplaceInstWithIconst {
+    fn new(func: &Function) -> Self {
+        let first_ebb = func.layout.entry_block().unwrap();
+        let first_inst = func.layout.first_inst(first_ebb).unwrap();
+        Self {
+            ebb: first_ebb,
+            inst: first_inst,
+        }
+    }
+}
+
+impl Mutator for ReplaceInstWithIconst {
+    fn name(&self) -> &'static str {
+        "replace inst with iconst"
+    }
+
+    fn mutation_count(&self, func: &Function) -> Option<usize> {
+        Some(inst_count(func))
+    }
+
+    fn mutate(&mut self, mut func: Function) -> Option<(Function, String, MutationKind)> {
+        if let Some((_prev_ebb, prev_inst)) =
+            next_inst_ret_prev(&func, &mut self.ebb, &mut self.inst)
+        {
+            let results = func.dfg.inst_results(prev_inst);
+            if results.len() == 1 {
+                let ty = func.dfg.value_type(results[0]);
+                func.dfg.replace(prev_inst).iconst(ty, 0);
+                Some((
+                    func,
+                    format!("Replace inst {} with iconst.{}", prev_inst, ty),
+                    MutationKind::Changed,
+                ))
+            } else {
+                Some((func, format!(""), MutationKind::Changed))
+            }
+        } else {
+            None
+        }
+    }
+}
+
+/// Try to replace instructions with `trap`.
+struct ReplaceInstWithTrap {
+    ebb: Ebb,
+    inst: Inst,
+}
+
+impl ReplaceInstWithTrap {
+    fn new(func: &Function) -> Self {
+        let first_ebb = func.layout.entry_block().unwrap();
+        let first_inst = func.layout.first_inst(first_ebb).unwrap();
+        Self {
+            ebb: first_ebb,
+            inst: first_inst,
+        }
+    }
+}
+
+impl Mutator for ReplaceInstWithTrap {
+    fn name(&self) -> &'static str {
+        "replace inst with trap"
+    }
+
+    fn mutation_count(&self, func: &Function) -> Option<usize> {
+        Some(inst_count(func))
+    }
+
+    fn mutate(&mut self, mut func: Function) -> Option<(Function, String, MutationKind)> {
+        if let Some((_prev_ebb, prev_inst)) =
+            next_inst_ret_prev(&func, &mut self.ebb, &mut self.inst)
+        {
+            func.dfg.replace(prev_inst).trap(TrapCode::User(0));
+            Some((
+                func,
+                format!("Replace inst {} with trap", prev_inst),
+                MutationKind::Changed,
+            ))
+        } else {
+            None
+        }
+    }
+}
+
+/// Try to remove an ebb.
+struct RemoveEbb {
+    ebb: Ebb,
+}
+
+impl RemoveEbb {
+    fn new(func: &Function) -> Self {
+        Self {
+            ebb: func.layout.entry_block().unwrap(),
+        }
+    }
+}
+
+impl Mutator for RemoveEbb {
+    fn name(&self) -> &'static str {
+        "remove ebb"
+    }
+
+    fn mutation_count(&self, func: &Function) -> Option<usize> {
+        Some(ebb_count(func))
+    }
+
+    fn mutate(&mut self, mut func: Function) -> Option<(Function, String, MutationKind)> {
+        if let Some(next_ebb) = func.layout.next_ebb(self.ebb) {
+            self.ebb = next_ebb;
+            while let Some(inst) = func.layout.last_inst(self.ebb) {
+                func.layout.remove_inst(inst);
+            }
+            func.layout.remove_ebb(self.ebb);
+            Some((
+                func,
+                format!("Remove ebb {}", next_ebb),
+                MutationKind::Shrinked,
+            ))
+        } else {
+            None
+        }
+    }
+}
+
+/// Try to remove unused entities.
+struct RemoveUnusedEntities {
+    kind: u32,
+}
+
+impl RemoveUnusedEntities {
+    fn new() -> Self {
+        Self { kind: 0 }
+    }
+}
+
+impl Mutator for RemoveUnusedEntities {
+    fn name(&self) -> &'static str {
+        "remove unused entities"
+    }
+
+    fn mutation_count(&self, _func: &Function) -> Option<usize> {
+        Some(4)
+    }
+
+    fn mutate(&mut self, mut func: Function) -> Option<(Function, String, MutationKind)> {
+        let name = match self.kind {
+            0 => {
+                let mut ext_func_usage_map = HashMap::new();
+                for ebb in func.layout.ebbs() {
+                    for inst in func.layout.ebb_insts(ebb) {
+                        match func.dfg[inst] {
+                            // Add new cases when there are new instruction formats taking a `FuncRef`.
+                            InstructionData::Call { func_ref, .. }
+                            | InstructionData::FuncAddr { func_ref, .. } => {
+                                ext_func_usage_map
+                                    .entry(func_ref)
+                                    .or_insert_with(Vec::new)
+                                    .push(inst);
+                            }
+                            _ => {}
+                        }
+                    }
+                }
+
+                let mut ext_funcs = PrimaryMap::new();
+
+                for (func_ref, ext_func_data) in func.dfg.ext_funcs.clone().into_iter() {
+                    if let Some(func_ref_usage) = ext_func_usage_map.get(&func_ref) {
+                        let new_func_ref = ext_funcs.push(ext_func_data.clone());
+                        for &inst in func_ref_usage {
+                            match func.dfg[inst] {
+                                // Keep in sync with the above match.
+                                InstructionData::Call {
+                                    ref mut func_ref, ..
+                                }
+                                | InstructionData::FuncAddr {
+                                    ref mut func_ref, ..
+                                } => {
+                                    *func_ref = new_func_ref;
+                                }
+                                _ => unreachable!(),
+                            }
+                        }
+                    }
+                }
+
+                func.dfg.ext_funcs = ext_funcs;
+
+                "Remove unused ext funcs"
+            }
+            1 => {
+                #[derive(Copy, Clone)]
+                enum SigRefUser {
+                    Instruction(Inst),
+                    ExtFunc(FuncRef),
+                }
+
+                let mut signatures_usage_map = HashMap::new();
+                for ebb in func.layout.ebbs() {
+                    for inst in func.layout.ebb_insts(ebb) {
+                        match func.dfg[inst] {
+                            // Add new cases when there are new instruction formats taking a `SigRef`.
+                            InstructionData::CallIndirect { sig_ref, .. } => {
+                                signatures_usage_map
+                                    .entry(sig_ref)
+                                    .or_insert_with(Vec::new)
+                                    .push(SigRefUser::Instruction(inst));
+                            }
+                            _ => {}
+                        }
+                    }
+                }
+                for (func_ref, ext_func_data) in func.dfg.ext_funcs.iter() {
+                    signatures_usage_map
+                        .entry(ext_func_data.signature)
+                        .or_insert_with(Vec::new)
+                        .push(SigRefUser::ExtFunc(func_ref));
+                }
+
+                let mut signatures = PrimaryMap::new();
+
+                for (sig_ref, sig_data) in func.dfg.signatures.clone().into_iter() {
+                    if let Some(sig_ref_usage) = signatures_usage_map.get(&sig_ref) {
+                        let new_sig_ref = signatures.push(sig_data.clone());
+                        for &sig_ref_user in sig_ref_usage {
+                            match sig_ref_user {
+                                SigRefUser::Instruction(inst) => match func.dfg[inst] {
+                                    // Keep in sync with the above match.
+                                    InstructionData::CallIndirect {
+                                        ref mut sig_ref, ..
+                                    } => {
+                                        *sig_ref = new_sig_ref;
+                                    }
+                                    _ => unreachable!(),
+                                },
+                                SigRefUser::ExtFunc(func_ref) => {
+                                    func.dfg.ext_funcs[func_ref].signature = new_sig_ref;
+                                }
+                            }
+                        }
+                    }
+                }
+
+                func.dfg.signatures = signatures;
+
+                "Remove unused signatures"
+            }
+            2 => {
+                let mut stack_slot_usage_map = HashMap::new();
+                for ebb in func.layout.ebbs() {
+                    for inst in func.layout.ebb_insts(ebb) {
+                        match func.dfg[inst] {
+                            // Add new cases when there are new instruction formats taking a `StackSlot`.
+                            InstructionData::StackLoad { stack_slot, .. }
+                            | InstructionData::StackStore { stack_slot, .. } => {
+                                stack_slot_usage_map
+                                    .entry(stack_slot)
+                                    .or_insert_with(Vec::new)
+                                    .push(inst);
+                            }
+
+                            InstructionData::RegSpill { dst, .. } => {
+                                stack_slot_usage_map
+                                    .entry(dst)
+                                    .or_insert_with(Vec::new)
+                                    .push(inst);
+                            }
+                            InstructionData::RegFill { src, .. } => {
+                                stack_slot_usage_map
+                                    .entry(src)
+                                    .or_insert_with(Vec::new)
+                                    .push(inst);
+                            }
+                            _ => {}
+                        }
+                    }
+                }
+
+                let mut stack_slots = StackSlots::new();
+
+                for (stack_slot, stack_slot_data) in func.stack_slots.clone().iter() {
+                    if let Some(stack_slot_usage) = stack_slot_usage_map.get(&stack_slot) {
+                        let new_stack_slot = stack_slots.push(stack_slot_data.clone());
+                        for &inst in stack_slot_usage {
+                            match &mut func.dfg[inst] {
+                                // Keep in sync with the above match.
+                                InstructionData::StackLoad { stack_slot, .. }
+                                | InstructionData::StackStore { stack_slot, .. } => {
+                                    *stack_slot = new_stack_slot;
+                                }
+                                InstructionData::RegSpill { dst, .. } => {
+                                    *dst = new_stack_slot;
+                                }
+                                InstructionData::RegFill { src, .. } => {
+                                    *src = new_stack_slot;
+                                }
+                                _ => unreachable!(),
+                            }
+                        }
+                    }
+                }
+
+                func.stack_slots = stack_slots;
+
+                "Remove unused stack slots"
+            }
+            3 => {
+                let mut global_value_usage_map = HashMap::new();
+                for ebb in func.layout.ebbs() {
+                    for inst in func.layout.ebb_insts(ebb) {
+                        match func.dfg[inst] {
+                            // Add new cases when there are new instruction formats taking a `GlobalValue`.
+                            InstructionData::UnaryGlobalValue { global_value, .. } => {
+                                global_value_usage_map
+                                    .entry(global_value)
+                                    .or_insert_with(Vec::new)
+                                    .push(inst);
+                            }
+                            _ => {}
+                        }
+                    }
+                }
+
+                for (_global_value, global_value_data) in func.global_values.iter() {
+                    match *global_value_data {
+                        GlobalValueData::VMContext | GlobalValueData::Symbol { .. } => {}
+                        // These can create cyclic references, which cause complications. Just skip
+                        // the global value removal for now.
+                        // FIXME Handle them in a better way.
+                        GlobalValueData::Load { base: _, .. }
+                        | GlobalValueData::IAddImm { base: _, .. } => return None,
+                    }
+                }
+
+                let mut global_values = PrimaryMap::new();
+
+                for (global_value, global_value_data) in func.global_values.clone().into_iter() {
+                    if let Some(global_value_usage) = global_value_usage_map.get(&global_value) {
+                        let new_global_value = global_values.push(global_value_data.clone());
+                        for &inst in global_value_usage {
+                            match &mut func.dfg[inst] {
+                                // Keep in sync with the above match.
+                                InstructionData::UnaryGlobalValue { global_value, .. } => {
+                                    *global_value = new_global_value;
+                                }
+                                _ => unreachable!(),
+                            }
+                        }
+                    }
+                }
+
+                func.global_values = global_values;
+
+                "Remove unused global values"
+            }
+            _ => return None,
+        };
+        self.kind += 1;
+        Some((func, name.to_owned(), MutationKind::Changed))
+    }
+}
+
+fn next_inst_ret_prev(func: &Function, ebb: &mut Ebb, inst: &mut Inst) -> Option<(Ebb, Inst)> {
+    let prev = (*ebb, *inst);
+    if let Some(next_inst) = func.layout.next_inst(*inst) {
+        *inst = next_inst;
+        return Some(prev);
+    } else if let Some(next_ebb) = func.layout.next_ebb(*ebb) {
+        *ebb = next_ebb;
+        *inst = func.layout.first_inst(*ebb).expect("no inst");
+        return Some(prev);
+    } else {
+        return None;
+    }
+}
+
+fn ebb_count(func: &Function) -> usize {
+    func.layout.ebbs().count()
+}
+
+fn inst_count(func: &Function) -> usize {
+    func.layout
+        .ebbs()
+        .map(|ebb| func.layout.ebb_insts(ebb).count())
+        .sum()
+}
+
+fn resolve_aliases(func: &mut Function) {
+    for ebb in func.layout.ebbs() {
+        for inst in func.layout.ebb_insts(ebb) {
+            func.dfg.resolve_aliases_in_arguments(inst);
+        }
+    }
+}
+
+fn reduce(
+    isa: &dyn TargetIsa,
+    mut func: Function,
+    verbose: bool,
+) -> Result<(Function, String), String> {
+    let mut ccc = CrashCheckContext::new(isa);
+
+    match ccc.check_for_crash(&func) {
+        CheckResult::Succeed => {
+            return Err(format!(
+                "Given function compiled successfully or gave an verifier error."
+            ));
+        }
+        CheckResult::Crash(_) => {}
+    }
+
+    resolve_aliases(&mut func);
+
+    for pass_idx in 0..100 {
+        let mut should_keep_reducing = false;
+        let mut phase = 0;
+
+        loop {
+            let mut mutator = match phase {
+                0 => Box::new(RemoveInst::new(&func)) as Box<dyn Mutator>,
+                1 => Box::new(ReplaceInstWithIconst::new(&func)) as Box<dyn Mutator>,
+                2 => Box::new(ReplaceInstWithTrap::new(&func)) as Box<dyn Mutator>,
+                3 => Box::new(RemoveEbb::new(&func)) as Box<dyn Mutator>,
+                4 => Box::new(RemoveUnusedEntities::new()) as Box<dyn Mutator>,
+                _ => break,
+            };
+
+            func = mutator.reduce(
+                &mut ccc,
+                func,
+                format!("pass {}", pass_idx),
+                verbose,
+                &mut should_keep_reducing,
+            );
+
+            phase += 1;
+        }
+
+        if !should_keep_reducing {
+            // No new shrinking opportunities have been found this pass. This means none will ever
+            // be found. Skip the rest of the passes over the function.
+            break;
+        }
+    }
+
+    let crash_msg = match ccc.check_for_crash(&func) {
+        CheckResult::Succeed => unreachable!("Used to crash, but doesn't anymore???"),
+        CheckResult::Crash(crash_msg) => crash_msg,
+    };
+
+    Ok((func, crash_msg))
+}
+
+struct CrashCheckContext<'a> {
+    /// Cached `Context`, to prevent repeated allocation.
+    context: Context,
+
+    /// The target isa to compile for.
+    isa: &'a dyn TargetIsa,
+}
+
+fn get_panic_string(panic: Box<dyn std::any::Any>) -> String {
+    let panic = match panic.downcast::<&'static str>() {
+        Ok(panic_msg) => panic_msg.to_owned(),
+        Err(panic) => panic,
+    };
+    match panic.downcast::<String>() {
+        Ok(panic_msg) => *panic_msg,
+        Err(_) => "Box<Any>".to_owned(),
+    }
+}
+
+enum CheckResult {
+    /// The function compiled fine, or the verifier noticed an error.
+    Succeed,
+
+    /// The compilation of the function panicked.
+    Crash(String),
+}
+
+impl<'a> CrashCheckContext<'a> {
+    fn new(isa: &'a dyn TargetIsa) -> Self {
+        CrashCheckContext {
+            context: Context::new(),
+            isa,
+        }
+    }
+
+    #[cfg_attr(test, allow(unreachable_code))]
+    fn check_for_crash(&mut self, func: &Function) -> CheckResult {
+        self.context.clear();
+        self.context.func = func.clone();
+
+        use std::io::Write;
+        std::io::stdout().flush().unwrap(); // Flush stdout to sync with panic messages on stderr
+
+        match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+            cranelift_codegen::verifier::verify_function(&func, self.isa).err()
+        })) {
+            Ok(Some(_)) => return CheckResult::Succeed,
+            Ok(None) => {}
+            // The verifier panicked. compiling it will probably give the same panic.
+            // We treat it as succeeding to make it possible to reduce for the actual error.
+            // FIXME prevent verifier panic on removing ebb1
+            Err(_) => return CheckResult::Succeed,
+        }
+
+        #[cfg(test)]
+        {
+            // For testing purposes we emulate a panic caused by the existence of
+            // a `call` instruction.
+            let contains_call = func.layout.ebbs().any(|ebb| {
+                func.layout.ebb_insts(ebb).any(|inst| match func.dfg[inst] {
+                    InstructionData::Call { .. } => true,
+                    _ => false,
+                })
+            });
+            if contains_call {
+                return CheckResult::Crash("test crash".to_string());
+            } else {
+                return CheckResult::Succeed;
+            }
+        }
+
+        let old_panic_hook = std::panic::take_hook();
+        std::panic::set_hook(Box::new(|_| {})); // silence panics
+
+        let res = match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+            let mut relocs = PrintRelocs::new(false);
+            let mut traps = PrintTraps::new(false);
+            let mut stackmaps = PrintStackmaps::new(false);
+            let mut mem = vec![];
+
+            let _ = self.context.compile_and_emit(
+                self.isa,
+                &mut mem,
+                &mut relocs,
+                &mut traps,
+                &mut stackmaps,
+            );
+        })) {
+            Ok(()) => CheckResult::Succeed,
+            Err(err) => CheckResult::Crash(get_panic_string(err)),
+        };
+
+        std::panic::set_hook(old_panic_hook);
+
+        res
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use cranelift_reader::ParseOptions;
+
+    #[test]
+    fn test_reduce() {
+        const TEST: &'static str = include_str!("./bugpoint_test.clif");
+
+        let test_file = parse_test(TEST, ParseOptions::default()).unwrap();
+
+        // If we have an isa from the command-line, use that. Otherwise if the
+        // file contains a unique isa, use that.
+        let isa = test_file.isa_spec.unique_isa().expect("Unknown isa");
+
+        for (func, _) in test_file.functions {
+            let (func, crash_msg) = reduce(isa, func, false).expect("Couldn't reduce test case");
+
+            assert_eq!(crash_msg, "test crash");
+
+            assert_eq!(
+                format!("{}", func),
+                "function u0:0(i64, i64, i64) system_v {
+    sig0 = (i64, i64, i16, i64, i64, i64, i64, i64) system_v
+    fn0 = u0:95 sig0
+
+ebb0(v0: i64, v1: i64, v2: i64):
+    v113 -> v1
+    v124 -> v1
+    v136 -> v1
+    v148 -> v1
+    v160 -> v1
+    v185 -> v1
+    v222 -> v1
+    v237 -> v1
+    v241 -> v1
+    v256 -> v1
+    v262 -> v1
+    v105 = iconst.i64 0
+    trap user0
+
+ebb99(v804: i64, v1035: i64, v1037: i64, v1039: i64, v1044: i64, v1052: i16, v1057: i64):
+    v817 -> v1035
+    v830 -> v1037
+    v844 -> v1039
+    v857 -> v1039
+    v939 -> v1039
+    v1042 -> v1039
+    v1050 -> v1039
+    v908 -> v1044
+    v917 -> v1044
+    v921 -> v1044
+    v1043 -> v1044
+    v960 -> v1052
+    v990 -> v1052
+    v1051 -> v1052
+    v1055 -> v1052
+    v963 -> v1057
+    v1056 -> v1057
+    v1060 -> v1057
+    trap user0
+
+ebb101:
+    v829 = iconst.i64 0
+    v935 -> v829
+    v962 -> v829
+    v992 -> v829
+    v1036 -> v829
+    v1049 -> v829
+    trap user0
+
+ebb102:
+    v842 = iconst.i64 0
+    v976 -> v842
+    v989 -> v842
+    v1038 -> v842
+    v1061 -> v842
+    trap user0
+
+ebb105:
+    v883 = iconst.i64 0
+    v934 -> v883
+    v961 -> v883
+    v991 -> v883
+    v1005 -> v883
+    v1048 -> v883
+    trap user0
+
+ebb114:
+    v951 = iconst.i64 0
+    v988 -> v951
+    trap user0
+
+ebb117:
+    v987 = iconst.i64 0
+    call fn0(v0, v105, v1052, v883, v829, v987, v951, v842)
+    trap user0
+}
+"
+            );
+        }
+    }
+}
diff --git a/src/bugpoint_test.clif b/src/bugpoint_test.clif
new file mode 100644
index 000000000..157411ed7
--- /dev/null
+++ b/src/bugpoint_test.clif
@@ -0,0 +1,1913 @@
+test compile
+set is_pic
+target x86_64-unknown-linux-gnu
+
+function u0:0(i64, i64, i64) system_v {
+
+
+    ss0 = explicit_slot 16
+    ss1 = explicit_slot 1     ss2 = explicit_slot 16     ss3 = explicit_slot 1     ss4 = explicit_slot 16     ss5 = explicit_slot 8     ss6 = explicit_slot 16     ss7 = explicit_slot 16     ss8 = explicit_slot 16     ss9 = explicit_slot 16     ss10 = explicit_slot 16     ss11 = explicit_slot 16     ss12 = explicit_slot 16     ss13 = explicit_slot 16     ss14 = explicit_slot 16     ss15 = explicit_slot 16     ss16 = explicit_slot 16     ss17 = explicit_slot 16     ss18 = explicit_slot 24     ss19 = explicit_slot 4     ss20 = explicit_slot 4     ss21 = explicit_slot 4     ss22 = explicit_slot 4     ss23 = explicit_slot 16     ss24 = explicit_slot 16     ss25 = explicit_slot 16     ss26 = explicit_slot 16     ss27 = explicit_slot 48     ss28 = explicit_slot 16     ss29 = explicit_slot 16     ss30 = explicit_slot 32     ss31 = explicit_slot 16     ss32 = explicit_slot 8     ss33 = explicit_slot 8     ss34 = explicit_slot 16     ss35 = explicit_slot 16     ss36 = explicit_slot 16     ss37 = explicit_slot 48     ss38 = explicit_slot 16     ss39 = explicit_slot 16     ss40 = explicit_slot 32     ss41 = explicit_slot 16     ss42 = explicit_slot 8     ss43 = explicit_slot 8     ss44 = explicit_slot 16     ss45 = explicit_slot 16     ss46 = explicit_slot 16     ss47 = explicit_slot 16     ss48 = explicit_slot 16     ss49 = explicit_slot 16     ss50 = explicit_slot 16     ss51 = explicit_slot 8     ss52 = explicit_slot 4     ss53 = explicit_slot 4     ss54 = explicit_slot 16     ss55 = explicit_slot 16     ss56 = explicit_slot 16     ss57 = explicit_slot 2     ss58 = explicit_slot 4     ss59 = explicit_slot 2     ss60 = explicit_slot 16     ss61 = explicit_slot 16     ss62 = explicit_slot 16     ss63 = explicit_slot 16     ss64 = explicit_slot 16     ss65 = explicit_slot 16     ss66 = explicit_slot 16     ss67 = explicit_slot 16     ss68 = explicit_slot 8     ss69 = explicit_slot 16     ss70 = explicit_slot 16     ss71 = explicit_slot 48     ss72 = explicit_slot 16     ss73 = explicit_slot 16     ss74 = explicit_slot 32     ss75 = explicit_slot 16     ss76 = explicit_slot 8     ss77 = explicit_slot 8     ss78 = explicit_slot 16     ss79 = explicit_slot 16     ss80 = explicit_slot 16     ss81 = explicit_slot 48     ss82 = explicit_slot 16     ss83 = explicit_slot 16     ss84 = explicit_slot 32     ss85 = explicit_slot 16     ss86 = explicit_slot 8     ss87 = explicit_slot 8     ss88 = explicit_slot 16     ss89 = explicit_slot 16     ss90 = explicit_slot 4     ss91 = explicit_slot 16     ss92 = explicit_slot 16     ss93 = explicit_slot 16     ss94 = explicit_slot 16     ss95 = explicit_slot 16     ss96 = explicit_slot 16     ss97 = explicit_slot 2     ss98 = explicit_slot 16     ss99 = explicit_slot 16     ss100 = explicit_slot 16     ss101 = explicit_slot 16     ss102 = explicit_slot 16     ss103 = explicit_slot 16     ss104 = explicit_slot 8     ss105 = explicit_slot 16     ss106 = explicit_slot 16     ss107 = explicit_slot 4     ss108 = explicit_slot 16
+    ss109 = explicit_slot 16
+    ss110 = explicit_slot 16
+    ss111 = explicit_slot 16
+    ss112 = explicit_slot 4
+    ss113 = explicit_slot 4
+    ss114 = explicit_slot 4
+    ss115 = explicit_slot 4
+    ss116 = explicit_slot 16
+    ss117 = explicit_slot 16
+    ss118 = explicit_slot 16
+    ss119 = explicit_slot 16
+    ss120 = explicit_slot 16
+    ss121 = explicit_slot 4
+    ss122 = explicit_slot 4
+    ss123 = explicit_slot 16
+    ss124 = explicit_slot 16
+    ss125 = explicit_slot 16
+    ss126 = explicit_slot 2
+    ss127 = explicit_slot 16
+    ss128 = explicit_slot 16
+    ss129 = explicit_slot 16
+    ss130 = explicit_slot 16
+    ss131 = explicit_slot 16
+    ss132 = explicit_slot 4
+    ss133 = explicit_slot 16
+    ss134 = explicit_slot 16
+    ss135 = explicit_slot 16
+    ss136 = explicit_slot 16
+    ss137 = explicit_slot 16
+    ss138 = explicit_slot 16
+    ss139 = explicit_slot 2
+    ss140 = explicit_slot 16
+    ss141 = explicit_slot 16
+    ss142 = explicit_slot 16
+    ss143 = explicit_slot 16
+    ss144 = explicit_slot 4
+    gv0 = symbol colocated u1:22
+    gv1 = symbol colocated u1:23
+    gv2 = symbol colocated u1:24
+    gv3 = symbol colocated u1:23
+    gv4 = symbol colocated u1:25
+    gv5 = symbol colocated u1:23
+    gv6 = symbol colocated u1:26
+    gv7 = symbol colocated u1:23
+    gv8 = symbol colocated u1:27
+    gv9 = symbol colocated u1:23
+    gv10 = symbol colocated u1:28
+    gv11 = symbol colocated u1:23
+    gv12 = symbol colocated u1:29
+    gv13 = symbol colocated u1:30
+    gv14 = symbol colocated u1:31
+    gv15 = symbol colocated u1:23
+    gv16 = symbol colocated u1:29
+    gv17 = symbol colocated u1:32
+    gv18 = symbol colocated u1:32
+    gv19 = symbol colocated u1:32
+    gv20 = symbol colocated u1:32
+    gv21 = symbol colocated u1:32
+    gv22 = symbol colocated u1:33
+    gv23 = symbol colocated u1:34
+    gv24 = symbol colocated u1:23
+    gv25 = symbol colocated u1:35
+    gv26 = symbol colocated u1:36
+    gv27 = symbol colocated u1:23
+    gv28 = symbol colocated u1:29
+    gv29 = symbol colocated u1:32
+    gv30 = symbol colocated u1:37
+    gv31 = symbol colocated u1:38
+    gv32 = symbol colocated u1:30
+    gv33 = symbol colocated u1:32
+    gv34 = symbol colocated u1:32
+    gv35 = symbol colocated u1:29
+    gv36 = symbol colocated u1:32
+    gv37 = symbol colocated u1:30
+    gv38 = symbol colocated u1:32
+    gv39 = symbol colocated u1:39
+    gv40 = symbol colocated u1:40
+    gv41 = symbol colocated u1:41
+    gv42 = symbol colocated u1:23
+    gv43 = symbol colocated u1:29
+    gv44 = symbol colocated u1:42
+    gv45 = symbol colocated u1:29
+    gv46 = symbol colocated u1:30
+    gv47 = symbol colocated u1:29
+    gv48 = symbol colocated u1:30
+    gv49 = symbol colocated u1:32
+    gv50 = symbol colocated u1:43
+    gv51 = symbol colocated u1:44
+    gv52 = symbol colocated u1:45
+    gv53 = symbol colocated u1:23
+    gv54 = symbol colocated u1:46
+    gv55 = symbol colocated u1:47
+    gv56 = symbol colocated u1:48
+    gv57 = symbol colocated u1:23
+    gv58 = symbol colocated u1:32
+    gv59 = symbol colocated u1:39
+    gv60 = symbol colocated u1:49
+    gv61 = symbol colocated u1:49
+    gv62 = symbol colocated u1:49
+    gv63 = symbol colocated u1:38
+    gv64 = symbol colocated u1:30
+    gv65 = symbol colocated u1:32
+    gv66 = symbol colocated u1:50
+    gv67 = symbol colocated u1:23
+    gv68 = symbol colocated u1:29
+    gv69 = symbol colocated u1:51
+    gv70 = symbol colocated u1:29
+    gv71 = symbol colocated u1:30
+    gv72 = symbol colocated u1:32
+    gv73 = symbol colocated u1:49
+    gv74 = symbol colocated u1:32
+    sig0 = (i64) system_v
+    sig1 = (i64) system_v
+    sig2 = (i64) system_v
+    sig3 = (i64) system_v
+    sig4 = (i64) system_v
+    sig5 = (i64) system_v
+    sig6 = (i64, i64, i64) system_v
+    sig7 = (i64) -> i8 system_v
+    sig8 = (i64) system_v
+    sig9 = (i64) system_v
+    sig10 = (i64, i64, i64) system_v
+    sig11 = (i64) -> i8 system_v
+    sig12 = (i64) system_v
+    sig13 = (i64) system_v
+    sig14 = (i64) -> i64 system_v
+    sig15 = (i64) system_v
+    sig16 = (i64) system_v
+    sig17 = (i64) system_v
+    sig18 = (i64) system_v
+    sig19 = (i64) system_v
+    sig20 = (i64) system_v
+    sig21 = (i64) system_v
+    sig22 = (i64, i64) system_v
+    sig23 = (i64) system_v
+    sig24 = (i64, i64, i16) system_v
+    sig25 = (i64, i64, i16) system_v
+    sig26 = (i64) system_v
+    sig27 = (i64) system_v
+    sig28 = (i64) system_v
+    sig29 = (i64) system_v
+    sig30 = (i64, i16, i16) system_v
+    sig31 = (i64, i64, i64) system_v
+    sig32 = (i64, i64, i64) system_v
+    sig33 = (i64, i64, i64) system_v
+    sig34 = (i64, i64) -> i8 system_v
+    sig35 = (i64, i64, i64) system_v
+    sig36 = (i64, i64) -> i8 system_v
+    sig37 = (i64, i64, i64) system_v
+    sig38 = (i64, i64, i64) system_v
+    sig39 = (i64, i64) system_v
+    sig40 = (i64) system_v
+    sig41 = (i64, i64) -> i8 system_v
+    sig42 = (i64, i64, i64) system_v
+    sig43 = (i64, i64) -> i8 system_v
+    sig44 = (i64, i64, i64) system_v
+    sig45 = (i64, i64, i64) system_v
+    sig46 = (i64, i64) system_v
+    sig47 = (i64) system_v
+    sig48 = (i64) system_v
+    sig49 = (i64) system_v
+    sig50 = (i64) system_v
+    sig51 = (i64) system_v
+    sig52 = (i64) system_v
+    sig53 = (i64) system_v
+    sig54 = (i64, i32) system_v
+    sig55 = (i64) system_v
+    sig56 = (i64) system_v
+    sig57 = (i64) system_v
+    sig58 = (i64) system_v
+    sig59 = (i64) system_v
+    sig60 = (i64) system_v
+    sig61 = (i64) system_v
+    sig62 = (i64) system_v
+    sig63 = (i64) system_v
+    sig64 = (i64) system_v
+    sig65 = (i64) system_v
+    sig66 = (i64) system_v
+    sig67 = (i64) system_v
+    sig68 = (i64) system_v
+    sig69 = (i64) system_v
+    sig70 = (i64, i64, i64) system_v
+    sig71 = (i64) system_v
+    sig72 = (i64, i64, i16, i64, i64, i64, i64, i64) system_v
+    sig73 = (i64, i64) -> i8 system_v
+    sig74 = (i64, i64, i64) system_v
+    sig75 = (i64, i64) -> i8 system_v
+    sig76 = (i64, i64, i64) system_v
+    sig77 = (i64, i64, i64) system_v
+    sig78 = (i64, i64) system_v
+    sig79 = (i64) system_v
+    sig80 = (i64, i64) -> i8 system_v
+    sig81 = (i64, i64, i64) system_v
+    sig82 = (i64, i64) -> i8 system_v
+    sig83 = (i64, i64, i64) system_v
+    sig84 = (i64, i64, i64) system_v
+    sig85 = (i64, i64) system_v
+    sig86 = (i64) system_v
+    sig87 = (i64) system_v
+    sig88 = (i64) system_v
+    sig89 = (i64) system_v
+    sig90 = (i64) system_v
+    sig91 = (i64) system_v
+    sig92 = (i64) system_v
+    sig93 = (i64) system_v
+    sig94 = (i64) system_v
+    sig95 = (i64) system_v
+    sig96 = (i64) system_v
+    sig97 = (i64) system_v
+    sig98 = (i64) system_v
+    sig99 = (i64) system_v
+    sig100 = (i64) system_v
+    sig101 = (i64, i64, i64) system_v
+    sig102 = (i64) system_v
+    sig103 = (i64) system_v
+    sig104 = (i64, i64, i16, i64, i64, i64, i64, i64) system_v
+    sig105 = (i64) system_v
+    fn0 = u0:83 sig0
+    fn1 = u0:13 sig1
+    fn2 = u0:83 sig2
+    fn3 = u0:13 sig3
+    fn4 = u0:83 sig4
+    fn5 = u0:13 sig5
+    fn6 = u0:84 sig6
+    fn7 = u0:85 sig7
+    fn8 = u0:83 sig8
+    fn9 = u0:13 sig9
+    fn10 = u0:86 sig10     fn11 = u0:85 sig11     fn12 = u0:83 sig12     fn13 = u0:13 sig13
+    fn14 = u0:16 sig14     fn15 = u0:83 sig15     fn16 = u0:13 sig16
+    fn17 = u0:13 sig17
+    fn18 = u0:13 sig18
+    fn19 = u0:83 sig19     fn20 = u0:13 sig20
+    fn21 = u0:13 sig21
+    fn22 = u0:87 sig22     fn23 = u0:13 sig23
+    fn24 = u0:88 sig24     fn25 = u0:88 sig25     fn26 = u0:13 sig26
+    fn27 = u0:13 sig27
+    fn28 = u0:13 sig28
+    fn29 = u0:13 sig29
+    fn30 = u0:89 sig30     fn31 = u0:90 sig31     fn32 = u0:90 sig32     fn33 = u0:90 sig33     fn34 = u0:91 sig34     fn35 = u0:92 sig35     fn36 = u0:91 sig36     fn37 = u0:92 sig37     fn38 = u0:11 sig38     fn39 = u0:12 sig39     fn40 = u0:13 sig40
+    fn41 = u0:91 sig41     fn42 = u0:92 sig42     fn43 = u0:91 sig43     fn44 = u0:92 sig44     fn45 = u0:11 sig45     fn46 = u0:12 sig46     fn47 = u0:13 sig47
+    fn48 = u0:13 sig48
+    fn49 = u0:13 sig49
+    fn50 = u0:13 sig50
+    fn51 = u0:13 sig51
+    fn52 = u0:13 sig52
+    fn53 = u0:13 sig53
+    fn54 = u0:93 sig54     fn55 = u0:13 sig55
+    fn56 = u0:13 sig56
+    fn57 = u0:13 sig57
+    fn58 = u0:13 sig58
+    fn59 = u0:13 sig59
+    fn60 = u0:13 sig60
+    fn61 = u0:13 sig61
+    fn62 = u0:83 sig62     fn63 = u0:13 sig63
+    fn64 = u0:13 sig64
+    fn65 = u0:13 sig65
+    fn66 = u0:13 sig66
+    fn67 = u0:13 sig67
+    fn68 = u0:13 sig68
+    fn69 = u0:13 sig69
+    fn70 = u0:94 sig70     fn71 = u0:13 sig71
+    fn72 = u0:95 sig72     fn73 = u0:96 sig73     fn74 = u0:97 sig74     fn75 = u0:96 sig75     fn76 = u0:97 sig76     fn77 = u0:11 sig77     fn78 = u0:12 sig78     fn79 = u0:13 sig79
+    fn80 = u0:91 sig80     fn81 = u0:92 sig81     fn82 = u0:91 sig82     fn83 = u0:92 sig83     fn84 = u0:11 sig84     fn85 = u0:12 sig85     fn86 = u0:13 sig86
+    fn87 = u0:13 sig87
+    fn88 = u0:13 sig88
+    fn89 = u0:13 sig89
+    fn90 = u0:13 sig90
+    fn91 = u0:13 sig91
+    fn92 = u0:13 sig92
+    fn93 = u0:13 sig93
+    fn94 = u0:13 sig94
+    fn95 = u0:83 sig95     fn96 = u0:13 sig96
+    fn97 = u0:13 sig97
+    fn98 = u0:13 sig98
+    fn99 = u0:13 sig99
+    fn100 = u0:13 sig100
+    fn101 = u0:94 sig101
+    fn102 = u0:13 sig102
+    fn103 = u0:13 sig103
+    fn104 = u0:95 sig104
+
+ebb0(v0: i64, v1: i64, v2: i64):
+    v113 -> v1
+    v124 -> v1
+    v136 -> v1
+    v148 -> v1
+    v160 -> v1
+    v185 -> v1
+    v222 -> v1
+    v237 -> v1
+    v241 -> v1
+    v256 -> v1
+    v262 -> v1
+    v3 = stack_addr.i64 ss0
+    v4 = load.i64 aligned v2
+    store aligned v4, v3
+    v5 = load.i64 aligned v2+8
+    store aligned v5, v3+8
+    v6 = stack_addr.i64 ss1
+    v7 = stack_addr.i64 ss2
+    v8 = stack_addr.i64 ss3
+    v9 = stack_addr.i64 ss4
+    v10 = stack_addr.i64 ss5
+    v11 = stack_addr.i64 ss6
+    v12 = stack_addr.i64 ss7
+    v13 = stack_addr.i64 ss8
+    v14 = stack_addr.i64 ss9
+    v15 = stack_addr.i64 ss10
+    v16 = stack_addr.i64 ss11
+    v17 = stack_addr.i64 ss12
+    v18 = stack_addr.i64 ss13
+    v19 = stack_addr.i64 ss14
+    v20 = stack_addr.i64 ss15
+    v21 = stack_addr.i64 ss16
+    v22 = stack_addr.i64 ss17
+    v23 = stack_addr.i64 ss18
+    v24 = stack_addr.i64 ss19
+    v25 = stack_addr.i64 ss20
+    v26 = stack_addr.i64 ss21
+    v27 = stack_addr.i64 ss22
+    v28 = stack_addr.i64 ss23
+    v29 = stack_addr.i64 ss24
+    v30 = stack_addr.i64 ss25
+    v31 = stack_addr.i64 ss26
+    v32 = stack_addr.i64 ss27
+    v33 = stack_addr.i64 ss28
+    v34 = stack_addr.i64 ss29
+    v35 = stack_addr.i64 ss30
+    v36 = stack_addr.i64 ss31
+    v37 = stack_addr.i64 ss32
+    v38 = stack_addr.i64 ss33
+    v39 = stack_addr.i64 ss34
+    v40 = stack_addr.i64 ss35
+    v41 = stack_addr.i64 ss36
+    v42 = stack_addr.i64 ss37
+    v43 = stack_addr.i64 ss38
+    v44 = stack_addr.i64 ss39
+    v45 = stack_addr.i64 ss40
+    v46 = stack_addr.i64 ss41
+    v47 = stack_addr.i64 ss42
+    v48 = stack_addr.i64 ss43
+    v49 = stack_addr.i64 ss44
+    v50 = stack_addr.i64 ss45
+    v51 = stack_addr.i64 ss46
+    v52 = stack_addr.i64 ss47
+    v53 = stack_addr.i64 ss48
+    v54 = stack_addr.i64 ss49
+    v55 = stack_addr.i64 ss50
+    v56 = stack_addr.i64 ss51
+    v57 = stack_addr.i64 ss52
+    v58 = stack_addr.i64 ss53
+    v59 = stack_addr.i64 ss54
+    v60 = stack_addr.i64 ss55
+    v61 = stack_addr.i64 ss56
+    v62 = stack_addr.i64 ss57
+    v63 = stack_addr.i64 ss58
+    v64 = stack_addr.i64 ss59
+    v65 = stack_addr.i64 ss60
+    v66 = stack_addr.i64 ss61
+    v67 = stack_addr.i64 ss62
+    v68 = stack_addr.i64 ss63
+    v69 = stack_addr.i64 ss64
+    v70 = stack_addr.i64 ss65
+    v71 = stack_addr.i64 ss66
+    v72 = stack_addr.i64 ss67
+    v73 = stack_addr.i64 ss68
+    v74 = stack_addr.i64 ss69
+    v75 = stack_addr.i64 ss70
+    v76 = stack_addr.i64 ss71
+    v77 = stack_addr.i64 ss72
+    v78 = stack_addr.i64 ss73
+    v79 = stack_addr.i64 ss74
+    v80 = stack_addr.i64 ss75
+    v81 = stack_addr.i64 ss76
+    v82 = stack_addr.i64 ss77
+    v83 = stack_addr.i64 ss78
+    v84 = stack_addr.i64 ss79
+    v85 = stack_addr.i64 ss80
+    v86 = stack_addr.i64 ss81
+    v87 = stack_addr.i64 ss82
+    v88 = stack_addr.i64 ss83
+    v89 = stack_addr.i64 ss84
+    v90 = stack_addr.i64 ss85
+    v91 = stack_addr.i64 ss86
+    v92 = stack_addr.i64 ss87
+    v93 = stack_addr.i64 ss88
+    v94 = stack_addr.i64 ss89
+    v95 = stack_addr.i64 ss90
+    v96 = stack_addr.i64 ss91
+    v97 = stack_addr.i64 ss92
+    v98 = stack_addr.i64 ss93
+    v99 = stack_addr.i64 ss94
+    v100 = stack_addr.i64 ss95
+    v101 = stack_addr.i64 ss96
+    v102 = stack_addr.i64 ss97
+    v103 = stack_addr.i64 ss98
+    v104 = stack_addr.i64 ss99
+    v105 = stack_addr.i64 ss100
+    v106 = stack_addr.i64 ss101
+    v107 = stack_addr.i64 ss102
+    v108 = stack_addr.i64 ss103
+    v109 = stack_addr.i64 ss104
+    v110 = stack_addr.i64 ss105
+    v111 = stack_addr.i64 ss106
+    v112 = stack_addr.i64 ss107
+    jump ebb1
+
+ebb1:
+    v114 = load.i64 v113
+    v115 = iconst.i64 0
+    v116 = icmp ugt v114, v115
+    v117 = bint.i8 v116
+    v118 = uextend.i32 v117
+    v119 = icmp_imm eq v118, 0
+    v120 = bint.i8 v119
+    v121 = uextend.i32 v120
+    brz v121, ebb3
+    jump ebb2
+
+ebb2:
+    v122 = global_value.i64 gv0
+    v123 = global_value.i64 gv1
+    trap user65535
+
+ebb3:
+    v125 = iadd_imm.i64 v124, 8
+    v126 = load.i64 v125
+    v127 = iconst.i64 0
+    v128 = icmp ugt v126, v127
+    v129 = bint.i8 v128
+    v130 = uextend.i32 v129
+    v131 = icmp_imm eq v130, 0
+    v132 = bint.i8 v131
+    v133 = uextend.i32 v132
+    brz v133, ebb5
+    jump ebb4
+
+ebb4:
+    v134 = global_value.i64 gv2
+    v135 = global_value.i64 gv3
+    trap user65535
+
+ebb5:
+    v137 = iadd_imm.i64 v136, 16
+    v138 = load.i64 v137+42
+    v139 = iconst.i64 0
+    v140 = icmp ugt v138, v139
+    v141 = bint.i8 v140
+    v142 = uextend.i32 v141
+    v143 = icmp_imm eq v142, 0
+    v144 = bint.i8 v143
+    v145 = uextend.i32 v144
+    brz v145, ebb7
+    jump ebb6
+
+ebb6:
+    v146 = global_value.i64 gv4
+    v147 = global_value.i64 gv5
+    trap user65535
+
+ebb7:
+    v149 = load.i64 v148
+    v150 = iadd_imm.i64 v148, 16
+    v151 = load.i64 v150
+    call fn6(v7, v149, v151)
+    jump ebb8
+
+ebb8:
+    v152 = call fn7(v7)
+    jump ebb9
+
+ebb9:
+    v153 = load.i8 v6
+    v154 = uextend.i32 v153
+    v155 = icmp_imm eq v154, 0
+    v156 = bint.i8 v155
+    v157 = uextend.i32 v156
+    brz v157, ebb11
+    jump ebb10
+
+ebb10:
+    v158 = global_value.i64 gv6
+    v159 = global_value.i64 gv7
+    trap user65535
+
+ebb11:
+    v161 = load.i64 v160
+    v162 = iadd_imm.i64 v160, 8
+    v163 = load.i64 v162
+    call fn10(v9, v161, v163)
+    jump ebb12
+
+ebb12:
+    v164 = call fn11(v9)
+    jump ebb13
+
+ebb13:
+    v165 = load.i8 v8
+    v166 = uextend.i32 v165
+    v167 = icmp_imm eq v166, 0
+    v168 = bint.i8 v167
+    v169 = uextend.i32 v168
+    brz v169, ebb15
+    jump ebb14
+
+ebb14:
+    v170 = global_value.i64 gv8
+    v171 = global_value.i64 gv9
+    trap user65535
+
+ebb15:
+    v172 = load.i64 aligned v3
+    v173 = load.i64 aligned v3+8
+    v174 = call fn14(v11)
+    jump ebb16
+
+ebb16:
+    v175 = iconst.i64 17
+    v176 = load.i64 v10
+    v177 = icmp uge v176, v175
+    v178 = bint.i8 v177
+    v179 = uextend.i32 v178
+    v180 = icmp_imm eq v179, 0
+    v181 = bint.i8 v180
+    v182 = uextend.i32 v181
+    brz v182, ebb18
+    jump ebb17
+
+ebb17:
+    v183 = global_value.i64 gv10
+    v184 = global_value.i64 gv11
+    trap user65535
+
+ebb18:
+    v186 = load.i64 v185
+    v187 = iadd_imm.i64 v185, 16
+    v188 = load.i64 v187
+    v189 = iadd v186, v188
+    v190 = iconst.i8 0
+    v191 = stack_addr.i64 ss108
+    v192 = stack_addr.i64 ss108
+    v193 = load.i64 aligned v192
+    v194 = load.i64 aligned v192+8
+    v195 = iadd_imm.i64 v12, 8
+    v196 = load.i8 v195
+    v197 = uextend.i32 v196
+    brz v197, ebb19
+    jump ebb164
+
+ebb164:
+    v198 = global_value.i64 gv12
+    trap user0
+
+ebb19:
+    v199 = load.i64 v12
+        v213 -> v199
+    v200 = iconst.i64 1
+    v201 = iconst.i32 61
+    v202 = ishl v200, v201
+    v203 = iconst.i8 0
+    v204 = stack_addr.i64 ss109
+    v205 = stack_addr.i64 ss109
+    v206 = load.i64 aligned v205
+    v207 = load.i64 aligned v205+8
+    v208 = iadd_imm.i64 v13, 8
+    v209 = load.i8 v208
+    v210 = uextend.i32 v209
+    brz v210, ebb20
+    jump ebb163
+
+ebb163:
+    v211 = global_value.i64 gv13
+    trap user0
+
+ebb20:
+    v212 = load.i64 v13
+    v214 = icmp.i64 ult v213, v212
+    v215 = bint.i8 v214
+    v216 = uextend.i32 v215
+    v217 = icmp_imm eq v216, 0
+    v218 = bint.i8 v217
+    v219 = uextend.i32 v218
+    brz v219, ebb22
+    jump ebb21
+
+ebb21:
+    v220 = global_value.i64 gv14
+    v221 = global_value.i64 gv15
+    trap user65535
+
+ebb22:
+    v223 = load.i64 v222
+    v224 = iadd_imm.i64 v222, 16
+    v225 = load.i64 v224
+    v226 = iadd v223, v225
+    v227 = iconst.i8 0
+    v228 = stack_addr.i64 ss110
+    v229 = stack_addr.i64 ss110
+    v230 = load.i64 aligned v229
+    v231 = load.i64 aligned v229+8
+    v232 = iadd_imm.i64 v16, 8
+    v233 = load.i8 v232
+    v234 = uextend.i32 v233
+    brz v234, ebb23
+    jump ebb162
+
+ebb162:
+    v235 = global_value.i64 gv16
+    trap user0
+
+ebb23:
+    v236 = load.i64 v16
+    v238 = iadd_imm.i64 v237, 24
+    v239 = load.i16 v238
+    v240 = iadd_imm.i64 v15, 8
+    call fn22(v14, v15)
+    jump ebb24
+
+ebb24:
+    v242 = load.i64 v241
+    v243 = iadd_imm.i64 v241, 8
+    v244 = load.i64 v243
+    v245 = isub v242, v244
+    v246 = iconst.i8 0
+    v247 = stack_addr.i64 ss111
+    v248 = stack_addr.i64 ss111
+    v249 = load.i64 aligned v248
+    v250 = load.i64 aligned v248+8
+    v251 = iadd_imm.i64 v19, 8
+    v252 = load.i8 v251
+    v253 = uextend.i32 v252
+    brz v253, ebb25
+    jump ebb161
+
+ebb161:
+    v254 = global_value.i64 gv17
+    trap user0
+
+ebb25:
+    v255 = load.i64 v19
+    v257 = iadd_imm.i64 v256, 24
+    v258 = load.i16 v257
+    v259 = iadd_imm.i64 v18, 8
+    v260 = iadd_imm.i64 v14, 8
+    v261 = load.i16 v260
+    call fn24(v17, v18, v261)
+    jump ebb26
+
+ebb26:
+    v263 = load.i64 v262
+    v264 = iadd_imm.i64 v262, 24
+    v265 = load.i16 v264
+    v266 = iadd_imm.i64 v21, 8
+    v267 = iadd_imm.i64 v14, 8
+    v268 = load.i16 v267
+    call fn25(v20, v21, v268)
+    jump ebb27
+
+ebb27:
+    v269 = iadd_imm.i64 v14, 8
+    v270 = load.i16 v269
+    v271 = iconst.i16 -60
+    v272 = isub v271, v270
+    v273 = iconst.i8 0
+    v274 = stack_addr.i64 ss112
+    v275 = stack_addr.i64 ss112
+    v276 = load.i32 aligned v275
+    v277 = iadd_imm.i64 v24, 2
+    v278 = load.i8 v277
+    v279 = uextend.i32 v278
+    brz v279, ebb28
+    jump ebb160
+
+ebb160:
+    v280 = global_value.i64 gv18
+    trap user0
+
+ebb28:
+    v281 = load.i16 v24
+    v282 = iconst.i16 64
+    v283 = isub v281, v282
+    v284 = iconst.i8 0
+    v285 = stack_addr.i64 ss113
+    v286 = stack_addr.i64 ss113
+    v287 = load.i32 aligned v286
+    v288 = iadd_imm.i64 v25, 2
+    v289 = load.i8 v288
+    v290 = uextend.i32 v289
+    brz v290, ebb29
+    jump ebb159
+
+ebb159:
+    v291 = global_value.i64 gv19
+    trap user0
+
+ebb29:
+    v292 = load.i16 v25
+        v317 -> v292
+    v293 = iadd_imm.i64 v14, 8
+    v294 = load.i16 v293
+    v295 = iconst.i16 -32
+    v296 = isub v295, v294
+    v297 = iconst.i8 0
+    v298 = stack_addr.i64 ss114
+    v299 = stack_addr.i64 ss114
+    v300 = load.i32 aligned v299
+    v301 = iadd_imm.i64 v26, 2
+    v302 = load.i8 v301
+    v303 = uextend.i32 v302
+    brz v303, ebb30
+    jump ebb158
+
+ebb158:
+    v304 = global_value.i64 gv20
+    trap user0
+
+ebb30:
+    v305 = load.i16 v26
+    v306 = iconst.i16 64
+    v307 = isub v305, v306
+    v308 = iconst.i8 0
+    v309 = stack_addr.i64 ss115
+    v310 = stack_addr.i64 ss115
+    v311 = load.i32 aligned v310
+    v312 = iadd_imm.i64 v27, 2
+    v313 = load.i8 v312
+    v314 = uextend.i32 v313
+    brz v314, ebb31
+    jump ebb157
+
+ebb157:
+    v315 = global_value.i64 gv21
+    trap user0
+
+ebb31:
+    v316 = load.i16 v27
+    call fn30(v23, v317, v316)
+    jump ebb32
+
+ebb32:
+    v318 = load.i16 v23
+        v1007 -> v318
+    v319 = iadd_imm.i64 v23, 8
+    v320 = load.i64 aligned v319
+    v321 = load.i64 aligned v319+8
+    call fn31(v28, v14, v22)
+    jump ebb33
+
+ebb33:
+    call fn32(v29, v17, v22)
+    jump ebb34
+
+ebb34:
+    call fn33(v30, v20, v22)
+    jump ebb35
+
+ebb35:
+    v322 = iconst.i8 1
+    v323 = uextend.i32 v322
+    brz v323, ebb42
+    jump ebb36
+
+ebb36:
+    v324 = iadd_imm.i64 v28, 8
+    v325 = iadd_imm.i64 v29, 8
+    v326 = iadd_imm.i64 v31, 8
+    v327 = load.i64 v31
+        v340 -> v327
+    v328 = iadd_imm.i64 v31, 8
+    v329 = load.i64 v328
+        v341 -> v329
+    v330 = load.i16 v327
+    v331 = load.i16 v329
+    v332 = icmp eq v330, v331
+    v333 = bint.i8 v332
+    v334 = uextend.i32 v333
+    v335 = icmp_imm eq v334, 0
+    v336 = bint.i8 v335
+    v337 = uextend.i32 v336
+    brz v337, ebb38
+    jump ebb37
+
+ebb37:
+    v338 = global_value.i64 gv22
+    v339 = iconst.i64 3
+    v342 = iadd_imm.i64 v36, 8
+    v343 = load.i64 v36
+    v344 = iadd_imm.i64 v36, 8
+    v345 = load.i64 v344
+        v347 -> v345
+    v346 = func_addr.i64 fn34
+    call fn35(v39, v343, v346)
+    jump ebb39
+
+ebb38:
+    jump ebb42
+
+ebb39:
+    v348 = func_addr.i64 fn36
+    call fn37(v40, v347, v348)
+    jump ebb40
+
+ebb40:
+    v349 = iconst.i64 0
+    v350 = imul_imm v349, 16
+    v351 = iadd.i64 v35, v350
+    v352 = load.i64 aligned v39
+    v353 = load.i64 aligned v39+8
+    v354 = iconst.i64 1
+    v355 = imul_imm v354, 16
+    v356 = iadd.i64 v35, v355
+    v357 = load.i64 aligned v40
+    v358 = load.i64 aligned v40+8
+    v359 = iconst.i64 2
+    call fn38(v32, v33, v34)
+    jump ebb41
+
+ebb41:
+    v360 = global_value.i64 gv23
+    call fn39(v32, v360)
+    v361 = global_value.i64 gv24
+    trap user65535
+
+ebb42:
+    v362 = iconst.i8 1
+    v363 = uextend.i32 v362
+    brz v363, ebb49(v1007)
+    jump ebb43
+
+ebb43:
+    v364 = iadd_imm.i64 v28, 8
+    v365 = iadd_imm.i64 v30, 8
+    v366 = iadd_imm.i64 v41, 8
+    v367 = load.i64 v41
+        v380 -> v367
+    v368 = iadd_imm.i64 v41, 8
+    v369 = load.i64 v368
+        v381 -> v369
+    v370 = load.i16 v367
+    v371 = load.i16 v369
+    v372 = icmp eq v370, v371
+    v373 = bint.i8 v372
+    v374 = uextend.i32 v373
+    v375 = icmp_imm eq v374, 0
+    v376 = bint.i8 v375
+    v377 = uextend.i32 v376
+    brz v377, ebb45
+    jump ebb44
+
+ebb44:
+    v378 = global_value.i64 gv25
+    v379 = iconst.i64 3
+    v382 = iadd_imm.i64 v46, 8
+    v383 = load.i64 v46
+    v384 = iadd_imm.i64 v46, 8
+    v385 = load.i64 v384
+        v387 -> v385
+    v386 = func_addr.i64 fn41
+    call fn42(v49, v383, v386)
+    jump ebb46
+
+ebb45:
+    jump ebb49(v1007)
+
+ebb46:
+    v388 = func_addr.i64 fn43
+    call fn44(v50, v387, v388)
+    jump ebb47
+
+ebb47:
+    v389 = iconst.i64 0
+    v390 = imul_imm v389, 16
+    v391 = iadd.i64 v45, v390
+    v392 = load.i64 aligned v49
+    v393 = load.i64 aligned v49+8
+    v394 = iconst.i64 1
+    v395 = imul_imm v394, 16
+    v396 = iadd.i64 v45, v395
+    v397 = load.i64 aligned v50
+    v398 = load.i64 aligned v50+8
+    v399 = iconst.i64 2
+    call fn45(v42, v43, v44)
+    jump ebb48
+
+ebb48:
+    v400 = global_value.i64 gv26
+    call fn46(v42, v400)
+    v401 = global_value.i64 gv27
+    trap user65535
+
+ebb49(v1006: i16):
+        v486 -> v1006
+    v402 = load.i64 v28
+    v403 = iconst.i64 1
+    v404 = iadd v402, v403
+    v405 = iconst.i8 0
+    v406 = stack_addr.i64 ss116
+    v407 = stack_addr.i64 ss116
+    v408 = load.i64 aligned v407
+    v409 = load.i64 aligned v407+8
+    v410 = iadd_imm.i64 v51, 8
+    v411 = load.i8 v410
+    v412 = uextend.i32 v411
+    brz v412, ebb50
+    jump ebb156
+
+ebb156:
+    v413 = global_value.i64 gv28
+    trap user0
+
+ebb50:
+    v414 = load.i64 v51
+        v439 -> v414
+        v452 -> v414
+        v478 -> v414
+        v508 -> v414
+    v415 = load.i64 v29
+    v416 = iconst.i64 1
+    v417 = isub v415, v416
+    v418 = iconst.i8 0
+    v419 = stack_addr.i64 ss117
+    v420 = stack_addr.i64 ss117
+    v421 = load.i64 aligned v420
+    v422 = load.i64 aligned v420+8
+    v423 = iadd_imm.i64 v52, 8
+    v424 = load.i8 v423
+    v425 = uextend.i32 v424
+    brz v425, ebb51
+    jump ebb155
+
+ebb155:
+    v426 = global_value.i64 gv29
+    trap user0
+
+ebb51:
+    v427 = load.i64 v52
+        v509 -> v427
+    v428 = iadd_imm.i64 v28, 8
+    v429 = load.i16 v428
+        v435 -> v429
+    v430 = iconst.i16 0xffff_ffff_ffff_8000
+    v431 = icmp eq v429, v430
+    v432 = bint.i8 v431
+    v433 = uextend.i32 v432
+    brz v433, ebb52
+    jump ebb154
+
+ebb154:
+    v434 = global_value.i64 gv30
+    trap user0
+
+ebb52:
+    v436 = iconst.i16 0
+    v437 = isub v436, v435
+    v438 = sextend.i64 v437
+        v453 -> v438
+        v521 -> v438
+    v440 = ushr.i64 v439, v438
+    v441 = iconst.i8 0
+    v442 = stack_addr.i64 ss118
+    v443 = stack_addr.i64 ss118
+    v444 = load.i64 aligned v443
+    v445 = load.i64 aligned v443+8
+    v446 = iadd_imm.i64 v53, 8
+    v447 = load.i8 v446
+    v448 = uextend.i32 v447
+    brz v448, ebb53
+    jump ebb153
+
+ebb153:
+    v449 = global_value.i64 gv31
+    trap user0
+
+ebb53:
+    v450 = load.i64 v53
+    v451 = ireduce.i32 v450
+        v480 -> v451
+        v551 -> v451
+    v454 = iconst.i64 1
+    v455 = ishl v454, v453
+    v456 = iconst.i8 0
+    v457 = stack_addr.i64 ss119
+    v458 = stack_addr.i64 ss119
+    v459 = load.i64 aligned v458
+    v460 = load.i64 aligned v458+8
+    v461 = iadd_imm.i64 v54, 8
+    v462 = load.i8 v461
+    v463 = uextend.i32 v462
+    brz v463, ebb54
+    jump ebb152
+
+ebb152:
+    v464 = global_value.i64 gv32
+    trap user0
+
+ebb54:
+    v465 = load.i64 v54
+    v466 = iconst.i64 1
+    v467 = isub v465, v466
+    v468 = iconst.i8 0
+    v469 = stack_addr.i64 ss120
+    v470 = stack_addr.i64 ss120
+    v471 = load.i64 aligned v470
+    v472 = load.i64 aligned v470+8
+    v473 = iadd_imm.i64 v55, 8
+    v474 = load.i8 v473
+    v475 = uextend.i32 v474
+    brz v475, ebb55
+    jump ebb151
+
+ebb151:
+    v476 = global_value.i64 gv33
+    trap user0
+
+ebb55:
+    v477 = load.i64 v55
+    v479 = band.i64 v478, v477
+    call fn54(v56, v480)
+    jump ebb56
+
+ebb56:
+    v481 = load.i8 v56
+        v548 -> v481
+    v482 = iadd_imm.i64 v56, 4
+    v483 = load.i32 v482
+        v550 -> v483
+    v484 = iconst.i64 0
+    v485 = uextend.i16 v481
+    v487 = isub v485, v486
+    v488 = iconst.i8 0
+    v489 = stack_addr.i64 ss121
+    v490 = stack_addr.i64 ss121
+    v491 = load.i32 aligned v490
+    v492 = iadd_imm.i64 v57, 2
+    v493 = load.i8 v492
+    v494 = uextend.i32 v493
+    brz v494, ebb57
+    jump ebb150
+
+ebb150:
+    v495 = global_value.i64 gv34
+    trap user0
+
+ebb57:
+    v496 = load.i16 v57
+    v497 = iconst.i16 1
+    v498 = iadd v496, v497
+    v499 = iconst.i8 0
+    v500 = stack_addr.i64 ss122
+    v501 = stack_addr.i64 ss122
+    v502 = load.i32 aligned v501
+    v503 = iadd_imm.i64 v58, 2
+    v504 = load.i8 v503
+    v505 = uextend.i32 v504
+    brz v505, ebb58
+    jump ebb149
+
+ebb149:
+    v506 = global_value.i64 gv35
+    trap user0
+
+ebb58:
+    v507 = load.i16 v58
+    v510 = isub.i64 v508, v509
+    v511 = iconst.i8 0
+    v512 = stack_addr.i64 ss123
+    v513 = stack_addr.i64 ss123
+    v514 = load.i64 aligned v513
+    v515 = load.i64 aligned v513+8
+    v516 = iadd_imm.i64 v59, 8
+    v517 = load.i8 v516
+    v518 = uextend.i32 v517
+    brz v518, ebb59
+    jump ebb148
+
+ebb148:
+    v519 = global_value.i64 gv36
+    trap user0
+
+ebb59:
+    v520 = load.i64 v59
+        v546 -> v520
+    v522 = iconst.i64 1
+    v523 = ishl v522, v521
+    v524 = iconst.i8 0
+    v525 = stack_addr.i64 ss124
+    v526 = stack_addr.i64 ss124
+    v527 = load.i64 aligned v526
+    v528 = load.i64 aligned v526+8
+    v529 = iadd_imm.i64 v60, 8
+    v530 = load.i8 v529
+    v531 = uextend.i32 v530
+    brz v531, ebb60
+    jump ebb147
+
+ebb147:
+    v532 = global_value.i64 gv37
+    trap user0
+
+ebb60:
+    v533 = load.i64 v60
+    v534 = iconst.i64 1
+    v535 = isub v533, v534
+    v536 = iconst.i8 0
+    v537 = stack_addr.i64 ss125
+    v538 = stack_addr.i64 ss125
+    v539 = load.i64 aligned v538
+    v540 = load.i64 aligned v538+8
+    v541 = iadd_imm.i64 v61, 8
+    v542 = load.i8 v541
+    v543 = uextend.i32 v542
+    brz v543, ebb61
+    jump ebb146
+
+ebb146:
+    v544 = global_value.i64 gv38
+    trap user0
+
+ebb61:
+    v545 = load.i64 v61
+    v547 = band.i64 v546, v545
+    v549 = uextend.i16 v548
+    jump ebb62(v551, v484, v521, v479, v520, v507, v508, v548, v547)
+
+ebb62(v552: i32, v1009: i64, v1013: i64, v1016: i64, v1019: i64, v1022: i16, v1025: i64, v1028: i8, v1033: i64):
+        v559 -> v552
+        v562 -> v552
+        v569 -> v552
+        v596 -> v1009
+        v605 -> v1009
+        v609 -> v1009
+        v1008 -> v1009
+        v624 -> v1013
+        v654 -> v1013
+        v1012 -> v1013
+        v1014 -> v1013
+        v1041 -> v1013
+        v636 -> v1016
+        v1015 -> v1016
+        v1017 -> v1016
+        v1030 -> v1016
+        v648 -> v1019
+        v676 -> v1019
+        v693 -> v1019
+        v1018 -> v1019
+        v1020 -> v1019
+        v674 -> v1022
+        v691 -> v1022
+        v1021 -> v1022
+        v1023 -> v1022
+        v1054 -> v1022
+        v677 -> v1025
+        v1024 -> v1025
+        v1026 -> v1025
+        v1059 -> v1025
+        v696 -> v1028
+        v1027 -> v1028
+        v1029 -> v1028
+        v1031 -> v1033
+        v1032 -> v1033
+        v1034 -> v1033
+    v553 = load.i32 v63
+        v560 -> v553
+    v554 = iconst.i32 0
+    v555 = icmp eq v553, v554
+    v556 = bint.i8 v555
+    v557 = uextend.i32 v556
+    brz v557, ebb63
+    jump ebb145
+
+ebb145:
+    v558 = global_value.i64 gv39
+    trap user0
+
+ebb63:
+    v561 = udiv.i32 v559, v560
+        v574 -> v561
+    v563 = load.i32 v63
+        v570 -> v563
+    v564 = iconst.i32 0
+    v565 = icmp eq v563, v564
+    v566 = bint.i8 v565
+    v567 = uextend.i32 v566
+    brz v567, ebb64
+    jump ebb144
+
+ebb144:
+    v568 = global_value.i64 gv40
+    trap user0
+
+ebb64:
+    v571 = urem.i32 v569, v570
+        v622 -> v571
+        v803 -> v571
+        v1011 -> v571
+    v572 = iconst.i8 1
+    v573 = uextend.i32 v572
+    brz v573, ebb68(v561)
+    jump ebb65
+
+ebb65:
+    v575 = iconst.i32 10
+    v576 = icmp.i32 ult v574, v575
+    v577 = bint.i8 v576
+    v578 = uextend.i32 v577
+    v579 = icmp_imm eq v578, 0
+    v580 = bint.i8 v579
+    v581 = uextend.i32 v580
+    brz v581, ebb67
+    jump ebb66
+
+ebb66:
+    v582 = global_value.i64 gv41
+    v583 = global_value.i64 gv42
+    trap user65535
+
+ebb67:
+    jump ebb68(v574)
+
+ebb68(v584: i32):
+    v585 = ireduce.i8 v584
+    v586 = iconst.i8 48
+    v587 = iadd v586, v585
+    v588 = iconst.i8 0
+    v589 = stack_addr.i64 ss126
+    v590 = stack_addr.i64 ss126
+    v591 = load.i16 aligned v590
+    v592 = iadd_imm.i64 v64, 1
+    v593 = load.i8 v592
+    v594 = uextend.i32 v593
+    brz v594, ebb69
+    jump ebb143
+
+ebb143:
+    v595 = global_value.i64 gv43
+    trap user0
+
+ebb69:
+    v597 = load.i64 v3
+    v598 = load.i64 v3+8
+    v599 = icmp.i64 ult v596, v598
+    v600 = bint.i8 v599
+    v601 = uextend.i32 v600
+    brnz v601, ebb70
+    jump ebb142
+
+ebb142:
+    v602 = global_value.i64 gv44
+    trap user0
+
+ebb70:
+    v603 = load.i64 v3
+    v604 = load.i64 v3+8
+    v606 = imul_imm.i64 v605, 1
+    v607 = iadd v603, v606
+    v608 = load.i8 aligned v64
+    v610 = iconst.i64 1
+    v611 = iadd.i64 v609, v610
+    v612 = iconst.i8 0
+    v613 = stack_addr.i64 ss127
+    v614 = stack_addr.i64 ss127
+    v615 = load.i64 aligned v614
+    v616 = load.i64 aligned v614+8
+    v617 = iadd_imm.i64 v65, 8
+    v618 = load.i8 v617
+    v619 = uextend.i32 v618
+    brz v619, ebb71
+    jump ebb141
+
+ebb141:
+    v620 = global_value.i64 gv45
+    trap user0
+
+ebb71:
+    v621 = load.i64 v65
+        v668 -> v621
+        v695 -> v621
+        v1010 -> v621
+        v1046 -> v621
+    v623 = uextend.i64 v622
+    v625 = ishl v623, v624
+    v626 = iconst.i8 0
+    v627 = stack_addr.i64 ss128
+    v628 = stack_addr.i64 ss128
+    v629 = load.i64 aligned v628
+    v630 = load.i64 aligned v628+8
+    v631 = iadd_imm.i64 v66, 8
+    v632 = load.i8 v631
+    v633 = uextend.i32 v632
+    brz v633, ebb72
+    jump ebb140
+
+ebb140:
+    v634 = global_value.i64 gv46
+    trap user0
+
+ebb72:
+    v635 = load.i64 v66
+    v637 = iadd v635, v636
+    v638 = iconst.i8 0
+    v639 = stack_addr.i64 ss129
+    v640 = stack_addr.i64 ss129
+    v641 = load.i64 aligned v640
+    v642 = load.i64 aligned v640+8
+    v643 = iadd_imm.i64 v67, 8
+    v644 = load.i8 v643
+    v645 = uextend.i32 v644
+    brz v645, ebb73
+    jump ebb139
+
+ebb139:
+    v646 = global_value.i64 gv47
+    trap user0
+
+ebb73:
+    v647 = load.i64 v67
+        v675 -> v647
+        v692 -> v647
+    v649 = icmp ult v647, v648
+    v650 = bint.i8 v649
+    v651 = uextend.i32 v650
+    brz v651, ebb80
+    jump ebb74
+
+ebb74:
+    v652 = load.i32 v63
+    v653 = uextend.i64 v652
+    v655 = ishl v653, v654
+    v656 = iconst.i8 0
+    v657 = stack_addr.i64 ss130
+    v658 = stack_addr.i64 ss130
+    v659 = load.i64 aligned v658
+    v660 = load.i64 aligned v658+8
+    v661 = iadd_imm.i64 v68, 8
+    v662 = load.i8 v661
+    v663 = uextend.i32 v662
+    brz v663, ebb75
+    jump ebb138
+
+ebb138:
+    v664 = global_value.i64 gv48
+    trap user0
+
+ebb75:
+    v665 = load.i64 v68
+        v690 -> v665
+    v666 = load.i64 aligned v3
+    v667 = load.i64 aligned v3+8
+    v669 = load.i64 v73
+    call fn70(v71, v72, v669)
+    jump ebb76
+
+ebb76:
+    v670 = load.i64 aligned v71
+    v671 = load.i64 aligned v71+8
+    v672 = load.i64 aligned v70
+    v673 = load.i64 aligned v70+8
+    v678 = load.i64 v30
+    v679 = isub.i64 v677, v678
+    v680 = iconst.i8 0
+    v681 = stack_addr.i64 ss131
+    v682 = stack_addr.i64 ss131
+    v683 = load.i64 aligned v682
+    v684 = load.i64 aligned v682+8
+    v685 = iadd_imm.i64 v74, 8
+    v686 = load.i8 v685
+    v687 = uextend.i32 v686
+    brz v687, ebb77
+    jump ebb137
+
+ebb137:
+    v688 = global_value.i64 gv49
+    trap user0
+
+ebb77:
+    v689 = load.i64 v74
+    v694 = iconst.i64 1
+    call fn72(v0, v69, v691, v692, v693, v689, v690, v694)
+    jump ebb78
+
+ebb78:
+    jump ebb79
+
+ebb79:
+    return
+
+ebb80:
+    v697 = uextend.i64 v696
+    v698 = icmp.i64 ugt v695, v697
+    v699 = bint.i8 v698
+    v700 = uextend.i32 v699
+    brz v700, ebb96
+    jump ebb81
+
+ebb81:
+    v701 = iconst.i8 1
+    v702 = uextend.i32 v701
+    brz v702, ebb88
+    jump ebb82
+
+ebb82:
+    v703 = global_value.i64 gv50
+    v704 = iadd_imm.i64 v75, 8
+    v705 = load.i64 v75
+        v718 -> v705
+    v706 = iadd_imm.i64 v75, 8
+    v707 = load.i64 v706
+        v719 -> v707
+    v708 = load.i32 v705
+    v709 = load.i32 v707
+    v710 = icmp eq v708, v709
+    v711 = bint.i8 v710
+    v712 = uextend.i32 v711
+    v713 = icmp_imm eq v712, 0
+    v714 = bint.i8 v713
+    v715 = uextend.i32 v714
+    brz v715, ebb84
+    jump ebb83
+
+ebb83:
+    v716 = global_value.i64 gv51
+    v717 = iconst.i64 3
+    v720 = iadd_imm.i64 v80, 8
+    v721 = load.i64 v80
+    v722 = iadd_imm.i64 v80, 8
+    v723 = load.i64 v722
+        v725 -> v723
+    v724 = func_addr.i64 fn73
+    call fn74(v83, v721, v724)
+    jump ebb85
+
+ebb84:
+    jump ebb88
+
+ebb85:
+    v726 = func_addr.i64 fn75
+    call fn76(v84, v725, v726)
+    jump ebb86
+
+ebb86:
+    v727 = iconst.i64 0
+    v728 = imul_imm v727, 16
+    v729 = iadd.i64 v79, v728
+    v730 = load.i64 aligned v83
+    v731 = load.i64 aligned v83+8
+    v732 = iconst.i64 1
+    v733 = imul_imm v732, 16
+    v734 = iadd.i64 v79, v733
+    v735 = load.i64 aligned v84
+    v736 = load.i64 aligned v84+8
+    v737 = iconst.i64 2
+    call fn77(v76, v77, v78)
+    jump ebb87
+
+ebb87:
+    v738 = global_value.i64 gv52
+    call fn78(v76, v738)
+    v739 = global_value.i64 gv53
+    trap user65535
+
+ebb88:
+    v740 = iconst.i8 1
+    v741 = uextend.i32 v740
+    brz v741, ebb95(v1030, v1031, v1041, v1046, v1054, v1059)
+    jump ebb89
+
+ebb89:
+    v742 = global_value.i64 gv54
+    v743 = iadd_imm.i64 v85, 8
+    v744 = load.i64 v85
+        v757 -> v744
+    v745 = iadd_imm.i64 v85, 8
+    v746 = load.i64 v745
+        v758 -> v746
+    v747 = load.i16 v744
+    v748 = load.i16 v746
+    v749 = icmp eq v747, v748
+    v750 = bint.i8 v749
+    v751 = uextend.i32 v750
+    v752 = icmp_imm eq v751, 0
+    v753 = bint.i8 v752
+    v754 = uextend.i32 v753
+    brz v754, ebb91
+    jump ebb90
+
+ebb90:
+    v755 = global_value.i64 gv55
+    v756 = iconst.i64 3
+    v759 = iadd_imm.i64 v90, 8
+    v760 = load.i64 v90
+    v761 = iadd_imm.i64 v90, 8
+    v762 = load.i64 v761
+        v764 -> v762
+    v763 = func_addr.i64 fn80
+    call fn81(v93, v760, v763)
+    jump ebb92
+
+ebb91:
+    jump ebb95(v1030, v1031, v1041, v1046, v1054, v1059)
+
+ebb92:
+    v765 = func_addr.i64 fn82
+    call fn83(v94, v764, v765)
+    jump ebb93
+
+ebb93:
+    v766 = iconst.i64 0
+    v767 = imul_imm v766, 16
+    v768 = iadd.i64 v89, v767
+    v769 = load.i64 aligned v93
+    v770 = load.i64 aligned v93+8
+    v771 = iconst.i64 1
+    v772 = imul_imm v771, 16
+    v773 = iadd.i64 v89, v772
+    v774 = load.i64 aligned v94
+    v775 = load.i64 aligned v94+8
+    v776 = iconst.i64 2
+    call fn84(v86, v87, v88)
+    jump ebb94
+
+ebb94:
+    v777 = global_value.i64 gv56
+    call fn85(v86, v777)
+    v778 = global_value.i64 gv57
+    trap user65535
+
+ebb95(v779: i64, v780: i64, v1040: i64, v1045: i64, v1053: i16, v1058: i64):
+    v781 = iconst.i64 1
+    jump ebb99(v779, v780, v781, v1040, v1045, v1053, v1058)
+
+ebb96:
+    v782 = iconst.i16 1
+    v783 = load.i16 v62
+    v784 = isub v783, v782
+    v785 = iconst.i8 0
+    v786 = stack_addr.i64 ss132
+    v787 = stack_addr.i64 ss132
+    v788 = load.i32 aligned v787
+    v789 = iadd_imm.i64 v95, 2
+    v790 = load.i8 v789
+    v791 = uextend.i32 v790
+    brz v791, ebb97
+    jump ebb136
+
+ebb136:
+    v792 = global_value.i64 gv58
+    trap user0
+
+ebb97:
+    v793 = load.i16 aligned v95
+    v794 = iconst.i32 10
+    v795 = iconst.i32 0
+    v796 = icmp eq v794, v795
+    v797 = bint.i8 v796
+    v798 = uextend.i32 v797
+    brz v798, ebb98
+    jump ebb135
+
+ebb135:
+    v799 = global_value.i64 gv59
+    trap user0
+
+ebb98:
+    v800 = iconst.i32 10
+    v801 = load.i32 v63
+    v802 = udiv v801, v800
+    jump ebb62(v803, v1010, v1014, v1017, v1020, v1023, v1026, v1029, v1034)
+
+ebb99(v804: i64, v1035: i64, v1037: i64, v1039: i64, v1044: i64, v1052: i16, v1057: i64):
+        v817 -> v1035
+        v830 -> v1037
+        v844 -> v1039
+        v857 -> v1039
+        v939 -> v1039
+        v1042 -> v1039
+        v1050 -> v1039
+        v908 -> v1044
+        v917 -> v1044
+        v921 -> v1044
+        v1043 -> v1044
+        v960 -> v1052
+        v990 -> v1052
+        v1051 -> v1052
+        v1055 -> v1052
+        v963 -> v1057
+        v1056 -> v1057
+        v1060 -> v1057
+    v805 = iconst.i64 10
+    v806 = imul v804, v805
+    v807 = iconst.i8 0
+    v808 = stack_addr.i64 ss133
+    v809 = stack_addr.i64 ss133
+    v810 = load.i64 aligned v809
+    v811 = load.i64 aligned v809+8
+    v812 = iadd_imm.i64 v96, 8
+    v813 = load.i8 v812
+    v814 = uextend.i32 v813
+    brz v814, ebb100
+    jump ebb134
+
+ebb134:
+    v815 = global_value.i64 gv60
+    trap user0
+
+ebb100:
+    v816 = load.i64 v96
+        v843 -> v816
+        v856 -> v816
+        v882 -> v816
+    v818 = iconst.i64 10
+    v819 = imul.i64 v817, v818
+    v820 = iconst.i8 0
+    v821 = stack_addr.i64 ss134
+    v822 = stack_addr.i64 ss134
+    v823 = load.i64 aligned v822
+    v824 = load.i64 aligned v822+8
+    v825 = iadd_imm.i64 v97, 8
+    v826 = load.i8 v825
+    v827 = uextend.i32 v826
+    brz v827, ebb101
+    jump ebb133
+
+ebb133:
+    v828 = global_value.i64 gv61
+    trap user0
+
+ebb101:
+    v829 = load.i64 v97
+        v935 -> v829
+        v962 -> v829
+        v992 -> v829
+        v1036 -> v829
+        v1049 -> v829
+    v831 = iconst.i64 10
+    v832 = imul.i64 v830, v831
+    v833 = iconst.i8 0
+    v834 = stack_addr.i64 ss135
+    v835 = stack_addr.i64 ss135
+    v836 = load.i64 aligned v835
+    v837 = load.i64 aligned v835+8
+    v838 = iadd_imm.i64 v98, 8
+    v839 = load.i8 v838
+    v840 = uextend.i32 v839
+    brz v840, ebb102
+    jump ebb132
+
+ebb132:
+    v841 = global_value.i64 gv62
+    trap user0
+
+ebb102:
+    v842 = load.i64 v98
+        v976 -> v842
+        v989 -> v842
+        v1038 -> v842
+        v1061 -> v842
+    v845 = ushr.i64 v843, v844
+    v846 = iconst.i8 0
+    v847 = stack_addr.i64 ss136
+    v848 = stack_addr.i64 ss136
+    v849 = load.i64 aligned v848
+    v850 = load.i64 aligned v848+8
+    v851 = iadd_imm.i64 v99, 8
+    v852 = load.i8 v851
+    v853 = uextend.i32 v852
+    brz v853, ebb103
+    jump ebb131
+
+ebb131:
+    v854 = global_value.i64 gv63
+    trap user0
+
+ebb103:
+    v855 = load.i64 v99
+        v886 -> v855
+    v858 = iconst.i64 1
+    v859 = ishl v858, v857
+    v860 = iconst.i8 0
+    v861 = stack_addr.i64 ss137
+    v862 = stack_addr.i64 ss137
+    v863 = load.i64 aligned v862
+    v864 = load.i64 aligned v862+8
+    v865 = iadd_imm.i64 v100, 8
+    v866 = load.i8 v865
+    v867 = uextend.i32 v866
+    brz v867, ebb104
+    jump ebb130
+
+ebb130:
+    v868 = global_value.i64 gv64
+    trap user0
+
+ebb104:
+    v869 = load.i64 v100
+    v870 = iconst.i64 1
+    v871 = isub v869, v870
+    v872 = iconst.i8 0
+    v873 = stack_addr.i64 ss138
+    v874 = stack_addr.i64 ss138
+    v875 = load.i64 aligned v874
+    v876 = load.i64 aligned v874+8
+    v877 = iadd_imm.i64 v101, 8
+    v878 = load.i8 v877
+    v879 = uextend.i32 v878
+    brz v879, ebb105
+    jump ebb129
+
+ebb129:
+    v880 = global_value.i64 gv65
+    trap user0
+
+ebb105:
+    v881 = load.i64 v101
+    v883 = band.i64 v882, v881
+        v934 -> v883
+        v961 -> v883
+        v991 -> v883
+        v1005 -> v883
+        v1048 -> v883
+    v884 = iconst.i8 1
+    v885 = uextend.i32 v884
+    brz v885, ebb109(v855)
+    jump ebb106
+
+ebb106:
+    v887 = iconst.i64 10
+    v888 = icmp.i64 ult v886, v887
+    v889 = bint.i8 v888
+    v890 = uextend.i32 v889
+    v891 = icmp_imm eq v890, 0
+    v892 = bint.i8 v891
+    v893 = uextend.i32 v892
+    brz v893, ebb108
+    jump ebb107
+
+ebb107:
+    v894 = global_value.i64 gv66
+    v895 = global_value.i64 gv67
+    trap user65535
+
+ebb108:
+    jump ebb109(v886)
+
+ebb109(v896: i64):
+    v897 = ireduce.i8 v896
+    v898 = iconst.i8 48
+    v899 = iadd v898, v897
+    v900 = iconst.i8 0
+    v901 = stack_addr.i64 ss139
+    v902 = stack_addr.i64 ss139
+    v903 = load.i16 aligned v902
+    v904 = iadd_imm.i64 v102, 1
+    v905 = load.i8 v904
+    v906 = uextend.i32 v905
+    brz v906, ebb110
+    jump ebb128
+
+ebb128:
+    v907 = global_value.i64 gv68
+    trap user0
+
+ebb110:
+    v909 = load.i64 v3
+    v910 = load.i64 v3+8
+    v911 = icmp.i64 ult v908, v910
+    v912 = bint.i8 v911
+    v913 = uextend.i32 v912
+    brnz v913, ebb111
+    jump ebb127
+
+ebb127:
+    v914 = global_value.i64 gv69
+    trap user0
+
+ebb111:
+    v915 = load.i64 v3
+    v916 = load.i64 v3+8
+    v918 = imul_imm.i64 v917, 1
+    v919 = iadd v915, v918
+    v920 = load.i8 aligned v102
+    v922 = iconst.i64 1
+    v923 = iadd.i64 v921, v922
+    v924 = iconst.i8 0
+    v925 = stack_addr.i64 ss140
+    v926 = stack_addr.i64 ss140
+    v927 = load.i64 aligned v926
+    v928 = load.i64 aligned v926+8
+    v929 = iadd_imm.i64 v103, 8
+    v930 = load.i8 v929
+    v931 = uextend.i32 v930
+    brz v931, ebb112
+    jump ebb126
+
+ebb126:
+    v932 = global_value.i64 gv70
+    trap user0
+
+ebb112:
+    v933 = load.i64 v103
+        v954 -> v933
+        v1047 -> v933
+    v936 = icmp.i64 ult v934, v935
+    v937 = bint.i8 v936
+    v938 = uextend.i32 v937
+    brz v938, ebb119
+    jump ebb113
+
+ebb113:
+    v940 = iconst.i64 1
+    v941 = ishl v940, v939
+    v942 = iconst.i8 0
+    v943 = stack_addr.i64 ss141
+    v944 = stack_addr.i64 ss141
+    v945 = load.i64 aligned v944
+    v946 = load.i64 aligned v944+8
+    v947 = iadd_imm.i64 v104, 8
+    v948 = load.i8 v947
+    v949 = uextend.i32 v948
+    brz v949, ebb114
+    jump ebb125
+
+ebb125:
+    v950 = global_value.i64 gv71
+    trap user0
+
+ebb114:
+    v951 = load.i64 v104
+        v988 -> v951
+    v952 = load.i64 aligned v3
+    v953 = load.i64 aligned v3+8
+    v955 = load.i64 v109
+    call fn101(v107, v108, v955)
+    jump ebb115
+
+ebb115:
+    v956 = load.i64 aligned v107
+    v957 = load.i64 aligned v107+8
+    v958 = load.i64 aligned v106
+    v959 = load.i64 aligned v106+8
+    v964 = load.i64 v30
+    v965 = isub.i64 v963, v964
+    v966 = iconst.i8 0
+    v967 = stack_addr.i64 ss142
+    v968 = stack_addr.i64 ss142
+    v969 = load.i64 aligned v968
+    v970 = load.i64 aligned v968+8
+    v971 = iadd_imm.i64 v110, 8
+    v972 = load.i8 v971
+    v973 = uextend.i32 v972
+    brz v973, ebb116
+    jump ebb123
+
+ebb123:
+    v974 = global_value.i64 gv72
+    trap user0
+
+ebb116:
+    v975 = load.i64 v110
+    v977 = imul v975, v976
+    v978 = iconst.i8 0
+    v979 = stack_addr.i64 ss143
+    v980 = stack_addr.i64 ss143
+    v981 = load.i64 aligned v980
+    v982 = load.i64 aligned v980+8
+    v983 = iadd_imm.i64 v111, 8
+    v984 = load.i8 v983
+    v985 = uextend.i32 v984
+    brz v985, ebb117
+    jump ebb122
+
+ebb122:
+    v986 = global_value.i64 gv73
+    trap user0
+
+ebb117:
+    v987 = load.i64 v111
+    call fn104(v0, v105, v990, v991, v992, v987, v988, v989)
+    jump ebb118
+
+ebb118:
+    jump ebb79
+
+ebb119:
+    v993 = iconst.i16 1
+    v994 = load.i16 v62
+    v995 = isub v994, v993
+    v996 = iconst.i8 0
+    v997 = stack_addr.i64 ss144
+    v998 = stack_addr.i64 ss144
+    v999 = load.i32 aligned v998
+    v1000 = iadd_imm.i64 v112, 2
+    v1001 = load.i8 v1000
+    v1002 = uextend.i32 v1001
+    brz v1002, ebb120
+    jump ebb121
+
+ebb121:
+    v1003 = global_value.i64 gv74
+    trap user0
+
+ebb120:
+    v1004 = load.i16 aligned v112
+    jump ebb99(v1005, v1036, v1038, v1042, v1047, v1055, v1060)
+}
diff --git a/src/clif-util.rs b/src/clif-util.rs
index a2b248b0d..19402d4bf 100755
--- a/src/clif-util.rs
+++ b/src/clif-util.rs
@@ -13,14 +13,6 @@
     )
 )]
 
-use cfg_if::cfg_if;
-
-cfg_if! {
-    if #[cfg(feature = "wasm")] {
-        mod wasm;
-    }
-}
-
 use clap::{App, Arg, SubCommand};
 use cranelift_codegen::dbg::LOG_FILENAME_PREFIX;
 use cranelift_codegen::VERSION;
@@ -28,12 +20,17 @@ use std::io::{self, Write};
 use std::option::Option;
 use std::process;
 
+mod bugpoint;
 mod cat;
 mod compile;
 mod disasm;
 mod print_cfg;
+mod run;
 mod utils;
 
+#[cfg(feature = "wasm")]
+mod wasm;
+
 /// A command either succeeds or fails with an error message.
 pub type CommandResult = Result<(), String>;
 
@@ -107,7 +104,13 @@ fn add_print_flag<'a>() -> clap::Arg<'a, 'a> {
 fn add_debug_flag<'a>() -> clap::Arg<'a, 'a> {
     Arg::with_name("debug")
         .short("d")
-        .help("enable debug output on stderr/stdout")
+        .help("Enable debug output on stderr/stdout")
+}
+
+fn add_enable_simd_flag<'a>() -> clap::Arg<'a, 'a> {
+    Arg::with_name("enable-simd")
+        .long("enable-simd")
+        .help("Enable WASM's SIMD operations")
 }
 
 /// Returns a vector of clap value options and changes these options into a vector of strings
@@ -140,6 +143,7 @@ fn add_wasm_or_compile<'a>(cmd: &str) -> clap::App<'a, 'a> {
         .arg(add_target_flag())
         .arg(add_input_file_arg())
         .arg(add_debug_flag())
+        .arg(add_enable_simd_flag())
 }
 
 fn handle_debug_flag(debug: bool) {
@@ -161,6 +165,13 @@ fn main() {
                 .arg(add_input_file_arg())
                 .arg(add_debug_flag()),
         )
+        .subcommand(
+            SubCommand::with_name("run")
+                .about("Execute CLIF code and verify with test expressions")
+                .arg(add_verbose_flag())
+                .arg(add_input_file_arg())
+                .arg(add_debug_flag()),
+        )
         .subcommand(
             SubCommand::with_name("cat")
                 .about("Outputs .clif file")
@@ -199,6 +210,14 @@ fn main() {
                 .arg(add_pass_arg())
                 .arg(add_debug_flag())
                 .arg(add_time_flag()),
+        )
+        .subcommand(
+            SubCommand::with_name("bugpoint")
+                .about("Reduce size of clif file causing panic during compilation.")
+                .arg(add_single_input_file_arg())
+                .arg(add_set_flag())
+                .arg(add_target_flag())
+                .arg(add_verbose_flag()),
         );
 
     let res_util = match app_cmds.get_matches().subcommand() {
@@ -215,6 +234,14 @@ fn main() {
             )
             .map(|_time| ())
         }
+        ("run", Some(rest_cmd)) => {
+            handle_debug_flag(rest_cmd.is_present("debug"));
+            run::run(
+                get_vec(rest_cmd.values_of("file")),
+                rest_cmd.is_present("verbose"),
+            )
+            .map(|_time| ())
+        }
         ("pass", Some(rest_cmd)) => {
             handle_debug_flag(rest_cmd.is_present("debug"));
 
@@ -276,6 +303,7 @@ fn main() {
                     rest_cmd.is_present("print-size"),
                     rest_cmd.is_present("time-passes"),
                     rest_cmd.is_present("value-ranges"),
+                    rest_cmd.is_present("enable-simd"),
                 )
             };
 
@@ -284,6 +312,19 @@ fn main() {
 
             result
         }
+        ("bugpoint", Some(rest_cmd)) => {
+            let mut target_val: &str = "";
+            if let Some(clap_target) = rest_cmd.value_of("target") {
+                target_val = clap_target;
+            }
+
+            bugpoint::run(
+                rest_cmd.value_of("single-file").unwrap(),
+                &get_vec(rest_cmd.values_of("set")),
+                target_val,
+                rest_cmd.is_present("verbose"),
+            )
+        }
         _ => Err("Invalid subcommand.".to_owned()),
     };
 
diff --git a/src/compile.rs b/src/compile.rs
index 4c663323b..7d888f311 100644
--- a/src/compile.rs
+++ b/src/compile.rs
@@ -1,12 +1,12 @@
 //! CLI tool to read Cranelift IR files and compile them into native code.
 
-use crate::disasm::{print_all, PrintRelocs, PrintTraps};
+use crate::disasm::{print_all, PrintRelocs, PrintStackmaps, PrintTraps};
 use crate::utils::{parse_sets_and_triple, read_to_string};
 use cranelift_codegen::print_errors::pretty_error;
 use cranelift_codegen::settings::FlagsOrIsa;
 use cranelift_codegen::timing;
 use cranelift_codegen::Context;
-use cranelift_reader::parse_test;
+use cranelift_reader::{parse_test, ParseOptions};
 use std::path::Path;
 use std::path::PathBuf;
 
@@ -44,7 +44,8 @@ fn handle_module(
     fisa: FlagsOrIsa,
 ) -> Result<(), String> {
     let buffer = read_to_string(&path).map_err(|e| format!("{}: {}", name, e))?;
-    let test_file = parse_test(&buffer, None, None).map_err(|e| format!("{}: {}", name, e))?;
+    let test_file =
+        parse_test(&buffer, ParseOptions::default()).map_err(|e| format!("{}: {}", name, e))?;
 
     // If we have an isa from the command-line, use that. Otherwise if the
     // file contains a unique isa, use that.
@@ -62,11 +63,12 @@ fn handle_module(
 
         let mut relocs = PrintRelocs::new(flag_print);
         let mut traps = PrintTraps::new(flag_print);
+        let mut stackmaps = PrintStackmaps::new(flag_print);
         let mut mem = vec![];
 
         // Compile and encode the result to machine code.
         let code_info = context
-            .compile_and_emit(isa, &mut mem, &mut relocs, &mut traps)
+            .compile_and_emit(isa, &mut mem, &mut relocs, &mut traps, &mut stackmaps)
             .map_err(|err| pretty_error(&context.func, Some(isa), err))?;
 
         if flag_print {
@@ -81,6 +83,7 @@ fn handle_module(
                 code_info.jumptables_size + code_info.rodata_size,
                 &relocs,
                 &traps,
+                &stackmaps,
             )?;
         }
     }
diff --git a/src/disasm.rs b/src/disasm.rs
index d7ba0dca6..9cd6851de 100644
--- a/src/disasm.rs
+++ b/src/disasm.rs
@@ -56,6 +56,22 @@ impl binemit::RelocSink for PrintRelocs {
             write!(&mut self.text, "reloc_jt: {} {} at {}\n", r, jt, where_).unwrap();
         }
     }
+
+    fn reloc_constant(
+        &mut self,
+        code_offset: binemit::CodeOffset,
+        reloc: binemit::Reloc,
+        constant: ir::ConstantOffset,
+    ) {
+        if self.flag_print {
+            write!(
+                &mut self.text,
+                "reloc_constant: {} {} at {}\n",
+                reloc, constant, code_offset
+            )
+            .unwrap();
+        }
+    }
 }
 
 pub struct PrintTraps {
@@ -80,6 +96,28 @@ impl binemit::TrapSink for PrintTraps {
     }
 }
 
+pub struct PrintStackmaps {
+    pub flag_print: bool,
+    pub text: String,
+}
+
+impl PrintStackmaps {
+    pub fn new(flag_print: bool) -> PrintStackmaps {
+        Self {
+            flag_print,
+            text: String::new(),
+        }
+    }
+}
+
+impl binemit::StackmapSink for PrintStackmaps {
+    fn add_stackmap(&mut self, offset: binemit::CodeOffset, _: binemit::Stackmap) {
+        if self.flag_print {
+            write!(&mut self.text, "add_stackmap at {}\n", offset).unwrap();
+        }
+    }
+}
+
 cfg_if! {
     if #[cfg(feature = "disas")] {
         use capstone::prelude::*;
@@ -98,16 +136,20 @@ cfg_if! {
                     .x86()
                     .mode(arch::x86::ArchMode::Mode64)
                     .build(),
-                Architecture::Arm
-                | Architecture::Armv4t
-                | Architecture::Armv5te
-                | Architecture::Armv7
-                | Architecture::Armv7s => Capstone::new().arm().mode(arch::arm::ArchMode::Arm).build(),
-                Architecture::Thumbv6m | Architecture::Thumbv7em | Architecture::Thumbv7m => Capstone::new(
-                ).arm()
-                    .mode(arch::arm::ArchMode::Thumb)
-                    .build(),
-                Architecture::Aarch64 => Capstone::new()
+                Architecture::Arm(arm) => {
+                    if arm.is_thumb() {
+                        Capstone::new()
+                            .arm()
+                            .mode(arch::arm::ArchMode::Thumb)
+                            .build()
+                    } else {
+                        Capstone::new()
+                            .arm()
+                            .mode(arch::arm::ArchMode::Arm)
+                            .build()
+                    }
+                }
+                Architecture::Aarch64 {..} => Capstone::new()
                     .arm64()
                     .mode(arch::arm64::ArchMode::Arm)
                     .build(),
@@ -118,7 +160,7 @@ cfg_if! {
         }
 
         pub fn print_disassembly(isa: &dyn TargetIsa, mem: &[u8]) -> Result<(), String> {
-            let mut cs = get_disassembler(isa)?;
+            let cs = get_disassembler(isa)?;
 
             println!("\nDisassembly of {} bytes:", mem.len());
             let insns = cs.disasm_all(&mem, 0x0).unwrap();
@@ -170,11 +212,12 @@ pub fn print_all(
     rodata_size: u32,
     relocs: &PrintRelocs,
     traps: &PrintTraps,
+    stackmaps: &PrintStackmaps,
 ) -> Result<(), String> {
     print_bytes(&mem);
     print_disassembly(isa, &mem[0..code_size as usize])?;
     print_readonly_data(&mem[code_size as usize..(code_size + rodata_size) as usize]);
-    println!("\n{}\n{}", &relocs.text, &traps.text);
+    println!("\n{}\n{}\n{}", &relocs.text, &traps.text, &stackmaps.text);
     Ok(())
 }
 
diff --git a/src/run.rs b/src/run.rs
new file mode 100644
index 000000000..a3cd94b25
--- /dev/null
+++ b/src/run.rs
@@ -0,0 +1,122 @@
+//! CLI tool to compile Cranelift IR files to native code in memory and execute them.
+
+use crate::utils::read_to_string;
+use cranelift_codegen::isa::{CallConv, TargetIsa};
+use cranelift_filetests::FunctionRunner;
+use cranelift_native::builder as host_isa_builder;
+use cranelift_reader::{parse_test, Details, IsaSpec, ParseOptions};
+use std::path::PathBuf;
+use target_lexicon::Triple;
+use walkdir::WalkDir;
+
+pub fn run(files: Vec<String>, flag_print: bool) -> Result<(), String> {
+    let mut total = 0;
+    let mut errors = 0;
+    for file in iterate_files(files) {
+        total += 1;
+        match run_single_file(&file) {
+            Ok(_) => {
+                if flag_print {
+                    println!("{}", file.to_string_lossy());
+                }
+            }
+            Err(e) => {
+                if flag_print {
+                    println!("{}: {}", file.to_string_lossy(), e);
+                }
+                errors += 1;
+            }
+        }
+    }
+
+    if flag_print {
+        match total {
+            0 => println!("0 files"),
+            1 => println!("1 file"),
+            n => println!("{} files", n),
+        }
+    }
+
+    match errors {
+        0 => Ok(()),
+        1 => Err(String::from("1 failure")),
+        n => Err(format!("{} failures", n)),
+    }
+}
+
+/// Iterate over all of the files passed as arguments, recursively iterating through directories
+fn iterate_files(files: Vec<String>) -> impl Iterator<Item = PathBuf> {
+    files
+        .into_iter()
+        .flat_map(WalkDir::new)
+        .filter(|f| match f {
+            Ok(d) => {
+                // filter out hidden files (starting with .)
+                !d.file_name().to_str().map_or(false, |s| s.starts_with("."))
+                    // filter out directories
+                    && !d.file_type().is_dir()
+            }
+            Err(e) => {
+                println!("Unable to read file: {}", e);
+                false
+            }
+        })
+        .map(|f| {
+            f.expect("This should not happen: we have already filtered out the errors")
+                .into_path()
+        })
+}
+
+/// Run all functions in a file that are succeeded by "run:" comments
+fn run_single_file(path: &PathBuf) -> Result<(), String> {
+    let file_contents = read_to_string(&path).map_err(|e| e.to_string())?;
+    run_file_contents(file_contents)
+}
+
+/// Main body of `run_single_file` separated for testing
+fn run_file_contents(file_contents: String) -> Result<(), String> {
+    let options = ParseOptions {
+        default_calling_convention: CallConv::triple_default(&Triple::host()), // use the host's default calling convention
+        ..ParseOptions::default()
+    };
+    let test_file = parse_test(&file_contents, options).map_err(|e| e.to_string())?;
+    for (func, Details { comments, .. }) in test_file.functions {
+        if comments.iter().any(|c| c.text.contains("run")) {
+            let isa = create_target_isa(&test_file.isa_spec)?;
+            FunctionRunner::new(func, isa).run()?
+        }
+    }
+    Ok(())
+}
+
+/// Build an ISA based on the current machine running this code (the host)
+fn create_target_isa(isa_spec: &IsaSpec) -> Result<Box<dyn TargetIsa>, String> {
+    if let IsaSpec::None(flags) = isa_spec {
+        // build an ISA for the current machine
+        let builder = host_isa_builder()?;
+        Ok(builder.finish(flags.clone()))
+    } else {
+        Err(String::from("A target ISA was specified in the file but should not have been--only the host ISA can be used for running CLIF files"))?
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn nop() {
+        let code = String::from(
+            "
+            function %test() -> b8 {
+            ebb0:
+                nop
+                v1 = bconst.b8 true
+                return v1
+            }
+            ; run
+            ",
+        );
+        run_file_contents(code).unwrap()
+    }
+}
diff --git a/src/wasm.rs b/src/wasm.rs
index dadb13389..cc5523882 100644
--- a/src/wasm.rs
+++ b/src/wasm.rs
@@ -7,19 +7,19 @@
     allow(clippy::too_many_arguments, clippy::cyclomatic_complexity)
 )]
 
-use crate::disasm::{print_all, PrintRelocs, PrintTraps};
+use crate::disasm::{print_all, PrintRelocs, PrintStackmaps, PrintTraps};
 use crate::utils::{parse_sets_and_triple, read_to_end};
+use cranelift_codegen::ir::DisplayFunctionAnnotations;
 use cranelift_codegen::print_errors::{pretty_error, pretty_verifier_error};
 use cranelift_codegen::settings::FlagsOrIsa;
 use cranelift_codegen::timing;
 use cranelift_codegen::Context;
-use cranelift_codegen::ir::DisplayFunctionAnnotations;
 use cranelift_entity::EntityRef;
 use cranelift_wasm::{translate_module, DummyEnvironment, FuncIndex, ReturnMode};
 use std::path::Path;
 use std::path::PathBuf;
 use term;
-use wabt::wat2wasm;
+use wabt::{wat2wasm_with_features, Features};
 
 macro_rules! vprintln {
     ($x: expr, $($tts:tt)*) => {
@@ -49,6 +49,7 @@ pub fn run(
     flag_print_size: bool,
     flag_report_times: bool,
     flag_calc_value_ranges: bool,
+    flag_enable_simd: bool,
 ) -> Result<(), String> {
     let parsed = parse_sets_and_triple(flag_set, flag_triple)?;
 
@@ -64,6 +65,7 @@ pub fn run(
             flag_print_disasm,
             flag_report_times,
             flag_calc_value_ranges,
+            flag_enable_simd,
             &path.to_path_buf(),
             &name,
             parsed.as_fisa(),
@@ -81,6 +83,7 @@ fn handle_module(
     flag_print_disasm: bool,
     flag_report_times: bool,
     flag_calc_value_ranges: bool,
+    flag_enable_simd: bool,
     path: &PathBuf,
     name: &str,
     fisa: FlagsOrIsa,
@@ -97,7 +100,11 @@ fn handle_module(
     let mut module_binary = read_to_end(path.clone()).map_err(|err| err.to_string())?;
 
     if !module_binary.starts_with(&[b'\0', b'a', b's', b'm']) {
-        module_binary = match wat2wasm(&module_binary) {
+        let mut features = Features::new();
+        if flag_enable_simd {
+            features.enable_simd();
+        }
+        module_binary = match wat2wasm_with_features(&module_binary, features) {
             Ok(data) => data,
             Err(e) => return Err(e.to_string()),
         };
@@ -113,7 +120,8 @@ fn handle_module(
     };
 
     let debug_info = flag_calc_value_ranges;
-    let mut dummy_environ = DummyEnvironment::new(isa.frontend_config(), ReturnMode::NormalReturns, debug_info);
+    let mut dummy_environ =
+        DummyEnvironment::new(isa.frontend_config(), ReturnMode::NormalReturns, debug_info);
     translate_module(&module_binary, &mut dummy_environ).map_err(|e| e.to_string())?;
 
     let _ = terminal.fg(term::color::GREEN);
@@ -171,13 +179,14 @@ fn handle_module(
         let mut mem = vec![];
         let mut relocs = PrintRelocs::new(flag_print);
         let mut traps = PrintTraps::new(flag_print);
+        let mut stackmaps = PrintStackmaps::new(flag_print);
         if flag_check_translation {
             if let Err(errors) = context.verify(fisa) {
                 return Err(pretty_verifier_error(&context.func, fisa.isa, None, errors));
             }
         } else {
             let code_info = context
-                .compile_and_emit(isa, &mut mem, &mut relocs, &mut traps)
+                .compile_and_emit(isa, &mut mem, &mut relocs, &mut traps, &mut stackmaps)
                 .map_err(|err| pretty_error(&context.func, fisa.isa, err))?;
 
             if flag_print_size {
@@ -194,7 +203,10 @@ fn handle_module(
             }
 
             if flag_print_disasm {
-                saved_sizes = Some((code_info.code_size, code_info.jumptables_size + code_info.rodata_size));
+                saved_sizes = Some((
+                    code_info.code_size,
+                    code_info.jumptables_size + code_info.rodata_size,
+                ));
             }
         }
 
@@ -211,19 +223,34 @@ fn handle_module(
                 println!("; Exported as \"{}\"", export_name);
             }
             let value_ranges = if flag_calc_value_ranges {
-                Some(context.build_value_labels_ranges(isa).expect("value location ranges"))
+                Some(
+                    context
+                        .build_value_labels_ranges(isa)
+                        .expect("value location ranges"),
+                )
             } else {
                 None
             };
-            println!("{}", context.func.display_with(DisplayFunctionAnnotations {
-                isa: fisa.isa,
-                value_ranges: value_ranges.as_ref(),
-            }));
+            println!(
+                "{}",
+                context.func.display_with(DisplayFunctionAnnotations {
+                    isa: fisa.isa,
+                    value_ranges: value_ranges.as_ref(),
+                })
+            );
             vprintln!(flag_verbose, "");
         }
 
         if let Some((code_size, rodata_size)) = saved_sizes {
-            print_all(isa, &mem, code_size, rodata_size, &relocs, &traps)?;
+            print_all(
+                isa,
+                &mem,
+                code_size,
+                rodata_size,
+                &relocs,
+                &traps,
+                &stackmaps,
+            )?;
         }
 
         context.clear();
diff --git a/test-all.sh b/test-all.sh
index 0ee57a0c8..d3ad83127 100755
--- a/test-all.sh
+++ b/test-all.sh
@@ -4,7 +4,6 @@ set -euo pipefail
 # This is the top-level test script:
 #
 # - Check code formatting.
-# - Perform checks on Python code.
 # - Make a debug build.
 # - Make a release build.
 # - Run unit tests for all Rust crates (including the filetests)
@@ -13,10 +12,6 @@ set -euo pipefail
 #
 # All tests run by this script should be passing at all times.
 
-# Disable generation of .pyc files because they cause trouble for vendoring
-# scripts, and this is a build step that isn't run very often anyway.
-export PYTHONDONTWRITEBYTECODE=1
-
 # Repository top-level directory.
 topdir=$(dirname "$0")
 cd "$topdir"
@@ -40,20 +35,6 @@ else
     echo "https://github.com/rust-lang-nursery/rustfmt for more information."
 fi
 
-# Check if any Python files have changed since we last checked them.
-tsfile="$topdir/target/meta-checked"
-meta_python="$topdir/cranelift-codegen/meta-python"
-if [ -f "$tsfile" ]; then
-    needcheck=$(find "$meta_python" -name '*.py' -newer "$tsfile")
-else
-    needcheck=yes
-fi
-if [ -n "$needcheck" ]; then
-    banner "Checking python source files"
-    "$meta_python/check.sh"
-    touch "$tsfile" || echo no target directory
-fi
-
 # Make sure the code builds in release mode.
 banner "Rust release build"
 cargo build --release
@@ -66,21 +47,44 @@ cargo build
 banner "Rust unit tests"
 RUST_BACKTRACE=1 cargo test --all
 
+has_toolchain() {
+    rustup toolchain list | grep -q $1
+}
+
+ensure_installed() {
+    program="$1"
+    toolchain="${2:-stable}"
+    if has_toolchain $toolchain; then
+        if grep -q $program <(cargo +$toolchain install --list); then
+            echo "$program found"
+        else
+            echo "installing $program"
+            cargo +$toolchain install $program
+        fi
+    else
+        return 1
+    fi
+}
+
 # Make sure the documentation builds.
 banner "Rust documentation: $topdir/target/doc/cranelift/index.html"
-cargo doc
+if has_toolchain nightly; then
+    cargo +nightly doc
+
+    # Make sure the documentation doesn't have broken links.
+    banner "Rust documentation link test"
+    ensure_installed cargo-deadlinks
+    find ./target/doc -maxdepth 1 -type d -name "cranelift*" | xargs -I{} cargo deadlinks --dir {}
+else
+    cargo doc
+    echo "nightly toolchain not found, some documentation links will not work"
+fi
 
 # Ensure fuzzer works by running it with a single input
 # Note LSAN is disabled due to https://github.com/google/sanitizers/issues/764
 banner "cargo fuzz check"
-if rustup toolchain list | grep -q nightly; then
-    if cargo install --list | grep -q cargo-fuzz; then
-        echo "cargo-fuzz found"
-    else
-        echo "installing cargo-fuzz"
-        cargo +nightly install cargo-fuzz
-    fi
 
+if ensure_installed cargo-fuzz nightly; then
     fuzz_module="ffaefab69523eb11935a9b420d58826c8ea65c4c"
     ASAN_OPTIONS=detect_leaks=0 \
     cargo +nightly fuzz run fuzz_translate_module \
diff --git a/wasmtests/simd.wat b/wasmtests/simd.wat
new file mode 100644
index 000000000..99b7d5c10
--- /dev/null
+++ b/wasmtests/simd.wat
@@ -0,0 +1,23 @@
+(module
+  (func $test_splat (result i32)
+    i32.const 42
+    i32x4.splat
+    i32x4.extract_lane 0
+  )
+
+  (func $test_insert_lane (result i32)
+      v128.const i64x2 0 0
+      i32.const 99
+      i32x4.replace_lane 1
+      i32x4.extract_lane 1
+  )
+
+  (func $test_const (result i32)
+    v128.const i32x4 1 2 3 4
+    i32x4.extract_lane 3
+  )
+
+  (export "test_splat" (func $test_splat))
+  (export "test_insert_lane" (func $test_insert_lane))
+  (export "test_const" (func $test_const))
+)