diff --git a/.github/workflows/arrow-flight-tests.yml b/.github/workflows/arrow-flight-tests.yml
index 3aab27ff09c58..c8dec648623d5 100644
--- a/.github/workflows/arrow-flight-tests.yml
+++ b/.github/workflows/arrow-flight-tests.yml
@@ -1,9 +1,6 @@
name: arrow flight tests
-on:
- pull_request:
- paths-ignore:
- - presto-docs/**
+on: pull_request
permissions:
contents: read
@@ -17,12 +14,30 @@ env:
RETRY: .github/bin/retry
jobs:
+ changes:
+ runs-on: ubuntu-latest
+ # Required permissions
+ permissions:
+ pull-requests: read
+ # Set job outputs to values from filter step
+ outputs:
+ codechange: ${{ steps.filter.outputs.codechange }}
+ steps:
+ # For pull requests it's not necessary to checkout the code
+ - uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
+ id: filter
+ with:
+ filters: |
+ codechange:
+ - '!presto-docs/**'
+
arrowflight-java-tests:
runs-on: ubuntu-latest
+ needs: changes
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
modules:
- :presto-base-arrow-flight # Only run tests for the `presto-base-arrow-flight` module
@@ -34,33 +49,52 @@ jobs:
steps:
# Checkout the code only if there are changes in the relevant files
- uses: actions/checkout@v4
+ if: needs.changes.outputs.codechange == 'true'
with:
show-progress: false
persist-credentials: false
# Set up Java and dependencies for the build environment
- uses: actions/setup-java@v4
+ if: needs.changes.outputs.codechange == 'true'
with:
distribution: temurin
java-version: ${{ matrix.java }}
cache: maven
+
+ # Cleanup before build
+ - name: Clean up before build
+ if: needs.changes.outputs.codechange == 'true'
+ run: |
+ sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc
+ sudo rm -rf /opt/hostedtoolcache/CodeQL
+ df -h
+ docker system prune -af || true
+
- name: Download nodejs to maven cache
+ if: needs.changes.outputs.codechange == 'true'
run: .github/bin/download_nodejs
# Install dependencies for the target module
- name: Maven Install
+ if: needs.changes.outputs.codechange == 'true'
run: |
export MAVEN_OPTS="${MAVEN_INSTALL_OPTS}"
./mvnw install ${MAVEN_FAST_INSTALL} -e -am -pl ${{ matrix.modules }}
# Run Maven tests for the target module, excluding native tests
- name: Maven Tests
+ if: needs.changes.outputs.codechange == 'true'
run: ./mvnw test ${MAVEN_TEST} -pl ${{ matrix.modules }} -Dtest="*,!TestArrowFlightNativeQueries*"
prestocpp-linux-build-for-test:
runs-on: ubuntu-22.04
+ needs: changes
container:
- image: prestodb/presto-native-dependency:0.292-20250204112033-cf8ba84
+ image: prestodb/presto-native-dependency:0.297-202602271419-160459b8
+ volumes:
+ - /usr:/host_usr
+ - /opt:/host_opt
concurrency:
group: ${{ github.workflow }}-prestocpp-linux-build-for-test-${{ github.event.pull_request.number }}
cancel-in-progress: true
@@ -71,21 +105,42 @@ jobs:
permissions:
actions: write
steps:
+ # We cannot use the github action to free disk space from the runner
+ # because we are in the container and not on the runner anymore.
+ - name: Free Disk Space
+ run: |
+ # Re-used from free-disk-space github action.
+ getAvailableSpace() { echo $(df -a $1 | awk 'NR > 1 {avail+=$4} END {print avail}'); }
+ # Show before
+ echo "Original available disk space: " $(getAvailableSpace)
+ # Remove DotNet.
+ rm -rf /host_usr/share/dotnet || true
+ # Remove android
+ rm -rf /host_usr/local/lib/android || true
+ # Remove CodeQL
+ rm -rf /host_opt/hostedtoolcache/CodeQL || true
+ # Show after
+ echo "New available disk space: " $(getAvailableSpace)
+
- uses: actions/checkout@v4
+ if: needs.changes.outputs.codechange == 'true'
with:
persist-credentials: false
- name: Fix git permissions
+ if: needs.changes.outputs.codechange == 'true'
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}
- name: Update velox
+ if: needs.changes.outputs.codechange == 'true'
run: |
cd presto-native-execution
make velox-submodule
- name: Install Arrow Flight
+ if: needs.changes.outputs.codechange == 'true'
run: |
mkdir -p ${DEPENDENCY_DIR}/adapter-deps/download
mkdir -p ${INSTALL_PREFIX}/adapter-deps/install
@@ -95,19 +150,23 @@ jobs:
PROMPT_ALWAYS_RESPOND=n ./scripts/setup-adapters.sh arrow_flight
- name: Install Github CLI for using apache/infrastructure-actions/stash
+ if: needs.changes.outputs.codechange == 'true'
run: |
curl -L https://github.com/cli/cli/releases/download/v2.63.2/gh_2.63.2_linux_amd64.rpm > gh_2.63.2_linux_amd64.rpm
rpm -iv gh_2.63.2_linux_amd64.rpm
- uses: apache/infrastructure-actions/stash/restore@4ab8682fbd4623d2b4fc1c98db38aba5091924c3
+ if: needs.changes.outputs.codechange == 'true'
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-prestocpp-linux-build-for-test
- name: Zero ccache statistics
+ if: needs.changes.outputs.codechange == 'true'
run: ccache -sz
- name: Build engine
+ if: needs.changes.outputs.codechange == 'true'
run: |
source /opt/rh/gcc-toolset-12/enable
cd presto-native-execution
@@ -122,39 +181,47 @@ jobs:
-DCMAKE_PREFIX_PATH=/usr/local \
-DThrift_ROOT=/usr/local \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
- -DMAX_LINK_JOBS=4
+ -DMAX_LINK_JOBS=3
ninja -C _build/release -j 4
- name: Ccache after
+ if: needs.changes.outputs.codechange == 'true'
run: ccache -s
- uses: apache/infrastructure-actions/stash/save@4ab8682fbd4623d2b4fc1c98db38aba5091924c3
+ if: needs.changes.outputs.codechange == 'true'
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-prestocpp-linux-build-for-test
- name: Run Unit Tests for the Arrow Flight connector only
+ if: needs.changes.outputs.codechange == 'true'
run: |
cd presto-native-execution/_build/release
ctest -j 4 -VV --output-on-failure --tests-regex ^presto_flight.*
- name: Upload artifacts
+ if: needs.changes.outputs.codechange == 'true'
uses: actions/upload-artifact@v4
with:
name: arrow-flight-presto-native-build
path: presto-native-execution/_build/release/presto_cpp/main/presto_server
- name: Upload Arrow Flight install artifacts
+ if: needs.changes.outputs.codechange == 'true'
uses: actions/upload-artifact@v4
with:
name: arrow-flight-install
path: ${{ env.INSTALL_PREFIX }}/lib64/libarrow_flight*
arrowflight-native-e2e-tests:
- needs: prestocpp-linux-build-for-test
+ needs: [changes, prestocpp-linux-build-for-test]
runs-on: ubuntu-22.04
container:
- image: prestodb/presto-native-dependency:0.292-20250204112033-cf8ba84
+ image: prestodb/presto-native-dependency:0.297-202602271419-160459b8
+ volumes:
+ - /usr:/host_usr
+ - /opt:/host_opt
env:
INSTALL_PREFIX: "${{ github.workspace }}/adapter-deps/install"
strategy:
@@ -169,22 +236,43 @@ jobs:
cancel-in-progress: true
steps:
+ # We cannot use the github action to free disk space from the runner
+ # because we are in the container and not on the runner anymore.
+ - name: Free Disk Space
+ run: |
+ # Re-used from free-disk-space github action.
+ getAvailableSpace() { echo $(df -a $1 | awk 'NR > 1 {avail+=$4} END {print avail}'); }
+ # Show before
+ echo "Original available disk space: " $(getAvailableSpace)
+ # Remove DotNet.
+ rm -rf /host_usr/share/dotnet || true
+ # Remove android
+ rm -rf /host_usr/local/lib/android || true
+ # Remove CodeQL
+ rm -rf /host_opt/hostedtoolcache/CodeQL || true
+ # Show after
+ echo "New available disk space: " $(getAvailableSpace)
+
- uses: actions/checkout@v4
+ if: needs.changes.outputs.codechange == 'true'
with:
persist-credentials: false
- name: Fix git permissions
+ if: needs.changes.outputs.codechange == 'true'
# Usually actions/checkout does this but as we run in a container
# it doesn't work
run: git config --global --add safe.directory ${GITHUB_WORKSPACE}
- name: Download artifacts
+ if: needs.changes.outputs.codechange == 'true'
uses: actions/download-artifact@v4
with:
name: arrow-flight-presto-native-build
path: presto-native-execution/_build/release/presto_cpp/main
- name: Download Arrow Flight install artifacts
+ if: needs.changes.outputs.codechange == 'true'
uses: actions/download-artifact@v4
with:
name: arrow-flight-install
@@ -192,21 +280,25 @@ jobs:
# Permissions are lost when uploading. Details here: https://github.com/actions/upload-artifact/issues/38
- name: Restore execute permissions and library path
+ if: needs.changes.outputs.codechange == 'true'
run: |
chmod +x ${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server
# Ensure transitive dependency libboost-iostreams is found.
ldconfig /usr/local/lib
- name: Install OpenJDK8
+ if: needs.changes.outputs.codechange == 'true'
uses: actions/setup-java@v4
with:
distribution: temurin
- java-version: 17.0.15
+ java-version: '17'
cache: maven
- name: Download nodejs to maven cache
+ if: needs.changes.outputs.codechange == 'true'
run: .github/bin/download_nodejs
- name: Maven install
+ if: needs.changes.outputs.codechange == 'true'
env:
# Use different Maven options to install.
MAVEN_OPTS: -Xmx2G -XX:+ExitOnOutOfMemoryError
@@ -215,6 +307,7 @@ jobs:
./mvnw install ${MAVEN_FAST_INSTALL} -am -pl ${{ matrix.modules }}
- name: Run arrowflight native e2e tests
+ if: needs.changes.outputs.codechange == 'true'
run: |
export PRESTO_SERVER_PATH="${GITHUB_WORKSPACE}/presto-native-execution/_build/release/presto_cpp/main/presto_server"
mvn test \
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 74e148fe4105e..ec9c9d7cb33c9 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -40,7 +40,7 @@ jobs:
- uses: actions/setup-java@v4
with:
distribution: temurin
- java-version: 17.0.15
+ java-version: '17'
cache: maven
- name: Maven Install
run: |
diff --git a/.github/workflows/hive-tests.yml b/.github/workflows/hive-tests.yml
index 05eb630e5a3fe..08228bfd827ee 100644
--- a/.github/workflows/hive-tests.yml
+++ b/.github/workflows/hive-tests.yml
@@ -35,7 +35,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
runs-on: ubuntu-latest
needs: changes
timeout-minutes: 60
@@ -92,7 +92,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
runs-on: ubuntu-latest
needs: changes
timeout-minutes: 20
diff --git a/.github/workflows/jdbc-connector-tests.yml b/.github/workflows/jdbc-connector-tests.yml
index 342fb01ef1c25..8818cf3d76b11 100644
--- a/.github/workflows/jdbc-connector-tests.yml
+++ b/.github/workflows/jdbc-connector-tests.yml
@@ -35,7 +35,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
runs-on: ubuntu-latest
needs: changes
timeout-minutes: 60
diff --git a/.github/workflows/kudu.yml b/.github/workflows/kudu.yml
index 831ce311087cd..6d711f94b28ff 100644
--- a/.github/workflows/kudu.yml
+++ b/.github/workflows/kudu.yml
@@ -33,7 +33,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
runs-on: ubuntu-latest
needs: changes
timeout-minutes: 60
diff --git a/.github/workflows/maven-checks.yml b/.github/workflows/maven-checks.yml
index 79782713bb0ed..9a4f7b5693fce 100644
--- a/.github/workflows/maven-checks.yml
+++ b/.github/workflows/maven-checks.yml
@@ -16,7 +16,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
runs-on: ubuntu-latest
timeout-minutes: 45
concurrency:
diff --git a/.github/workflows/owasp-dependency-check.yml b/.github/workflows/owasp-dependency-check.yml
index 426187e40b678..04b87e6c57779 100644
--- a/.github/workflows/owasp-dependency-check.yml
+++ b/.github/workflows/owasp-dependency-check.yml
@@ -12,23 +12,44 @@ on:
type: string
jobs:
+ changes:
+ runs-on: ubuntu-latest
+ # Required permissions
+ permissions:
+ pull-requests: read
+ # Set job outputs to values from filter step
+ outputs:
+ codechange: ${{ steps.filter.outputs.codechange }}
+ steps:
+ # For pull requests it's not necessary to checkout the code
+ - uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
+ id: filter
+ with:
+ filters: |
+ codechange:
+ - '!presto-docs/**'
+ - 'presto-docs/pom.xml'
+
dependency-check:
+ needs: changes
runs-on: ubuntu-latest
concurrency:
group: ${{ github.workflow }}-owasp-dependency-check-${{ github.event.pull_request.number }}
cancel-in-progress: true
env:
- CVSS_THRESHOLD: ${{ github.event.inputs.cvss-threshold || '7.0' }}
+ CVSS_THRESHOLD: ${{ github.event.inputs.cvss-threshold || '0.1' }}
OWASP_VERSION: 12.1.3
steps:
# Checkout PR branch first to get access to the composite action
- name: Checkout PR branch
+ if: needs.changes.outputs.codechange == 'true'
uses: actions/checkout@v4
with:
persist-credentials: false
ref: ${{ github.event.pull_request.head.sha }}
- name: Find merge base
+ if: needs.changes.outputs.codechange == 'true'
id: merge-base
env:
GH_TOKEN: ${{ github.token }}
@@ -42,6 +63,7 @@ jobs:
echo "Using merge base: $merge_base"
- name: Checkout base branch
+ if: needs.changes.outputs.codechange == 'true'
uses: actions/checkout@v4
with:
persist-credentials: false
@@ -49,6 +71,7 @@ jobs:
path: base
- name: Set up Java
+ if: needs.changes.outputs.codechange == 'true'
uses: actions/setup-java@v4
with:
distribution: temurin
@@ -56,10 +79,12 @@ jobs:
cache: maven
- name: Get date for cache key
+ if: needs.changes.outputs.codechange == 'true'
id: get-date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
- name: Restore OWASP database cache
+ if: needs.changes.outputs.codechange == 'true'
uses: actions/cache/restore@v4
id: cache-owasp-restore
with:
@@ -70,6 +95,7 @@ jobs:
owasp-cache-${{ runner.os }}-
- name: Run OWASP check on base branch
+ if: needs.changes.outputs.codechange == 'true'
uses: ./.github/actions/maven-owasp-scan
with:
working-directory: base
@@ -77,13 +103,14 @@ jobs:
data-directory: /tmp/.owasp/dependency-check-data
- name: Save OWASP cache after base scan
- if: steps.cache-owasp-restore.outputs.cache-hit != 'true'
+ if: needs.changes.outputs.codechange == 'true' && steps.cache-owasp-restore.outputs.cache-hit != 'true'
uses: actions/cache/save@v4
with:
path: /tmp/.owasp/dependency-check-data
key: owasp-cache-${{ runner.os }}-v${{ env.OWASP_VERSION }}-${{ steps.get-date.outputs.date }}-partial
- name: Run OWASP check on PR branch
+ if: needs.changes.outputs.codechange == 'true'
uses: ./.github/actions/maven-owasp-scan
with:
working-directory: .
@@ -91,6 +118,7 @@ jobs:
data-directory: /tmp/.owasp/dependency-check-data
- name: Compare and fail on new CVEs above threshold
+ if: needs.changes.outputs.codechange == 'true'
run: |
# Extract CVEs above threshold from both branches (CVSS >= $CVSS_THRESHOLD)
threshold=$CVSS_THRESHOLD
@@ -154,14 +182,14 @@ jobs:
fi
- name: Save OWASP database cache
- if: always()
+ if: needs.changes.outputs.codechange == 'true' && always()
uses: actions/cache/save@v4
with:
path: /tmp/.owasp/dependency-check-data
key: owasp-cache-${{ runner.os }}-v${{ env.OWASP_VERSION }}-${{ steps.get-date.outputs.date }}
- name: Upload reports
- if: always()
+ if: needs.changes.outputs.codechange == 'true' && always()
uses: actions/upload-artifact@v4
with:
name: owasp-reports
diff --git a/.github/workflows/presto-release-publish.yml b/.github/workflows/presto-release-publish.yml
index 36720303fa17b..3c9e1058fb51c 100644
--- a/.github/workflows/presto-release-publish.yml
+++ b/.github/workflows/presto-release-publish.yml
@@ -432,7 +432,7 @@ jobs:
working-directory: presto-native-execution
run: |
df -h
- docker compose build --build-arg EXTRA_CMAKE_FLAGS="
+ docker compose build --build-arg EXTRA_CMAKE_FLAGS=" \
-DPRESTO_ENABLE_PARQUET=ON \
-DPRESTO_ENABLE_REMOTE_FUNCTIONS=ON \
-DPRESTO_ENABLE_JWT=ON \
diff --git a/.github/workflows/prestocpp-linux-adapters-build.yml b/.github/workflows/prestocpp-linux-adapters-build.yml
index 362b3fcbbacee..d45bd35a27f6d 100644
--- a/.github/workflows/prestocpp-linux-adapters-build.yml
+++ b/.github/workflows/prestocpp-linux-adapters-build.yml
@@ -11,7 +11,7 @@ jobs:
prestocpp-linux-adapters-build:
runs-on: ubuntu-22.04
container:
- image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea
+ image: prestodb/presto-native-dependency:0.297-202602271419-160459b8
concurrency:
group: ${{ github.workflow }}-prestocpp-linux-adapters-build-${{ github.event.pull_request.number }}
cancel-in-progress: true
diff --git a/.github/workflows/prestocpp-linux-build-and-unit-test.yml b/.github/workflows/prestocpp-linux-build-and-unit-test.yml
index e77a4bb152fd6..566146da290fd 100644
--- a/.github/workflows/prestocpp-linux-build-and-unit-test.yml
+++ b/.github/workflows/prestocpp-linux-build-and-unit-test.yml
@@ -30,7 +30,7 @@ jobs:
runs-on: ubuntu-22.04
needs: changes
container:
- image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea
+ image: prestodb/presto-native-dependency:0.297-202602271419-160459b8
concurrency:
group: ${{ github.workflow }}-prestocpp-linux-build-test-${{ github.event.pull_request.number }}
cancel-in-progress: true
@@ -135,7 +135,7 @@ jobs:
needs: [changes, prestocpp-linux-build-for-test]
runs-on: ubuntu-22.04
container:
- image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea
+ image: prestodb/presto-native-dependency:0.297-202602271419-160459b8
volumes:
- /usr:/host_usr
- /opt:/host_opt
@@ -200,7 +200,7 @@ jobs:
uses: actions/setup-java@v4
with:
distribution: temurin
- java-version: 17.0.15
+ java-version: '17'
cache: maven
- name: Download nodejs to maven cache
if: |
@@ -260,7 +260,7 @@ jobs:
storage-format: [PARQUET, DWRF]
enable-sidecar: [true, false]
container:
- image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea
+ image: prestodb/presto-native-dependency:0.297-202602271419-160459b8
volumes:
- /usr:/host_usr
- /opt:/host_opt
@@ -332,7 +332,7 @@ jobs:
uses: actions/setup-java@v4
with:
distribution: temurin
- java-version: 17.0.15
+ java-version: '17'
cache: maven
- name: Download nodejs to maven cache
if: |
@@ -388,7 +388,7 @@ jobs:
group: ${{ github.workflow }}-prestocpp-linux-presto-on-spark-e2e-tests-${{ matrix.storage-format }}-${{ matrix.enable-sidecar }}-${{ github.event.pull_request.number }}
cancel-in-progress: true
container:
- image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea
+ image: prestodb/presto-native-dependency:0.297-202602271419-160459b8
volumes:
- /usr:/host_usr
- /opt:/host_opt
@@ -453,7 +453,7 @@ jobs:
uses: actions/setup-java@v4
with:
distribution: temurin
- java-version: 17.0.15
+ java-version: '17'
cache: maven
- name: Download nodejs to maven cache
if: |
@@ -501,7 +501,7 @@ jobs:
needs: [changes, prestocpp-linux-build-for-test]
runs-on: ubuntu-22.04
container:
- image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea
+ image: prestodb/presto-native-dependency:0.297-202602271419-160459b8
volumes:
- /usr:/host_usr
- /opt:/host_opt
@@ -568,7 +568,7 @@ jobs:
uses: actions/setup-java@v4
with:
distribution: temurin
- java-version: 17.0.15
+ java-version: '17'
cache: maven
- name: Download nodejs to maven cache
if: |
@@ -613,7 +613,7 @@ jobs:
needs: [changes, prestocpp-linux-build-for-test]
runs-on: ubuntu-22.04
container:
- image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea
+ image: prestodb/presto-native-dependency:0.297-202602271419-160459b8
volumes:
- /usr:/host_usr
- /opt:/host_opt
@@ -680,7 +680,7 @@ jobs:
uses: actions/setup-java@v4
with:
distribution: temurin
- java-version: 17.0.15
+ java-version: '17'
cache: maven
- name: Download nodejs to maven cache
if: |
diff --git a/.github/workflows/prestocpp-linux-build.yml b/.github/workflows/prestocpp-linux-build.yml
index 872b43adf90e0..f87b63fe760f3 100644
--- a/.github/workflows/prestocpp-linux-build.yml
+++ b/.github/workflows/prestocpp-linux-build.yml
@@ -20,13 +20,13 @@ jobs:
codechange:
- '!presto-docs/**'
- prestocpp-linux-build-engine:
+ prestocpp-linux-build-gpu-engine:
runs-on: ubuntu-22.04
permissions:
contents: read
needs: changes
container:
- image: prestodb/presto-native-dependency:0.297-202512180933-75d7d4ea
+ image: prestodb/presto-native-dependency:0.297-202602190453-8d6d9543
volumes:
- /usr:/host_usr
- /opt:/host_opt
@@ -35,16 +35,20 @@ jobs:
cancel-in-progress: true
env:
CCACHE_DIR: "${{ github.workspace }}/ccache"
- CC: /usr/bin/clang-15
- CXX: /usr/bin/clang++-15
+ cudf_SOURCE: BUNDLED
+ CUDA_COMPILER: /usr/local/cuda-${CUDA_VERSION}/bin/nvcc
+ # Set compiler to GCC 14
+ CUDA_FLAGS: -ccbin /opt/rh/gcc-toolset-14/root/usr/bin
BUILD_SCRIPT: |
+ unset CC && unset CXX
+ source /opt/rh/gcc-toolset-14/enable
cd presto-native-execution
cmake \
- -B _build/debug \
+ -B _build/release \
-GNinja \
-DTREAT_WARNINGS_AS_ERRORS=1 \
-DENABLE_ALL_WARNINGS=1 \
- -DCMAKE_BUILD_TYPE=Debug \
+ -DCMAKE_BUILD_TYPE=Release \
-DPRESTO_ENABLE_S3=ON \
-DPRESTO_ENABLE_GCS=ON \
-DPRESTO_ENABLE_ABFS=OFF \
@@ -54,11 +58,13 @@ jobs:
-DPRESTO_STATS_REPORTER_TYPE=PROMETHEUS \
-DPRESTO_MEMORY_CHECKER_TYPE=LINUX_MEMORY_CHECKER \
-DPRESTO_ENABLE_TESTING=OFF \
+ -DPRESTO_ENABLE_CUDF=ON \
+ -DCMAKE_CUDA_ARCHITECTURES=75 \
-DCMAKE_PREFIX_PATH=/usr/local \
-DThrift_ROOT=/usr/local \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DMAX_LINK_JOBS=4
- ninja -C _build/debug -j 4
+ ninja -C _build/release -j 4
steps:
# We cannot use the github action to free disk space from the runner
diff --git a/.github/workflows/prestocpp-macos-build.yml b/.github/workflows/prestocpp-macos-build.yml
index 7c2b6421a43ba..32bf241d58670 100644
--- a/.github/workflows/prestocpp-macos-build.yml
+++ b/.github/workflows/prestocpp-macos-build.yml
@@ -76,6 +76,10 @@ jobs:
install_velox_deps_from_brew
install_double_conversion
+ # Install glog/gflags because they are not installed from homebrew.
+ install_gflags
+ install_glog
+
# Velox deps needed by proxygen, a presto dependency.
install_boost
install_fmt
diff --git a/.github/workflows/product-tests-basic-environment.yml b/.github/workflows/product-tests-basic-environment.yml
index faa60abf11d3a..7a2dfd1a6cb17 100644
--- a/.github/workflows/product-tests-basic-environment.yml
+++ b/.github/workflows/product-tests-basic-environment.yml
@@ -31,7 +31,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
runs-on: ubuntu-latest
permissions:
contents: read
diff --git a/.github/workflows/product-tests-specific-environment.yml b/.github/workflows/product-tests-specific-environment.yml
index fea69491625d8..cbc0dde9e0e4c 100644
--- a/.github/workflows/product-tests-specific-environment.yml
+++ b/.github/workflows/product-tests-specific-environment.yml
@@ -31,7 +31,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
runs-on: ubuntu-latest
permissions:
contents: read
@@ -98,7 +98,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
runs-on: ubuntu-latest
permissions:
contents: read
diff --git a/.github/workflows/singlestore-tests.yml b/.github/workflows/singlestore-tests.yml
index f18d1a6e554b3..6e1f0b275c1ff 100644
--- a/.github/workflows/singlestore-tests.yml
+++ b/.github/workflows/singlestore-tests.yml
@@ -32,7 +32,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
runs-on: ubuntu-latest
permissions:
contents: read
diff --git a/.github/workflows/spark-integration.yml b/.github/workflows/spark-integration.yml
index 6dc61358e9c2e..36f43b97d5198 100644
--- a/.github/workflows/spark-integration.yml
+++ b/.github/workflows/spark-integration.yml
@@ -32,7 +32,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
runs-on: ubuntu-latest
permissions:
contents: read
diff --git a/.github/workflows/test-other-modules.yml b/.github/workflows/test-other-modules.yml
index ed2bf2e77727c..38a34323ddc01 100644
--- a/.github/workflows/test-other-modules.yml
+++ b/.github/workflows/test-other-modules.yml
@@ -32,7 +32,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
runs-on: ubuntu-latest
permissions:
contents: read
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 028c9ba9378ff..7419e3e2083de 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -37,7 +37,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java: [17.0.15]
+ java: ['17']
modules:
- :presto-tests -P presto-tests-execution-memory
- :presto-tests -P presto-tests-general
diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties
index b96797e3c6d15..b5217474cf3a6 100644
--- a/.mvn/wrapper/maven-wrapper.properties
+++ b/.mvn/wrapper/maven-wrapper.properties
@@ -1,2 +1,2 @@
-distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.8.8/apache-maven-3.8.8-bin.zip
+distributionUrl=https://repo1.maven.org/maven2/org/apache/maven/apache-maven/3.9.12/apache-maven-3.9.12-bin.zip
wrapperUrl=https://repo1.maven.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar
diff --git a/README.md b/README.md
index 4b3b1fcdedb39..a664980d3687b 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ See the [Presto documentation](https://prestodb.io/docs/current/) for general do
## Mission and Architecture
-See [PrestoDB: Mission and Architecture](ARCHITECTURE.md).
+See [PrestoDB: Mission and Architecture](ARCHITECTURE.md).
## Requirements
@@ -67,7 +67,7 @@ To modify the loaded plugins in IntelliJ, modify the `config.properties` located
### Additional configuration for Java 17
-When running with Java 17, additional `--add-opens` flags are required to allow reflective access used by certain catalogs based on which catalogs are configured.
+When running with Java 17, additional `--add-opens` flags are required to allow reflective access used by certain catalogs based on which catalogs are configured.
For the default set of catalogs loaded when starting the Presto server in IntelliJ without changes, add the following flags to the **VM Options**:
--add-opens=java.base/java.io=ALL-UNNAMED
@@ -157,6 +157,10 @@ resources will be hot-reloaded and changes are reflected on browser refresh.
Check out [building instructions](https://github.com/prestodb/presto/tree/master/presto-native-execution#build-from-source) to get started.
+## Using development containers
+
+The PrestoDB project provides support for development containers in its own repository.
+Please visit the [presto-dev README](https://github.com/prestodb/presto-dev/blob/main/README.md) for details.
diff --git a/pom.xml b/pom.xml
index 880d2928cc193..534f3ef5e91e7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -44,7 +44,7 @@
3.3.94.13.2
- 0.225
+ 0.227${dep.airlift.version}0.380.6
@@ -55,23 +55,24 @@
${dep.airlift.version}
- 2.13.1
+ 2.14.01.557.59.12.03.8.0
- 1.13.1
+ 1.16.0
+ 1.10.09.7.11.9.173132.0.163.9.1
- 1.3.0
- 30.0.1
+ 1.4.0
+ 35.0.12.3.1
- 4.0.5
+ 4.0.60.14.0
- 1.20.5
+ 2.0.33.4.12.9.03.1.3
@@ -79,17 +80,17 @@
32.1.0-jre2.15.43.0.0
- 1.11.4
+ 1.12.11.27.1
- 4.29.0
+ 4.30.212.0.29
- 4.1.130.Final
- 1.2.8
+ 4.2.10.Final
+ 1.3.32.52.12.13.18.06.0.0
- 17.0.0
+ 18.3.03.5.42.0.2-63.4.1-1
@@ -110,11 +111,12 @@
-missing1.17.22.32.9
- 1.19.0
+ 1.20.08.5.22.2.05.0.1
+ 1.58.0true
@@ -160,6 +162,7 @@
presto-bytecodepresto-clientpresto-parser
+ presto-internal-communicationpresto-main-basepresto-main-testspresto-main
@@ -224,17 +227,20 @@
presto-native-testspresto-routerpresto-open-telemetry
+ presto-openlineage-event-listenerredis-hbo-providerpresto-singlestorepresto-hanapresto-openapipresto-native-sidecar-plugin
+ presto-common-arrowpresto-base-arrow-flightpresto-function-serverpresto-router-example-plugin-schedulerpresto-plan-checker-router-pluginpresto-sql-helpers/presto-sql-invoked-functions-pluginpresto-sql-helpers/presto-native-sql-invoked-functions-plugin
+ presto-lance
@@ -597,6 +603,12 @@
${project.version}
+
+ com.facebook.presto
+ presto-internal-communication
+ ${project.version}
+
+
io.grpcgrpc-context
@@ -1010,6 +1022,12 @@
provided
+
+ com.facebook.presto
+ presto-common-arrow
+ ${project.version}
+
+
com.facebook.prestopresto-base-arrow-flight
@@ -1089,7 +1107,7 @@
com.facebook.presto.hivehive-apache
- 3.0.0-10
+ 3.0.0-12
@@ -1192,6 +1210,12 @@
${project.version}
+
+ com.facebook.presto
+ presto-openlineage-event-listener
+ ${project.version}
+
+
com.facebook.prestopresto-native-sidecar-plugin
@@ -1207,7 +1231,7 @@
io.airliftaircompressor
- 0.27
+ 2.0.3
@@ -1536,7 +1560,7 @@
org.postgresqlpostgresql
- 42.6.1
+ 42.7.9
@@ -1640,6 +1664,12 @@
${dep.reactor-netty.version}
+
+ io.projectreactor
+ reactor-core
+ 3.8.3
+
+
org.apache.thriftlibthrift
@@ -2062,6 +2092,13 @@
+
+
+ at.yawk.lz4
+ lz4-java
+ 1.10.2
+
+
org.apache.httpcomponentshttpclient
@@ -2313,7 +2350,7 @@
org.xerial.snappysnappy-java
- 1.1.10.7
+ 1.1.10.8
@@ -2582,19 +2619,19 @@
io.opentelemetryopentelemetry-api
- 1.19.0
+ ${dep.io.opentelemetry.version}io.opentelemetryopentelemetry-context
- 1.19.0
+ ${dep.io.opentelemetry.version}io.opentelemetryopentelemetry-exporter-otlp
- 1.19.0
+ ${dep.io.opentelemetry.version}com.squareup.okhttp3
@@ -2606,31 +2643,31 @@
io.opentelemetryopentelemetry-extension-trace-propagators
- 1.19.0
+ ${dep.io.opentelemetry.version}io.opentelemetryopentelemetry-sdk
- 1.19.0
+ ${dep.io.opentelemetry.version}io.opentelemetryopentelemetry-sdk-common
- 1.19.0
+ ${dep.io.opentelemetry.version}io.opentelemetryopentelemetry-sdk-trace
- 1.19.0
+ ${dep.io.opentelemetry.version}
- io.opentelemetry
+ io.opentelemetry.semconvopentelemetry-semconv
- 1.19.0-alpha
+ 1.37.0
diff --git a/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/Analysis.java b/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/Analysis.java
index d5329e375d58f..c3c1f43f8e142 100644
--- a/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/Analysis.java
+++ b/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/Analysis.java
@@ -28,6 +28,7 @@
import com.facebook.presto.spi.analyzer.AccessControlReferences;
import com.facebook.presto.spi.analyzer.AccessControlRole;
import com.facebook.presto.spi.analyzer.UpdateInfo;
+import com.facebook.presto.spi.analyzer.ViewDefinitionReferences;
import com.facebook.presto.spi.connector.ConnectorTransactionHandle;
import com.facebook.presto.spi.eventlistener.OutputColumnMetadata;
import com.facebook.presto.spi.function.FunctionHandle;
@@ -241,11 +242,14 @@ public class Analysis
// Row id field used for MERGE INTO command.
private final Map, FieldReference> rowIdField = new LinkedHashMap<>();
- public Analysis(@Nullable Statement root, Map, Expression> parameters, boolean isDescribe)
+ private final ViewDefinitionReferences viewDefinitionReferences;
+
+ public Analysis(@Nullable Statement root, Map, Expression> parameters, boolean isDescribe, ViewDefinitionReferences viewDefinitionReferences)
{
this.root = root;
this.parameters = ImmutableMap.copyOf(requireNonNull(parameters, "parameterMap is null"));
this.isDescribe = isDescribe;
+ this.viewDefinitionReferences = requireNonNull(viewDefinitionReferences, "viewDefinitionReferences is null");
}
public Statement getStatement()
@@ -957,9 +961,9 @@ public AccessControlReferences getAccessControlReferences()
return accessControlReferences;
}
- public void addQueryAccessControlInfo(AccessControlInfo accessControlInfo)
+ public ViewDefinitionReferences getViewDefinitionReferences()
{
- accessControlReferences.setQueryAccessControlInfo(accessControlInfo);
+ return viewDefinitionReferences;
}
public void addAccessControlCheckForTable(AccessControlRole accessControlRole, AccessControlInfoForTable accessControlInfoForTable)
diff --git a/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/BuiltInQueryAnalysis.java b/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/BuiltInQueryAnalysis.java
index 69b55766ca693..3a70ce4e66b0b 100644
--- a/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/BuiltInQueryAnalysis.java
+++ b/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/BuiltInQueryAnalysis.java
@@ -18,6 +18,7 @@
import com.facebook.presto.spi.analyzer.AccessControlReferences;
import com.facebook.presto.spi.analyzer.QueryAnalysis;
import com.facebook.presto.spi.analyzer.UpdateInfo;
+import com.facebook.presto.spi.analyzer.ViewDefinitionReferences;
import com.facebook.presto.spi.function.FunctionKind;
import com.facebook.presto.sql.tree.Explain;
import com.google.common.collect.ImmutableSet;
@@ -65,6 +66,12 @@ public AccessControlReferences getAccessControlReferences()
return analysis.getAccessControlReferences();
}
+ @Override
+ public ViewDefinitionReferences getViewDefinitionReferences()
+ {
+ return analysis.getViewDefinitionReferences();
+ }
+
@Override
public boolean isExplainAnalyzeQuery()
{
diff --git a/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/utils/StatementUtils.java b/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/utils/StatementUtils.java
index e777e219eae6e..1be20437e8a1c 100644
--- a/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/utils/StatementUtils.java
+++ b/presto-analyzer/src/main/java/com/facebook/presto/sql/analyzer/utils/StatementUtils.java
@@ -21,12 +21,14 @@
import com.facebook.presto.sql.tree.Analyze;
import com.facebook.presto.sql.tree.Call;
import com.facebook.presto.sql.tree.Commit;
+import com.facebook.presto.sql.tree.CreateBranch;
import com.facebook.presto.sql.tree.CreateFunction;
import com.facebook.presto.sql.tree.CreateMaterializedView;
import com.facebook.presto.sql.tree.CreateRole;
import com.facebook.presto.sql.tree.CreateSchema;
import com.facebook.presto.sql.tree.CreateTable;
import com.facebook.presto.sql.tree.CreateTableAsSelect;
+import com.facebook.presto.sql.tree.CreateTag;
import com.facebook.presto.sql.tree.CreateType;
import com.facebook.presto.sql.tree.CreateView;
import com.facebook.presto.sql.tree.Deallocate;
@@ -131,6 +133,8 @@ private StatementUtils() {}
builder.put(CreateType.class, QueryType.DATA_DEFINITION);
builder.put(AddColumn.class, QueryType.DATA_DEFINITION);
builder.put(CreateTable.class, QueryType.DATA_DEFINITION);
+ builder.put(CreateBranch.class, QueryType.DATA_DEFINITION);
+ builder.put(CreateTag.class, QueryType.DATA_DEFINITION);
builder.put(RenameTable.class, QueryType.DATA_DEFINITION);
builder.put(RenameColumn.class, QueryType.DATA_DEFINITION);
builder.put(DropColumn.class, QueryType.DATA_DEFINITION);
diff --git a/presto-base-arrow-flight/pom.xml b/presto-base-arrow-flight/pom.xml
index 004f690c12ffb..2bec314c35953 100644
--- a/presto-base-arrow-flight/pom.xml
+++ b/presto-base-arrow-flight/pom.xml
@@ -29,6 +29,11 @@
+
+ com.facebook.presto
+ presto-common-arrow
+
+
org.apache.arrowarrow-memory-core
@@ -250,6 +255,7 @@
com.fasterxml.jackson.core:jackson-databindcom.facebook.airlift:log-managerjavax.inject:javax.inject
+ io.airlift:slice
diff --git a/presto-base-arrow-flight/src/test/java/com/facebook/plugin/arrow/testingConnector/TestingArrowBlockBuilder.java b/presto-base-arrow-flight/src/test/java/com/facebook/plugin/arrow/testingConnector/TestingArrowBlockBuilder.java
index f42941a96387c..b72e2100339c1 100644
--- a/presto-base-arrow-flight/src/test/java/com/facebook/plugin/arrow/testingConnector/TestingArrowBlockBuilder.java
+++ b/presto-base-arrow-flight/src/test/java/com/facebook/plugin/arrow/testingConnector/TestingArrowBlockBuilder.java
@@ -34,7 +34,7 @@ public TestingArrowBlockBuilder(TypeManager typeManager)
}
@Override
- protected Type getPrestoTypeFromArrowField(Field field)
+ public Type getPrestoTypeFromArrowField(Field field)
{
String columnLength = field.getMetadata().get("columnLength");
int length = columnLength != null ? Integer.parseInt(columnLength) : 0;
diff --git a/presto-built-in-worker-function-tools/src/main/java/com/facebook/presto/builtin/tools/WorkerFunctionUtil.java b/presto-built-in-worker-function-tools/src/main/java/com/facebook/presto/builtin/tools/WorkerFunctionUtil.java
index d880429f7489f..3ecd353d21327 100644
--- a/presto-built-in-worker-function-tools/src/main/java/com/facebook/presto/builtin/tools/WorkerFunctionUtil.java
+++ b/presto-built-in-worker-function-tools/src/main/java/com/facebook/presto/builtin/tools/WorkerFunctionUtil.java
@@ -17,6 +17,7 @@
import com.facebook.presto.common.CatalogSchemaName;
import com.facebook.presto.common.QualifiedObjectName;
import com.facebook.presto.common.type.NamedTypeSignature;
+import com.facebook.presto.common.type.RowFieldName;
import com.facebook.presto.common.type.StandardTypes;
import com.facebook.presto.common.type.TypeSignature;
import com.facebook.presto.common.type.TypeSignatureParameter;
@@ -154,10 +155,12 @@ private static List getTypeSignatureParameters(
parameterTypeSignature.getStandardTypeSignature(),
parameterTypeSignature.getParameters()));
if (isNamedTypeSignature) {
+ // Preserve the original field name if present, otherwise use Optional.empty()
+ Optional fieldName = parameter.getNamedTypeSignature().getFieldName();
newParameterTypeList.add(
TypeSignatureParameter.of(
new NamedTypeSignature(
- Optional.empty(),
+ fieldName,
newTypeSignature)));
}
else {
diff --git a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraClientModule.java b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraClientModule.java
index c6a3242f0b7ff..b851cdc81146a 100644
--- a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraClientModule.java
+++ b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraClientModule.java
@@ -57,7 +57,6 @@ public void configure(Binder binder)
{
binder.bind(CassandraConnectorId.class).toInstance(new CassandraConnectorId(connectorId));
binder.bind(CassandraConnector.class).in(Scopes.SINGLETON);
- binder.bind(CassandraMetadata.class).in(Scopes.SINGLETON);
binder.bind(CassandraSplitManager.class).in(Scopes.SINGLETON);
binder.bind(CassandraTokenSplitManager.class).in(Scopes.SINGLETON);
binder.bind(CassandraRecordSetProvider.class).in(Scopes.SINGLETON);
diff --git a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraConnector.java b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraConnector.java
index 016f2c8022465..9562a9afb6971 100644
--- a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraConnector.java
+++ b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraConnector.java
@@ -14,8 +14,10 @@
package com.facebook.presto.cassandra;
import com.facebook.airlift.bootstrap.LifeCycleManager;
+import com.facebook.airlift.json.JsonCodec;
import com.facebook.airlift.log.Logger;
import com.facebook.presto.spi.connector.Connector;
+import com.facebook.presto.spi.connector.ConnectorCommitHandle;
import com.facebook.presto.spi.connector.ConnectorMetadata;
import com.facebook.presto.spi.connector.ConnectorPageSinkProvider;
import com.facebook.presto.spi.connector.ConnectorRecordSetProvider;
@@ -26,9 +28,13 @@
import jakarta.inject.Inject;
import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import static com.facebook.presto.spi.connector.EmptyConnectorCommitHandle.INSTANCE;
import static com.facebook.presto.spi.transaction.IsolationLevel.READ_UNCOMMITTED;
import static com.facebook.presto.spi.transaction.IsolationLevel.checkConnectorSupports;
+import static com.google.common.base.Preconditions.checkArgument;
import static java.util.Objects.requireNonNull;
public class CassandraConnector
@@ -36,35 +42,66 @@ public class CassandraConnector
{
private static final Logger log = Logger.get(CassandraConnector.class);
+ private final CassandraConnectorId connectorId;
private final LifeCycleManager lifeCycleManager;
- private final CassandraMetadata metadata;
+ private final CassandraPartitionManager partitionManager;
+ private final CassandraClientConfig config;
+ private final CassandraSession cassandraSession;
private final CassandraSplitManager splitManager;
private final ConnectorRecordSetProvider recordSetProvider;
private final ConnectorPageSinkProvider pageSinkProvider;
private final List> sessionProperties;
+ private final JsonCodec> extraColumnMetadataCodec;
+ private final ConcurrentMap transactions = new ConcurrentHashMap<>();
@Inject
public CassandraConnector(
+ CassandraConnectorId connectorId,
LifeCycleManager lifeCycleManager,
- CassandraMetadata metadata,
CassandraSplitManager splitManager,
CassandraRecordSetProvider recordSetProvider,
CassandraPageSinkProvider pageSinkProvider,
- CassandraSessionProperties sessionProperties)
+ CassandraSessionProperties sessionProperties,
+ CassandraSession cassandraSession,
+ CassandraPartitionManager partitionManager,
+ JsonCodec> extraColumnMetadataCodec,
+ CassandraClientConfig config)
{
+ this.connectorId = requireNonNull(connectorId, "connectorId is null");
this.lifeCycleManager = requireNonNull(lifeCycleManager, "lifeCycleManager is null");
- this.metadata = requireNonNull(metadata, "metadata is null");
this.splitManager = requireNonNull(splitManager, "splitManager is null");
this.recordSetProvider = requireNonNull(recordSetProvider, "recordSetProvider is null");
this.pageSinkProvider = requireNonNull(pageSinkProvider, "pageSinkProvider is null");
this.sessionProperties = requireNonNull(sessionProperties.getSessionProperties(), "sessionProperties is null");
+ this.partitionManager = requireNonNull(partitionManager, "partitionManager is null");
+ this.cassandraSession = requireNonNull(cassandraSession, "cassandraSession is null");
+ this.config = requireNonNull(config, "config is null");
+ this.extraColumnMetadataCodec = requireNonNull(extraColumnMetadataCodec, "extraColumnMetadataCodec is null");
}
@Override
public ConnectorTransactionHandle beginTransaction(IsolationLevel isolationLevel, boolean readOnly)
{
checkConnectorSupports(READ_UNCOMMITTED, isolationLevel);
- return CassandraTransactionHandle.INSTANCE;
+ CassandraTransactionHandle transaction = new CassandraTransactionHandle();
+ transactions.put(transaction,
+ new CassandraMetadata(connectorId, cassandraSession, partitionManager, extraColumnMetadataCodec, config));
+ return transaction;
+ }
+
+ @Override
+ public ConnectorCommitHandle commit(ConnectorTransactionHandle transaction)
+ {
+ checkArgument(transactions.remove(transaction) != null, "no such transaction: %s", transaction);
+ return INSTANCE;
+ }
+
+ @Override
+ public void rollback(ConnectorTransactionHandle transaction)
+ {
+ CassandraMetadata metadata = transactions.remove(transaction);
+ checkArgument(metadata != null, "no such transaction: %s", transaction);
+ metadata.rollback();
}
@Override
@@ -74,8 +111,10 @@ public boolean isSingleStatementWritesOnly()
}
@Override
- public ConnectorMetadata getMetadata(ConnectorTransactionHandle transactionHandle)
+ public ConnectorMetadata getMetadata(ConnectorTransactionHandle transaction)
{
+ CassandraMetadata metadata = transactions.get(transaction);
+ checkArgument(metadata != null, "no such transaction: %s", transaction);
return metadata;
}
diff --git a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraConnectorFactory.java b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraConnectorFactory.java
index 3a2b2cda4a574..e63707de290ee 100644
--- a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraConnectorFactory.java
+++ b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraConnectorFactory.java
@@ -15,6 +15,7 @@
import com.facebook.airlift.bootstrap.Bootstrap;
import com.facebook.airlift.json.JsonModule;
+import com.facebook.presto.common.util.RebindSafeMBeanServer;
import com.facebook.presto.spi.ConnectorHandleResolver;
import com.facebook.presto.spi.connector.Connector;
import com.facebook.presto.spi.connector.ConnectorContext;
diff --git a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraMetadata.java b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraMetadata.java
index da86b2293fdcc..b4ea65a7ecb48 100644
--- a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraMetadata.java
+++ b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraMetadata.java
@@ -41,13 +41,13 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.airlift.slice.Slice;
-import jakarta.inject.Inject;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import static com.facebook.presto.cassandra.CassandraType.toCassandraType;
@@ -57,6 +57,7 @@
import static com.facebook.presto.spi.StandardErrorCode.PERMISSION_DENIED;
import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.util.Locale.ROOT;
import static java.util.Objects.requireNonNull;
@@ -72,8 +73,8 @@ public class CassandraMetadata
private boolean caseSensitiveNameMatchingEnabled;
private final JsonCodec> extraColumnMetadataCodec;
+ private final AtomicReference rollbackAction = new AtomicReference<>();
- @Inject
public CassandraMetadata(
CassandraConnectorId connectorId,
CassandraSession cassandraSession,
@@ -319,6 +320,9 @@ private CassandraOutputTableHandle createTable(ConnectorSession session, Connect
// We need to create the Cassandra table before commit because the record needs to be written to the table.
cassandraSession.execute(queryBuilder.toString());
+
+ // set a rollback to delete the created table in case of an abort / failure.
+ setRollback(schemaName, tableName);
return new CassandraOutputTableHandle(
connectorId,
schemaName,
@@ -330,6 +334,7 @@ private CassandraOutputTableHandle createTable(ConnectorSession session, Connect
@Override
public Optional finishCreateTable(ConnectorSession session, ConnectorOutputTableHandle tableHandle, Collection fragments, Collection computedStatistics)
{
+ clearRollback();
return Optional.empty();
}
@@ -365,4 +370,30 @@ public String normalizeIdentifier(ConnectorSession session, String identifier)
{
return caseSensitiveNameMatchingEnabled ? identifier : identifier.toLowerCase(ROOT);
}
+
+ public void rollback()
+ {
+ Runnable action = rollbackAction.getAndSet(null);
+ if (action == null) {
+ return; // nothing to roll back
+ }
+
+ if (!allowDropTable) {
+ throw new PrestoException(
+ PERMISSION_DENIED,
+ "Table creation was aborted and requires rollback, but cleanup failed because DROP TABLE is disabled in this Cassandra catalog.");
+ }
+
+ action.run();
+ }
+
+ private void setRollback(String schemaName, String tableName)
+ {
+ checkState(rollbackAction.compareAndSet(null, () -> cassandraSession.execute(String.format("DROP TABLE \"%s\".\"%s\"", schemaName, tableName))), "rollback action is already set");
+ }
+
+ private void clearRollback()
+ {
+ rollbackAction.set(null);
+ }
}
diff --git a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraTransactionHandle.java b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraTransactionHandle.java
index 7a2eb23d4f162..4128e287135ef 100644
--- a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraTransactionHandle.java
+++ b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/CassandraTransactionHandle.java
@@ -14,9 +14,61 @@
package com.facebook.presto.cassandra;
import com.facebook.presto.spi.connector.ConnectorTransactionHandle;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
-public enum CassandraTransactionHandle
+import java.util.Objects;
+import java.util.UUID;
+
+import static com.google.common.base.MoreObjects.toStringHelper;
+import static java.util.Objects.requireNonNull;
+
+public class CassandraTransactionHandle
implements ConnectorTransactionHandle
{
- INSTANCE
+ private final UUID uuid;
+
+ public CassandraTransactionHandle()
+ {
+ this(UUID.randomUUID());
+ }
+
+ @JsonCreator
+ public CassandraTransactionHandle(@JsonProperty("uuid") UUID uuid)
+ {
+ this.uuid = requireNonNull(uuid, "uuid is null");
+ }
+
+ @JsonProperty
+ public UUID getUuid()
+ {
+ return uuid;
+ }
+
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (this == obj) {
+ return true;
+ }
+ if ((obj == null) || (getClass() != obj.getClass())) {
+ return false;
+ }
+ CassandraTransactionHandle other = (CassandraTransactionHandle) obj;
+ return Objects.equals(uuid, other.uuid);
+ }
+
+ @Override
+ public int hashCode()
+ {
+ return Objects.hash(uuid);
+ }
+
+ @Override
+ public String toString()
+ {
+ return toStringHelper(this)
+ .add("uuid", uuid)
+ .toString();
+ }
}
diff --git a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/RebindSafeMBeanServer.java b/presto-cassandra/src/main/java/com/facebook/presto/cassandra/RebindSafeMBeanServer.java
deleted file mode 100644
index 9525145e4e510..0000000000000
--- a/presto-cassandra/src/main/java/com/facebook/presto/cassandra/RebindSafeMBeanServer.java
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package com.facebook.presto.cassandra;
-
-import com.facebook.airlift.log.Logger;
-import com.google.errorprone.annotations.ThreadSafe;
-
-import javax.management.Attribute;
-import javax.management.AttributeList;
-import javax.management.AttributeNotFoundException;
-import javax.management.InstanceAlreadyExistsException;
-import javax.management.InstanceNotFoundException;
-import javax.management.IntrospectionException;
-import javax.management.InvalidAttributeValueException;
-import javax.management.ListenerNotFoundException;
-import javax.management.MBeanException;
-import javax.management.MBeanInfo;
-import javax.management.MBeanRegistrationException;
-import javax.management.MBeanServer;
-import javax.management.NotCompliantMBeanException;
-import javax.management.NotificationFilter;
-import javax.management.NotificationListener;
-import javax.management.ObjectInstance;
-import javax.management.ObjectName;
-import javax.management.OperationsException;
-import javax.management.QueryExp;
-import javax.management.ReflectionException;
-import javax.management.loading.ClassLoaderRepository;
-
-import java.io.ObjectInputStream;
-import java.util.Set;
-
-/**
- * MBeanServer wrapper that a ignores calls to registerMBean when there is already
- * a MBean registered with the specified object name.
- */
-@ThreadSafe
-public class RebindSafeMBeanServer
- implements MBeanServer
-{
- private static final Logger log = Logger.get(RebindSafeMBeanServer.class);
-
- private final MBeanServer mbeanServer;
-
- public RebindSafeMBeanServer(MBeanServer mbeanServer)
- {
- this.mbeanServer = mbeanServer;
- }
-
- /**
- * Delegates to the wrapped mbean server, but if a mbean is already registered
- * with the specified name, the existing instance is returned.
- */
- @Override
- public ObjectInstance registerMBean(Object object, ObjectName name)
- throws MBeanRegistrationException, NotCompliantMBeanException
- {
- while (true) {
- try {
- // try to register the mbean
- return mbeanServer.registerMBean(object, name);
- }
- catch (InstanceAlreadyExistsException ignored) {
- }
-
- try {
- // a mbean is already installed, try to return the already registered instance
- ObjectInstance objectInstance = mbeanServer.getObjectInstance(name);
- log.debug("%s already bound to %s", name, objectInstance);
- return objectInstance;
- }
- catch (InstanceNotFoundException ignored) {
- // the mbean was removed before we could get the reference
- // start the whole process over again
- }
- }
- }
-
- @Override
- public void unregisterMBean(ObjectName name)
- throws InstanceNotFoundException, MBeanRegistrationException
- {
- mbeanServer.unregisterMBean(name);
- }
-
- @Override
- public ObjectInstance getObjectInstance(ObjectName name)
- throws InstanceNotFoundException
- {
- return mbeanServer.getObjectInstance(name);
- }
-
- @Override
- public Set queryMBeans(ObjectName name, QueryExp query)
- {
- return mbeanServer.queryMBeans(name, query);
- }
-
- @Override
- public Set queryNames(ObjectName name, QueryExp query)
- {
- return mbeanServer.queryNames(name, query);
- }
-
- @Override
- public boolean isRegistered(ObjectName name)
- {
- return mbeanServer.isRegistered(name);
- }
-
- @Override
- public Integer getMBeanCount()
- {
- return mbeanServer.getMBeanCount();
- }
-
- @Override
- public Object getAttribute(ObjectName name, String attribute)
- throws MBeanException, AttributeNotFoundException, InstanceNotFoundException, ReflectionException
- {
- return mbeanServer.getAttribute(name, attribute);
- }
-
- @Override
- public AttributeList getAttributes(ObjectName name, String[] attributes)
- throws InstanceNotFoundException, ReflectionException
- {
- return mbeanServer.getAttributes(name, attributes);
- }
-
- @Override
- public void setAttribute(ObjectName name, Attribute attribute)
- throws InstanceNotFoundException, AttributeNotFoundException, InvalidAttributeValueException, MBeanException, ReflectionException
- {
- mbeanServer.setAttribute(name, attribute);
- }
-
- @Override
- public AttributeList setAttributes(ObjectName name, AttributeList attributes)
- throws InstanceNotFoundException, ReflectionException
- {
- return mbeanServer.setAttributes(name, attributes);
- }
-
- @Override
- public Object invoke(ObjectName name, String operationName, Object[] params, String[] signature)
- throws InstanceNotFoundException, MBeanException, ReflectionException
- {
- return mbeanServer.invoke(name, operationName, params, signature);
- }
-
- @Override
- public String getDefaultDomain()
- {
- return mbeanServer.getDefaultDomain();
- }
-
- @Override
- public String[] getDomains()
- {
- return mbeanServer.getDomains();
- }
-
- @Override
- public void addNotificationListener(ObjectName name, NotificationListener listener, NotificationFilter filter, Object context)
- throws InstanceNotFoundException
- {
- mbeanServer.addNotificationListener(name, listener, filter, context);
- }
-
- @Override
- public void addNotificationListener(ObjectName name, ObjectName listener, NotificationFilter filter, Object context)
- throws InstanceNotFoundException
- {
- mbeanServer.addNotificationListener(name, listener, filter, context);
- }
-
- @Override
- public void removeNotificationListener(ObjectName name, ObjectName listener)
- throws InstanceNotFoundException, ListenerNotFoundException
- {
- mbeanServer.removeNotificationListener(name, listener);
- }
-
- @Override
- public void removeNotificationListener(ObjectName name, ObjectName listener, NotificationFilter filter, Object context)
- throws InstanceNotFoundException, ListenerNotFoundException
- {
- mbeanServer.removeNotificationListener(name, listener, filter, context);
- }
-
- @Override
- public void removeNotificationListener(ObjectName name, NotificationListener listener)
- throws InstanceNotFoundException, ListenerNotFoundException
- {
- mbeanServer.removeNotificationListener(name, listener);
- }
-
- @Override
- public void removeNotificationListener(ObjectName name, NotificationListener listener, NotificationFilter filter, Object context)
- throws InstanceNotFoundException, ListenerNotFoundException
- {
- mbeanServer.removeNotificationListener(name, listener, filter, context);
- }
-
- @Override
- public MBeanInfo getMBeanInfo(ObjectName name)
- throws InstanceNotFoundException, IntrospectionException, ReflectionException
- {
- return mbeanServer.getMBeanInfo(name);
- }
-
- @Override
- public boolean isInstanceOf(ObjectName name, String className)
- throws InstanceNotFoundException
- {
- return mbeanServer.isInstanceOf(name, className);
- }
-
- @Override
- public Object instantiate(String className)
- throws ReflectionException, MBeanException
- {
- return mbeanServer.instantiate(className);
- }
-
- @Override
- public Object instantiate(String className, ObjectName loaderName)
- throws ReflectionException, MBeanException, InstanceNotFoundException
- {
- return mbeanServer.instantiate(className, loaderName);
- }
-
- @Override
- public Object instantiate(String className, Object[] params, String[] signature)
- throws ReflectionException, MBeanException
- {
- return mbeanServer.instantiate(className, params, signature);
- }
-
- @Override
- public Object instantiate(String className, ObjectName loaderName, Object[] params, String[] signature)
- throws ReflectionException, MBeanException, InstanceNotFoundException
- {
- return mbeanServer.instantiate(className, loaderName, params, signature);
- }
-
- @Override
- @Deprecated
- @SuppressWarnings("deprecation")
- public ObjectInputStream deserialize(ObjectName name, byte[] data)
- throws OperationsException
- {
- return mbeanServer.deserialize(name, data);
- }
-
- @Override
- @Deprecated
- @SuppressWarnings("deprecation")
- public ObjectInputStream deserialize(String className, byte[] data)
- throws OperationsException, ReflectionException
- {
- return mbeanServer.deserialize(className, data);
- }
-
- @Override
- @Deprecated
- @SuppressWarnings("deprecation")
- public ObjectInputStream deserialize(String className, ObjectName loaderName, byte[] data)
- throws OperationsException, ReflectionException
- {
- return mbeanServer.deserialize(className, loaderName, data);
- }
-
- @Override
- public ClassLoader getClassLoaderFor(ObjectName mbeanName)
- throws InstanceNotFoundException
- {
- return mbeanServer.getClassLoaderFor(mbeanName);
- }
-
- @Override
- public ClassLoader getClassLoader(ObjectName loaderName)
- throws InstanceNotFoundException
- {
- return mbeanServer.getClassLoader(loaderName);
- }
-
- @Override
- public ClassLoaderRepository getClassLoaderRepository()
- {
- return mbeanServer.getClassLoaderRepository();
- }
-
- @Override
- public ObjectInstance createMBean(String className, ObjectName name)
- throws ReflectionException, InstanceAlreadyExistsException, MBeanException, NotCompliantMBeanException
- {
- return mbeanServer.createMBean(className, name);
- }
-
- @Override
- public ObjectInstance createMBean(String className, ObjectName name, ObjectName loaderName)
- throws ReflectionException, InstanceAlreadyExistsException, MBeanException, NotCompliantMBeanException, InstanceNotFoundException
- {
- return mbeanServer.createMBean(className, name, loaderName);
- }
-
- @Override
- public ObjectInstance createMBean(String className, ObjectName name, Object[] params, String[] signature)
- throws ReflectionException, InstanceAlreadyExistsException, MBeanException, NotCompliantMBeanException
- {
- return mbeanServer.createMBean(className, name, params, signature);
- }
-
- @Override
- public ObjectInstance createMBean(String className, ObjectName name, ObjectName loaderName, Object[] params, String[] signature)
- throws ReflectionException, InstanceAlreadyExistsException, MBeanException, NotCompliantMBeanException, InstanceNotFoundException
- {
- return mbeanServer.createMBean(className, name, loaderName, params, signature);
- }
-}
diff --git a/presto-cassandra/src/test/java/com/facebook/presto/cassandra/TestCassandraConnector.java b/presto-cassandra/src/test/java/com/facebook/presto/cassandra/TestCassandraConnector.java
index 49a8a2bdc247d..a3535c0e46b4f 100644
--- a/presto-cassandra/src/test/java/com/facebook/presto/cassandra/TestCassandraConnector.java
+++ b/presto-cassandra/src/test/java/com/facebook/presto/cassandra/TestCassandraConnector.java
@@ -17,6 +17,7 @@
import com.facebook.presto.common.type.Type;
import com.facebook.presto.spi.ColumnHandle;
import com.facebook.presto.spi.ColumnMetadata;
+import com.facebook.presto.spi.ConnectorOutputTableHandle;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorSplit;
import com.facebook.presto.spi.ConnectorSplitSource;
@@ -66,10 +67,12 @@
import static com.facebook.presto.common.type.Varchars.isVarcharType;
import static com.facebook.presto.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING;
import static com.facebook.presto.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED;
+import static com.facebook.presto.spi.transaction.IsolationLevel.READ_UNCOMMITTED;
import static com.google.common.base.Preconditions.checkArgument;
import static java.util.Locale.ENGLISH;
import static java.util.Locale.ROOT;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
@@ -98,10 +101,11 @@ public class TestCassandraConnector
protected SchemaTableName table;
protected SchemaTableName tableUnpartitioned;
protected SchemaTableName invalidTable;
+ protected SchemaTableName rollbackTable;
private CassandraServer server;
- private ConnectorMetadata metadata;
private ConnectorSplitManager splitManager;
private ConnectorRecordSetProvider recordSetProvider;
+ private Connector connector;
@BeforeClass
public void setup()
@@ -115,14 +119,12 @@ public void setup()
String connectorId = "cassandra-test";
CassandraConnectorFactory connectorFactory = new CassandraConnectorFactory(connectorId);
- Connector connector = connectorFactory.create(connectorId, ImmutableMap.of(
- "cassandra.contact-points", server.getHost(),
- "cassandra.native-protocol-port", Integer.toString(server.getPort())),
+ connector = connectorFactory.create(connectorId, ImmutableMap.of(
+ "cassandra.contact-points", server.getHost(),
+ "cassandra.native-protocol-port", Integer.toString(server.getPort()),
+ "cassandra.allow-drop-table", "true"),
new TestingConnectorContext());
- metadata = connector.getMetadata(CassandraTransactionHandle.INSTANCE);
- assertInstanceOf(metadata, CassandraMetadata.class);
-
splitManager = connector.getSplitManager();
assertInstanceOf(splitManager, CassandraSplitManager.class);
@@ -133,6 +135,7 @@ public void setup()
table = new SchemaTableName(database, TABLE_ALL_TYPES.toLowerCase(ROOT));
tableUnpartitioned = new SchemaTableName(database, "presto_test_unpartitioned");
invalidTable = new SchemaTableName(database, "totally_invalid_table_name");
+ rollbackTable = new SchemaTableName(database, "rollback_table");
}
@Test
@@ -149,6 +152,8 @@ public void tearDown()
@Test
public void testGetDatabaseNames()
{
+ ConnectorTransactionHandle transactionHandle = connector.beginTransaction(READ_UNCOMMITTED, true);
+ ConnectorMetadata metadata = connector.getMetadata(transactionHandle);
List databases = metadata.listSchemaNames(SESSION);
assertTrue(databases.contains(database.toLowerCase(ROOT)));
}
@@ -156,6 +161,8 @@ public void testGetDatabaseNames()
@Test
public void testGetTableNames()
{
+ ConnectorTransactionHandle transactionHandle = connector.beginTransaction(READ_UNCOMMITTED, true);
+ ConnectorMetadata metadata = connector.getMetadata(transactionHandle);
List tables = metadata.listTables(SESSION, database);
assertTrue(tables.contains(table));
}
@@ -164,12 +171,16 @@ public void testGetTableNames()
@Test(enabled = false, expectedExceptions = SchemaNotFoundException.class)
public void testGetTableNamesException()
{
+ ConnectorTransactionHandle transactionHandle = connector.beginTransaction(READ_UNCOMMITTED, true);
+ ConnectorMetadata metadata = connector.getMetadata(transactionHandle);
metadata.listTables(SESSION, INVALID_DATABASE);
}
@Test
public void testListUnknownSchema()
{
+ ConnectorTransactionHandle transactionHandle = connector.beginTransaction(READ_UNCOMMITTED, true);
+ ConnectorMetadata metadata = connector.getMetadata(transactionHandle);
assertNull(metadata.getTableHandle(SESSION, new SchemaTableName("totally_invalid_database_name", "dual")));
assertEquals(metadata.listTables(SESSION, "totally_invalid_database_name"), ImmutableList.of());
assertEquals(metadata.listTableColumns(SESSION, new SchemaTablePrefix("totally_invalid_database_name", "dual")), ImmutableMap.of());
@@ -178,23 +189,23 @@ public void testListUnknownSchema()
@Test
public void testGetRecords()
{
- ConnectorTableHandle tableHandle = getTableHandle(table);
+ ConnectorTransactionHandle transactionHandle = connector.beginTransaction(READ_UNCOMMITTED, true);
+ ConnectorMetadata metadata = connector.getMetadata(transactionHandle);
+ ConnectorTableHandle tableHandle = getTableHandle(table, metadata);
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(SESSION, tableHandle);
List columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(SESSION, tableHandle).values());
Map columnIndex = indexColumns(columnHandles);
- ConnectorTransactionHandle transaction = CassandraTransactionHandle.INSTANCE;
-
ConnectorTableLayoutResult layoutResult = metadata.getTableLayoutForConstraint(SESSION, tableHandle, Constraint.alwaysTrue(), Optional.empty());
ConnectorTableLayoutHandle layout = layoutResult.getTableLayout().getHandle();
- List splits = getAllSplits(splitManager.getSplits(transaction, SESSION, layout, new SplitSchedulingContext(UNGROUPED_SCHEDULING, false, WarningCollector.NOOP)));
+ List splits = getAllSplits(splitManager.getSplits(transactionHandle, SESSION, layout, new SplitSchedulingContext(UNGROUPED_SCHEDULING, false, WarningCollector.NOOP)));
long rowNumber = 0;
for (ConnectorSplit split : splits) {
CassandraSplit cassandraSplit = (CassandraSplit) split;
long completedBytes = 0;
- try (RecordCursor cursor = recordSetProvider.getRecordSet(transaction, SESSION, cassandraSplit, columnHandles).cursor()) {
+ try (RecordCursor cursor = recordSetProvider.getRecordSet(transactionHandle, SESSION, cassandraSplit, columnHandles).cursor()) {
while (cursor.advanceNextPosition()) {
try {
assertReadFields(cursor, tableMetadata.getColumns());
@@ -231,6 +242,39 @@ public void testGetRecords()
assertEquals(rowNumber, 9);
}
+ @Test
+ public void testRollbackTables()
+ {
+ ConnectorTableMetadata connectorTableMetadata = new ConnectorTableMetadata(
+ rollbackTable,
+ ImmutableList.of(
+ ColumnMetadata.builder()
+ .setName("test_col")
+ .setType(BIGINT)
+ .build()));
+
+ // start a transaction
+ ConnectorTransactionHandle transactionHandle = connector.beginTransaction(READ_UNCOMMITTED, true);
+ ConnectorMetadata metadata = connector.getMetadata(transactionHandle);
+ ConnectorOutputTableHandle handle = null;
+
+ try {
+ // Begin table creation (STAGING only)
+ handle = metadata.beginCreateTable(SESSION, connectorTableMetadata, Optional.empty());
+ // simulate a failure
+ throw new RuntimeException("Force failure before finish");
+ }
+ catch (RuntimeException e) {
+ if (handle != null) {
+ // table should exist
+ assertTrue(metadata.listTables(SESSION, database).contains(rollbackTable));
+ // rollback table
+ connector.rollback(transactionHandle);
+ }
+ }
+ assertFalse(metadata.listTables(SESSION, database).contains(rollbackTable));
+ }
+
private static void assertReadFields(RecordCursor cursor, List schema)
{
for (int columnIndex = 0; columnIndex < schema.size(); columnIndex++) {
@@ -270,7 +314,7 @@ else if (isVarcharType(type) || VARBINARY.equals(type)) {
}
}
- private ConnectorTableHandle getTableHandle(SchemaTableName tableName)
+ private ConnectorTableHandle getTableHandle(SchemaTableName tableName, ConnectorMetadata metadata)
{
ConnectorTableHandle handle = metadata.getTableHandle(SESSION, tableName);
checkArgument(handle != null, "table not found: %s", tableName);
diff --git a/presto-clickhouse/pom.xml b/presto-clickhouse/pom.xml
index f6ece72d07bd9..b143e209efa5c 100644
--- a/presto-clickhouse/pom.xml
+++ b/presto-clickhouse/pom.xml
@@ -196,13 +196,13 @@
org.testcontainers
- clickhouse
+ testcontainers-clickhousetestorg.testcontainers
- jdbc
+ testcontainers-jdbctest
diff --git a/presto-clickhouse/src/test/java/com/facebook/presto/plugin/clickhouse/ClickHouseQueryRunner.java b/presto-clickhouse/src/test/java/com/facebook/presto/plugin/clickhouse/ClickHouseQueryRunner.java
index a0519c2f6855b..1f5847ba8efdc 100755
--- a/presto-clickhouse/src/test/java/com/facebook/presto/plugin/clickhouse/ClickHouseQueryRunner.java
+++ b/presto-clickhouse/src/test/java/com/facebook/presto/plugin/clickhouse/ClickHouseQueryRunner.java
@@ -60,6 +60,8 @@ public static DistributedQueryRunner createClickHouseQueryRunner(
connectorProperties = new HashMap<>(ImmutableMap.copyOf(connectorProperties));
connectorProperties.putIfAbsent("clickhouse.connection-url", server.getJdbcUrl());
+ connectorProperties.putIfAbsent("clickhouse.connection-user", server.getClickHouseContainer().getUsername());
+ connectorProperties.putIfAbsent("clickhouse.connection-password", server.getClickHouseContainer().getPassword());
connectorProperties.putIfAbsent("clickhouse.allow-drop-table", String.valueOf(true));
connectorProperties.putIfAbsent("clickhouse.map-string-as-varchar", String.valueOf(true));
diff --git a/presto-clickhouse/src/test/java/com/facebook/presto/plugin/clickhouse/TestingClickHouseServer.java b/presto-clickhouse/src/test/java/com/facebook/presto/plugin/clickhouse/TestingClickHouseServer.java
index 87dbb916f1179..1a1c80c312782 100755
--- a/presto-clickhouse/src/test/java/com/facebook/presto/plugin/clickhouse/TestingClickHouseServer.java
+++ b/presto-clickhouse/src/test/java/com/facebook/presto/plugin/clickhouse/TestingClickHouseServer.java
@@ -13,16 +13,13 @@
*/
package com.facebook.presto.plugin.clickhouse;
-import org.testcontainers.containers.ClickHouseContainer;
+import org.testcontainers.clickhouse.ClickHouseContainer;
import java.io.Closeable;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;
-import static java.lang.String.format;
-import static org.testcontainers.containers.ClickHouseContainer.HTTP_PORT;
-
public class TestingClickHouseServer
implements Closeable
{
@@ -44,7 +41,10 @@ public ClickHouseContainer getClickHouseContainer()
}
public void execute(String sql)
{
- try (Connection connection = DriverManager.getConnection(getJdbcUrl());
+ try (Connection connection = DriverManager.getConnection(
+ getJdbcUrl(),
+ dockerContainer.getUsername(),
+ dockerContainer.getPassword());
Statement statement = connection.createStatement()) {
statement.execute(sql);
}
@@ -55,10 +55,7 @@ public void execute(String sql)
public String getJdbcUrl()
{
- String s = format("jdbc:clickhouse://%s:%s/", dockerContainer.getContainerIpAddress(),
- dockerContainer.getMappedPort(HTTP_PORT));
- return format("jdbc:clickhouse://%s:%s/", dockerContainer.getContainerIpAddress(),
- dockerContainer.getMappedPort(HTTP_PORT));
+ return dockerContainer.getJdbcUrl();
}
@Override
diff --git a/presto-common-arrow/pom.xml b/presto-common-arrow/pom.xml
new file mode 100644
index 0000000000000..c2507c9f136d4
--- /dev/null
+++ b/presto-common-arrow/pom.xml
@@ -0,0 +1,87 @@
+
+
+ 4.0.0
+
+
+ com.facebook.presto
+ presto-root
+ 0.297-SNAPSHOT
+
+
+ presto-common-arrow
+ presto-common-arrow
+ Presto - Common Arrow Utilities
+
+
+ ${project.parent.basedir}
+
+
+
+
+ org.apache.arrow
+ arrow-vector
+
+
+ org.slf4j
+ slf4j-api
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-jsr310
+
+
+
+
+
+ com.facebook.presto
+ presto-spi
+
+
+
+ com.facebook.presto
+ presto-common
+
+
+
+ io.airlift
+ slice
+
+
+
+ com.google.guava
+ guava
+
+
+
+ jakarta.inject
+ jakarta.inject-api
+
+
+
+
+
+
+ org.basepom.maven
+ duplicate-finder-maven-plugin
+ 1.2.1
+
+
+ module-info
+ META-INF.versions.9.module-info
+
+
+ arrow-git.properties
+ about.html
+
+
+
+
+
+ check
+
+
+
+
+
+
+
diff --git a/presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowBlockBuilder.java b/presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowBlockBuilder.java
similarity index 92%
rename from presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowBlockBuilder.java
rename to presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowBlockBuilder.java
index ed703f1b2444f..a6180a18fe8fb 100644
--- a/presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowBlockBuilder.java
+++ b/presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowBlockBuilder.java
@@ -54,6 +54,7 @@
import org.apache.arrow.vector.TimeMicroVector;
import org.apache.arrow.vector.TimeMilliVector;
import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
import org.apache.arrow.vector.TimeStampMicroVector;
import org.apache.arrow.vector.TimeStampMilliTZVector;
import org.apache.arrow.vector.TimeStampMilliVector;
@@ -62,6 +63,7 @@
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.complex.StructVector;
@@ -112,7 +114,7 @@ public Block buildBlockFromFieldVector(FieldVector vector, Type type, Dictionary
return builder.build();
}
- protected Type getPrestoTypeFromArrowField(Field field)
+ public Type getPrestoTypeFromArrowField(Field field)
{
switch (field.getType().getTypeID()) {
case Int:
@@ -139,7 +141,8 @@ protected Type getPrestoTypeFromArrowField(Field field)
return BooleanType.BOOLEAN;
case Time:
return TimeType.TIME;
- case List: {
+ case List:
+ case FixedSizeList: {
List children = field.getChildren();
checkArgument(children.size() == 1, "Arrow List expected to have 1 child Field, got: " + children.size());
return new ArrayType(getPrestoTypeFromArrowField(field.getChildren().get(0)));
@@ -292,6 +295,9 @@ else if (vector instanceof TimeStampSecVector) {
else if (vector instanceof TimeMicroVector) {
assignBlockFromTimeMicroVector((TimeMicroVector) vector, type, builder, startIndex, endIndex);
}
+ else if (vector instanceof TimeStampMicroTZVector) {
+ assignBlockFromTimeStampMicroTZVector((TimeStampMicroTZVector) vector, type, builder, startIndex, endIndex);
+ }
else if (vector instanceof TimeStampMilliTZVector) {
assignBlockFromTimeMilliTZVector((TimeStampMilliTZVector) vector, type, builder, startIndex, endIndex);
}
@@ -299,6 +305,9 @@ else if (vector instanceof MapVector) {
// NOTE: MapVector is also instanceof ListVector, so check for Map first
assignBlockFromMapVector((MapVector) vector, type, builder, startIndex, endIndex);
}
+ else if (vector instanceof FixedSizeListVector) {
+ assignBlockFromFixedSizeListVector((FixedSizeListVector) vector, type, builder, startIndex, endIndex);
+ }
else if (vector instanceof ListVector) {
assignBlockFromListVector((ListVector) vector, type, builder, startIndex, endIndex);
}
@@ -666,6 +675,49 @@ public void assignBlockFromListVector(ListVector vector, Type type, BlockBuilder
}
}
+ public void assignBlockFromFixedSizeListVector(FixedSizeListVector vector, Type type, BlockBuilder builder, int startIndex, int endIndex)
+ {
+ if (!(type instanceof ArrayType)) {
+ throw new IllegalArgumentException("Type must be an ArrayType for FixedSizeListVector");
+ }
+
+ ArrayType arrayType = (ArrayType) type;
+ Type elementType = arrayType.getElementType();
+ int listSize = vector.getListSize();
+
+ for (int i = startIndex; i < endIndex; i++) {
+ if (vector.isNull(i)) {
+ builder.appendNull();
+ }
+ else {
+ BlockBuilder elementBuilder = builder.beginBlockEntry();
+ int elementStart = i * listSize;
+ int elementEnd = elementStart + listSize;
+ assignBlockFromValueVector(
+ vector.getDataVector(), elementType, elementBuilder, elementStart, elementEnd);
+ builder.closeEntry();
+ }
+ }
+ }
+
+ public void assignBlockFromTimeStampMicroTZVector(TimeStampMicroTZVector vector, Type type, BlockBuilder builder, int startIndex, int endIndex)
+ {
+ if (!(type instanceof TimestampType)) {
+ throw new IllegalArgumentException("Expected TimestampType but got " + type.getClass().getName());
+ }
+
+ for (int i = startIndex; i < endIndex; i++) {
+ if (vector.isNull(i)) {
+ builder.appendNull();
+ }
+ else {
+ long micros = vector.get(i);
+ long millis = TimeUnit.MICROSECONDS.toMillis(micros);
+ type.writeLong(builder, millis);
+ }
+ }
+ }
+
public void assignBlockFromMapVector(MapVector vector, Type type, BlockBuilder builder, int startIndex, int endIndex)
{
if (!(type instanceof MapType)) {
diff --git a/presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowErrorCode.java b/presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowErrorCode.java
similarity index 100%
rename from presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowErrorCode.java
rename to presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowErrorCode.java
diff --git a/presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowException.java b/presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowException.java
similarity index 100%
rename from presto-base-arrow-flight/src/main/java/com/facebook/plugin/arrow/ArrowException.java
rename to presto-common-arrow/src/main/java/com/facebook/plugin/arrow/ArrowException.java
diff --git a/presto-common/pom.xml b/presto-common/pom.xml
index 07e80d39313e7..37db32216242d 100644
--- a/presto-common/pom.xml
+++ b/presto-common/pom.xml
@@ -62,6 +62,12 @@
jol-core
+
+ com.facebook.airlift
+ log
+ provided
+
+
com.facebook.presto
@@ -78,7 +84,6 @@
com.google.guavaguava
- test
diff --git a/presto-common/src/main/java/com/facebook/presto/common/Subfield.java b/presto-common/src/main/java/com/facebook/presto/common/Subfield.java
index 5854fff8a8731..4d49c557fcaab 100644
--- a/presto-common/src/main/java/com/facebook/presto/common/Subfield.java
+++ b/presto-common/src/main/java/com/facebook/presto/common/Subfield.java
@@ -81,6 +81,31 @@ public String toString()
}
}
+ public static final class StructureOnly
+ implements PathElement
+ {
+ private static final StructureOnly STRUCTURE_ONLY = new StructureOnly();
+
+ private StructureOnly() {}
+
+ public static StructureOnly getInstance()
+ {
+ return STRUCTURE_ONLY;
+ }
+
+ @Override
+ public boolean isSubscript()
+ {
+ return true;
+ }
+
+ @Override
+ public String toString()
+ {
+ return "[$]";
+ }
+ }
+
public static final class NestedField
implements PathElement
{
@@ -238,6 +263,11 @@ public static PathElement noSubfield()
return NoSubfield.getInstance();
}
+ public static PathElement structureOnly()
+ {
+ return StructureOnly.getInstance();
+ }
+
@JsonCreator
public Subfield(String path)
{
diff --git a/presto-common/src/main/java/com/facebook/presto/common/SubfieldTokenizer.java b/presto-common/src/main/java/com/facebook/presto/common/SubfieldTokenizer.java
index fe61bf6e73eaf..562a99e1c151a 100644
--- a/presto-common/src/main/java/com/facebook/presto/common/SubfieldTokenizer.java
+++ b/presto-common/src/main/java/com/facebook/presto/common/SubfieldTokenizer.java
@@ -106,7 +106,7 @@ private Subfield.PathElement computeNext()
}
if (tryMatch(OPEN_BRACKET)) {
- Subfield.PathElement token = tryMatch(QUOTE) ? matchQuotedSubscript() : tryMatch(WILDCARD) ? matchWildcardSubscript() : matchUnquotedSubscript();
+ Subfield.PathElement token = tryMatch(QUOTE) ? matchQuotedSubscript() : tryMatch(WILDCARD) ? matchWildcardSubscript() : tryMatch(DOLLAR) ? matchStructureOnlySubscript() : matchUnquotedSubscript();
match(CLOSE_BRACKET);
firstSegment = false;
@@ -151,6 +151,11 @@ private Subfield.PathElement matchDollarPathElement()
return Subfield.noSubfield();
}
+ private Subfield.PathElement matchStructureOnlySubscript()
+ {
+ return Subfield.structureOnly();
+ }
+
private static boolean isUnquotedPathCharacter(char c)
{
return c == ':' || c == '$' || c == '-' || c == '/' || c == '@' || c == '|' || c == '#' || c == ' ' || c == '<' || c == '>' || isUnquotedSubscriptCharacter(c);
diff --git a/presto-common/src/main/java/com/facebook/presto/common/function/SqlFunctionProperties.java b/presto-common/src/main/java/com/facebook/presto/common/function/SqlFunctionProperties.java
index 9df746537e007..eda4d7fc9f764 100644
--- a/presto-common/src/main/java/com/facebook/presto/common/function/SqlFunctionProperties.java
+++ b/presto-common/src/main/java/com/facebook/presto/common/function/SqlFunctionProperties.java
@@ -18,9 +18,12 @@
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
+import java.util.Set;
import static java.util.Collections.emptyMap;
+import static java.util.Collections.emptySet;
import static java.util.Collections.unmodifiableMap;
+import static java.util.Collections.unmodifiableSet;
import static java.util.Objects.requireNonNull;
public class SqlFunctionProperties
@@ -38,6 +41,7 @@ public class SqlFunctionProperties
private final Map extraCredentials;
private final boolean warnOnCommonNanPatterns;
private final boolean canonicalizedJsonExtract;
+ private final Set tryCatchableErrorCodes;
private SqlFunctionProperties(
boolean parseDecimalLiteralAsDouble,
@@ -52,7 +56,8 @@ private SqlFunctionProperties(
boolean legacyJsonCast,
Map extraCredentials,
boolean warnOnCommonNanPatterns,
- boolean canonicalizedJsonExtract)
+ boolean canonicalizedJsonExtract,
+ Set tryCatchableErrorCodes)
{
this.parseDecimalLiteralAsDouble = parseDecimalLiteralAsDouble;
this.legacyRowFieldOrdinalAccessEnabled = legacyRowFieldOrdinalAccessEnabled;
@@ -67,6 +72,7 @@ private SqlFunctionProperties(
this.extraCredentials = requireNonNull(extraCredentials, "extraCredentials is null");
this.warnOnCommonNanPatterns = warnOnCommonNanPatterns;
this.canonicalizedJsonExtract = canonicalizedJsonExtract;
+ this.tryCatchableErrorCodes = requireNonNull(tryCatchableErrorCodes, "tryCatchableErrorCodes is null");
}
public boolean isParseDecimalLiteralAsDouble()
@@ -133,6 +139,11 @@ public boolean shouldWarnOnCommonNanPatterns()
public boolean isCanonicalizedJsonExtract()
{ return canonicalizedJsonExtract; }
+ public Set getTryCatchableErrorCodes()
+ {
+ return tryCatchableErrorCodes;
+ }
+
@Override
public boolean equals(Object o)
{
@@ -153,7 +164,8 @@ public boolean equals(Object o)
Objects.equals(sessionUser, that.sessionUser) &&
Objects.equals(extraCredentials, that.extraCredentials) &&
Objects.equals(legacyJsonCast, that.legacyJsonCast) &&
- Objects.equals(canonicalizedJsonExtract, that.canonicalizedJsonExtract);
+ Objects.equals(canonicalizedJsonExtract, that.canonicalizedJsonExtract) &&
+ Objects.equals(tryCatchableErrorCodes, that.tryCatchableErrorCodes);
}
@Override
@@ -161,7 +173,7 @@ public int hashCode()
{
return Objects.hash(parseDecimalLiteralAsDouble, legacyRowFieldOrdinalAccessEnabled, timeZoneKey,
legacyTimestamp, legacyMapSubscript, sessionStartTime, sessionLocale, sessionUser,
- extraCredentials, legacyJsonCast, canonicalizedJsonExtract);
+ extraCredentials, legacyJsonCast, canonicalizedJsonExtract, tryCatchableErrorCodes);
}
public static Builder builder()
@@ -184,6 +196,7 @@ public static class Builder
private Map extraCredentials = emptyMap();
private boolean warnOnCommonNanPatterns;
private boolean canonicalizedJsonExtract;
+ private Set tryCatchableErrorCodes = emptySet();
private Builder() {}
@@ -265,6 +278,12 @@ public Builder setCanonicalizedJsonExtract(boolean canonicalizedJsonExtract)
return this;
}
+ public Builder setTryCatchableErrorCodes(Set tryCatchableErrorCodes)
+ {
+ this.tryCatchableErrorCodes = unmodifiableSet(tryCatchableErrorCodes);
+ return this;
+ }
+
public SqlFunctionProperties build()
{
return new SqlFunctionProperties(
@@ -280,7 +299,8 @@ public SqlFunctionProperties build()
legacyJsonCast,
extraCredentials,
warnOnCommonNanPatterns,
- canonicalizedJsonExtract);
+ canonicalizedJsonExtract,
+ tryCatchableErrorCodes);
}
}
}
diff --git a/presto-main-base/src/main/java/com/facebook/presto/type/IpAddressType.java b/presto-common/src/main/java/com/facebook/presto/common/type/IpAddressType.java
similarity index 96%
rename from presto-main-base/src/main/java/com/facebook/presto/type/IpAddressType.java
rename to presto-common/src/main/java/com/facebook/presto/common/type/IpAddressType.java
index dbcc9d46ee587..5d2b1b11380dd 100644
--- a/presto-main-base/src/main/java/com/facebook/presto/type/IpAddressType.java
+++ b/presto-common/src/main/java/com/facebook/presto/common/type/IpAddressType.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package com.facebook.presto.type;
+package com.facebook.presto.common.type;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockBuilder;
@@ -19,9 +19,6 @@
import com.facebook.presto.common.block.Int128ArrayBlockBuilder;
import com.facebook.presto.common.block.PageBuilderStatus;
import com.facebook.presto.common.function.SqlFunctionProperties;
-import com.facebook.presto.common.type.AbstractPrimitiveType;
-import com.facebook.presto.common.type.FixedWidthType;
-import com.facebook.presto.common.type.StandardTypes;
import com.google.common.net.InetAddresses;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
diff --git a/presto-main-base/src/main/java/com/facebook/presto/type/IpPrefixType.java b/presto-common/src/main/java/com/facebook/presto/common/type/IpPrefixType.java
similarity index 96%
rename from presto-main-base/src/main/java/com/facebook/presto/type/IpPrefixType.java
rename to presto-common/src/main/java/com/facebook/presto/common/type/IpPrefixType.java
index 9381ff5c0ea34..dbf624672f903 100644
--- a/presto-main-base/src/main/java/com/facebook/presto/type/IpPrefixType.java
+++ b/presto-common/src/main/java/com/facebook/presto/common/type/IpPrefixType.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package com.facebook.presto.type;
+package com.facebook.presto.common.type;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockBuilder;
@@ -19,9 +19,6 @@
import com.facebook.presto.common.block.PageBuilderStatus;
import com.facebook.presto.common.block.VariableWidthBlockBuilder;
import com.facebook.presto.common.function.SqlFunctionProperties;
-import com.facebook.presto.common.type.AbstractPrimitiveType;
-import com.facebook.presto.common.type.FixedWidthType;
-import com.facebook.presto.common.type.StandardTypes;
import com.google.common.net.InetAddresses;
import io.airlift.slice.Slice;
import io.airlift.slice.XxHash64;
diff --git a/presto-common/src/main/java/com/facebook/presto/common/type/StandardTypes.java b/presto-common/src/main/java/com/facebook/presto/common/type/StandardTypes.java
index 2291fd8dc80e4..f2c1f408969d8 100644
--- a/presto-common/src/main/java/com/facebook/presto/common/type/StandardTypes.java
+++ b/presto-common/src/main/java/com/facebook/presto/common/type/StandardTypes.java
@@ -34,6 +34,7 @@ public final class StandardTypes
public static final String QDIGEST = "qdigest";
public static final String TDIGEST = "tdigest";
public static final String KLL_SKETCH = "kllsketch";
+ public static final String K_HYPER_LOG_LOG = "KHyperLogLog";
public static final String P4_HYPER_LOG_LOG = "P4HyperLogLog";
public static final String INTERVAL_DAY_TO_SECOND = "interval day to second";
public static final String INTERVAL_YEAR_TO_MONTH = "interval year to month";
diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/RebindSafeMBeanServer.java b/presto-common/src/main/java/com/facebook/presto/common/util/RebindSafeMBeanServer.java
similarity index 98%
rename from presto-hive/src/main/java/com/facebook/presto/hive/RebindSafeMBeanServer.java
rename to presto-common/src/main/java/com/facebook/presto/common/util/RebindSafeMBeanServer.java
index c1af771e83253..c11ed61d23f7e 100644
--- a/presto-hive/src/main/java/com/facebook/presto/hive/RebindSafeMBeanServer.java
+++ b/presto-common/src/main/java/com/facebook/presto/common/util/RebindSafeMBeanServer.java
@@ -11,7 +11,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package com.facebook.presto.hive;
+package com.facebook.presto.common.util;
import com.facebook.airlift.log.Logger;
import com.google.errorprone.annotations.ThreadSafe;
@@ -47,7 +47,6 @@
* MBeanServer wrapper that a ignores calls to registerMBean when there is already
* a MBean registered with the specified object name.
*/
-@SuppressWarnings("deprecation")
@ThreadSafe
public class RebindSafeMBeanServer
implements MBeanServer
@@ -261,6 +260,7 @@ public Object instantiate(String className, ObjectName loaderName, Object[] para
@Override
@Deprecated
+ @SuppressWarnings("deprecation")
public ObjectInputStream deserialize(ObjectName name, byte[] data)
throws OperationsException
{
@@ -269,6 +269,7 @@ public ObjectInputStream deserialize(ObjectName name, byte[] data)
@Override
@Deprecated
+ @SuppressWarnings("deprecation")
public ObjectInputStream deserialize(String className, byte[] data)
throws OperationsException, ReflectionException
{
@@ -277,6 +278,7 @@ public ObjectInputStream deserialize(String className, byte[] data)
@Override
@Deprecated
+ @SuppressWarnings("deprecation")
public ObjectInputStream deserialize(String className, ObjectName loaderName, byte[] data)
throws OperationsException, ReflectionException
{
diff --git a/presto-common/src/main/resources/com/facebook/presto/common/type/zone-index.properties b/presto-common/src/main/resources/com/facebook/presto/common/type/zone-index.properties
index 7d53bb63b6ecd..19615ee15f34a 100644
--- a/presto-common/src/main/resources/com/facebook/presto/common/type/zone-index.properties
+++ b/presto-common/src/main/resources/com/facebook/presto/common/type/zone-index.properties
@@ -2240,3 +2240,4 @@
2231 Pacific/Kanton
2232 Europe/Kyiv
2233 America/Ciudad_Juarez
+2234 America/Coyhaique
diff --git a/presto-common/src/test/java/com/facebook/presto/common/type/TestTimeZoneKey.java b/presto-common/src/test/java/com/facebook/presto/common/type/TestTimeZoneKey.java
index 6750e43a34937..263abb0e3c0c8 100644
--- a/presto-common/src/test/java/com/facebook/presto/common/type/TestTimeZoneKey.java
+++ b/presto-common/src/test/java/com/facebook/presto/common/type/TestTimeZoneKey.java
@@ -216,7 +216,7 @@ public int compare(TimeZoneKey left, TimeZoneKey right)
hasher.putString(timeZoneKey.getId(), StandardCharsets.UTF_8);
}
// Zone file should not (normally) be changed, so let's make this more difficult
- assertEquals(hasher.hash().asLong(), 4825838578917475630L, "zone-index.properties file contents changed!");
+ assertEquals(hasher.hash().asLong(), 3765670086753811806L, "zone-index.properties file contents changed!");
}
public void assertTimeZoneNotSupported(String zoneId)
diff --git a/presto-delta/src/main/java/com/facebook/presto/delta/DeltaMetadata.java b/presto-delta/src/main/java/com/facebook/presto/delta/DeltaMetadata.java
index 22e67e9d47f6e..8cdfde27b4f2f 100644
--- a/presto-delta/src/main/java/com/facebook/presto/delta/DeltaMetadata.java
+++ b/presto-delta/src/main/java/com/facebook/presto/delta/DeltaMetadata.java
@@ -46,6 +46,7 @@
import jakarta.inject.Inject;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -331,11 +332,19 @@ private ConnectorTableMetadata getTableMetadata(ConnectorSession session, Schema
return null;
}
- List columnMetadata = tableHandle.getDeltaTable().getColumns().stream()
+ DeltaTable deltaTable = tableHandle.getDeltaTable();
+
+ // External location property
+ Map properties = new HashMap<>(1);
+ if (deltaTable.getTableLocation() != null) {
+ properties.put(DeltaTableProperties.EXTERNAL_LOCATION_PROPERTY, deltaTable.getTableLocation());
+ }
+
+ List columnMetadata = deltaTable.getColumns().stream()
.map(column -> getColumnMetadata(session, column))
.collect(Collectors.toList());
- return new ConnectorTableMetadata(tableName, columnMetadata);
+ return new ConnectorTableMetadata(tableName, columnMetadata, properties);
}
@Override
diff --git a/presto-delta/src/main/java/com/facebook/presto/delta/DeltaModule.java b/presto-delta/src/main/java/com/facebook/presto/delta/DeltaModule.java
index 598eadcdd5bf7..cbbe73ef3ec92 100644
--- a/presto-delta/src/main/java/com/facebook/presto/delta/DeltaModule.java
+++ b/presto-delta/src/main/java/com/facebook/presto/delta/DeltaModule.java
@@ -42,6 +42,7 @@
import com.facebook.presto.hive.metastore.HivePartitionMutator;
import com.facebook.presto.hive.metastore.InMemoryCachingHiveMetastore;
import com.facebook.presto.hive.metastore.InvalidateMetastoreCacheProcedure;
+import com.facebook.presto.hive.metastore.MetastoreCacheSpecProvider;
import com.facebook.presto.hive.metastore.MetastoreCacheStats;
import com.facebook.presto.hive.metastore.MetastoreConfig;
import com.facebook.presto.hive.metastore.thrift.ThriftHiveMetastoreConfig;
@@ -105,6 +106,7 @@ protected void setup(Binder binder)
configBinder(binder).bindConfig(HiveClientConfig.class);
configBinder(binder).bindConfig(MetastoreClientConfig.class);
configBinder(binder).bindConfig(ThriftHiveMetastoreConfig.class);
+ binder.bind(MetastoreCacheSpecProvider.class).in(Scopes.SINGLETON);
binder.bind(MetastoreCacheStats.class).to(HiveMetastoreCacheStats.class).in(Scopes.SINGLETON);
newExporter(binder).export(MetastoreCacheStats.class).as(generatedNameOf(MetastoreCacheStats.class, connectorId));
binder.bind(ExtendedHiveMetastore.class).to(InMemoryCachingHiveMetastore.class).in(Scopes.SINGLETON);
diff --git a/presto-delta/src/test/java/com/facebook/presto/delta/TestDeltaIntegration.java b/presto-delta/src/test/java/com/facebook/presto/delta/TestDeltaIntegration.java
index da535ca4177b0..4c016e734126d 100644
--- a/presto-delta/src/test/java/com/facebook/presto/delta/TestDeltaIntegration.java
+++ b/presto-delta/src/test/java/com/facebook/presto/delta/TestDeltaIntegration.java
@@ -338,4 +338,33 @@ private static void setCommitFileModificationTime(String tableLocation, long com
Paths.get(URI.create(tableLocation)).resolve("_delta_log/").resolve(format("%020d.json", commitId)),
FileTime.from(commitTimeMillis, TimeUnit.MILLISECONDS));
}
+
+ @Test(dataProvider = "deltaReaderVersions")
+ public void testShowCreateTable(String deltaVersion)
+ {
+ String tableName = deltaVersion + "/data-reader-primitives";
+ String fullTableName = format("%s.%s.\"%s\"", DELTA_CATALOG, DELTA_SCHEMA.toLowerCase(), tableName);
+
+ String createTableQueryTemplate = "CREATE TABLE %s (\n" +
+ " \"as_int\" integer,\n" +
+ " \"as_long\" bigint,\n" +
+ " \"as_byte\" tinyint,\n" +
+ " \"as_short\" smallint,\n" +
+ " \"as_boolean\" boolean,\n" +
+ " \"as_float\" real,\n" +
+ " \"as_double\" double,\n" +
+ " \"as_string\" varchar,\n" +
+ " \"as_binary\" varbinary,\n" +
+ " \"as_big_decimal\" decimal(1,0)\n" +
+ ")\n" +
+ "WITH (\n" +
+ " external_location = '%s'\n" +
+ ")";
+
+ String expectedSqlCommand = format(createTableQueryTemplate, fullTableName, goldenTablePath(tableName));
+
+ String showCreateTableCommandResult = (String) computeActual("SHOW CREATE TABLE " + fullTableName).getOnlyValue();
+
+ assertEquals(showCreateTableCommandResult, expectedSqlCommand);
+ }
}
diff --git a/presto-docs/requirements.txt b/presto-docs/requirements.txt
index f5ee358d6d1e4..3203e335054ca 100644
--- a/presto-docs/requirements.txt
+++ b/presto-docs/requirements.txt
@@ -1,3 +1,2 @@
sphinx==8.2.1
sphinx-immaterial==0.13.0
-sphinx-copybutton==0.5.2
diff --git a/presto-docs/src/main/sphinx/admin.rst b/presto-docs/src/main/sphinx/admin.rst
index 9c3bc6f547b5c..38b4e74adf3d2 100644
--- a/presto-docs/src/main/sphinx/admin.rst
+++ b/presto-docs/src/main/sphinx/admin.rst
@@ -22,3 +22,4 @@ Administration
admin/verifier
admin/grafana-cloud
admin/version-support
+ admin/jmx-metrics
diff --git a/presto-docs/src/main/sphinx/admin/jmx-metrics.rst b/presto-docs/src/main/sphinx/admin/jmx-metrics.rst
new file mode 100644
index 0000000000000..983e0fe4dcf17
--- /dev/null
+++ b/presto-docs/src/main/sphinx/admin/jmx-metrics.rst
@@ -0,0 +1,208 @@
+=====================
+JMX Metrics Reference
+=====================
+
+Presto exposes comprehensive metrics via Java Management Extensions (JMX) for monitoring
+cluster health, query performance, and system behavior. This page documents some
+important JMX metrics available for production monitoring.
+
+Overview
+--------
+
+JMX metrics can be accessed through:
+
+* **JMX clients**: JConsole, VisualVM, or jmxterm
+* **SQL queries**: Using the :doc:`/connector/jmx` connector
+* **Monitoring systems**: Prometheus, Grafana, or other JMX exporters
+
+Querying Metrics via SQL
+-------------------------
+
+Once configured, you can query metrics using SQL:
+
+.. code-block:: sql
+
+ -- List all available metrics
+ SHOW TABLES FROM jmx.current;
+
+ -- Query specific metrics
+ SELECT * FROM jmx.current."com.facebook.presto.metadata:name=metadatamanagerstats";
+
+Metadata Operation Metrics
+---------------------------
+
+**JMX Table Name:** ``com.facebook.presto.metadata:name=metadatamanagerstats``
+
+Tracks performance and usage of all metadata operations including schema discovery,
+table lookups, and column information retrieval.
+
+Key Metrics
+^^^^^^^^^^^
+
+For each metadata operation such as ``listSchemaNames``, ``listTables``, or ``getTableHandle``:
+
+**Call Counters**
+
+* ``Calls``: Total number of times the operation was called
+* Example: ``listSchemaNamesCalls``, ``listTablesCalls``
+
+**Timing Statistics**
+
+All timing values are in nanoseconds:
+
+* ``time.alltime.avg``: Average execution time across all calls
+* ``time.alltime.min``: Fastest execution time
+* ``time.alltime.max``: Slowest execution time
+* ``time.alltime.count``: Number of samples collected
+* ``time.alltime.p50``: Median (50th percentile)
+* ``time.alltime.p75``: 75th percentile
+* ``time.alltime.p90``: 90th percentile
+* ``time.alltime.p95``: 95th percentile
+* ``time.alltime.p99``: 99th percentile
+
+**Time Windows**
+
+Statistics are also available for recent time windows:
+
+* ``time.oneminute.*``: Last 1 minute
+* ``time.fiveminutes.*``: Last 5 minutes
+* ``time.fifteenminutes.*``: Last 15 minutes
+
+Common Operations
+^^^^^^^^^^^^^^^^^
+
+**Schema Operations**
+
+* ``listSchemaNames``: List all schemas in a catalog
+* ``getSchemaProperties``: Get schema-level properties
+
+**Table Operations**
+
+* ``listTables``: List tables in a schema
+* ``getTableHandle``: Get table metadata handle
+* ``getTableMetadata``: Get detailed table information
+* ``getTableStatistics``: Get table statistics
+
+**Column Operations**
+
+* ``getColumnHandles``: Get column information
+* ``getColumnMetadata``: Get detailed column metadata
+
+**View Operations**
+
+* ``listViews``: List views in a schema
+* ``getView``: Get view definition
+
+Example Queries
+^^^^^^^^^^^^^^^
+
+**Query Lifecycle Metrics**
+
+Track query begin and completion times:
+
+.. code-block:: sql
+
+ -- Query begin operation metrics
+ SELECT
+ "beginquerytime.alltime.count" as total_queries,
+ "beginquerytime.alltime.avg" / 1000.0 as avg_microseconds,
+ "beginquerytime.alltime.min" / 1000.0 as min_microseconds,
+ "beginquerytime.alltime.max" / 1000.0 as max_microseconds
+ FROM jmx.current."com.facebook.presto.metadata:name=metadatamanagerstats";
+
+ -- Example output:
+ -- total_queries | avg_microseconds | min_microseconds | max_microseconds
+ -- 3.0 | 49.42 | 28.63 | 75.38
+
+**Insert Operation Metrics**
+
+Track data insertion performance:
+
+.. code-block:: sql
+
+ -- Begin insert operation metrics
+ SELECT
+ "begininserttime.alltime.count" as insert_operations,
+ "begininserttime.alltime.avg" / 1000000000.0 as avg_seconds,
+ "begininserttime.alltime.min" / 1000000000.0 as min_seconds,
+ "begininserttime.alltime.max" / 1000000000.0 as max_seconds
+ FROM jmx.current."com.facebook.presto.metadata:name=metadatamanagerstats";
+
+ -- Example output:
+ -- insert_operations | avg_seconds | min_seconds | max_seconds
+ -- 1.0 | 0.82 | 0.82 | 0.82
+
+ -- Finish insert operation metrics
+ SELECT
+ "finishinserttime.alltime.count" as completed_inserts,
+ "finishinserttime.alltime.avg" / 1000000000.0 as avg_seconds,
+ "finishinserttime.alltime.min" / 1000000000.0 as min_seconds,
+ "finishinserttime.alltime.max" / 1000000000.0 as max_seconds
+ FROM jmx.current."com.facebook.presto.metadata:name=metadatamanagerstats";
+
+ -- Example output:
+ -- completed_inserts | avg_seconds | min_seconds | max_seconds
+ -- 1.0 | 11.47 | 11.47 | 11.47
+
+System Access Control Metrics
+------------------------------
+
+**JMX Table Name:** ``com.facebook.presto.security:name=accesscontrolmanager``
+
+Tracks performance of access control checks.
+
+Key Metrics
+^^^^^^^^^^^
+
+Similar structure to metadata metrics, tracking operations like:
+
+* ``checkCanSetUser``: User impersonation checks
+* ``checkCanAccessCatalog``: Catalog access checks
+* ``checkCanSelectFromColumns``: Column-level access checks
+* ``checkCanCreateTable``: Table creation permission checks
+
+Query Execution Metrics
+-----------------------
+
+**Task Metrics**
+
+* ``com.facebook.presto.execution:name=taskmanager``: Task execution statistics
+* ``com.facebook.presto.execution.executor:name=taskexecutor``: Task executor pool metrics
+
+**Memory Metrics**
+
+* ``com.facebook.presto.memory:name=general,type=memorypool``: General memory pool usage
+* ``com.facebook.presto.memory:name=reserved,type=memorypool``: Reserved memory pool usage
+
+**Query Manager Metrics**
+
+* ``com.facebook.presto.dispatcher:name=dispatchmanager``: Query dispatch statistics
+* ``com.facebook.presto.execution:name=querymanager``: Query execution statistics
+
+Connector-Specific Metrics
+---------------------------
+
+Hive Connector
+^^^^^^^^^^^^^^
+
+* ``com.facebook.presto.hive:name=*``: Hive metastore and file system metrics
+Example -
+* com.facebook.presto.hive:name=hive,type=cachingdirectorylister
+
+Iceberg Connector
+^^^^^^^^^^^^^^^^^
+
+* ``com.facebook.presto.iceberg:name=*``: Iceberg-specific caching and I/O metrics
+
+Examples:
+
+* com.facebook.presto.iceberg:name=iceberg,type=icebergsplitmanager
+* com.facebook.presto.iceberg:name=iceberg,type=manifestfilecache
+* com.facebook.presto.iceberg:name=icebergfilewriterfactory
+
+See Also
+--------
+
+* :doc:`/connector/jmx` - JMX Connector documentation
+* :doc:`web-interface` - Web UI monitoring
+* :doc:`tuning` - Performance tuning guide
diff --git a/presto-docs/src/main/sphinx/admin/materialized-views.rst b/presto-docs/src/main/sphinx/admin/materialized-views.rst
index 9e330c5509511..318397b481057 100644
--- a/presto-docs/src/main/sphinx/admin/materialized-views.rst
+++ b/presto-docs/src/main/sphinx/admin/materialized-views.rst
@@ -91,6 +91,250 @@ The following permissions are required for materialized view operations when
non-owners query the view to prevent privilege escalation.
* For INVOKER mode: User needs ``SELECT`` permission on all underlying base tables
+Data Consistency Modes
+----------------------
+
+Materialized views support three data consistency modes that control how queries are optimized
+when the view's data may be stale:
+
+**USE_STITCHING** (default)
+ Reads fresh data from storage, recomputes stale data from base tables,
+ and combines results via UNION.
+
+**FAIL**
+ Fails the query if the materialized view is stale.
+
+**USE_VIEW_QUERY**
+ Executes the view query against base tables. Always fresh but highest cost.
+
+Set via session property::
+
+ SET SESSION materialized_view_skip_storage = 'USE_STITCHING';
+
+Predicate Stitching (USE_STITCHING Mode)
+----------------------------------------
+
+Overview
+^^^^^^^^
+
+Predicate stitching recomputes only stale data rather than the entire view. When base
+tables change, Presto identifies which data is affected and generates a UNION query
+that combines:
+
+* **Storage scan**: Reads unchanged (fresh) data from the materialized view's storage
+* **Recompute branch**: Recomputes changed (stale) data from base tables using the view's
+ defining query
+
+This avoids full recomputation when only a subset of data is stale, though there is
+overhead from the UNION operation and predicate-based filtering.
+
+How It Works
+^^^^^^^^^^^^
+
+**Staleness Detection**
+
+For each base table referenced in the materialized view, a connector may track which data
+has changed since the last refresh and return predicates identifying the stale data. The
+specific mechanism depends on the connector:
+
+1. At refresh time, metadata is recorded (implementation varies by connector)
+2. When the view is queried, the current state is compared with the recorded state
+3. Predicates are built that identify exactly which data is stale
+
+See the connector-specific documentation for details on how staleness is tracked.
+For Iceberg tables, see :ref:`connector/iceberg:materialized views`.
+
+**Query Rewriting**
+
+When a query uses a materialized view with stale data, the optimizer rewrites the query
+to use UNION::
+
+ -- Original query
+ SELECT * FROM my_materialized_view WHERE order_date >= '2024-01-01'
+
+ -- Rewritten with predicate stitching (example using partition predicates)
+ SELECT * FROM (
+ -- Fresh partitions from storage
+ SELECT * FROM my_materialized_view_storage
+ WHERE order_date >= '2024-01-01'
+ AND order_date NOT IN ('2024-01-15', '2024-01-16') -- Exclude stale
+ UNION ALL
+ -- Stale partitions recomputed
+ SELECT o.order_id, c.customer_name, o.order_date
+ FROM orders o
+ JOIN customers c ON o.customer_id = c.customer_id
+ AND o.order_date = c.reg_date
+ WHERE o.order_date IN ('2024-01-15', '2024-01-16') -- Stale partition filter
+ AND c.reg_date IN ('2024-01-15', '2024-01-16') -- Propagated via equivalence
+ AND o.order_date >= '2024-01-01' -- Original filter preserved
+ )
+
+The partition predicate is propagated to equivalent columns in joined tables (in this case,
+``c.reg_date``), allowing partition pruning on the ``customers`` table as well.
+
+Requirements
+^^^^^^^^^^^^
+
+For predicate stitching to work effectively, the following requirements must be met:
+
+**Predicate Mapping Requirement**
+
+The connector must be able to express staleness as predicates that can be mapped to the
+materialized view's columns. The specific requirements depend on the connector implementation.
+For partition-based connectors (like Iceberg), this typically means:
+
+* Base table partition columns must appear in the SELECT list or be equivalent to columns that do
+* The materialized view should be partitioned on the same or equivalent columns
+* Partition columns must use compatible data types
+
+See connector-specific documentation for details on staleness tracking requirements.
+
+**Unsupported Query Patterns**
+
+Predicate stitching does not work with:
+
+* **Outer joins**: LEFT, RIGHT, and FULL OUTER joins
+* **Non-deterministic functions**: ``RANDOM()``, ``NOW()``, ``UUID()``, etc.
+
+**Security Constraints**
+
+For SECURITY INVOKER materialized views, predicate stitching requires that:
+
+* No column masks are defined on base tables (or the view is treated as fully stale)
+* No row filters are defined on base tables (or the view is treated as fully stale)
+
+This is because column masks and row filters can vary by user, making it impossible to
+determine staleness in a user-independent way.
+
+Column Equivalences and Passthrough Columns
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Predicate stitching supports **passthrough columns** through **column equivalences**,
+which allows tracking staleness even when predicate columns from base tables
+are not directly in the materialized view's output.
+
+**Column Equivalence**
+
+When tables are joined with equality predicates, those columns become equivalent for
+predicate propagation purposes. This applies to any type of staleness predicate
+(partition-based, snapshot-based, etc.). For example with partition predicates::
+
+ CREATE TABLE orders (order_id BIGINT, customer_id BIGINT, order_date VARCHAR)
+ WITH (partitioning = ARRAY['order_date']);
+
+ CREATE TABLE customers (customer_id BIGINT, name VARCHAR, reg_date VARCHAR)
+ WITH (partitioning = ARRAY['reg_date']);
+
+ -- Materialized view with equivalence: order_date = reg_date
+ CREATE MATERIALIZED VIEW order_summary
+ WITH (partitioning = ARRAY['order_date'])
+ AS
+ SELECT o.order_id, c.name, o.order_date
+ FROM orders o
+ JOIN customers c ON o.customer_id = c.customer_id
+ AND o.order_date = c.reg_date; -- Creates equivalence
+
+In this example:
+
+* ``orders.order_date`` and ``customers.reg_date`` are equivalent due to the equality join condition
+* Even though ``reg_date`` is not in the SELECT list, staleness can be tracked through the equivalence to ``order_date``
+* When ``customers`` table changes in partition ``reg_date='2024-01-15'``, this maps to ``order_date='2024-01-15'`` for recomputation
+
+**How Passthrough Mapping Works**
+
+1. **Equivalence Extraction**: During materialized view creation, Presto analyzes JOIN conditions to identify
+ column equivalences
+
+2. **Staleness Detection**: When a base table changes:
+
+ * The connector detects which data changed in the base table and returns predicates
+ * For passthrough columns, predicates are mapped through equivalences
+ * Example: ``customers.reg_date='2024-01-15'`` → ``orders.order_date='2024-01-15'``
+
+3. **Predicate Application**: The mapped predicates are used in:
+
+ * Storage scan: Exclude data where equivalent columns match stale values
+ * Recompute branch: Filter the stale table using the staleness predicate
+ * Joined tables: Propagate the predicate to equivalent columns in joined
+ tables, enabling pruning on those tables as well
+
+**Requirements for Passthrough Columns**
+
+* Join must be an INNER JOIN (not LEFT, RIGHT, or FULL OUTER)
+* Equality must be direct (``col1 = col2``), not through expressions like ``col1 = col2 + 1``
+* At least one column in the equivalence class must be in the materialized view's output
+* Data types must be compatible
+
+**Transitive Equivalences**
+
+Multiple equivalences can be chained together. If ``A.x = B.y`` and ``B.y = C.z``, then
+``A.x``, ``B.y``, and ``C.z`` are all equivalent for predicate propagation.
+
+Unsupported Patterns
+^^^^^^^^^^^^^^^^^^^^
+
+Predicate stitching is **not** applied in the following cases:
+
+* **No staleness predicates available**: If the connector cannot provide staleness predicates
+* **Predicate columns not preserved**: If predicate columns are transformed or not mappable to the materialized view's output
+* **Outer joins with passthrough**: LEFT, RIGHT, and FULL OUTER joins invalidate passthrough equivalences due to null handling
+* **Expression-based equivalences**: ``CAST(col1 AS DATE) = col2`` or ``col1 = col2 + 1``
+
+When predicate stitching cannot be applied, the behavior falls back to the configured consistency mode:
+
+* If ``USE_STITCHING`` is set but stitching is not possible, the query falls back to full
+ recompute (equivalent to ``USE_VIEW_QUERY``)
+* A warning may be logged indicating why stitching was not possible
+
+Performance Considerations
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+**When Stitching is Most Effective**
+
+* **Large materialized views**: More benefit from avoiding full recomputation
+* **Localized changes**: When only a small fraction of data is stale
+* **Frequently refreshed**: When most data remains fresh between queries
+* **Well-structured data**: When staleness predicates align with data modification patterns
+
+**Cost Trade-offs**
+
+Predicate stitching introduces a UNION operation, which has overhead:
+
+* **Storage scan overhead**: Reading from storage + filtering fresh data
+* **Recompute overhead**: Querying base tables + filtering stale data
+* **Union overhead**: Combining results from both branches
+
+However, this is typically much cheaper than:
+
+* **Full recompute**: Reading all base table data
+* **Stale data**: Returning incorrect results
+
+**Optimization Tips**
+
+1. **Predicate granularity**: For partition-based connectors, choose partition columns that align
+ with data modification patterns
+
+ * Too coarse (e.g., partitioning by year): Recomputes too much data
+ * Too fine (e.g., partitioning by second): Too many partitions to manage
+
+2. **Refresh frequency**: Balance freshness needs with refresh costs
+
+ * More frequent refreshes: Less recomputation per query, but higher refresh costs
+ * Less frequent refreshes: More recomputation per query, but lower refresh costs
+
+3. **Query filters**: Include predicate columns in query filters when possible::
+
+ -- Good: Limits scan to relevant data
+ SELECT * FROM mv WHERE order_date >= '2024-01-01'
+
+ -- Less optimal: Scans all data
+ SELECT * FROM mv WHERE customer_id = 12345
+
+4. **Monitor metrics**: Track the ratio of storage scan vs recompute:
+
+ * High recompute ratio: Consider more frequent refreshes or better staleness granularity
+ * High storage scan ratio: Stitching is working efficiently
+
See Also
--------
diff --git a/presto-docs/src/main/sphinx/admin/properties-session.rst b/presto-docs/src/main/sphinx/admin/properties-session.rst
index f8a46b4063e30..dad07f4e654eb 100644
--- a/presto-docs/src/main/sphinx/admin/properties-session.rst
+++ b/presto-docs/src/main/sphinx/admin/properties-session.rst
@@ -50,7 +50,7 @@ The corresponding configuration property is :ref:`admin/properties:\`\`join-dist
^^^^^^^^^^^^^^^^^^^^^^^
* **Type:** ``boolean``
-* **Default value:** ``true``
+* **Default value:** ``false``
This property enables redistribution of data before writing. This can
eliminate the performance impact of data skew when writing by hashing it
@@ -58,8 +58,27 @@ across nodes in the cluster. It can be disabled when it is known that the
output data set is not skewed in order to avoid the overhead of hashing and
redistributing all the data across the network.
+When both ``scale_writers`` and ``redistribute_writes`` are set to ``true``,
+``scale_writers`` takes precedence.
+
The corresponding configuration property is :ref:`admin/properties:\`\`redistribute-writes\`\``.
+``scale_writers``
+^^^^^^^^^^^^^^^^^
+
+* **Type:** ``boolean``
+* **Default value:** ``true``
+
+This property enables dynamic scaling of writer tasks based on throughput. When enabled,
+Presto automatically adjusts the number of writer tasks to use the minimum necessary
+for optimal performance. This can improve resource utilization by scaling out writers
+only when needed based on data throughput.
+
+When both ``scale_writers`` and ``redistribute_writes`` are set to ``true``,
+``scale_writers`` takes precedence.
+
+The corresponding configuration property is :ref:`admin/properties:\`\`scale-writers\`\``.
+
``task_writer_count``
^^^^^^^^^^^^^^^^^^^^^
@@ -148,6 +167,27 @@ If it’s below the limit, the generated prefixes are used.
The corresponding configuration property is :ref:`admin/properties:\`\`max-prefixes-count\`\``.
+``try_function_catchable_errors``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``string``
+* **Default value:** ``""`` (empty string)
+
+A comma-separated list of error code names that the ``TRY()`` function should catch
+and return ``NULL`` for, in addition to the default catchable errors (such as
+``DIVISION_BY_ZERO``, ``INVALID_CAST_ARGUMENT``, ``INVALID_FUNCTION_ARGUMENT``,
+and ``NUMERIC_VALUE_OUT_OF_RANGE``).
+
+This allows users to specify exactly which additional errors ``TRY()`` should suppress.
+Error codes are matched by their name (such as ``GENERIC_INTERNAL_ERROR``, ``INVALID_ARGUMENTS``).
+
+Example usage::
+
+ SET SESSION try_function_catchable_errors = 'GENERIC_INTERNAL_ERROR,INVALID_ARGUMENTS';
+ SELECT TRY(my_function(x)) FROM table;
+
+The corresponding configuration property is :ref:`admin/properties:\`\`try-function-catchable-errors\`\``.
+
Spilling Properties
-------------------
@@ -160,9 +200,8 @@ Spilling Properties
Try spilling memory to disk to avoid exceeding memory limits for the query.
Spilling works by offloading memory to disk. This process can allow a query with a large memory
-footprint to pass at the cost of slower execution times. Currently, spilling is supported only for
-aggregations and joins (inner and outer), so this property will not reduce memory usage required for
-window functions, sorting and other join types.
+footprint to pass at the cost of slower execution times. See :ref:`spill-operations`
+for a list of operations that support spilling.
Be aware that this is an experimental feature and should be used with care.
@@ -331,6 +370,19 @@ queries that have very selective joins.
The corresponding configuration property is :ref:`admin/properties:\`\`optimizer.push-aggregation-through-join\`\``.
+``push_partial_aggregation_through_join``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``boolean``
+* **Default value:** ``false``
+
+When a partial aggregation is above an inner join and all aggregation inputs come from
+only one side of the join, the partial aggregation is pushed below the join to that side.
+This reduces the amount of data flowing into the join operator, which can improve
+performance by allowing the aggregation to pre-reduce data before the join is performed.
+
+The corresponding configuration property is :ref:`admin/properties:\`\`optimizer.push-partial-aggregation-through-join\`\``.
+
``push_table_write_through_union``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -447,6 +499,17 @@ Use this to optimize the ``map_filter()`` and ``map_subset()`` function.
It controls if subfields access is executed at the data source or not.
+``pushdown_subfields_for_cardinality``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+* **Type:** ``boolean``
+* **Default value:** ``false``
+
+Enable subfield pruning for the ``cardinality()`` function to skip reading keys and values.
+
+When enabled, the query optimizer can push down subfield pruning for cardinality operations,
+allowing the data source to skip reading the actual keys and values when only the cardinality
+(count of elements) is needed.
+
``schedule_splits_based_on_task_load``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* **Type:** ``boolean``
@@ -486,6 +549,59 @@ parallelism factor is below the ``table_scan_shuffle_parallelism_threshold``.
The corresponding configuration property is :ref:`admin/properties:\`\`optimizer.table-scan-shuffle-strategy\`\``.
+``remote_function_names_for_fixed_parallelism``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``string``
+* **Default value:** ``""`` (empty string, disabled)
+
+A regular expression pattern to match fully qualified remote function names, such as ``catalog.schema.function_name``,
+that should use fixed parallelism. When a remote function matches this pattern, the optimizer inserts
+round-robin shuffle exchanges before and after the projection containing the remote function call.
+This ensures that the remote function executes with a fixed degree of parallelism, which can be useful
+for controlling resource usage when calling external services.
+
+This property only applies to external/remote functions (functions where ``isExternalExecution()`` returns ``true``,
+such as functions using THRIFT, GRPC, or REST implementation types).
+
+Example patterns:
+
+* ``myschema.myfunction`` - matches an exact function name
+* ``catalog.schema.remote_.*`` - matches all functions starting with ``remote_`` in the specified catalog and schema
+* ``.*remote.*`` - matches any function containing ``remote`` in its fully qualified name
+
+The corresponding configuration property is :ref:`admin/properties:\`\`optimizer.remote-function-names-for-fixed-parallelism\`\``.
+
+``remote_function_fixed_parallelism_task_count``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``integer``
+* **Default value:** ``null`` (uses the default hash partition count)
+
+The number of tasks to use for remote functions matching the ``remote_function_names_for_fixed_parallelism`` pattern.
+When set, this value determines the degree of parallelism for the round-robin shuffle exchanges inserted
+around matching remote function projections. If not set, the default hash partition count will be used.
+
+This property is only effective when ``remote_function_names_for_fixed_parallelism`` is set to a non-empty pattern.
+
+The corresponding configuration property is :ref:`admin/properties:\`\`optimizer.remote-function-fixed-parallelism-task-count\`\``.
+
+``local_exchange_parent_preference_strategy``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``string``
+* **Allowed values:** ``ALWAYS``, ``NEVER``, ``AUTOMATIC``
+* **Default value:** ``ALWAYS``
+
+Strategy to consider parent preferences when adding local exchange partitioning for aggregations.
+When set to ``ALWAYS``, the optimizer always uses parent preferences for local exchange partitioning.
+When set to ``NEVER``, it never uses parent preferences and instead uses the aggregation's own
+grouping keys. When set to ``AUTOMATIC``, the optimizer makes a cost-based decision, using parent
+preferences only when the estimated partition cardinality is greater than or equal to the task
+concurrency.
+
+The corresponding configuration property is :ref:`admin/properties:\`\`optimizer.local-exchange-parent-preference-strategy\`\``.
+
JDBC Properties
---------------
diff --git a/presto-docs/src/main/sphinx/admin/properties.rst b/presto-docs/src/main/sphinx/admin/properties.rst
index bb84818e9645f..4a89a9c229ed3 100644
--- a/presto-docs/src/main/sphinx/admin/properties.rst
+++ b/presto-docs/src/main/sphinx/admin/properties.rst
@@ -2,12 +2,12 @@
Presto Configuration Properties
===============================
-This section describes configuration properties that may be used to tune
+This section describes configuration properties that may be used to tune
Presto or alter its behavior when required.
-The following is not a complete list of all configuration properties
+The following is not a complete list of all configuration properties
available in Presto, and does not include any connector-specific
-catalog configuration properties.
+catalog configuration properties.
For information on catalog configuration properties, see the :doc:`connector documentation `.
@@ -40,25 +40,44 @@ only need to fit in distributed memory across all nodes. When set to ``AUTOMATIC
Presto will make a cost based decision as to which distribution type is optimal.
It will also consider switching the left and right inputs to the join. In ``AUTOMATIC``
mode, Presto will default to hash distributed joins if no cost could be computed, such as if
-the tables do not have statistics.
+the tables do not have statistics.
-The corresponding session property is :ref:`admin/properties-session:\`\`join_distribution_type\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`join_distribution_type\`\``.
``redistribute-writes``
^^^^^^^^^^^^^^^^^^^^^^^
* **Type:** ``boolean``
-* **Default value:** ``true``
+* **Default value:** ``false``
This property enables redistribution of data before writing. This can
eliminate the performance impact of data skew when writing by hashing it
across nodes in the cluster. It can be disabled when it is known that the
output data set is not skewed in order to avoid the overhead of hashing and
-redistributing all the data across the network.
+redistributing all the data across the network.
+
+When both ``scale-writers`` and ``redistribute-writes`` are set to ``true``,
+``scale-writers`` takes precedence.
The corresponding session property is :ref:`admin/properties-session:\`\`redistribute_writes\`\``.
+``scale-writers``
+^^^^^^^^^^^^^^^^^
+
+* **Type:** ``boolean``
+* **Default value:** ``true``
+
+This property enables dynamic scaling of writer tasks based on throughput. When enabled,
+Presto automatically adjusts the number of writer tasks to use the minimum necessary
+for optimal performance. This can improve resource utilization by scaling out writers
+only when needed based on data throughput.
+
+When both ``scale-writers`` and ``redistribute-writes`` are set to ``true``,
+``scale-writers`` takes precedence.
+
+The corresponding session property is :ref:`admin/properties-session:\`\`scale_writers\`\``.
+
``check-access-control-on-utilized-columns-only``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -124,8 +143,8 @@ session properties are included.
* **Minimum value:** ``0``
* **Default value:** ``0``
-The number of times that a query is automatically retried in the case of a transient query or communications failure.
-The default value ``0`` means that retries are disabled.
+The number of times that a query is automatically retried in the case of a transient query or communications failure.
+The default value ``0`` means that retries are disabled.
``http-server.max-request-header-size``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -133,10 +152,10 @@ The default value ``0`` means that retries are disabled.
* **Type:** ``data size``
* **Default value:** ``8 kB``
-The maximum size of the request header from the HTTP server.
+The maximum size of the request header from the HTTP server.
-Note: The default value can cause errors when large session properties
-or other large session information is involved.
+Note: The default value can cause errors when large session properties
+or other large session information is involved.
See :ref:`troubleshoot/query:\`\`Request Header Fields Too Large\`\``.
``offset-clause-enabled``
@@ -147,7 +166,7 @@ See :ref:`troubleshoot/query:\`\`Request Header Fields Too Large\`\``.
To enable the ``OFFSET`` clause in SQL query expressions, set this property to ``true``.
-The corresponding session property is :ref:`admin/properties-session:\`\`offset_clause_enabled\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`offset_clause_enabled\`\``.
``max-serializable-object-size``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -178,9 +197,26 @@ The corresponding session property is :ref:`admin/properties-session:\`\`max_pre
* **Type:** ``string``
* **Default value:** (none)
-An optional identifier for the cluster. When set, this tag is included in the response from the
+An optional identifier for the cluster. When set, this tag is included in the response from the
``/v1/cluster`` REST API endpoint, allowing clients to identify which cluster provided the response.
+``try-function-catchable-errors``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``string``
+* **Default value:** ``""`` (empty string)
+
+A comma-separated list of error code names that the ``TRY()`` function should catch
+and return ``NULL`` for, in addition to the default catchable errors (such as
+``DIVISION_BY_ZERO``, ``INVALID_CAST_ARGUMENT``, ``INVALID_FUNCTION_ARGUMENT``,
+and ``NUMERIC_VALUE_OUT_OF_RANGE``).
+
+This allows administrators to configure which additional errors ``TRY()`` should suppress
+at the server level. Error codes are matched by their name (such as ``GENERIC_INTERNAL_ERROR``,
+``INVALID_ARGUMENTS``).
+
+The corresponding session property is :ref:`admin/properties-session:\`\`try_function_catchable_errors\`\``.
+
Memory Management Properties
----------------------------
@@ -274,13 +310,12 @@ Spilling Properties
Try spilling memory to disk to avoid exceeding memory limits for the query.
Spilling works by offloading memory to disk. This process can allow a query with a large memory
-footprint to pass at the cost of slower execution times. Currently, spilling is supported only for
-aggregations and joins (inner and outer), so this property will not reduce memory usage required for
-window functions, sorting and other join types.
+footprint to pass at the cost of slower execution times. See :ref:`spill-operations`
+for a list of operations that support spilling.
Be aware that this is an experimental feature and should be used with care.
-The corresponding session property is :ref:`admin/properties-session:\`\`spill_enabled\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`spill_enabled\`\``.
``experimental.join-spill-enabled``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -291,7 +326,7 @@ The corresponding session property is :ref:`admin/properties-session:\`\`spill_e
When ``spill_enabled`` is ``true``, this determines whether Presto will try spilling memory to disk for joins to
avoid exceeding memory limits for the query.
-The corresponding session property is :ref:`admin/properties-session:\`\`join_spill_enabled\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`join_spill_enabled\`\``.
``experimental.aggregation-spill-enabled``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -302,7 +337,7 @@ The corresponding session property is :ref:`admin/properties-session:\`\`join_sp
When ``spill_enabled`` is ``true``, this determines whether Presto will try spilling memory to disk for aggregations to
avoid exceeding memory limits for the query.
-The corresponding session property is :ref:`admin/properties-session:\`\`aggregation_spill_enabled\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`aggregation_spill_enabled\`\``.
``experimental.distinct-aggregation-spill-enabled``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -313,7 +348,7 @@ The corresponding session property is :ref:`admin/properties-session:\`\`aggrega
When ``aggregation_spill_enabled`` is ``true``, this determines whether Presto will try spilling memory to disk for distinct
aggregations to avoid exceeding memory limits for the query.
-The corresponding session property is :ref:`admin/properties-session:\`\`distinct_aggregation_spill_enabled\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`distinct_aggregation_spill_enabled\`\``.
``experimental.order-by-aggregation-spill-enabled``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -324,7 +359,7 @@ The corresponding session property is :ref:`admin/properties-session:\`\`distinc
When ``aggregation_spill_enabled`` is ``true``, this determines whether Presto will try spilling memory to disk for order by
aggregations to avoid exceeding memory limits for the query.
-The corresponding session property is :ref:`admin/properties-session:\`\`order_by_aggregation_spill_enabled\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`order_by_aggregation_spill_enabled\`\``.
``experimental.window-spill-enabled``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -335,7 +370,7 @@ The corresponding session property is :ref:`admin/properties-session:\`\`order_b
When ``spill_enabled`` is ``true``, this determines whether Presto will try spilling memory to disk for window functions to
avoid exceeding memory limits for the query.
-The corresponding session property is :ref:`admin/properties-session:\`\`window_spill_enabled\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`window_spill_enabled\`\``.
``experimental.order-by-spill-enabled``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -346,7 +381,7 @@ The corresponding session property is :ref:`admin/properties-session:\`\`window_
When ``spill_enabled`` is ``true``, this determines whether Presto will try spilling memory to disk for order by to
avoid exceeding memory limits for the query.
-The corresponding session property is :ref:`admin/properties-session:\`\`order_by_spill_enabled\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`order_by_spill_enabled\`\``.
``experimental.spiller.task-spilling-strategy``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -472,7 +507,7 @@ Max spill space to be used by a single query on a single node.
Limit for memory used for unspilling a single aggregation operator instance.
-The corresponding session property is :ref:`admin/properties-session:\`\`aggregation_operator_unspill_memory_limit\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`aggregation_operator_unspill_memory_limit\`\``.
``experimental.spill-compression-codec``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -599,16 +634,16 @@ network has high latency or if there are many nodes in the cluster.
* **Type:** ``boolean``
* **Default value:** ``false``
-Enables the use of custom connector-provided serialization codecs for handles.
+Enables the use of custom connector-provided serialization codecs for handles.
This feature allows connectors to use their own serialization format for
handle objects (such as table handles, column handles, and splits) instead
of standard JSON serialization.
-When enabled, connectors that provide a ``ConnectorCodecProvider`` with
-appropriate codecs will have their handles serialized using custom binary
-formats, which are then Base64-encoded for transport. Connectors without
-codec support automatically fall back to standard JSON serialization.
-Internal Presto handles (prefixed with ``$``) always use JSON serialization
+When enabled, connectors that provide a ``ConnectorCodecProvider`` with
+appropriate codecs will have their handles serialized using custom binary
+formats, which are then Base64-encoded for transport. Connectors without
+codec support automatically fall back to standard JSON serialization.
+Internal Presto handles (prefixed with ``$``) always use JSON serialization
regardless of this setting.
.. _task-properties:
@@ -629,9 +664,9 @@ resource utilization. Lower values are better for clusters that run many queries
concurrently because the cluster will already be utilized by all the running
queries, so adding more concurrency will result in slow downs due to context
switching and other overhead. Higher values are better for clusters that only run
-one or a few queries at a time.
+one or a few queries at a time.
-The corresponding session property is :ref:`admin/properties-session:\`\`task_concurrency\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`task_concurrency\`\``.
``task.http-response-threads``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -691,7 +726,7 @@ can improve throughput if worker CPU utilization is low and all the threads are
but will cause increased heap space usage. Setting the value too high may cause a drop
in performance due to a context switching. The number of active threads is available
via the ``RunningSplits`` property of the
-``com.facebook.presto.execution.executor:name=TaskExecutor.RunningSplits`` JXM object.
+``com.facebook.presto.execution.executor:name=TaskExecutor.RunningSplits`` JMX object.
The number of threads can be configured using either an absolute value (for example, ``10``)
or a value relative to the number of available CPU cores (for example, ``1.5C``). When
@@ -722,9 +757,9 @@ The number of concurrent writer threads per worker per query. Increasing this va
increase write speed, especially when a query is not I/O bound and can take advantage
of additional CPU for parallel writes (some connectors can be bottlenecked on CPU when
writing due to compression or other factors). Setting this too high may cause the cluster
-to become overloaded due to excessive resource utilization.
+to become overloaded due to excessive resource utilization.
-The corresponding session property is :ref:`admin/properties-session:\`\`task_writer_count\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`task_writer_count\`\``.
``task.interrupt-runaway-splits-timeout``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -840,9 +875,9 @@ Optimizer Properties
* **Type:** ``boolean``
* **Default value:** ``false``
-Enables optimization for aggregations on dictionaries.
+Enables optimization for aggregations on dictionaries.
-The corresponding session property is :ref:`admin/properties-session:\`\`dictionary_aggregation\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`dictionary_aggregation\`\``.
``optimizer.optimize-hash-generation``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -854,12 +889,12 @@ Compute hash codes for distribution, joins, and aggregations early during execut
allowing result to be shared between operations later in the query. This can reduce
CPU usage by avoiding computing the same hash multiple times, but at the cost of
additional network transfer for the hashes. In most cases it will decrease overall
-query processing time.
+query processing time.
It is often helpful to disable this property when using :doc:`/sql/explain` in order
to make the query plan easier to read.
-The corresponding session property is :ref:`admin/properties-session:\`\`optimize_hash_generation\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`optimize_hash_generation\`\``.
``optimizer.optimize-metadata-queries``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -905,9 +940,22 @@ over an outer join. For example::
Enabling this optimization can substantially speed up queries by reducing
the amount of data that needs to be processed by the join. However, it may slow down some
-queries that have very selective joins.
+queries that have very selective joins.
+
+The corresponding session property is :ref:`admin/properties-session:\`\`push_aggregation_through_join\`\``.
-The corresponding session property is :ref:`admin/properties-session:\`\`push_aggregation_through_join\`\``.
+``optimizer.push-partial-aggregation-through-join``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``boolean``
+* **Default value:** ``false``
+
+When a partial aggregation is above an inner join and all aggregation inputs come from
+only one side of the join, the partial aggregation is pushed below the join to that side.
+This reduces the amount of data flowing into the join operator, which can improve
+performance by allowing the aggregation to pre-reduce data before the join is performed.
+
+The corresponding session property is :ref:`admin/properties-session:\`\`push_partial_aggregation_through_join\`\``.
``optimizer.push-table-write-through-union``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -919,9 +967,9 @@ Parallelize writes when using ``UNION ALL`` in queries that write data. This imp
speed of writing output tables in ``UNION ALL`` queries because these writes do not require
additional synchronization when collecting results. Enabling this optimization can improve
``UNION ALL`` speed when write speed is not yet saturated. However, it may slow down queries
-in an already heavily loaded system.
+in an already heavily loaded system.
-The corresponding session property is :ref:`admin/properties-session:\`\`push_table_write_through_union\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`push_table_write_through_union\`\``.
``optimizer.join-reordering-strategy``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -935,9 +983,9 @@ query. ``ELIMINATE_CROSS_JOINS`` reorders joins to eliminate cross joins where
otherwise maintains the original query order. When reordering joins it also strives to maintain the
original table order as much as possible. ``AUTOMATIC`` enumerates possible orders and uses
statistics-based cost estimation to determine the least cost order. If stats are not available or if
-for any reason a cost could not be computed, the ``ELIMINATE_CROSS_JOINS`` strategy is used.
+for any reason a cost could not be computed, the ``ELIMINATE_CROSS_JOINS`` strategy is used.
-The corresponding session property is :ref:`admin/properties-session:\`\`join_reordering_strategy\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`join_reordering_strategy\`\``.
``optimizer.max-reordered-joins``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1020,7 +1068,7 @@ Enable broadcasting based on the confidence of the statistics that are being use
broadcasting the side of a joinNode which has the highest (``HIGH`` or ``FACT``) confidence statistics.
If both sides have the same confidence statistics, then the original behavior will be followed.
-The corresponding session property is :ref:`admin/properties-session:\`\`confidence_based_broadcast\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`confidence_based_broadcast\`\``.
``optimizer.treat-low-confidence-zero-estimation-as-unknown``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1028,9 +1076,9 @@ The corresponding session property is :ref:`admin/properties-session:\`\`confide
* **Type:** ``boolean``
* **Default value:** ``false``
-Enable treating ``LOW`` confidence, zero estimations as ``UNKNOWN`` during joins.
+Enable treating ``LOW`` confidence, zero estimations as ``UNKNOWN`` during joins.
-The corresponding session property is :ref:`admin/properties-session:\`\`treat-low-confidence-zero-estimation-as-unknown\`\``.
+The corresponding session property is :ref:`admin/properties-session:\`\`treat-low-confidence-zero-estimation-as-unknown\`\``.
``optimizer.retry-query-with-history-based-optimization``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1038,7 +1086,7 @@ The corresponding session property is :ref:`admin/properties-session:\`\`treat-l
* **Type:** ``boolean``
* **Default value:** ``false``
-Enable retry for failed queries who can potentially be helped by HBO.
+Enable retry for failed queries who can potentially be helped by HBO.
The corresponding session property is :ref:`admin/properties-session:\`\`retry-query-with-history-based-optimization\`\``.
@@ -1100,6 +1148,59 @@ parallelism factor is below the ``optimizer.table-scan-shuffle-parallelism-thres
The corresponding session property is :ref:`admin/properties-session:\`\`table_scan_shuffle_strategy\`\``.
+``optimizer.remote-function-names-for-fixed-parallelism``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``string``
+* **Default value:** ``""`` (empty string, disabled)
+
+A regular expression pattern to match fully qualified remote function names, such as ``catalog.schema.function_name``,
+that should use fixed parallelism. When a remote function matches this pattern, the optimizer inserts
+round-robin shuffle exchanges before and after the projection containing the remote function call.
+This ensures that the remote function executes with a fixed degree of parallelism, which can be useful
+for controlling resource usage when calling external services.
+
+This property only applies to external/remote functions (functions where ``isExternalExecution()`` returns ``true``,
+such as functions using THRIFT, GRPC, or REST implementation types).
+
+Example patterns:
+
+* ``myschema.myfunction`` - matches an exact function name
+* ``catalog.schema.remote_.*`` - matches all functions starting with ``remote_`` in the specified catalog and schema
+* ``.*remote.*`` - matches any function containing ``remote`` in its fully qualified name
+
+The corresponding session property is :ref:`admin/properties-session:\`\`remote_function_names_for_fixed_parallelism\`\``.
+
+``optimizer.remote-function-fixed-parallelism-task-count``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``integer``
+* **Default value:** ``null`` (uses the default hash partition count)
+
+The number of tasks to use for remote functions matching the ``optimizer.remote-function-names-for-fixed-parallelism`` pattern.
+When set, this value determines the degree of parallelism for the round-robin shuffle exchanges inserted
+around matching remote function projections. If not set, the default hash partition count will be used.
+
+This property is only effective when ``optimizer.remote-function-names-for-fixed-parallelism`` is set to a non-empty pattern.
+
+The corresponding session property is :ref:`admin/properties-session:\`\`remote_function_fixed_parallelism_task_count\`\``.
+
+``optimizer.local-exchange-parent-preference-strategy``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``string``
+* **Allowed values:** ``ALWAYS``, ``NEVER``, ``AUTOMATIC``
+* **Default value:** ``ALWAYS``
+
+Strategy to consider parent preferences when adding local exchange partitioning for aggregations.
+When set to ``ALWAYS``, the optimizer always uses parent preferences for local exchange partitioning.
+When set to ``NEVER``, it never uses parent preferences and instead uses the aggregation's own
+grouping keys. When set to ``AUTOMATIC``, the optimizer makes a cost-based decision, using parent
+preferences only when the estimated partition cardinality is greater than or equal to the task
+concurrency.
+
+The corresponding session property is :ref:`admin/properties-session:\`\`local_exchange_parent_preference_strategy\`\``.
+
Planner Properties
------------------
@@ -1248,6 +1349,72 @@ Use to configure how long a query can be queued before it is terminated.
The corresponding session property is :ref:`admin/properties-session:\`\`query_max_queued_time\`\``.
+``query-manager.query-pacing.max-queries-per-second``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``integer``
+* **Minimum value:** ``1``
+* **Default value:** ``2147483647`` (unlimited)
+
+Maximum number of queries that can be admitted per second globally across
+all resource groups. This property enables query admission pacing to prevent
+worker overload when many queries start simultaneously. Pacing only activates
+when the number of running queries exceeds the threshold configured by
+``query-manager.query-pacing.min-running-queries``.
+
+Set to a lower value such as ``10`` to limit query admission rate during
+periods of high cluster load. The default value effectively disables pacing.
+
+``query-manager.query-pacing.min-running-queries``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``integer``
+* **Minimum value:** ``0``
+* **Default value:** ``30``
+
+Minimum number of running queries required before query admission pacing
+is applied. When the total number of running queries is below this threshold,
+queries are admitted immediately without rate limiting, regardless of the
+``query-manager.query-pacing.max-queries-per-second`` setting.
+
+This allows the cluster to quickly ramp up when idle while still providing
+protection against overload when the cluster is busy. Set to ``0`` to always
+apply pacing when ``max-queries-per-second`` is configured.
+
+``max-total-running-task-count-to-not-execute-new-query``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``integer``
+* **Minimum value:** ``1``
+* **Default value:** ``2147483647`` (unlimited)
+
+Maximum total running task count across all queries on the coordinator. When
+this threshold is exceeded, new queries are held in the queue rather than
+being scheduled for execution. This helps prevent coordinator overload by
+limiting the number of concurrent tasks being managed.
+
+Unlike ``max-total-running-task-count-to-kill-query`` which kills queries when
+the limit is exceeded, this property proactively prevents new queries from
+starting while allowing existing queries to complete normally.
+
+This property works in conjunction with query admission pacing
+(``query-manager.query-pacing.max-queries-per-second``) to provide
+comprehensive coordinator load management. When both are configured:
+
+1. Pacing controls the rate at which queries are admitted
+2. This property provides a hard cap on total concurrent tasks
+
+Without query-pacing, the cluster can admit multiple queries at once, which
+can lead to significantly more concurrent tasks than expected over this limit.
+
+Set to a lower value (e.g., ``50000``) to limit coordinator task management
+overhead. The default value effectively disables this feature.
+
+.. note::
+
+ For backwards compatibility, this property can also be configured using the
+ legacy name ``experimental.max-total-running-task-count-to-not-execute-new-query``.
+
Query Retry Properties
----------------------
diff --git a/presto-docs/src/main/sphinx/admin/spill.rst b/presto-docs/src/main/sphinx/admin/spill.rst
index f0c51e751dd21..3af869c61f877 100644
--- a/presto-docs/src/main/sphinx/admin/spill.rst
+++ b/presto-docs/src/main/sphinx/admin/spill.rst
@@ -2,11 +2,6 @@
Spill to Disk
=============
-.. contents::
- :local:
- :backlinks: none
- :depth: 1
-
Overview
--------
@@ -30,7 +25,7 @@ of memory to queries and prevents deadlock caused by memory allocation.
It is efficient when there are a lot of small queries in the cluster, but
leads to killing large queries that don't stay within the limits.
-To overcome this inefficiency, the concept of revocable memory was introduced. A
+To overcome this limitation, the concept of revocable memory was introduced. A
query can request memory that does not count toward the limits, but this memory
can be revoked by the memory manager at any time. When memory is revoked, the
query runner spills intermediate data from memory to disk and continues to
@@ -107,6 +102,8 @@ When spill encryption is enabled (``spill-encryption-enabled`` property in
(per spill file) secret key. Enabling this will decrease the performance of spilling
to disk but can protect spilled data from being recovered from the files written to disk.
+.. _spill-operations:
+
Supported Operations
--------------------
diff --git a/presto-docs/src/main/sphinx/cache/local.rst b/presto-docs/src/main/sphinx/cache/local.rst
index 5b8efba0f8cb1..98a458d3edd32 100644
--- a/presto-docs/src/main/sphinx/cache/local.rst
+++ b/presto-docs/src/main/sphinx/cache/local.rst
@@ -2,11 +2,6 @@
Alluxio SDK Cache
=================
-.. contents::
- :local:
- :backlinks: none
- :depth: 1
-
Overview
--------
diff --git a/presto-docs/src/main/sphinx/cache/service.rst b/presto-docs/src/main/sphinx/cache/service.rst
index 4b11cee30e703..4313d1fb2a6f0 100644
--- a/presto-docs/src/main/sphinx/cache/service.rst
+++ b/presto-docs/src/main/sphinx/cache/service.rst
@@ -2,11 +2,6 @@
Alluxio Cache Service
=====================
-.. contents::
- :local:
- :backlinks: none
- :depth: 1
-
Overview
--------
diff --git a/presto-docs/src/main/sphinx/conf.py b/presto-docs/src/main/sphinx/conf.py
index ea71636098d47..0ba35583db294 100644
--- a/presto-docs/src/main/sphinx/conf.py
+++ b/presto-docs/src/main/sphinx/conf.py
@@ -64,7 +64,7 @@ def get_version():
needs_sphinx = '8.2.1'
extensions = [
- 'sphinx_immaterial', 'sphinx_copybutton', 'download', 'issue', 'pr', 'sphinx.ext.autosectionlabel'
+ 'sphinx_immaterial', 'download', 'issue', 'pr', 'sphinx.ext.autosectionlabel'
]
copyright = 'The Presto Foundation. All rights reserved. Presto is a registered trademark of LF Projects, LLC'
@@ -106,13 +106,8 @@ def get_version():
html_logo = 'images/logo.png'
html_favicon = 'images/favicon.ico'
-# doesn't seem to do anything
-# html_baseurl = 'overview.html'
-
html_static_path = ['.']
-templates_path = ['_templates']
-
# Set the primary domain to js because if left as the default python
# the theme errors when functions aren't available in a python module
primary_domain = 'js'
diff --git a/presto-docs/src/main/sphinx/connector.rst b/presto-docs/src/main/sphinx/connector.rst
index d337fe4ed12d1..00221c91e86a7 100644
--- a/presto-docs/src/main/sphinx/connector.rst
+++ b/presto-docs/src/main/sphinx/connector.rst
@@ -27,6 +27,7 @@ from different data sources.
connector/kafka
connector/kafka-tutorial
connector/kudu
+ connector/lance
connector/larksheets
connector/localfile
connector/memory
diff --git a/presto-docs/src/main/sphinx/connector/hive.rst b/presto-docs/src/main/sphinx/connector/hive.rst
index e3c0c22a7576d..76daed9d6f773 100644
--- a/presto-docs/src/main/sphinx/connector/hive.rst
+++ b/presto-docs/src/main/sphinx/connector/hive.rst
@@ -164,9 +164,18 @@ Property Name Description
absolutely necessary to access HDFS.
Example: ``/etc/hdfs-site.xml``
-``hive.storage-format`` The default file format used when creating new tables. ``ORC``
-
-``hive.compression-codec`` The compression codec to use when writing files. ``GZIP``
+``hive.storage-format`` The default file format used when creating new tables. The ``ORC``
+ available values are ``ORC``, ``PARQUET``, ``AVRO``,
+ ``RCBINARY``, ``RCTEXT``, ``SEQUENCEFILE``, ``JSON``,
+ and ``TEXTFILE``.
+
+``hive.compression-codec`` The compression codec to use when writing files. The ``GZIP``
+ available values are ``NONE``, ``SNAPPY``, ``GZIP``,
+ ``LZ4``, and ``ZSTD``.
+
+ Note: ``LZ4`` is only available when
+ ``hive.storage-format=ORC``. ``ZSTD`` is available
+ for both ``ORC`` and ``PARQUET`` formats.
``hive.force-local-scheduling`` Force splits to be scheduled on the same node as the Hadoop ``false``
DataNode process serving the split data. This is useful for
@@ -236,6 +245,18 @@ Property Name Description
.. _constructor: https://github.com/apache/hadoop/blob/02a9190af5f8264e25966a80c8f9ea9bb6677899/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java#L844-L875
+Hive Session Properties
+-----------------------
+
+======================================================== ============================================================ ============
+Property Name Description Default
+======================================================== ============================================================ ============
+``native_max_target_file_size`` Native Execution only. Maximum target file size. When a ``0B``
+ file exceeds this size during writing, the writer will
+ close the current file and start writing to a new file.
+ Zero means no limit.
+======================================================== ============================================================ ============
+
Avro Configuration Properties
-----------------------------
@@ -279,6 +300,33 @@ Add the ``metastore.storage.schema.reader.impl`` property to ``hive-site.xml`` w
You must restart the metastore service for this configuration to take effect. This setting allows the metastore to read storage schemas for Avro tables and avoids ``Storage schema reading not supported`` errors.
+Textfile Configuration Properties
+---------------------------------
+
+Table Properties
+^^^^^^^^^^^^^^^^
+
+These properties can be used when creating TEXTFILE tables in Presto:
+
+======================================================== ============================================================================== =============================
+Property Name Description Default
+======================================================== ============================================================================== =============================
+``textfile_field_delim`` A custom single-character delimiter to separate fields. NONE
+
+``textfile_escape_delim`` A custom single-character delimiter to escape characters. NONE
+
+``textfile_collection_delim`` A custom single-character delimiter to separate collection elements. NONE
+
+``textfile_mapkey_delim`` A custom single-character delimiter to separate map keys. NONE
+
+======================================================== ============================================================================== =============================
+
+.. note::
+These properties are mapped to the corresponding properties in Hive ``LazySerDeParameters`` during serialization and
+follow the same behaviors with ``LazySimpleSerDe``.
+If they are not defined, the Hive defaults are used, which are typically ``\001`` for field delimiter, ``\002`` for
+collection delimiter, ``\003`` for map key delimiter, and escape character is disabled.
+
Metastore Configuration Properties
----------------------------------
@@ -289,15 +337,29 @@ Property Name Descriptio
======================================================== ============================================================= ============
``hive.metastore-timeout`` Timeout for Hive metastore requests. ``10s``
-``hive.metastore-cache-ttl`` Duration how long cached metastore data should be considered ``0s``
+``hive.metastore.cache.enabled-caches`` Comma-separated list of metastore cache types to enable. NONE
+ The value should be a valid .
+
+``hive.metastore.cache.disabled-caches`` Comma-separated list of metastore cache types to disable. NONE
+ The value should be a valid .
+
+``hive.metastore.cache.ttl.default`` Duration how long cached metastore data should be considered ``0s``
valid.
+``hive.metastore.cache.ttl-by-type`` Per-cache time-to-live (TTL) overrides for Hive metastore NONE
+ caches. The value is a comma-separated list of
+ : pairs.
+
``hive.metastore-cache-maximum-size`` Hive metastore cache maximum size. 10000
-``hive.metastore-refresh-interval`` Asynchronously refresh cached metastore data after access ``0s``
+``hive.metastore.cache.refresh-interval.default`` Asynchronously refresh cached metastore data after access ``0s``
if it is older than this but is not yet expired, allowing
subsequent accesses to see fresh data.
+``hive.metastore.cache.refresh-interval-by-type`` Per-cache refresh interval overrides for Hive metastore NONE
+ caches. The value is a comma-separated list of
+ : pairs.
+
``hive.metastore-refresh-max-threads`` Maximum threads used to refresh cached metastore data. 100
``hive.invalidate-metastore-cache-procedure-enabled`` When enabled, users will be able to invalidate metastore false
@@ -315,6 +377,26 @@ Property Name Descriptio
======================================================== ============================================================= ============
+.. note::
+
+ The supported values for ``CACHE_TYPE`` when enabling Hive Metastore Cache are:
+
+ * ``ALL``: Represents all supported Hive metastore cache types.
+ * ``DATABASE``: Caches metadata for individual Hive databases.
+ * ``DATABASE_NAMES``: Caches the list of all database names in the metastore.
+ * ``TABLE``: Caches metadata for individual Hive tables.
+ * ``TABLE_NAMES``: Caches the list of table names within a database.
+ * ``TABLE_STATISTICS``: Caches column-level statistics for Hive tables.
+ * ``TABLE_CONSTRAINTS``: Caches table constraint metadata such as primary and unique keys.
+ * ``PARTITION``: Caches metadata for individual Hive partitions.
+ * ``PARTITION_STATISTICS``: Caches column-level statistics for individual partitions.
+ * ``PARTITION_FILTER``: Caches partition name lookups based on partition filter predicates.
+ * ``PARTITION_NAMES``: Caches the list of partition names for a table.
+ * ``VIEW_NAMES``: Caches the list of view names within a database.
+ * ``TABLE_PRIVILEGES``: Caches table-level privilege information for users and roles.
+ * ``ROLES``: Caches the list of available Hive roles.
+ * ``ROLE_GRANTS``: Caches role grant mappings for principals.
+
AWS Glue Catalog Configuration Properties
-----------------------------------------
@@ -1262,4 +1344,4 @@ Example::
CAST(id AS BIGINT) AS id,
CAST(value AS INT) AS value,
CAST(date_col AS DATE) AS date_col
- FROM hive.csv.csv_data;
\ No newline at end of file
+ FROM hive.csv.csv_data;
diff --git a/presto-docs/src/main/sphinx/connector/hudi.rst b/presto-docs/src/main/sphinx/connector/hudi.rst
index 6d6ef20b17187..4d928ad6832ef 100644
--- a/presto-docs/src/main/sphinx/connector/hudi.rst
+++ b/presto-docs/src/main/sphinx/connector/hudi.rst
@@ -6,9 +6,9 @@ Overview
--------
The Hudi connector enables querying `Hudi `_ tables
-synced to Hive metastore. The connector usesthe metastore only to track partition locations.
+synced to Hive metastore. The connector uses the metastore only to track partition locations.
It makes use of the underlying Hudi filesystem and input formats to list data files. To learn
-more about the design of the connector, please check out `RFC-40 `_.
Requirements
diff --git a/presto-docs/src/main/sphinx/connector/iceberg.rst b/presto-docs/src/main/sphinx/connector/iceberg.rst
index 5783c3e4c7cad..391b3dba70a73 100644
--- a/presto-docs/src/main/sphinx/connector/iceberg.rst
+++ b/presto-docs/src/main/sphinx/connector/iceberg.rst
@@ -557,6 +557,8 @@ Property Name Description
names. Default: ``__mv_storage__``
``materialized_view_missing_base_table_behavior`` Behavior when a base table referenced by a materialized view is Yes No
missing. Valid values: ``FAIL``, ``IGNORE``. Default: ``FAIL``
+``max_partitions_per_writer`` Overrides the behavior of the connector property Yes No
+ ``iceberg.max-partitions-per-writer`` in the current session.
===================================================== ======================================================================= =================== =============================================
Caching Support
@@ -679,7 +681,21 @@ File and stripe footer cache is not applicable for Presto C++.
Metastore Cache
^^^^^^^^^^^^^^^
-Iceberg Connector does not support Metastore Caching.
+Iceberg Connector supports Metastore Caching with some exceptions. Iceberg Connector does not allow enabling TABLE cache.
+Metastore Caching is only supported when ``iceberg.catalog.type`` is ``HIVE``.
+
+The Iceberg connector supports the same configuration properties for
+`Hive Metastore Caching `_
+as a Hive connector.
+
+The following configuration properties are the minimum set of configurations required to be added in the Iceberg catalog file ``catalog/iceberg.properties``:
+
+.. code-block:: none
+
+ # Hive Metastore Cache
+ hive.metastore.cache.disabled-caches=TABLE
+ hive.metastore.cache.ttl.default=10m
+ hive.metastore.cache.refresh-interval.default=5m
Extra Hidden Metadata Columns
-----------------------------
@@ -968,7 +984,7 @@ Register Table
Iceberg tables for which table data and metadata already exist in the
file system can be registered with the catalog. Use the ``register_table``
procedure on the catalog's ``system`` schema to register a table which
-already exists but does not known by the catalog.
+already exists but is not known by the catalog.
The following arguments are available:
@@ -1589,6 +1605,42 @@ Alter table operations are supported in the Iceberg connector::
ALTER TABLE iceberg.web.page_views DROP TAG 'tag1';
+ ALTER TABLE iceberg.default.mytable CREATE BRANCH 'audit-branch';
+
+ ALTER TABLE iceberg.default.mytable CREATE BRANCH IF NOT EXISTS 'audit-branch';
+
+ ALTER TABLE iceberg.default.mytable CREATE OR REPLACE BRANCH 'audit-branch';
+
+ ALTER TABLE iceberg.default.mytable CREATE BRANCH 'audit-branch-system' FOR SYSTEM_VERSION AS OF 4176642711908913940;
+
+ ALTER TABLE iceberg.default.mytable CREATE BRANCH IF NOT EXISTS 'audit-branch-system' FOR SYSTEM_VERSION AS OF 4176642711908913940;
+
+ ALTER TABLE iceberg.default.mytable CREATE BRANCH 'audit-branch-retain' FOR SYSTEM_VERSION AS OF 4176642711908913940 RETAIN 7 DAYS;
+
+ ALTER TABLE iceberg.default.mytable CREATE BRANCH 'audit-branch-snap-retain' FOR SYSTEM_VERSION AS OF 4176642711908913940 RETAIN 7 DAYS WITH SNAPSHOT RETENTION 2 SNAPSHOTS 2 DAYS;
+
+ ALTER TABLE iceberg.default.mytable CREATE OR REPLACE BRANCH 'audit-branch-time' FOR SYSTEM_TIME AS OF TIMESTAMP '2026-01-02 17:30:35.247 Asia/Kolkata';
+
+ ALTER TABLE iceberg.default.mytable CREATE TAG 'audit-tag';
+
+ ALTER TABLE iceberg.default.mytable CREATE TAG IF NOT EXISTS 'audit-tag';
+
+ ALTER TABLE iceberg.default.mytable CREATE OR REPLACE TAG 'audit-tag';
+
+ ALTER TABLE iceberg.default.mytable CREATE TAG 'audit-tag-system' FOR SYSTEM_VERSION AS OF 4176642711908913940;
+
+ ALTER TABLE iceberg.default.mytable CREATE TAG IF NOT EXISTS 'audit-tag-system' FOR SYSTEM_VERSION AS OF 4176642711908913940;
+
+ ALTER TABLE iceberg.default.mytable CREATE TAG 'audit-tag-retain' FOR SYSTEM_VERSION AS OF 4176642711908913940 RETAIN 7 DAYS;
+
+ ALTER TABLE iceberg.default.mytable CREATE TAG 'audit-tag-snap-retain' FOR SYSTEM_VERSION AS OF 4176642711908913940 RETAIN 7 DAYS WITH SNAPSHOT RETENTION 2 SNAPSHOTS 2 DAYS;
+
+ ALTER TABLE iceberg.default.mytable CREATE OR REPLACE TAG 'audit-tag-time' FOR SYSTEM_TIME AS OF TIMESTAMP '2026-01-02 17:30:35.247 Asia/Kolkata';
+
+**Presto C++ Support**
+
+Creating and dropping tags and branches with ``ALTER TABLE`` statements is fully supported in Presto C++.
+
To add a new column as a partition column, identify the transform functions for the column.
The table is partitioned by the transformed value of the column::
@@ -1874,6 +1926,73 @@ Iceberg tables do not support running multiple :doc:`../sql/merge` statements on
Failed to commit Iceberg update to table:
Found conflicting files that can contain records matching true
+Transaction support
+^^^^^^^^^^^^^^^^^^^
+
+The Iceberg connector supports explicit multi-statement transactions with writes
+to a single Iceberg table. To run transaction statements, use
+:doc:`/sql/start-transaction` with :doc:`/sql/commit` or :doc:`/sql/rollback`.
+
+The Iceberg connector provides snapshot isolation at ``REPEATABLE READ`` level.
+This also satisfies ``READ COMMITTED`` and ``READ UNCOMMITTED``, so these
+isolation levels are supported as well. For snapshot semantics, use
+``REPEATABLE READ``.
+
+Within a transaction, reads can access multiple tables, while write operations are
+restricted to a single Iceberg table. All operations execute under snapshot isolation.
+The transaction therefore behaves as a **multi-table read, single-table write** transaction::
+
+ START TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+ INSERT INTO iceberg.default.test_table
+ SELECT id, status
+ FROM iceberg.source.source_table1
+ WHERE status = 'pending';
+ INSERT INTO iceberg.default.test_table
+ SELECT * FROM iceberg.source.source_table2;
+ INSERT INTO iceberg.default.test_table (id, status) VALUES (1, 'pending');
+ UPDATE iceberg.default.test_table
+ SET status = 'committed'
+ WHERE id < 100 and status = 'pending';
+ COMMIT;
+
+Statements executed within the same transaction follow **read-your-writes**
+semantics. This behavior is important for standard SQL interactive transactions.
+Data modifications performed earlier in the transaction are visible to subsequent
+statements before the transaction is committed::
+
+ START TRANSACTION;
+ INSERT INTO iceberg.default.test_table (id, status) VALUES (1, 'pending'), (2, 'pending');
+ UPDATE iceberg.default.test_table SET status = 'committed' WHERE id = 1;
+ SELECT * FROM iceberg.default.test_table; -- (1, 'committed'), (2, 'pending')
+
+ DELETE FROM iceberg.default.test_table WHERE status = 'pending';
+ SELECT * FROM iceberg.default.test_table; -- (1, 'committed')
+ COMMIT;
+
+Limitations:
+
+* Writes in the same transaction can target only one Iceberg table. Attempts
+ to write to another table fail with ``Not allowed to open write transactions on multiple tables``.
+* ``SERIALIZABLE`` isolation is not supported by the Iceberg connector.
+* The following statements are only supported in autocommit mode:
+ ``MERGE INTO``, ``CREATE/DROP/RENAME TABLE``,
+ ``CREATE/DROP/RENAME SCHEMA``, ``CREATE/DROP/RENAME VIEW``,
+ ``CREATE/DROP/REFRESH MATERIALIZED VIEW``, ``TRUNCATE TABLE``, and
+ ``ANALYZE``.
+* ``CALL`` statements are only supported in autocommit mode.
+* If concurrent transactions change table metadata, commit may fail and require
+ retrying the transaction (for example, ``Table metadata refresh is required``).
+
+.. _iceberg_analyze:
+
+Collecting table and column statistics
+--------------------------------------
+
+The Iceberg connector supports collection of table and column statistics
+with the :doc:`/sql/analyze` statement::
+
+ ANALYZE iceberg.tpch.orders;
+
Schema Evolution
----------------
@@ -2176,7 +2295,7 @@ Querying branches and tags
Iceberg supports branches and tags which are named references to snapshots.
-Query Iceberg table by specifying the branch name:
+Query Iceberg table by specifying the branch name using ``FOR SYSTEM_VERSION AS OF``:
.. code-block:: sql
@@ -2191,6 +2310,21 @@ Query Iceberg table by specifying the branch name:
30 | mexico | 3 | comment
(3 rows)
+Alternatively, you can query a branch using the dot notation syntax with quoted identifiers:
+
+.. code-block:: sql
+
+ SELECT * FROM "nation.branch_testBranch";
+
+.. code-block:: text
+
+ nationkey | name | regionkey | comment
+ -----------+---------------+-----------+---------
+ 10 | united states | 1 | comment
+ 20 | canada | 2 | comment
+ 30 | mexico | 3 | comment
+ (3 rows)
+
Query Iceberg table by specifying the tag name:
.. code-block:: sql
@@ -2205,6 +2339,110 @@ Query Iceberg table by specifying the tag name:
20 | canada | 2 | comment
(3 rows)
+**Note:** The dot notation syntax ``"
.branch_"`` requires double quotes to prevent the SQL parser from interpreting the dot as a schema.table separator. This syntax works for both querying (SELECT) and mutating (INSERT, UPDATE, DELETE, MERGE) branch data.
+
+**Presto C++ Support**
+
+Querying tags and branches is fully supported in Presto C++.
+
+Mutating Iceberg Branches
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Iceberg supports performing INSERT, UPDATE, DELETE, and MERGE operations directly on branches,
+allowing you to make changes to a branch without affecting the main table or other branches.
+
+To perform mutations on a branch, use the quoted identifier syntax ``"
.branch_"`` (for example, ``"orders.branch_audit_branch"``).
+The quotes are required to prevent the SQL parser from interpreting the dot as a schema.table separator.
+
+**Insert into a branch:**
+
+.. code-block:: sql
+
+ -- Create a branch first
+ ALTER TABLE orders CREATE BRANCH 'audit_branch';
+
+ -- Insert data into the branch
+ INSERT INTO "orders.branch_audit_branch" VALUES (1, 'Product A', 100.00);
+ INSERT INTO "orders.branch_audit_branch" VALUES (2, 'Product B', 200.00);
+
+**Update data in a branch:**
+
+.. code-block:: sql
+
+ -- Update specific rows in the branch
+ UPDATE "orders.branch_audit_branch" SET price = 120.00 WHERE id = 1;
+
+ -- Update with complex expressions
+ UPDATE "orders.branch_audit_branch"
+ SET price = price * 1.1
+ WHERE category = 'electronics';
+
+**Delete from a branch:**
+
+.. code-block:: sql
+
+ -- Delete specific rows from the branch
+ DELETE FROM "orders.branch_audit_branch" WHERE id = 2;
+
+ -- Delete with complex predicates
+ DELETE FROM "orders.branch_audit_branch"
+ WHERE created_date < DATE '2024-01-01';
+
+**Merge into a branch:**
+
+.. code-block:: sql
+
+ -- Merge data from source table into branch
+ MERGE INTO "orders.branch_audit_branch" t
+ USING source_table s
+ ON t.id = s.id
+ WHEN MATCHED THEN UPDATE SET price = s.price
+ WHEN NOT MATCHED THEN INSERT (id, product, price) VALUES (s.id, s.product, s.price);
+
+**Verify branch isolation:**
+
+After performing mutations on a branch, you can verify that the main table remains unchanged:
+
+.. code-block:: sql
+
+ -- Query the branch to see changes
+ SELECT * FROM orders FOR SYSTEM_VERSION AS OF 'audit_branch';
+
+ -- Query the main table (unchanged)
+ SELECT * FROM orders;
+
+**Supported operations:**
+
+The following DML operations are supported with branch-specific table names:
+
+* ``INSERT`` - Add new rows to a branch
+* ``UPDATE`` - Modify existing rows in a branch
+* ``DELETE`` - Remove rows from a branch (including metadata delete optimization)
+* ``MERGE`` - Conditionally insert, update, or delete rows in a branch
+* ``TRUNCATE TABLE`` - Remove all rows from a branch
+* ``SELECT`` - Query branch data using ``FOR SYSTEM_VERSION AS OF 'branch_name'``
+
+**Unsupported operations:**
+
+The following operations are **not supported** with branch-specific table names and will result in an error:
+
+* ``ALTER TABLE`` DDL operations (``ADD COLUMN``, ``DROP COLUMN``, ``RENAME COLUMN``, ``SET PROPERTIES``) - Schema changes must be applied to the main table
+* ``CREATE VIEW`` / ``CREATE MATERIALIZED VIEW`` - Views cannot be created from branch-specific tables
+
+**Important notes:**
+
+* Branch mutations require quoted identifiers (double quotes) around the table name with branch suffix
+* The branch must exist before performing mutations (create it with ``ALTER TABLE ... CREATE BRANCH``)
+* Changes are isolated to the specified branch and do not affect the main table or other branches
+* All standard SQL features work with branch mutations such as WHERE clauses, column lists, INSERT from SELECT, and others
+* For MERGE operations, the table must have format version 2 or higher and update mode set to ``merge-on-read``
+
+**Presto C++ Support**
+
+Branch mutations are partially supported in Presto C++.
+
+* **Supported:** ``INSERT``, ``TRUNCATE TABLE``
+
Presto C++ Support
^^^^^^^^^^^^^^^^^^
@@ -2427,27 +2665,36 @@ The storage table inherits standard Iceberg table properties for partitioning, s
Freshness and Refresh
^^^^^^^^^^^^^^^^^^^^^
-Materialized views track the snapshot IDs of their base tables to determine staleness. When base tables are modified, the materialized view becomes stale and returns results by querying the base tables directly. After running ``REFRESH MATERIALIZED VIEW``, queries read from the pre-computed storage table.
-
-The refresh operation uses a full refresh strategy, replacing all data in the storage table with the current query results.
+After running ``REFRESH MATERIALIZED VIEW``, queries read from the pre-computed storage table. The refresh operation uses a full refresh strategy, replacing all data in the storage table with the current query results and recording the new snapshot IDs for all base tables.
.. _iceberg-stale-data-handling:
Stale Data Handling
^^^^^^^^^^^^^^^^^^^
+The Iceberg connector automatically detects staleness by comparing current base table
+snapshots against the snapshots recorded at the last refresh. A materialized view is
+considered stale if base tables have changed AND the time since the last base table
+modification exceeds the configured staleness window.
+
By default, when no staleness properties are configured, queries against a stale materialized
view will fall back to executing the underlying view query against the base tables. You can
change this default using the ``materialized_view_stale_read_behavior`` session property.
To configure staleness handling per view, set both of these properties together:
-- ``stale_read_behavior``: What to do when reading stale data (``FAIL`` or ``USE_VIEW_QUERY``)
+- ``stale_read_behavior``: What to do when reading stale data (``FAIL``, ``USE_VIEW_QUERY``, or ``USE_STITCHING``)
- ``staleness_window``: How much staleness to tolerate (e.g., ``1h``, ``30m``, ``0s``)
-The Iceberg connector automatically detects staleness based on base table modifications.
-A materialized view is considered stale if base tables have changed AND the time since
-the last base table modification exceeds the staleness window.
+When ``USE_STITCHING`` is configured, the Iceberg connector tracks staleness at the
+partition level, enabling predicate stitching to recompute only affected partitions
+rather than the entire view. See :doc:`/admin/materialized-views` for details on how
+predicate stitching works.
+
+.. note::
+ Partition-level staleness detection only works for append-only changes (INSERT).
+ DELETE or UPDATE operations on base tables cause the entire view to be treated
+ as stale, requiring full recomputation.
Example with staleness handling:
@@ -2464,8 +2711,8 @@ Example with staleness handling:
Limitations
^^^^^^^^^^^
-- All refreshes recompute the entire result set
-- REFRESH does not provide snapshot isolation across multiple base tables
+- All refreshes recompute the entire result set (incremental refresh not supported)
+- REFRESH does not provide snapshot isolation across multiple base tables (each base table's current snapshot is used independently)
- Querying materialized views at specific snapshots or timestamps is not supported
Example
diff --git a/presto-docs/src/main/sphinx/connector/lance.rst b/presto-docs/src/main/sphinx/connector/lance.rst
new file mode 100644
index 0000000000000..266a26a7a3997
--- /dev/null
+++ b/presto-docs/src/main/sphinx/connector/lance.rst
@@ -0,0 +1,236 @@
+===============
+Lance Connector
+===============
+
+Overview
+--------
+
+The Lance connector allows querying and writing data stored in
+`Lance `_ format from Presto. Lance is a modern columnar
+data format optimized for machine learning workloads and fast random access.
+
+The connector uses the Lance Java SDK to read and write Lance datasets.
+Each Lance dataset is organized into **fragments**, and the connector maps each fragment to a
+Presto split for parallel processing across workers.
+
+Configuration
+-------------
+
+To configure the Lance connector, create a catalog properties file
+``etc/catalog/lance.properties`` with the following contents,
+replacing the properties as appropriate:
+
+.. code-block:: none
+
+ connector.name=lance
+ lance.root-url=/path/to/lance/data
+
+Configuration Properties
+------------------------
+
+The following configuration properties are available:
+
+=============================== ============================================================= ===============
+Property Name Description Default
+=============================== ============================================================= ===============
+``lance.impl`` Namespace implementation: ``dir`` ``dir``
+``lance.root-url`` Root storage path for Lance datasets. ``""``
+``lance.single-level-ns`` When ``true``, uses a single-level namespace with a ``true``
+ virtual ``default`` schema.
+``lance.read-batch-size`` Number of rows per Arrow batch during reads. ``8192``
+``lance.max-rows-per-file`` Maximum number of rows per Lance data file. ``1000000``
+``lance.max-rows-per-group`` Maximum number of rows per row group. ``100000``
+``lance.write-batch-size`` Number of rows to batch before writing to Arrow. ``10000``
+=============================== ============================================================= ===============
+
+``lance.impl``
+^^^^^^^^^^^^^^
+
+Namespace implementation to use. The default ``dir`` uses a directory-based
+table store where each table is a ``.lance`` directory under the root.
+
+``lance.root-url``
+^^^^^^^^^^^^^^^^^^
+
+Root storage path for Lance datasets. All tables are stored as subdirectories
+named ``.lance`` under this path. For example, if ``lance.root-url``
+is set to ``/data/lance``, a table named ``my_table`` is stored at
+``/data/lance/my_table.lance``.
+
+``lance.single-level-ns``
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When set to ``true`` (the default), the connector exposes a single ``default``
+schema that maps directly to the root directory. All tables are accessed as
+``lance.default.``.
+
+``lance.read-batch-size``
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Controls the number of rows read per Arrow batch from Lance. Larger values may
+improve read throughput at the cost of higher memory usage. The default is
+``8192``.
+
+``lance.max-rows-per-file``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Maximum number of rows per Lance data file. The default is ``1000000``.
+
+.. note::
+
+ This property is reserved for future use and is not yet wired into the
+ write path.
+
+``lance.max-rows-per-group``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Maximum number of rows per row group within a Lance data file. The default is
+``100000``.
+
+.. note::
+
+ This property is reserved for future use and is not yet wired into the
+ write path.
+
+``lance.write-batch-size``
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Number of rows to batch before converting to Arrow format during writes. The
+default is ``10000``.
+
+.. note::
+
+ This property is reserved for future use and is not yet wired into the
+ write path.
+
+Data Types
+----------
+
+The following table lists the supported data type mappings between Lance
+(Arrow) types and Presto types:
+
+================= =============== ======================================
+Lance (Arrow) Presto Notes
+================= =============== ======================================
+``Bool`` ``BOOLEAN``
+``Int(8)`` ``TINYINT``
+``Int(16)`` ``SMALLINT``
+``Int(32)`` ``INTEGER``
+``Int(64)`` ``BIGINT``
+``Float(SINGLE)`` ``REAL``
+``Float(DOUBLE)`` ``DOUBLE``
+``Utf8`` ``VARCHAR``
+``LargeUtf8`` ``VARCHAR``
+``Binary`` ``VARBINARY``
+``LargeBinary`` ``VARBINARY``
+``Date(DAY)`` ``DATE``
+``Timestamp`` ``TIMESTAMP`` Microsecond precision; reads support
+ both with and without timezone
+``List`` ``ARRAY`` Read only; element type mapped
+ recursively
+``FixedSizeList`` ``ARRAY`` Read only; element type mapped
+ recursively
+================= =============== ======================================
+
+.. note::
+
+ Arrow types not listed above are unsupported and will cause an error.
+
+SQL Support
+-----------
+
+The Lance connector supports the following SQL operations.
+
+CREATE TABLE
+^^^^^^^^^^^^
+
+Create a new Lance table:
+
+.. code-block:: sql
+
+ CREATE TABLE lance.default.my_table (
+ id BIGINT,
+ name VARCHAR,
+ score DOUBLE
+ );
+
+CREATE TABLE AS
+^^^^^^^^^^^^^^^
+
+Create a Lance table from a query:
+
+.. code-block:: sql
+
+ CREATE TABLE lance.default.my_table AS
+ SELECT * FROM tpch.tiny.nation;
+
+INSERT INTO
+^^^^^^^^^^^
+
+Append data to an existing Lance table:
+
+.. code-block:: sql
+
+ INSERT INTO lance.default.my_table
+ SELECT * FROM tpch.tiny.nation;
+
+SELECT
+^^^^^^
+
+Query data from a Lance table:
+
+.. code-block:: sql
+
+ SELECT * FROM lance.default.my_table;
+
+Column projection is pushed down to Lance, so queries that select a subset
+of columns only read those columns from disk:
+
+.. code-block:: sql
+
+ SELECT id, name FROM lance.default.my_table;
+
+DROP TABLE
+^^^^^^^^^^
+
+Drop a Lance table and delete all its data:
+
+.. code-block:: sql
+
+ DROP TABLE lance.default.my_table;
+
+SHOW TABLES
+^^^^^^^^^^^
+
+List all tables in the catalog:
+
+.. code-block:: sql
+
+ SHOW TABLES FROM lance.default;
+
+DESCRIBE
+^^^^^^^^
+
+Show the columns and types of a Lance table:
+
+.. code-block:: sql
+
+ DESCRIBE lance.default.my_table;
+
+Limitations
+-----------
+
+* Only a single schema (``default``) is supported when ``lance.single-level-ns``
+ is ``true``.
+* The following SQL statements are not supported:
+
+ * :doc:`/sql/alter-table`
+ * :doc:`/sql/delete`
+ * :doc:`/sql/update`
+
+* Predicate pushdown is not supported. Only column projection is pushed down
+ to the Lance reader.
+* ``ARRAY`` types are supported for reads but cannot be written.
+* Only local filesystem paths are supported in the current ``dir`` implementation.
+* Data written by one Presto cluster is not visible to another cluster until the
+ write transaction commits.
diff --git a/presto-docs/src/main/sphinx/develop.rst b/presto-docs/src/main/sphinx/develop.rst
index 8f58863395ad8..2ef0136f8a6ee 100644
--- a/presto-docs/src/main/sphinx/develop.rst
+++ b/presto-docs/src/main/sphinx/develop.rst
@@ -17,6 +17,7 @@ This guide is intended for Presto contributors and plugin developers.
develop/system-access-control
develop/password-authenticator
develop/event-listener
+ develop/openlineage-event-listener
develop/client-protocol
develop/worker-protocol
develop/serialized-page
diff --git a/presto-docs/src/main/sphinx/develop/openlineage-event-listener.rst b/presto-docs/src/main/sphinx/develop/openlineage-event-listener.rst
new file mode 100644
index 0000000000000..45b11a2b7b598
--- /dev/null
+++ b/presto-docs/src/main/sphinx/develop/openlineage-event-listener.rst
@@ -0,0 +1,163 @@
+==========================
+OpenLineage Event Listener
+==========================
+
+The OpenLineage event listener plugin emits query events in the
+`OpenLineage `_ format, enabling integration with
+lineage tracking systems such as `Marquez `_,
+`Atlan `_, and `DataHub `_.
+
+The plugin captures:
+
+* Query start events (``START``)
+* Query completion events (``COMPLETE`` or ``FAIL``)
+* Input and output dataset information including column-level lineage
+
+Installation
+------------
+
+The OpenLineage event listener plugin is bundled with Presto and requires
+no additional installation.
+
+Configuration
+-------------
+
+Create an ``etc/event-listener.properties`` file on the coordinator with the
+following required properties:
+
+.. code-block:: none
+
+ event-listener.name=openlineage-event-listener
+ openlineage-event-listener.presto.uri=http://presto-coordinator:8080
+ openlineage-event-listener.transport.type=CONSOLE
+
+Transport Types
+^^^^^^^^^^^^^^^
+
+The plugin supports two transport types for emitting OpenLineage events:
+
+**Console Transport**
+
+Writes OpenLineage events as JSON to stdout. Useful for debugging and
+development.
+
+.. code-block:: none
+
+ event-listener.name=openlineage-event-listener
+ openlineage-event-listener.presto.uri=http://presto-coordinator:8080
+ openlineage-event-listener.transport.type=CONSOLE
+
+**HTTP Transport**
+
+Sends OpenLineage events to an HTTP endpoint such as the Marquez API.
+
+.. code-block:: none
+
+ event-listener.name=openlineage-event-listener
+ openlineage-event-listener.presto.uri=http://presto-coordinator:8080
+ openlineage-event-listener.transport.type=HTTP
+ openlineage-event-listener.transport.url=http://marquez:5000
+ openlineage-event-listener.transport.endpoint=/api/v1/lineage
+
+Configuration Properties
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. list-table::
+ :widths: 40 10 10 40
+ :header-rows: 1
+
+ * - Property
+ - Required
+ - Default
+ - Description
+ * - ``openlineage-event-listener.presto.uri``
+ - Yes
+ -
+ - URI of the Presto server. Used for namespace rendering in OpenLineage events.
+ * - ``openlineage-event-listener.transport.type``
+ - No
+ - ``CONSOLE``
+ - Transport type for emitting events. Supported values: ``CONSOLE``, ``HTTP``.
+ * - ``openlineage-event-listener.namespace``
+ - No
+ -
+ - Override the default namespace for OpenLineage jobs. Defaults to the Presto URI with ``presto://`` scheme.
+ * - ``openlineage-event-listener.job.name-format``
+ - No
+ - ``$QUERY_ID``
+ - Format string for the OpenLineage job name. Supported placeholders: ``$QUERY_ID``, ``$USER``, ``$SOURCE``, ``$CLIENT_IP``.
+ * - ``openlineage-event-listener.presto.include-query-types``
+ - No
+ - ``DELETE,INSERT,MERGE,UPDATE,DATA_DEFINITION``
+ - Comma-separated list of query types that generate OpenLineage events. Other query types are filtered out on completion.
+ * - ``openlineage-event-listener.disabled-facets``
+ - No
+ -
+ - Comma-separated list of facets to exclude from events. Supported values: ``PRESTO_METADATA``, ``PRESTO_QUERY_STATISTICS``, ``PRESTO_QUERY_CONTEXT``.
+
+HTTP Transport Properties
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These properties apply when ``openlineage-event-listener.transport.type`` is set to ``HTTP``.
+
+.. list-table::
+ :widths: 40 10 10 40
+ :header-rows: 1
+
+ * - Property
+ - Required
+ - Default
+ - Description
+ * - ``openlineage-event-listener.transport.url``
+ - Yes
+ -
+ - URL of the OpenLineage API server.
+ * - ``openlineage-event-listener.transport.endpoint``
+ - No
+ -
+ - Custom API path for receiving events.
+ * - ``openlineage-event-listener.transport.api-key``
+ - No
+ -
+ - API key for authentication. Sent as a ``Bearer`` token.
+ * - ``openlineage-event-listener.transport.timeout``
+ - No
+ - ``5s``
+ - HTTP request timeout. Accepts duration strings. For example: ``5s``, ``30s``, ``1m``.
+ * - ``openlineage-event-listener.transport.headers``
+ - No
+ -
+ - Custom HTTP headers as comma-separated ``key:value`` pairs.
+ * - ``openlineage-event-listener.transport.url-params``
+ - No
+ -
+ - Custom URL query parameters as comma-separated ``key:value`` pairs.
+ * - ``openlineage-event-listener.transport.compression``
+ - No
+ - ``NONE``
+ - HTTP body compression. Supported values: ``NONE``, ``GZIP``.
+
+Event Details
+-------------
+
+The plugin emits the following OpenLineage facets:
+
+**Run Facets**
+
+* ``processing_engine`` - Presto server version information
+* ``presto_metadata`` - Query ID, transaction ID, and query plan
+* ``presto_query_context`` - User, server address, environment, source, client info
+* ``presto_query_statistics`` - Detailed query execution statistics (on completion only)
+* ``nominalTime`` - Query start and end times (on completion only)
+* ``errorMessage`` - Failure message (on failure only)
+
+**Job Facets**
+
+* ``jobType`` - ``BATCH`` / ``PRESTO`` / ``QUERY``
+* ``sql`` - The SQL query text with dialect ``presto``
+
+**Dataset Facets**
+
+* ``schema`` - Column names and types for input and output datasets
+* ``dataSource`` - Catalog and schema information
+* ``columnLineage`` - Column-level lineage mapping from input to output columns
diff --git a/presto-docs/src/main/sphinx/ecosystem/list.rst b/presto-docs/src/main/sphinx/ecosystem/list.rst
index 48d0fede5decf..34c72ec9721c8 100644
--- a/presto-docs/src/main/sphinx/ecosystem/list.rst
+++ b/presto-docs/src/main/sphinx/ecosystem/list.rst
@@ -2,11 +2,6 @@
Ecosystem
=========
-.. contents::
- :local:
- :backlinks: none
- :depth: 1
-
Overview
--------
diff --git a/presto-docs/src/main/sphinx/functions.rst b/presto-docs/src/main/sphinx/functions.rst
index 8ba846f7d7285..9717a1455d6f1 100644
--- a/presto-docs/src/main/sphinx/functions.rst
+++ b/presto-docs/src/main/sphinx/functions.rst
@@ -38,3 +38,4 @@ Functions and Operators
functions/setdigest
functions/sketch
functions/pinot
+ functions/plugin-loaded-functions
diff --git a/presto-docs/src/main/sphinx/functions/array.rst b/presto-docs/src/main/sphinx/functions/array.rst
index 0f7a2fb2d5c2b..339cc9a8bdfc5 100644
--- a/presto-docs/src/main/sphinx/functions/array.rst
+++ b/presto-docs/src/main/sphinx/functions/array.rst
@@ -21,6 +21,8 @@ The ``||`` operator is used to concatenate an array with an array or an element
Array Functions
---------------
+For plugin-loaded array functions, see :ref:`functions/plugin-loaded-functions:array functions`.
+
.. function:: all_match(array(T), function(T,boolean)) -> boolean
Returns whether all elements of an array match the given predicate. Returns ``true`` if all the elements
@@ -35,11 +37,6 @@ Array Functions
array is empty); ``NULL`` if the predicate function returns ``NULL`` for one or more elements and ``false``
for all other elements.
-.. function:: array_average(array(double)) -> double
-
- Returns the average of all non-null elements of the ``array``. If there is no non-null elements, returns
- ``null``.
-
.. function:: array_cum_sum(array(T)) -> array(T)
Returns the array whose elements are the cumulative sum of the input array, i.e. result[i] = input[1]+input[2]+...+input[i].
@@ -55,14 +52,6 @@ Array Functions
SELECT array_distinct(ARRAY [1, 2, null, null, 2]) -- ARRAY[1, 2, null]
SELECT array_distinct(ARRAY [ROW(1, null), ROW (1, null)] -- ARRAY[ROW(1, null)
-.. function:: array_duplicates(array(T)) -> array(bigint/varchar)
-
- Returns a set of elements that occur more than once in ``array``.
- Throws an exception if any of the elements are rows or arrays that contain nulls. ::
-
- SELECT array_duplicates(ARRAY[1, 2, null, 1, null, 3]) -- ARRAY[1, null]
- SELECT array_duplicates(ARRAY[ROW(1, null), ROW(1, null)]) -- "map key cannot be null or contain nulls"
-
.. function:: array_except(x, y) -> array
Returns an array of elements in ``x`` but not in ``y``, without duplicates.
@@ -70,19 +59,6 @@ Array Functions
SELECT array_except(ARRAY[1, 3, 3, 2, null], ARRAY[1,2, 2, 4]) -- ARRAY[3, null]
-.. function:: array_frequency(array(E)) -> map(E, int)
-
- Returns a map: keys are the unique elements in the ``array``, values are how many times the key appears.
- Ignores null elements. Empty array returns empty map.
-
-.. function:: array_has_duplicates(array(T)) -> boolean
-
- Returns a boolean: whether ``array`` has any elements that occur more than once.
- Throws an exception if any of the elements are rows or arrays that contain nulls. ::
-
- SELECT array_has_duplicates(ARRAY[1, 2, null, 1, null, 3]) -- true
- SELECT array_has_duplicates(ARRAY[ROW(1, null), ROW(1, null)]) -- "map key cannot be null or contain nulls"
-
.. function:: array_intersect(x, y) -> array
Returns an array of the elements in the intersection of ``x`` and ``y``, without duplicates.
@@ -90,36 +66,10 @@ Array Functions
SELECT array_intersect(ARRAY[1, 2, 3, 2, null], ARRAY[1,2, 2, 4, null]) -- ARRAY[1, 2, null]
-.. function:: array_intersect(array(array(E))) -> array(E)
-
- Returns an array of the elements in the intersection of all arrays in the given array, without duplicates.
- This function uses ``IS NOT DISTINCT FROM`` to determine which elements are the same. ::
-
- SELECT array_intersect(ARRAY[ARRAY[1, 2, 3, 2, null], ARRAY[1,2,2, 4, null], ARRAY [1, 2, 3, 4 null]]) -- ARRAY[1, 2, null]
-
.. function:: array_join(x, delimiter, null_replacement) -> varchar
Concatenates the elements of the given array using the delimiter and an optional string to replace nulls.
-.. function:: array_least_frequent(array(T)) -> array(T)
-
- Returns the least frequent non-null element of an array. If there are multiple elements with the same frequency, the function returns the smallest element.
- If the array has more than one element and any elements are ``ROWS`` with null fields or ``ARRAYS`` with null elements, an exception is returned. ::
-
- SELECT array_least_frequent(ARRAY[1, 0 , 5]) -- ARRAY[0]
- select array_least_frequent(ARRAY[1, null, 1]) -- ARRAY[1]
- select array_least_frequent(ARRAY[ROW(1,null), ROW(1, null)]) -- "map key cannot be null or contain nulls"
-
-.. function:: array_least_frequent(array(T), n) -> array(T)
-
- Returns ``n`` least frequent non-null elements of an array. The elements are ordered in increasing order of their frequencies.
- If two elements have the same frequency, smaller elements will appear first.
- If the array has more than one element and any elements are ``ROWS`` with null fields or ``ARRAYS`` with null elements, an exception is returned. ::
-
- SELECT array_least_frequent(ARRAY[3, 2, 2, 6, 6, 1, 1], 3) -- ARRAY[3, 1, 2]
- select array_least_frequent(ARRAY[1, null, 1], 2) -- ARRAY[1]
- select array_least_frequent(ARRAY[ROW(1,null), ROW(1, null)], 2) -- "map key cannot be null or contain nulls"
-
.. function:: array_max(x) -> x
Returns the maximum value of input array.
@@ -128,20 +78,6 @@ Array Functions
Returns the minimum value of input array.
-.. function:: array_max_by(array(T), function(T, U)) -> T
-
- Applies the provided function to each element, and returns the element that gives the maximum value.
- ``U`` can be any orderable type. ::
-
- SELECT array_max_by(ARRAY ['a', 'bbb', 'cc'], x -> LENGTH(x)) -- 'bbb'
-
-.. function:: array_min_by(array(T), function(T, U)) -> T
-
- Applies the provided function to each element, and returns the element that gives the minimum value.
- ``U`` can be any orderable type. ::
-
- SELECT array_min_by(ARRAY ['a', 'bbb', 'cc'], x -> LENGTH(x)) -- 'a'
-
.. function:: array_normalize(x, p) -> array
Normalizes array ``x`` by dividing each element by the p-norm of the array.
@@ -210,15 +146,6 @@ Array Functions
SELECT array_sort(ARRAY[CAST(0.0 AS DOUBLE), CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE)], x -> x); -- [-Infinity, 0.0, Infinity, NaN]
SELECT array_sort(ARRAY[ROW('a', 3), ROW('b', 1), ROW('c', 2)], x -> x[2]); -- [ROW('b', 1), ROW('c', 2), ROW('a', 3)]
-.. function:: array_sort_desc(x) -> array
-
- Returns the ``array`` sorted in the descending order. Elements of the ``array`` must be orderable.
- Null elements are placed at the end of the returned array. ::
-
- SELECT array_sort_desc(ARRAY [100, 1, 10, 50]); -- [100, 50, 10, 1]
- SELECT array_sort_desc(ARRAY [null, 100, null, 1, 10, 50]); -- [100, 50, 10, 1, null, null]
- SELECT array_sort_desc(ARRAY [ARRAY ["a", null], null, ARRAY ["a"]); -- [["a", null], ["a"], null]
-
.. function:: array_sort_desc(array(T), function(T,U)) -> array(T)
Sorts and returns the ``array`` in descending order using a lambda function to extract sorting keys.
@@ -231,16 +158,6 @@ Array Functions
SELECT array_sort_desc(ARRAY[CAST(0.0 AS DOUBLE), CAST('NaN' AS DOUBLE), CAST('Infinity' AS DOUBLE), CAST('-Infinity' AS DOUBLE)], x -> x); -- [NaN, Infinity, 0.0, -Infinity]
SELECT array_sort_desc(ARRAY[ROW('a', 3), ROW('b', 1), ROW('c', 2)], x -> x[2]); -- [ROW('a', 3), ROW('c', 2), ROW('b', 1)]
-.. function:: array_split_into_chunks(array(T), int) -> array(array(T))
-
- Returns an ``array`` of arrays splitting the input ``array`` into chunks of given length.
- The last chunk will be shorter than the chunk length if the array's length is not an integer multiple of
- the chunk length. Ignores null inputs, but not elements.
-
- SELECT array_split_into_chunks(ARRAY [1, 2, 3, 4], 3); -- [[1, 2, 3], [4]]
- SELECT array_split_into_chunks(null, null); -- null
- SELECT array_split_into_chunks(array[1, 2, 3, cast(null as int)], 2]); -- [[1, 2], [3, null]]
-
.. function:: array_sum(array(T)) -> bigint/double
Returns the sum of all non-null elements of the ``array``. If there is no non-null elements, returns ``0``.
@@ -249,26 +166,6 @@ Array Functions
``T`` must be coercible to ``double``.
Returns ``bigint`` if T is coercible to ``bigint``. Otherwise, returns ``double``.
-.. function:: array_top_n(array(T), int) -> array(T)
-
- Returns an array of the top ``n`` elements from a given ``array``, sorted according to its natural descending order.
- If ``n`` is larger than the size of the given ``array``, the returned list will be the same size as the input instead of ``n``. ::
-
- SELECT array_top_n(ARRAY [1, 100, 2, 5, 3], 3); -- [100, 5, 3]
- SELECT array_top_n(ARRAY [1, 100], 5); -- [100, 1]
- SELECT array_top_n(ARRAY ['a', 'zzz', 'zz', 'b', 'g', 'f'], 3); -- ['zzz', 'zz', 'g']
-
-.. function:: array_transpose(array(array(T))) -> array(array(T))
-
- Returns a transpose of a 2D array (matrix), where rows become columns and columns become rows.
- Converts ``a[x][y]`` to ``transpose(a)[y][x]``. All rows in the input array must have the same length, otherwise the function will fail with an error.
- Returns an empty array if the input is empty or if all rows are empty. ::
-
- SELECT array_transpose(ARRAY [ARRAY [1, 2, 3], ARRAY [4, 5, 6]]) -- [[1, 4], [2, 5], [3, 6]]
- SELECT array_transpose(ARRAY [ARRAY ['a', 'b'], ARRAY ['c', 'd'], ARRAY ['e', 'f']]) -- [['a', 'c', 'e'], ['b', 'd', 'f']]
- SELECT array_transpose(ARRAY [ARRAY [1]]) -- [[1]]
- SELECT array_transpose(ARRAY []) -- []
-
.. function:: arrays_overlap(x, y) -> boolean
Tests if arrays ``x`` and ``y`` have any non-null elements in common.
@@ -401,10 +298,6 @@ Array Functions
(s, x) -> CAST(ROW(x + s.sum, s.count + 1) AS ROW(sum DOUBLE, count INTEGER)),
s -> IF(s.count = 0, NULL, s.sum / s.count));
-.. function:: remove_nulls(array(T)) -> array
-
- Remove all null elements in the array.
-
.. function:: repeat(element, count) -> array
Repeat ``element`` for ``count`` times.
diff --git a/presto-docs/src/main/sphinx/functions/map.rst b/presto-docs/src/main/sphinx/functions/map.rst
index 8205869e504db..dcb7ee6e30519 100644
--- a/presto-docs/src/main/sphinx/functions/map.rst
+++ b/presto-docs/src/main/sphinx/functions/map.rst
@@ -12,23 +12,7 @@ The ``[]`` operator is used to retrieve the value corresponding to a given key f
Map Functions
-------------
-.. function:: all_keys_match(x(K,V), function(K, boolean)) -> boolean
-
- Returns whether all keys of a map match the given predicate. Returns true if all the keys match the predicate (a special case is when the map is empty); false if one or more keys don’t match; NULL if the predicate function returns NULL for one or more keys and true for all other keys. ::
-
- SELECT all_keys_match(map(array['a', 'b', 'c'], array[1, 2, 3]), x -> length(x) = 1); -- true
-
-.. function:: any_keys_match(x(K,V), function(K, boolean)) -> boolean
-
- Returns whether any keys of a map match the given predicate. Returns true if one or more keys match the predicate; false if none of the keys match (a special case is when the map is empty); NULL if the predicate function returns NULL for one or more keys and false for all other keys. ::
-
- SELECT any_keys_match(map(array['a', 'b', 'c'], array[1, 2, 3]), x -> x = 'a'); -- true
-
-.. function:: any_values_match(x(K,V), function(V, boolean)) -> boolean
-
- Returns whether any values of a map matches the given predicate. Returns true if one or more values match the predicate; false if none of the values match (a special case is when the map is empty); NULL if the predicate function returns NULL for one or more values and false for all other values. ::
-
- SELECT ANY_VALUES_MATCH(map(ARRAY['a', 'b', 'c'], ARRAY[1, 2, 3]), x -> x = 1); -- true
+For plugin-loaded map functions, see :ref:`functions/plugin-loaded-functions:map functions`.
.. function:: cardinality(x) -> bigint
:noindex:
@@ -85,10 +69,6 @@ Map Functions
SELECT map_filter(MAP(ARRAY[10, 20, 30], ARRAY['a', NULL, 'c']), (k, v) -> v IS NOT NULL); -- {10 -> a, 30 -> c}
SELECT map_filter(MAP(ARRAY['k1', 'k2', 'k3'], ARRAY[20, 3, 15]), (k, v) -> v > 10); -- {k1 -> 20, k3 -> 15}
-.. function:: map_remove_null_values(x(K,V)) -> map(K, V)
-
- Removes all the entries where the value is null from the map ``x``.
-
.. function:: map_subset(map(K,V), array(k)) -> map(K,V)
Constructs a map from those entries of ``map`` for which the key is in the array given::
@@ -99,73 +79,14 @@ Map Functions
SELECT map_subset(MAP(ARRAY[1,2], ARRAY['a','b']), ARRAY[]); -- {}
SELECT map_subset(MAP(ARRAY[], ARRAY[]), ARRAY[1,2]); -- {}
-.. function:: map_key_exists(x(K, V), k) -> boolean
-
- Returns whether the given key exists in the map. Returns ``true`` if key is present in the input map, returns ``false`` if not present.::
-
- SELECT map_key_exists(MAP(ARRAY['x','y'], ARRAY[100,200]), 'x'); -- TRUE
-
.. function:: map_keys(x(K,V)) -> array(K)
Returns all the keys in the map ``x``.
-.. function:: map_top_n_keys(x(K,V), n) -> array(K)
-
- Returns top ``n`` keys in the map ``x`` by sorting its keys in descending order.
- ``n`` must be a non-negative integer.
-
- For bottom ``n`` keys, use the function with lambda operator to perform custom sorting ::
-
- SELECT map_top_n_keys(map(ARRAY['a', 'b', 'c'], ARRAY[3, 2, 1]), 2) --- ['c', 'b']
-
-.. function:: map_top_n_keys(x(K,V), n, function(K,K,int)) -> array(K)
-
- Returns top ``n`` keys in the map ``x`` by sorting its keys using the given comparator ``function``. The comparator takes
- two non-nullable arguments representing two keys of the ``map``. It returns -1, 0, or 1
- as the first key is less than, equal to, or greater than the second key.
- If the comparator function returns other values (including ``NULL``), the query will fail and raise an error ::
-
- SELECT map_top_n_keys(map(ARRAY['a', 'b', 'c'], ARRAY[3, 2, 1]), 2, (x, y) -> IF(x < y, -1, IF(x = y, 0, 1))) --- ['c', 'b']
-
-.. function:: map_keys_by_top_n_values(x(K,V), n) -> array(K)
-
- Returns top ``n`` keys in the map ``x`` by sorting its values in descending order. If two or more keys have equal values, the higher key takes precedence.
- ``n`` must be a non-negative integer.::
-
- SELECT map_keys_by_top_n_values(map(ARRAY['a', 'b', 'c'], ARRAY[2, 1, 3]), 2) --- ['c', 'a']
-
-.. function:: map_top_n(x(K,V), n) -> map(K, V)
-
- Truncates map items. Keeps only the top ``n`` elements by value. Keys are used to break ties with the max key being chosen. Both keys and values should be orderable.
- ``n`` must be a non-negative integer. ::
-
- SELECT map_top_n(map(ARRAY['a', 'b', 'c'], ARRAY[2, 3, 1]), 2) --- {'b' -> 3, 'a' -> 2}
-
-.. function:: map_normalize(x(varchar,double)) -> map(varchar,double)
-
- Returns the map with the same keys but all non-null values are scaled proportionally so that the sum of values becomes 1.
- Map entries with null values remain unchanged.
-
.. function:: map_values(x(K,V)) -> array(V)
Returns all the values in the map ``x``.
-.. function:: map_top_n_values(x(K,V), n) -> array(V)
-
- Returns top ``n`` values in the map ``x`` by sorting its values in descending order.
- ``n`` must be a non-negative integer. ::
-
- SELECT map_top_n_values(map(ARRAY['a', 'b', 'c'], ARRAY[1, 2, 3]), 2) --- [3, 2]
-
-.. function:: map_top_n_values(x(K,V), n, function(V,V,int)) -> array(V)
-
- Returns top n values in the map ``x`` based on the given comparator ``function``. The comparator will take
- two nullable arguments representing two values of the ``map``. It returns -1, 0, or 1
- as the first value is less than, equal to, or greater than the second value.
- If the comparator function returns other values (including ``NULL``), the query will fail and raise an error ::
-
- SELECT map_top_n_values(map(ARRAY['a', 'b', 'c'], ARRAY[1, 2, 3]), 2, (x, y) -> IF(x < y, -1, IF(x = y, 0, 1))) --- [3, 2]
-
.. function:: map_zip_with(map(K,V1), map(K,V2), function(K,V1,V2,V3)) -> map(K,V3)
Merges the two given maps into a single map by applying ``function`` to the pair of values with the same key.
@@ -181,18 +102,6 @@ Map Functions
MAP(ARRAY['a', 'b', 'c'], ARRAY[1, 2, 3]),
(k, v1, v2) -> k || CAST(v1/v2 AS VARCHAR));
-.. function:: no_keys_match(x(K,V), function(K, boolean)) -> boolean
-
- Returns whether no keys of a map match the given predicate. Returns true if none of the keys match the predicate (a special case is when the map is empty); false if one or more keys match; NULL if the predicate function returns NULL for one or more keys and false for all other keys. ::
-
- SELECT no_keys_match(map(array['a', 'b', 'c'], array[1, 2, 3]), x -> x = 'd'); -- true
-
-.. function:: no_values_match(x(K,V), function(V, boolean)) -> boolean
-
- Returns whether no values of a map match the given predicate. Returns true if none of the values match the predicate (a special case is when the map is empty); false if one or more values match; NULL if the predicate function returns NULL for one or more values and false for all other values. ::
-
- SELECT no_values_match(map(array['a', 'b', 'c'], array[1, 2, 3]), x -> x = 'd'); -- true
-
.. function:: transform_keys(map(K1,V), function(K1,V,K2)) -> map(K2,V)
Returns a map that applies ``function`` to each entry of ``map`` and transforms the keys::
@@ -214,14 +123,3 @@ Map Functions
SELECT transform_values(MAP(ARRAY ['a', 'b'], ARRAY [1, 2]), (k, v) -> k || CAST(v as VARCHAR)); -- {a -> a1, b -> b2}
SELECT transform_values(MAP(ARRAY [1, 2], ARRAY [1.0, 1.4]), -- {1 -> one_1.0, 2 -> two_1.4}
(k, v) -> MAP(ARRAY[1, 2], ARRAY['one', 'two'])[k] || '_' || CAST(v AS VARCHAR));
-
-.. function:: map_int_keys_to_array(map(int,V)) -> array(V)
- Returns an ``array`` of values from the ``map`` with value at indexed by the original keys from ``map``::
- SELECT MAP_INT_KEYS_TO_ARRAY(MAP(ARRAY[3, 5, 6, 9], ARRAY['a', 'b', 'c', 'd'])) -> ARRAY[null, null, 'a', null, 'b', 'c', null, null, 'd']
- SELECT MAP_INT_KEYS_TO_ARRAY(MAP(ARRAY[3, 5, 6, 9], ARRAY['a', null, 'c', 'd'])) -> ARRAY[null, null, 'a', null, null, 'c', 'd']
-
-.. function:: array_to_map_int_keys(array(v)) -> map(int, v)
- Returns an ``map`` with indices of all non-null values from the ``array`` as keys and element at the specified index as the value::
- SELECT ARRAY_TO_MAP_INT_KEYS(CAST(ARRAY[3, 5, 6, 9] AS ARRAY)) -> MAP(ARRAY[1, 2, 3,4], ARRAY[3, 5, 6, 9])
- SELECT ARRAY_TO_MAP_INT_KEYS(CAST(ARRAY[3, 5, null, 6, 9] AS ARRAY)) -> MAP(ARRAY[1, 2, 4, 5], ARRAY[3, 5, 6, 9])
- SELECT ARRAY_TO_MAP_INT_KEYS(CAST(ARRAY[3, 5, null, 6, 9, null, null, 1] AS ARRAY)) -> MAP(ARRAY[1, 2, 4, 5, 8], ARRAY[3, 5, 6, 9, 1])
\ No newline at end of file
diff --git a/presto-docs/src/main/sphinx/functions/plugin-loaded-functions.rst b/presto-docs/src/main/sphinx/functions/plugin-loaded-functions.rst
new file mode 100644
index 0000000000000..0492c7041bc31
--- /dev/null
+++ b/presto-docs/src/main/sphinx/functions/plugin-loaded-functions.rst
@@ -0,0 +1,261 @@
+=======================
+Plugin Loaded Functions
+=======================
+
+These functions are optional, opt-in functions that can be loaded as needed.
+For more details on loading these functions, refer to the
+`presto-sql-helpers README. `_
+
+Array Functions
+---------------
+
+.. function:: array_intersect(array(array(E))) -> array(E)
+
+ Returns an array of the elements in the intersection of all arrays in the given array, without duplicates.
+ This function uses ``IS NOT DISTINCT FROM`` to determine which elements are the same. ::
+
+ SELECT array_intersect(ARRAY[ARRAY[1, 2, 3, 2, null], ARRAY[1, 2, 2, 4, null], ARRAY [1, 2, 3, 4, null]]) -- ARRAY[1, 2, null]
+
+.. function:: array_average(array(double)) -> double
+
+ Returns the average of all non-null elements of the ``array``. If there are no non-null elements, returns
+ ``null``.
+
+.. function:: array_split_into_chunks(array(T), int) -> array(array(T))
+
+ Returns an ``array`` of arrays splitting the input ``array`` into chunks of given length.
+ The last chunk will be shorter than the chunk length if the array's length is not an integer multiple of
+ the chunk length. Ignores null inputs, but not elements. ::
+
+ SELECT array_split_into_chunks(ARRAY [1, 2, 3, 4], 3); -- [[1, 2, 3], [4]]
+ SELECT array_split_into_chunks(null, null); -- null
+ SELECT array_split_into_chunks(array[1, 2, 3, cast(null as int)], 2); -- [[1, 2], [3, null]]
+
+.. function:: array_frequency(array(E)) -> map(E, int)
+
+ Returns a map: keys are the unique elements in the ``array``, values are how many times the key appears.
+ Ignores null elements. Empty array returns empty map.
+
+.. function:: array_duplicates(array(T)) -> array(bigint/varchar)
+
+ Returns a set of elements that occur more than once in ``array``.
+ Throws an exception if any of the elements are rows or arrays that contain nulls. ::
+
+ SELECT array_duplicates(ARRAY[1, 2, null, 1, null, 3]) -- ARRAY[1, null]
+ SELECT array_duplicates(ARRAY[ROW(1, null), ROW(1, null)]) -- "map key cannot be null or contain nulls"
+
+.. function:: array_has_duplicates(array(T)) -> boolean
+
+ Returns a boolean: whether ``array`` has any elements that occur more than once.
+ Throws an exception if any of the elements are rows or arrays that contain nulls. ::
+
+ SELECT array_has_duplicates(ARRAY[1, 2, null, 1, null, 3]) -- true
+ SELECT array_has_duplicates(ARRAY[ROW(1, null), ROW(1, null)]) -- "map key cannot be null or contain nulls"
+
+.. function:: array_least_frequent(array(T)) -> array(T)
+
+ Returns the least frequent non-null element of an array. If there are multiple elements with the same frequency, the function returns the smallest element.
+ If the array has more than one element and any elements are ``ROWS`` with null fields or ``ARRAYS`` with null elements, an exception is returned. ::
+
+ SELECT array_least_frequent(ARRAY[1, 0 , 5]) -- ARRAY[0]
+ select array_least_frequent(ARRAY[1, null, 1]) -- ARRAY[1]
+ select array_least_frequent(ARRAY[ROW(1,null), ROW(1, null)]) -- "map key cannot be null or contain nulls"
+
+.. function:: array_least_frequent(array(T), n) -> array(T)
+
+ Returns ``n`` least frequent non-null elements of an array. The elements are ordered in increasing order of their frequencies.
+ If two elements have the same frequency, smaller elements will appear first.
+ If the array has more than one element and any elements are ``ROWS`` with null fields or ``ARRAYS`` with null elements, an exception is returned. ::
+
+ SELECT array_least_frequent(ARRAY[3, 2, 2, 6, 6, 1, 1], 3) -- ARRAY[3, 1, 2]
+ select array_least_frequent(ARRAY[1, null, 1], 2) -- ARRAY[1]
+ select array_least_frequent(ARRAY[ROW(1,null), ROW(1, null)], 2) -- "map key cannot be null or contain nulls"
+
+.. function:: array_max_by(array(T), function(T, U)) -> T
+
+ Applies the provided function to each element, and returns the element that gives the maximum value.
+ ``U`` can be any orderable type. ::
+
+ SELECT array_max_by(ARRAY ['a', 'bbb', 'cc'], x -> LENGTH(x)) -- 'bbb'
+
+.. function:: array_min_by(array(T), function(T, U)) -> T
+
+ Applies the provided function to each element, and returns the element that gives the minimum value.
+ ``U`` can be any orderable type. ::
+
+ SELECT array_min_by(ARRAY ['a', 'bbb', 'cc'], x -> LENGTH(x)) -- 'a'
+
+.. function:: array_sort_desc(x) -> array
+
+ Returns the ``array`` sorted in the descending order. Elements of the ``array`` must be orderable.
+ Null elements are placed at the end of the returned array. ::
+
+ SELECT array_sort_desc(ARRAY [100, 1, 10, 50]); -- [100, 50, 10, 1]
+ SELECT array_sort_desc(ARRAY [null, 100, null, 1, 10, 50]); -- [100, 50, 10, 1, null, null]
+ SELECT array_sort_desc(ARRAY [ARRAY ["a", null], null, ARRAY ["a"]]); -- [["a", null], ["a"], null]
+
+.. function:: remove_nulls(array(T)) -> array
+
+ Remove all null elements in the array.
+
+.. function:: array_top_n(array(T), int) -> array(T)
+
+ Returns an array of the top ``n`` elements from a given ``array``, sorted according to its natural descending order.
+ If ``n`` is larger than the size of the given ``array``, the returned list will be the same size as the input instead of ``n``. ::
+
+ SELECT array_top_n(ARRAY [1, 100, 2, 5, 3], 3); -- [100, 5, 3]
+ SELECT array_top_n(ARRAY [1, 100], 5); -- [100, 1]
+ SELECT array_top_n(ARRAY ['a', 'zzz', 'zz', 'b', 'g', 'f'], 3); -- ['zzz', 'zz', 'g']
+
+.. function:: array_top_n(array(T), int, function(T,T,int)) -> array(T)
+
+ Returns an array of the top ``n`` elements from a given ``array`` using the specified comparator ``function``.
+ The comparator will take two nullable arguments representing two nullable elements of the ``array``. It returns -1, 0, or 1
+ as the first nullable element is less than, equal to, or greater than the second nullable element.
+ If the comparator function returns other values (including ``NULL``), the query will fail and raise an error.
+ If ``n`` is larger than the size of the given ``array``, the returned list will be the same size as the input instead of ``n``. ::
+
+ SELECT array_top_n(ARRAY [100, 1, 3, -10, 6, -5], 3, (x, y) -> IF(abs(x) < abs(y), -1, IF(abs(x) = abs(y), 0, 1))); -- [100, -10, 6]
+ SELECT array_top_n(ARRAY [CAST(ROW(1, 2) AS ROW(x INT, y INT)), CAST(ROW(0, 11) AS ROW(x INT, y INT)), CAST(ROW(5, 10) AS ROW(x INT, y INT))], 2, (a, b) -> IF(a.x*a.y < b.x*b.y, -1, IF(a.x*a.y = b.x*b.y, 0, 1))); -- [ROW(5, 10), ROW(1, 2)]
+
+.. function:: array_transpose(array(array(T))) -> array(array(T))
+
+ Returns a transpose of a 2D array (matrix), where rows become columns and columns become rows.
+ Converts ``a[x][y]`` to ``transpose(a)[y][x]``. All rows in the input array must have the same length, otherwise the function will fail with an error.
+ Returns an empty array if the input is empty or if all rows are empty. ::
+
+ SELECT array_transpose(ARRAY [ARRAY [1, 2, 3], ARRAY [4, 5, 6]]) -- [[1, 4], [2, 5], [3, 6]]
+ SELECT array_transpose(ARRAY [ARRAY ['a', 'b'], ARRAY ['c', 'd'], ARRAY ['e', 'f']]) -- [['a', 'c', 'e'], ['b', 'd', 'f']]
+ SELECT array_transpose(ARRAY [ARRAY [1]]) -- [[1]]
+ SELECT array_transpose(ARRAY []) -- []
+
+Map Functions
+--------------
+
+.. function:: map_normalize(x(varchar,double)) -> map(varchar,double)
+
+ Returns the map with the same keys but all non-null values are scaled proportionally so that the sum of values becomes 1.
+ Map entries with null values remain unchanged.
+
+.. function:: map_keys_by_top_n_values(x(K,V), n) -> array(K)
+
+ Returns top ``n`` keys in the map ``x`` by sorting its values in descending order. If two or more keys have equal values, the higher key takes precedence.
+ ``n`` must be a non-negative integer.::
+
+ SELECT map_keys_by_top_n_values(map(ARRAY['a', 'b', 'c'], ARRAY[2, 1, 3]), 2) --- ['c', 'a']
+
+.. function:: map_key_exists(x(K, V), k) -> boolean
+
+ Returns whether the given key exists in the map. Returns ``true`` if key is present in the input map, returns ``false`` if not present.::
+
+ SELECT map_key_exists(MAP(ARRAY['x','y'], ARRAY[100,200]), 'x'); -- TRUE
+
+.. function:: map_top_n(x(K,V), n) -> map(K, V)
+
+ Truncates map items. Keeps only the top ``n`` elements by value. Keys are used to break ties with the max key being chosen. Both keys and values should be orderable.
+ ``n`` must be a non-negative integer. ::
+
+ SELECT map_top_n(map(ARRAY['a', 'b', 'c'], ARRAY[2, 3, 1]), 2) --- {'b' -> 3, 'a' -> 2}
+
+.. function:: map_top_n_keys(x(K,V), n) -> array(K)
+
+ Returns top ``n`` keys in the map ``x`` by sorting its keys in descending order.
+ ``n`` must be a non-negative integer.
+
+ For bottom ``n`` keys, use the function with lambda operator to perform custom sorting. ::
+
+ SELECT map_top_n_keys(map(ARRAY['a', 'b', 'c'], ARRAY[3, 2, 1]), 2) --- ['c', 'b']
+
+.. function:: map_top_n_keys(x(K,V), n, function(K,K,int)) -> array(K)
+
+ Returns top ``n`` keys in the map ``x`` by sorting its keys using the given comparator ``function``. The comparator takes
+ two non-nullable arguments representing two keys of the ``map``. It returns -1, 0, or 1
+ as the first key is less than, equal to, or greater than the second key.
+ If the comparator function returns other values (including ``NULL``), the query will fail and raise an error. ::
+
+ SELECT map_top_n_keys(map(ARRAY['a', 'b', 'c'], ARRAY[3, 2, 1]), 2, (x, y) -> IF(x < y, -1, IF(x = y, 0, 1))) --- ['c', 'b']
+
+.. function:: map_top_n_values(x(K,V), n) -> array(V)
+
+ Returns top ``n`` values in the map ``x`` by sorting its values in descending order.
+ ``n`` must be a non-negative integer. ::
+
+ SELECT map_top_n_values(map(ARRAY['a', 'b', 'c'], ARRAY[1, 2, 3]), 2) --- [3, 2]
+
+.. function:: map_top_n_values(x(K,V), n, function(V,V,int)) -> array(V)
+
+ Returns top n values in the map ``x`` based on the given comparator ``function``. The comparator will take
+ two nullable arguments representing two values of the ``map``. It returns -1, 0, or 1
+ as the first value is less than, equal to, or greater than the second value.
+ If the comparator function returns other values (including ``NULL``), the query will fail and raise an error. ::
+
+ SELECT map_top_n_values(map(ARRAY['a', 'b', 'c'], ARRAY[1, 2, 3]), 2, (x, y) -> IF(x < y, -1, IF(x = y, 0, 1))) --- [3, 2]
+
+.. function:: map_remove_null_values(x(K,V)) -> map(K, V)
+
+ Removes all the entries where the value is null from the map ``x``.
+
+.. function:: all_keys_match(x(K,V), function(K, boolean)) -> boolean
+
+ Returns whether all keys of a map match the given predicate. Returns true if all the keys match the predicate (a special case is when the map is empty); false if one or more keys don’t match; NULL if the predicate function returns NULL for one or more keys and true for all other keys. ::
+
+ SELECT all_keys_match(map(array['a', 'b', 'c'], array[1, 2, 3]), x -> length(x) = 1); -- true
+
+.. function:: any_keys_match(x(K,V), function(K, boolean)) -> boolean
+
+ Returns whether any keys of a map match the given predicate. Returns true if one or more keys match the predicate; false if none of the keys match (a special case is when the map is empty); NULL if the predicate function returns NULL for one or more keys and false for all other keys. ::
+
+ SELECT any_keys_match(map(array['a', 'b', 'c'], array[1, 2, 3]), x -> x = 'a'); -- true
+
+.. function:: any_values_match(x(K,V), function(V, boolean)) -> boolean
+
+ Returns whether any values of a map matches the given predicate. Returns true if one or more values match the predicate; false if none of the values match (a special case is when the map is empty); NULL if the predicate function returns NULL for one or more values and false for all other values. ::
+
+ SELECT ANY_VALUES_MATCH(map(ARRAY['a', 'b', 'c'], ARRAY[1, 2, 3]), x -> x = 1); -- true
+
+.. function:: no_keys_match(x(K,V), function(K, boolean)) -> boolean
+
+ Returns whether no keys of a map match the given predicate. Returns true if none of the keys match the predicate (a special case is when the map is empty); false if one or more keys match; NULL if the predicate function returns NULL for one or more keys and false for all other keys. ::
+
+ SELECT no_keys_match(map(array['a', 'b', 'c'], array[1, 2, 3]), x -> x = 'd'); -- true
+
+.. function:: no_values_match(x(K,V), function(V, boolean)) -> boolean
+
+ Returns whether no values of a map match the given predicate. Returns true if none of the values match the predicate (a special case is when the map is empty); false if one or more values match; NULL if the predicate function returns NULL for one or more values and false for all other values. ::
+
+ SELECT no_values_match(map(array['a', 'b', 'c'], array[1, 2, 3]), x -> x = 'd'); -- true
+
+.. function:: map_int_keys_to_array(map(int,V)) -> array(V)
+
+ Returns an ``array`` of values from the ``map`` with value at indexed by the original keys from ``map``. ::
+
+ SELECT MAP_INT_KEYS_TO_ARRAY(MAP(ARRAY[3, 5, 6, 9], ARRAY['a', 'b', 'c', 'd'])) -> ARRAY[null, null, 'a', null, 'b', 'c', null, null, 'd']
+ SELECT MAP_INT_KEYS_TO_ARRAY(MAP(ARRAY[3, 5, 6, 9], ARRAY['a', null, 'c', 'd'])) -> ARRAY[null, null, 'a', null, null, 'c', 'd']
+
+
+.. function:: array_to_map_int_keys(array(v)) -> map(int, v)
+
+ Returns an ``map`` with indices of all non-null values from the ``array`` as keys and element at the specified index as the value. ::
+
+ SELECT ARRAY_TO_MAP_INT_KEYS(CAST(ARRAY[3, 5, 6, 9] AS ARRAY)) -> MAP(ARRAY[1, 2, 3,4], ARRAY[3, 5, 6, 9])
+ SELECT ARRAY_TO_MAP_INT_KEYS(CAST(ARRAY[3, 5, null, 6, 9] AS ARRAY)) -> MAP(ARRAY[1, 2, 4, 5], ARRAY[3, 5, 6, 9])
+ SELECT ARRAY_TO_MAP_INT_KEYS(CAST(ARRAY[3, 5, null, 6, 9, null, null, 1] AS ARRAY)) -> MAP(ARRAY[1, 2, 4, 5, 8], ARRAY[3, 5, 6, 9, 1])
+
+String Functions
+----------------
+
+.. function:: replace_first(string, search, replace) -> varchar
+
+ Replaces the first instance of ``search`` with ``replace`` in ``string``.
+
+ If ``search`` is an empty string, it inserts ``replace`` at the beginning of the ``string``.
+
+.. function:: trail(string, N) -> varchar
+
+ Returns the last N characters of the input string.
+
+.. function:: key_sampling_percent(varchar) -> double
+
+ Generates a double value between 0.0 and 1.0 based on the hash of the given ``varchar``.
+ This function is useful for deterministic sampling of data.
+
diff --git a/presto-docs/src/main/sphinx/functions/string.rst b/presto-docs/src/main/sphinx/functions/string.rst
index 62e94acb7ae48..a0f3d7cb42c37 100644
--- a/presto-docs/src/main/sphinx/functions/string.rst
+++ b/presto-docs/src/main/sphinx/functions/string.rst
@@ -10,6 +10,8 @@ The ``||`` operator performs concatenation.
String Functions
----------------
+For plugin-loaded string functions, see :ref:`functions/plugin-loaded-functions:string functions`.
+
.. note::
These functions assume that the input strings contain valid UTF-8 encoded
@@ -109,11 +111,6 @@ String Functions
If ``search`` is an empty string, inserts ``replace`` in front of every
character and at the end of the ``string``.
-.. function:: replace_first(string, search, replace) -> varchar
- Replaces the first instances of ``search`` with ``replace`` in ``string``.
-
- If ``search`` is an empty string, it inserts ``replace`` at the beginning of the ``string``.
-
.. function:: reverse(string) -> varchar
Returns ``string`` with the characters in reverse order.
@@ -224,10 +221,6 @@ String Functions
position ``start``. Positions start with ``1``. A negative starting
position is interpreted as being relative to the end of the string.
-.. function:: trail(string, N) -> varchar
-
- Returns the last N characters of the input string.
-
.. function:: trim(string) -> varchar
Removes leading and trailing whitespace from ``string``.
@@ -317,7 +310,3 @@ Unicode Functions
be a single character or empty (in which case invalid characters are
removed).
-.. function:: key_sampling_percent(varchar) -> double
-
- Generates a double value between 0.0 and 1.0 based on the hash of the given ``varchar``.
- This function is useful for deterministic sampling of data.
diff --git a/presto-docs/src/main/sphinx/installation.rst b/presto-docs/src/main/sphinx/installation.rst
index c67f47b4324c4..daef03acb5288 100644
--- a/presto-docs/src/main/sphinx/installation.rst
+++ b/presto-docs/src/main/sphinx/installation.rst
@@ -6,6 +6,6 @@ Installation
:maxdepth: 1
installation/deployment
- installation/deploy-docker
installation/deploy-brew
+ installation/deploy-docker
installation/deploy-helm
diff --git a/presto-docs/src/main/sphinx/installation/deploy-brew.rst b/presto-docs/src/main/sphinx/installation/deploy-brew.rst
index bed202f4bd4d0..3f7b768535753 100644
--- a/presto-docs/src/main/sphinx/installation/deploy-brew.rst
+++ b/presto-docs/src/main/sphinx/installation/deploy-brew.rst
@@ -1,6 +1,6 @@
-============================
-Deploy Presto using Homebrew
-============================
+===========================
+Deploy Presto with Homebrew
+===========================
This guide explains how to install and get started with Presto on macOS, Linux or WSL2 using the Homebrew package manager.
diff --git a/presto-docs/src/main/sphinx/installation/deploy-docker.rst b/presto-docs/src/main/sphinx/installation/deploy-docker.rst
index b6a824916c40e..d1a7882c7fced 100644
--- a/presto-docs/src/main/sphinx/installation/deploy-docker.rst
+++ b/presto-docs/src/main/sphinx/installation/deploy-docker.rst
@@ -1,60 +1,66 @@
-=================================
-Deploy Presto From a Docker Image
-=================================
+=========================
+Deploy Presto with Docker
+=========================
+
+This guide explains how to install and get started with Presto using Docker.
+
+.. note::
+
+ These steps were developed and tested on Mac OS X, on both Intel and Apple Silicon chips.
-These steps were developed and tested on Mac OS X, on both Intel and Apple Silicon chips.
+Prepare the container environment
+=================================
-Follow these steps to:
+If Docker is already installed, skip to step 4 to verify the setup.
+Otherwise, follow the instructions below to install Docker and Colima using Homebrew or choose an alternative method.
-- install the command line tools for brew, docker, and `Colima `_
-- verify your Docker setup
-- pull the Docker image of the Presto server
-- start your local Presto server
+1. Install `Homebrew `_ if it is not already present on the system.
-Installing brew, Docker, and Colima
-===================================
+2. Install the Docker command line and `Colima `_ tools via the following command:
-This task shows how to install brew, then to use brew to install Docker and Colima.
+ .. code-block:: shell
-Note: If you have Docker installed you can skip steps 1-3, but you should
-verify your Docker setup by running the command in step 4.
+ brew install docker colima
-1. If you do not have brew installed, run the following command:
+3. Run the following command to start Colima with defaults:
- ``/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)"``
+ .. code-block:: shell
-2. To install the Docker command line and `Colima `_ tools, run the following command:
+ colima start
- ``brew install docker colima``
+ .. note::
-3. Run the following command:
+ The default VM created by Colima uses 2 CPUs, 2GiB memory and 100GiB storage. To customize the VM resources,
+ see the Colima README for `Customizing the VM `_.
- ``colima start``
+4. Verify the local setup by running the following command:
- *Note*: The default VM created by Colima uses 2 CPUs, 2GB memory and 60GB storage. To customize the VM resources,
- see the Colima README for `Customizing the VM `_.
+ .. code-block:: shell
-4. To verify your local setup, run the following command:
+ docker run hello-world
- ``docker run hello-world``
+ The following output confirms a successful installation.
- If you see a response similar to the following, you are ready.
+ .. code-block:: shell
+ :class: no-copy
- ``Hello from Docker!``
- ``This message shows that your installation appears to be working correctly.``
+ Hello from Docker!
+ This message shows that your installation appears to be working correctly.
Installing and Running the Presto Docker container
==================================================
-1. Download the latest non-edge Presto container from `Presto on DockerHub `_. Run the following command:
+1. Download the latest non-edge Presto container from `Presto on DockerHub `_:
+
+ .. code-block:: shell
- ``docker pull prestodb/presto:latest``
+ docker pull prestodb/presto:latest
Downloading the container may take a few minutes. When the download completes, go on to the next step.
-2. On your local system, create a file named ``config.properties`` containing the following text:
+2. On the local system, create a file named ``config.properties`` containing the following text:
- .. code-block:: none
+ .. code-block:: properties
coordinator=true
node-scheduler.include-coordinator=true
@@ -62,7 +68,7 @@ Installing and Running the Presto Docker container
discovery-server.enabled=true
discovery.uri=http://localhost:8080
-3. On your local system, create a file named ``jvm.config`` containing the following text:
+3. On the local system, create a file named ``jvm.config`` containing the following text:
.. code-block:: none
@@ -78,20 +84,26 @@ Installing and Running the Presto Docker container
4. To start the Presto server in the Docker container, run the command:
- ``docker run -p 8080:8080 -it -v ./config.properties:/opt/presto-server/etc/config.properties -v ./jvm.config:/opt/presto-server/etc/jvm.config --name presto prestodb/presto:latest``
+ .. code-block:: shell
+
+ docker run -p 8080:8080 -it -v ./config.properties:/opt/presto-server/etc/config.properties -v ./jvm.config:/opt/presto-server/etc/jvm.config --name presto prestodb/presto:latest
This command assigns the name ``presto`` for the newly-created container that uses the downloaded image ``prestodb/presto:latest``.
- The Presto server logs startup information in the terminal window. Once you see a response similar to the following, the Presto server is running in the Docker container.
+ The Presto server logs startup information in the terminal window. The following output confirms the Presto server is running in the Docker container.
+
+ .. code-block:: shell
+ :class: no-copy
- ``======== SERVER STARTED ========``
+ ======== SERVER STARTED ========
Removing the Presto Docker container
====================================
-To remove the Presto Docker container, run the following two commands:
+To stop and remove the Presto Docker container, run the following commands:
-``docker stop presto``
+.. code-block:: shell
-``docker rm presto``
+ docker stop presto
+ docker rm presto
These commands return the name of the container ``presto`` when they succeed.
diff --git a/presto-docs/src/main/sphinx/installation/deploy-helm.rst b/presto-docs/src/main/sphinx/installation/deploy-helm.rst
index 5fe7d0225e8c2..1fefb226fd142 100644
--- a/presto-docs/src/main/sphinx/installation/deploy-helm.rst
+++ b/presto-docs/src/main/sphinx/installation/deploy-helm.rst
@@ -1,5 +1,5 @@
-===============================
-Deploy Presto Using Helm Charts
-===============================
+=======================
+Deploy Presto with Helm
+=======================
-To deploy Presto using Helm charts, see the `Presto Helm Charts README `_.
\ No newline at end of file
+To deploy Presto using Helm, see the `Presto Helm Charts README `_.
diff --git a/presto-docs/src/main/sphinx/plugin/native-sidecar-plugin.rst b/presto-docs/src/main/sphinx/plugin/native-sidecar-plugin.rst
index c2557dd2f94a6..f64a36d58485c 100644
--- a/presto-docs/src/main/sphinx/plugin/native-sidecar-plugin.rst
+++ b/presto-docs/src/main/sphinx/plugin/native-sidecar-plugin.rst
@@ -26,7 +26,7 @@ Property Name Description
``coordinator-sidecar-enabled`` Enables sidecar in the coordinator true
``native-execution-enabled`` Enables native execution true
``presto.default-namespace`` Sets the default function namespace `native.default`
-``plugin.dir`` Specifies which directory under installation root `{root-directory}/native-plugins/`
+``plugin.dir`` Specifies which directory under installation root `{root-directory}/native-plugin/`
to scan for plugins at startup.
============================================ ===================================================================== ==============================
diff --git a/presto-docs/src/main/sphinx/plugin/redis-hbo-provider.rst b/presto-docs/src/main/sphinx/plugin/redis-hbo-provider.rst
index 85079d719736f..423e9e4db9cc1 100644
--- a/presto-docs/src/main/sphinx/plugin/redis-hbo-provider.rst
+++ b/presto-docs/src/main/sphinx/plugin/redis-hbo-provider.rst
@@ -9,27 +9,27 @@ Redis HBO Provider supports loading a custom configured Redis Client for storing
Configuration
-------------
-Create ``etc/catalog/redis-provider.properties`` to mount the Redis HBO Provider Plugin.
+Create ``etc/redis-provider.properties`` to mount the Redis HBO Provider Plugin.
Edit the configuration properties as appropriate:
Configuration properties
------------------------
-The following configuration properties are available for use in ``etc/catalog/redis-provider.properties``:
+The following configuration properties are available for use in ``etc/redis-provider.properties``:
============================================ =====================================================================
Property Name Description
============================================ =====================================================================
-``coordinator`` Boolean property whether Presto server is a coordinator
+``coordinator`` Boolean property to decide whether Presto server is a coordinator
``hbo.redis-provider.server_uri`` Redis Server URI
``hbo.redis-provider.total-fetch-timeoutms`` Maximum timeout in ms for Redis fetch requests
``hbo.redis-provider.total-set-timeoutms`` Maximum timeout in ms for Redis set requests
``hbo.redis-provider.default-ttl-seconds`` TTL in seconds of the Redis data to be stored
-``hbo.redis-provider.enabled`` Boolean property whether this plugin is enabled in production
+``hbo.redis-provider.enabled`` Boolean property to enable this plugin
``credentials-path`` Path for Redis credentials
-``hbo.redis-provider.cluster-mode-enabled`` Boolean property whether cluster mode is enabled
+``hbo.redis-provider.cluster-mode-enabled`` Boolean property to enable cluster mode
============================================ =====================================================================
Coordinator Configuration for Historical Based Optimization
@@ -80,29 +80,18 @@ You can place the plugin JARs in the production's ``plugins`` directory.
Alternatively, follow this method to ensure that the plugin is loaded during the Presto build.
-1. Add the following to register the plugin in ```` in ``presto-server/src/main/assembly/presto.xml``:
+1. Add the following to register the plugin in ``presto-server/src/main/provisio/presto.xml``:
.. code-block:: text
-
-
- ${project.build.directory}/dependency/redis-hbo-provider-${project.version}
- plugin/redis-hbo-provider
-
+
+
+
+
+
+
2. In ``redis-hbo-provider/src/main/resources``, create the file ``META-INF.services`` with the Plugin entry class ``com.facebook.presto.statistic.RedisProviderPlugin``.
-3. Add the dependency on the module in ``presto-server/pom.xml``:
-
- .. code-block:: text
-
-
- com.facebook.presto
- redis-hbo-provider
- ${project.version}
- zip
- provided
-
-
-4. (Optional) Add your custom Redis client connection login in ``com.facebook.presto.statistic.RedisClusterAsyncCommandsFactory``.
+3. (Optional) Add your custom Redis client connection login in ``com.facebook.presto.statistic.RedisClusterAsyncCommandsFactory``.
Note: The AsyncCommands must be provided properly.
diff --git a/presto-docs/src/main/sphinx/presto-cpp.rst b/presto-docs/src/main/sphinx/presto-cpp.rst
index bd71e95daf6ed..f826d6beebafb 100644
--- a/presto-docs/src/main/sphinx/presto-cpp.rst
+++ b/presto-docs/src/main/sphinx/presto-cpp.rst
@@ -7,12 +7,15 @@ Note: Presto C++ is in active development. See :doc:`Limitations `_.
+
+ For comprehensive documentation of all available runtime metrics, see :doc:`metrics`.
diff --git a/presto-docs/src/main/sphinx/presto_cpp/functions.rst b/presto-docs/src/main/sphinx/presto_cpp/functions.rst
new file mode 100644
index 0000000000000..1c5134b08d8ae
--- /dev/null
+++ b/presto-docs/src/main/sphinx/presto_cpp/functions.rst
@@ -0,0 +1,8 @@
+********************
+Presto C++ Functions
+********************
+
+.. toctree::
+ :maxdepth: 1
+
+ functions/sketch.rst
\ No newline at end of file
diff --git a/presto-docs/src/main/sphinx/presto_cpp/functions/sketch.rst b/presto-docs/src/main/sphinx/presto_cpp/functions/sketch.rst
new file mode 100644
index 0000000000000..a3165ddb32fb3
--- /dev/null
+++ b/presto-docs/src/main/sphinx/presto_cpp/functions/sketch.rst
@@ -0,0 +1,38 @@
+================
+Sketch Functions
+================
+
+Sketches are data structures that can approximately answer particular questions
+about a dataset when full accuracy is not required. Approximate answers are
+often faster and more efficient to compute than functions which result in full
+accuracy.
+
+Presto C++ provides support for computing some sketches available in the `Apache
+DataSketches`_ library.
+
+Theta Sketches
+--------------
+
+Theta sketches enable distinct value counting on datasets and also provide the
+ability to perform set operations. For more information on Theta sketches,
+please see the Apache DataSketches `Theta sketch documentation`_.
+
+.. function:: sketch_theta(x) -> varbinary
+
+ Computes a theta sketch from an input dataset. The output from
+ this function can be used as an input to any of the other ``sketch_theta_*``
+ family of functions.
+
+.. function:: sketch_theta_estimate(sketch) -> double
+
+ Returns the estimate of distinct values from the input sketch.
+
+.. function:: sketch_theta_summary(sketch) -> row(estimate double, theta double, upper_bound_std double, lower_bound_std double, retained_entries int)
+
+ Returns a summary of the input sketch which includes the distinct values
+ estimate alongside other useful information such as the sketch theta
+ parameter, current error bounds corresponding to 1 standard deviation, and
+ the number of retained entries in the sketch.
+
+.. _Apache DataSketches: https://datasketches.apache.org/
+.. _Theta sketch documentation: https://datasketches.apache.org/docs/Theta/ThetaSketches.html#theta-sketch-framework
diff --git a/presto-docs/src/main/sphinx/presto_cpp/installation.rst b/presto-docs/src/main/sphinx/presto_cpp/installation.rst
new file mode 100644
index 0000000000000..9182c16ae043c
--- /dev/null
+++ b/presto-docs/src/main/sphinx/presto_cpp/installation.rst
@@ -0,0 +1,254 @@
+=======================
+Presto C++ Installation
+=======================
+
+.. contents::
+ :local:
+ :backlinks: none
+ :depth: 1
+
+This shows how to install and run a lightweight Presto cluster utilizing a PrestoDB Java Coordinator and Prestissimo (Presto C++) Workers using Docker.
+
+For more information about Presto C++, see the :ref:`presto-cpp:overview`.
+
+The setup uses Meta's high-performance Velox engine for worker-side query execution to configure a cluster and run a test query with the built-in TPC-H connector.
+
+Prerequisites
+-------------
+
+To follow this tutorial, you need:
+
+* Docker installed.
+* Basic familiarity with the terminal and shell commands.
+
+Create a Working Directory
+--------------------------
+The recommended directory structure uses ``presto-lab`` as the root directory.
+
+Create a clean root directory to hold all necessary configuration files and the ``docker-compose.yml`` file.
+
+.. code-block:: bash
+
+ mkdir -p ~/presto-lab
+ cd ~/presto-lab
+
+Configure the Presto Java Coordinator
+-------------------------------------
+
+The Coordinator requires configuration to define its role, enable the discovery service, and set up a catalog for querying.
+
+1. Create Configuration Directory
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To create the necessary directories for the coordinator and its catalogs, run the following command:
+
+.. code-block:: bash
+
+ mkdir -p coordinator/etc/catalog
+
+
+2. Create the Coordinator Configuration File
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Create the file ``coordinator/etc/config.properties`` with the following contents. This file enables the coordinator mode, the discovery server, and sets the HTTP port to ``8080``.
+
+.. code-block:: properties
+
+ # coordinator/etc/config.properties
+ coordinator=true
+ node-scheduler.include-coordinator=true
+ http-server.http.port=8080
+ discovery-server.enabled=true
+ discovery.uri=http://localhost:8080
+
+* ``coordinator=true``: Enables the coordinator mode.
+* ``discovery-server.enabled=true``: Designates the coordinator as the host for the worker discovery service.
+* ``http-server.http.port=8080S``: Start the HTTP server on port 8080 for the coordinator (and workers, if enabled).
+
+3. Create the JVM Configuration File
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Create the file ``coordinator/etc/jvm.config`` with the following content. These are standard Java 17 flags for Presto that ensures compatibility with Java 17's module system, provides stable garbage collection and memory behavior, and enforces safe failure handling.
+
+.. code-block:: properties
+
+ # coordinator/etc/jvm.config
+ -server
+ -Xmx1G
+ -XX:+UseG1GC
+ -XX:G1HeapRegionSize=32M
+ -XX:+UseGCOverheadLimit
+ -XX:+ExplicitGCInvokesConcurrent
+ -XX:+HeapDumpOnOutOfMemoryError
+ -XX:+ExitOnOutOfMemoryError
+ -Djdk.attach.allowAttachSelf=true
+ --add-opens=java.base/java.io=ALL-UNNAMED
+ --add-opens=java.base/java.lang=ALL-UNNAMED
+ --add-opens=java.base/java.lang.ref=ALL-UNNAMED
+ --add-opens=java.base/java.lang.reflect=ALL-UNNAMED
+ --add-opens=java.base/java.net=ALL-UNNAMED
+ --add-opens=java.base/java.nio=ALL-UNNAMED
+ --add-opens=java.base/java.security=ALL-UNNAMED
+ --add-opens=java.base/javax.security.auth=ALL-UNNAMED
+ --add-opens=java.base/javax.security.auth.login=ALL-UNNAMED
+ --add-opens=java.base/java.text=ALL-UNNAMED
+ --add-opens=java.base/java.util=ALL-UNNAMED
+ --add-opens=java.base/java.util.concurrent=ALL-UNNAMED
+ --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED
+ --add-opens=java.base/java.util.regex=ALL-UNNAMED
+ --add-opens=java.base/jdk.internal.loader=ALL-UNNAMED
+ --add-opens=java.base/sun.security.action=ALL-UNNAMED
+ --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED
+
+4. Create the Node Properties File
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Create the file ``coordinator/etc/node.properties`` with the following content to set the node environment and the data directory.
+
+.. code-block:: properties
+
+ # coordinator/etc/node.properties
+ node.id=${ENV:HOSTNAME}
+ node.environment=test
+ node.data-dir=/var/lib/presto/data
+
+5. Create the TPC-H Catalog Configuration File
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Create the file ``coordinator/etc/catalog/tpch.properties`` with the following content. The TPC-H catalog enables running test queries against an in-memory dataset.
+
+.. code-block:: properties
+
+ # coordinator/etc/catalog/tpch.properties
+ connector.name=tpch
+
+Configure the Prestissimo (C++) Worker
+--------------------------------------
+
+Configure the Worker to locate the Coordinator or Discovery service and identify itself within the network.
+
+1. Create Worker Configuration Directory
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: bash
+
+ mkdir -p worker-1/etc/catalog
+
+2. Create ``worker-1/etc/config.properties``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Configure the worker to point to the discovery service running on the coordinator.
+
+Note: You can repeat this step to add more workers, such as ``worker-2``.
+
+.. code-block:: properties
+
+ # worker-1/etc/config.properties
+ discovery.uri=http://coordinator:8080
+ presto.version=0.288-15f14bb
+ http-server.http.port=7777
+ shutdown-onset-sec=1
+ runtime-metrics-collection-enabled=true
+
+* ``discovery.uri=http://coordinator:8080``: This uses the coordinator service name as defined in the ``docker-compose.yml`` file for network communication within Docker.
+
+3. Configure ``worker-1/etc/node.properties``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Define the worker’s internal address to ensure reliable registration.
+
+.. code-block:: properties
+
+ # worker-1/etc/node.properties
+ node.environment=test
+ node.internal-address=worker-1
+ node.location=docker
+ node.id=worker-1
+
+* ``node.internal-address=worker-1``: This setting matches the service name defined in :ref:`Docker Compose `.
+
+4. Add TPC-H Catalog Configuration
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Configure the worker with the same catalog definitions as the coordinator to execute query stages
+
+.. code-block:: properties
+
+ # worker-1/etc/catalog/tpch.properties
+ connector.name=tpch
+
+.. _create-docker-compose-yml:
+
+Create ``docker-compose.yml``
+-----------------------------
+
+Create a ``docker-compose.yml`` file in the ``~/presto-lab`` directory to orchestrate both the Java Coordinator and the C++ Worker containers.
+
+.. code-block:: yaml
+
+ # docker-compose.yml
+ services:
+ coordinator:
+ image: public.ecr.aws/oss-presto/presto:latest
+ platform: linux/amd64
+ container_name: presto-coordinator
+ hostname: coordinator
+ ports:
+ - "8080:8080"
+ volumes:
+ - ./coordinator/etc:/opt/presto-server/etc:ro
+ restart: unless-stopped
+
+ worker-1:
+ image: public.ecr.aws/oss-presto/presto-native:latest
+ platform: linux/amd64
+ container_name: prestissimo-worker-1
+ hostname: worker-1
+ depends_on:
+ - coordinator
+ volumes:
+ - ./worker-1/etc:/opt/presto-server/etc:ro
+ restart: unless-stopped
+
+ worker-2:
+ image: public.ecr.aws/oss-presto/presto-native:latest
+ platform: linux/amd64
+ container_name: prestissimo-worker-2
+ hostname: worker-2
+ depends_on:
+ - coordinator
+ volumes:
+ - ./worker-2/etc:/opt/presto-server/etc:ro
+ restart: unless-stopped
+
+* The coordinator service uses the standard Java Presto image (presto:latest).
+* The worker-1 and worker-2 services use the Prestissimo (C++ Native) image (presto-native:latest).
+* The setting ``platform: linux/amd64`` is essential for users running on Apple Silicon Macs.
+* The ``volumes`` section mounts your local configuration directories (``./coordinator/etc``, ``./worker-1/etc``) into the container's expected path (``/opt/presto-server/etc``).
+
+Start the Cluster and Verify
+----------------------------
+
+1. Start the Cluster
+^^^^^^^^^^^^^^^^^^^^
+
+Use Docker Compose to start the cluster in detached mode (``-d``).
+
+.. code-block:: bash
+
+ docker compose up -d
+
+2. Verify
+^^^^^^^^^
+
+1. **Check the Web UI:** Open the Presto Web UI at http://localhost:8080.
+
+ * You should see the UI displaying 3 Active Workers (1 Coordinator and 2 Workers).
+
+2. **Check Detailed Node Status** : Run the following SQL query to check the detailed status and metadata about every node (Coordinator and Workers).
+
+ .. code-block:: sql
+
+ select * from system.runtime.nodes;
+
+ This confirms the cluster nodes are registered and active.
\ No newline at end of file
diff --git a/presto-docs/src/main/sphinx/presto_cpp/limitations.rst b/presto-docs/src/main/sphinx/presto_cpp/limitations.rst
index d86dedb95e2c5..ea957b5e60cea 100644
--- a/presto-docs/src/main/sphinx/presto_cpp/limitations.rst
+++ b/presto-docs/src/main/sphinx/presto_cpp/limitations.rst
@@ -7,6 +7,7 @@ Presto C++ Limitations
:backlinks: none
:depth: 1
+
General Limitations
===================
@@ -38,13 +39,369 @@ The C++ evaluation engine has a number of limitations:
* The reserved pool is not supported.
* In general, queries may use more memory than they are allowed to through memory arbitration. See `Memory Management `_.
+
Functions
=========
-reduce_agg
-----------
+Aggregate Functions
+-------------------
+
+reduce_agg
+^^^^^^^^^^
In C++ based Presto, ``reduce_agg`` is not permitted to return ``null`` in either the
``inputFunction`` or the ``combineFunction``. In Presto (Java), this is permitted
but undefined behavior. For more information about ``reduce_agg`` in Presto,
-see `reduce_agg <../functions/aggregate.html#reduce_agg>`_.
\ No newline at end of file
+see `reduce_agg <../functions/aggregate.html#reduce_agg>`_.
+
+reduce lambda
+^^^^^^^^^^^^^
+For the reduce lambda function, the array size is controlled by the session property
+``native_expression_max_array_size_in_reduce``, as it is inefficient to support such
+cases for arbitrarily large arrays. This property is set at ``100K``. Queries that
+fail due to this limit must be revised to meet this limit.
+
+
+Array Functions
+---------------
+
+Array sort with lambda comparator
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+``Case`` is not supported for the lambda comparator. Use ``If`` Instead. The following
+example is not supported in Presto C++:
+
+.. code-block:: sql
+
+ (x, y) ->
+ CASE
+ WHEN x.event_time < y.event_time THEN
+ -1
+ WHEN x.event_time > y.event_time THEN
+ 1
+ ELSE 0
+ END
+
+To work with Presto C++, the best option is to use transform lambda whenever possible.
+For example:
+
+.. code-block:: sql
+
+ (x) -> x.event_time
+
+Or, rewrite using ``if`` as in the following example:
+
+.. code-block:: sql
+
+ (x, y) -> IF (x.event_time < y.event_time, -1,
+ IF (x.event_time > y.event_time, 1, 0))
+
+When using ``If``, follow these rules when using a lambda in array sort:
+
+* The lambda should use ``if else``. Case is not supported.
+* The lambda should return ``1``, ``0``, ``-1``. Cover all the cases.
+* The lambda should use the same expression when doing the comparison.
+ For example, in the above case ``event_time`` is used for comparison throughout the lambda.
+ If we rewrote the expression as following, where ``x`` and ``y`` have different fields, it will fail:
+ ``(x, y) -> if (x.event_time < y.event_start_time, -1, if (x.event_time > y.event_start_time, 1, 0))``
+* Any additional nesting other than the two ``if`` uses shown above will fail.
+
+``Array_sort`` can support any transformation lambda that returns a comparable type.
+This example is not supported in Presto C++:
+
+.. code-block:: sql
+
+ "array_sort"("map_values"(m), (a, b) -> (
+ CASE WHEN (a[1] [2] > b[1] [2]) THEN 1
+ WHEN (a[1] [2] < b[1] [2]) THEN -1
+ WHEN (a[1] [2] = b[1] [2]) THEN
+ IF((a[3] > b[3]), 1, -1) END)
+
+To run in Presto C++, rewrite the query as shown in this example:
+
+.. code-block:: sql
+
+ "array_sort"("map_values"(m), (a) -> ROW(a[1][2], a[3]))
+
+
+Casting
+-------
+
+Casting of Unicode strings to digits is not supported. The following example is not supported in Presto C++:
+
+.. code-block:: sql
+
+ CAST ('â…¦' as integer)
+
+
+Date and Time Functions
+-----------------------
+The maximum date range supported by ``from_unixtime`` is between (292 Million BCE, 292 Million CE).
+The exact values corresponding to this are [292,275,055-05-16 08:54:06.192 BC, +292,278,994-08-17 00:12:55.807 CE],
+corresponding to a UNIX time between [-9223372036854775, 9223372036854775].
+
+Presto and Presto C++ both support the same range but Presto queries succeed because Presto silently
+truncates. Presto C++ throws an error if the values exceed this range.
+
+
+Geospatial Differences
+----------------------
+There are cosmetic representation changes as well as numerical precision differences.
+Some of these differences result in different output for spatial predicates such
+as ST_Intersects. Differences include:
+
+* Equivalent but different representations for geometries. Polygons may have their rings
+ rotated, EMPTY geometries may be of a different type, MULTI-types and
+ GEOMETRYCOLLECTIONs may have their elements in a different order. In general,
+ WKTs/WKBs may be different.
+* Numerical precision: Differences in numerical techniques may result in different
+ coordinate values, and also different results for predicates (ST_Relates and children,
+ including ST_Contains, ST_Crosses, ST_Disjoint, ST_Equals, ST_Intersects,
+ ST_Overlaps, ST_Relate, ST_Touches, ST_Within).
+* ST_IsSimple, ST_IsValid, simplify_geometry and geometry_invalid_reason may give different results.
+
+
+JSON Functions
+--------------
+``json_extract`` has several topics to consider when rewriting Presto queries to run successfully in Presto C++.
+
+Use of functions in JSON path
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Using functions inside a JSON path is not supported.
+
+To run queries with functions inside a JSON path in Presto C++, rewrite paths to
+use equivalent and often faster UDFs (User-Defined Functions) outside the JSON
+path, improving job portability and efficiency. Aggregates might be necessary.
+
+Generally, functions should be extracted from the JSON path for better portability.
+
+For example, this Presto query:
+
+.. code-block:: sql
+
+ CAST(JSON_EXTRACT(config, '$.table_name_to_properties.keys()'
+ ) AS ARRAY(ARRAY(VARCHAR)))
+
+can be revised to work in both Presto and Presto C++ as the following:
+
+.. code-block:: sql
+
+ map_keys(JSON_EXTRACT( config, '$.table_name_to_properties') )
+
+Use of expressions in JSON path
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Paths containing filter expressions are not supported.
+
+To run such queries in Presto C++, revise the query to do the filtering as a
+part of the SQL expression query, rather than in the JSON path.
+
+For example, consider this Presto query:
+
+.. code-block:: sql
+
+ JSON_EXTRACT(config, '$.store.book[?(@.price > 10)]')
+
+The same query rewritten to run in Presto C++:
+
+.. code-block:: sql
+
+ filter(
+ CAST(json_extract(data, '$.store.book') AS ARRAY),
+ x -> CAST(json_extract_scalar(x.value, '$.price') AS DOUBLE) > 10)
+ )
+
+Erroring on Invalid JSON
+^^^^^^^^^^^^^^^^^^^^^^^^
+Presto can successfully run ``json_extract`` on certain invalid JSON, but Presto C++
+always fails. Extracting data from invalid JSON is indeterminate and relying on
+that behavior can have unintended consequences.
+
+Because Presto C++ takes the safe approach to always throw an error on invalid
+JSON, wrap calls in a try to ensure the query succeeds and validate that the
+results correspond to your expectations.
+
+Canonicalization
+^^^^^^^^^^^^^^^^
+Presto ``json_extract`` can return `JSON that is not canonicalized `_.
+``json_extract`` has been rewritten in Presto C++ to always return canonical JSON.
+
+
+Regex Functions
+---------------
+
+Unsupported Cases
+^^^^^^^^^^^^^^^^^
+Presto C++ uses `RE2 `_, a widely adopted modern regular
+expression parsing library.
+
+Presto uses `JONI `_, a deprecated port of Oniguruma (ONIG).
+
+While both frameworks support almost all regular expression syntaxes, RE2 differs from
+JONI and PCRE in certain cases. The following are not supported in Presto C++ but are supported in Presto:
+
+* before text matching (?=re)
+* before text not matching (?!re)
+* after text matching (?<=re)
+* after text not matching (?`_,
+must be rewritten to run in Presto C++. See `Syntax `_
+for a full list of unsupported regular expressions in RE2 and
+`Caveats `_ for an explanation of
+why RE2 skips certain syntax in Perl.
+
+Regex Compilation Limit in Velox
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Because Regex compilation is CPU intensive, unbounded compilation can cause problems.
+The number of regular expressions that can be dynamically compiled for a query is limited
+to 250 to keep the overall shared cluster environment healthy.
+
+If this limit is reached, rewrite the query to use fewer compiled regular expressions.
+
+In this example the regex can change based on the ``test_name`` column value, which could exceed the 250 limit:
+
+.. code-block:: sql
+
+ code_location_path LIKE '%' || test_name || '%'
+
+Revise the query as follows to avoid this limit:
+
+.. code-block:: sql
+
+ strpos(code_location, test_name) > 0
+
+
+Time and Time with Time Zone
+----------------------------
+
+IANA Named Timezones Support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Support for IANA named time zones - for example, `Europe/London`, `UTC`, `America/New_York`,
+`Asia/Kolkata` - in ``TIME`` and ``TIME WITH TIME ZONE`` was removed from Presto C++
+to align with the SQL standard. Only fixed-offset time zones such as `+02:00` are
+now supported for these types.
+
+Named time zones may still work when the Presto coordinator handles the query.
+
+To run queries involving ``TIME`` and ``TIME WITH TIME ZONE``, migrate to fixed-offset
+time zones as soon as possible.
+
+These queries will fail in Presto C++, but may still work in Presto:
+
+.. code-block:: sql
+
+ cast('14:00:01 UTC' as TIME WITH TIME ZONE)
+ cast('14:00:01 Europe/Paris' as TIME WITH TIME ZONE)
+ cast('14:00:01 America/New_York' as TIME WITH TIME ZONE)
+ cast('14:00:01 Asia/Kolkata' as TIME WITH TIME ZONE)
+
+These queries using fixed offsets will run successfully in Presto C++:
+
+.. code-block:: sql
+
+ cast('14:00:01 +00:00' as TIME WITH TIME ZONE)
+ cast('14:00:01 +05:30' as TIME WITH TIME ZONE)
+
+Casting from TIMESTAMP to TIME
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+In Presto, the result of CAST(TIMESTAMP AS TIME) or CAST(TIMESTAMP AS TIME WITH TIME ZONE)
+would change based on the session property ``legacy_timestamp`` (true by default) when
+applied to the user's time zone. In Presto C++ for ``TIME`` and ``TIME WITH TIME ZONE``,
+the behavior is equivalent to the property being `false`.
+
+Note: ``TIMESTAMP`` behavior in Presto and Presto C++ is unchanged.
+
+For examples, consider the following queries and their responses when run in Presto:
+
+.. code-block:: sql
+
+ -- Default behavior with legacy_timestamp=true:
+ -- Session Timezone - America/Los_Angeles
+
+ -- DST Active Dates
+ select cast(TIMESTAMP '2023-08-05 10:15:00.000' as TIME);
+ -- Returns: 09:15:00.000
+ select cast(TIMESTAMP '2023-08-05 10:15:00.000' as TIME WITH TIME ZONE);
+ -- Returns: 09:15:00.000 America/Los_Angeles
+ select cast(TIMESTAMP '2023-08-05 10:15:00.000 America/Los_Angeles' as TIME);
+ -- Returns: 09:15:00.000
+ select cast(TIMESTAMP '2023-08-05 10:15:00.000 America/Los_Angeles' as TIME WITH TIME ZONE);
+ -- Returns: 09:15:00.000
+
+ -- DST Inactive Dates
+ select cast(TIMESTAMP '2023-12-05 10:15:00.000' as TIME);
+ -- Returns: 10:15:00.000
+ select cast(TIMESTAMP '2023-12-05 10:15:00.000' as TIME WITH TIME ZONE);
+ -- Returns: 10:15:00.000 America/Los_Angeles
+ select cast(TIMESTAMP '2023-08-05 10:15:00.000 America/Los_Angeles' as TIME);
+ -- Returns: 10:15:00.000
+ select cast(TIMESTAMP '2023-12-05 10:15:00.000 America/Los_Angeles' as TIME WITH TIME ZONE);
+ -- 10:15:00.000 America/Los_Angeles
+
+Consider the following queries and their responses when run in Presto C++ (Velox):
+
+.. code-block:: sql
+
+ -- New Expected behavior similar to what currently exists if legacy_timestamp=false:
+ -- Session Timezone - America/Los_Angeles
+
+
+ -- DST Active Dates
+ select cast(TIMESTAMP '2023-08-05 10:15:00.000' as TIME);
+ -- Returns: 10:15:00.000
+ select cast(TIMESTAMP '2023-08-05 10:15:00.000' as TIME WITH TIME ZONE);
+ -- Returns: 10:15:00.000 -07:00
+ select cast(TIMESTAMP '2023-08-05 10:15:00.000 America/Los_Angeles' as TIME);
+ -- Returns: 10:15:00.000
+ select cast(TIMESTAMP '2023-08-05 10:15:00.000 America/Los_Angeles' as TIME WITH TIME ZONE);
+ -- Returns: 10:15:00.000 -07:00
+
+ -- DST Inactive Dates
+ select cast(TIMESTAMP '2023-12-05 10:15:00.000' as TIME);
+ -- Returns: 10:15:00.000
+ select cast(TIMESTAMP '2023-12-05 10:15:00.000' as TIME WITH TIME ZONE);
+ -- Returns: 10:15:00.000 -08:00
+ select cast(TIMESTAMP '2023-08-05 10:15:00.000 America/Los_Angeles' as TIME);
+ -- Returns: 10:15:00.000
+ select cast(TIMESTAMP '2023-12-05 10:15:00.000 America/Los_Angeles' as TIME WITH TIME ZONE);
+ -- Returns: 10:15:00.000 -08:00
+
+Note: ``TIMESTAMP`` supports named time zones, unlike ``TIME`` and ``TIME WITH TIME ZONE``.
+
+DST Implications
+^^^^^^^^^^^^^^^^
+Because IANA zones are not supported for ``TIME``, Presto C++ does not manage DST transitions.
+All time interpretation is strictly in the provided offset, not local civil time.
+
+For example, ``14:00:00 +02:00`` always means 14:00 at a +02:00 fixed offset, regardless
+of DST changes that might apply under an IANA zone.
+
+Recommendations
+^^^^^^^^^^^^^^^
+* Use fixed-offset time zones like +02:00 with ``TIME`` and ``TIME WITH TIME ZONE``.
+* Do not use IANA time zone names for ``TIME`` and ``TIME WITH TIME ZONE``.
+* Confirm that your Presto C++ usage does not depend on legacy timestamp behavior. If your workload
+ depends on legacy ``TIME`` behavior, including support of IANA timezones, handle this outside
+ Presto or reach out so that we can discuss alternative solutions.
+* Test: Try your most critical workflows with these settings.
+
+
+URL Functions
+-------------
+
+Presto and Presto C++ implement different URL function specifications which can lead to
+some URL function mismatches. Presto C++ implements `RFC-3986 `_ whereas Presto
+implements `RFC-2396 `_. This can lead to subtle differences as presented in
+`this issue `_.
+
+Window Functions
+----------------
+
+Aggregate window functions do not support ``IGNORE NULLS``, returning the following error message:
+
+``!ignoreNulls Aggregate window functions do not support IGNORE NULLS.``
+
+For Presto C++, remove the ``IGNORE NULLS`` clause. This clause is only defined for value functions
+and does not apply to aggregate window functions. In Presto the results obtained with and without
+the clause are similar, Presto C++ includes this clause whereas Presto just warns.
\ No newline at end of file
diff --git a/presto-docs/src/main/sphinx/presto_cpp/metrics.rst b/presto-docs/src/main/sphinx/presto_cpp/metrics.rst
new file mode 100644
index 0000000000000..81f85b19c9260
--- /dev/null
+++ b/presto-docs/src/main/sphinx/presto_cpp/metrics.rst
@@ -0,0 +1,674 @@
+==========================
+Presto C++ Runtime Metrics
+==========================
+
+.. contents::
+ :local:
+ :backlinks: none
+ :depth: 1
+
+Overview
+========
+
+Presto C++ workers expose various runtime metrics that can be collected and monitored when
+``runtime-metrics-collection-enabled`` is set to true. These metrics are available through the
+``GET /v1/info/metrics`` endpoint in Prometheus data format.
+
+For information on enabling metrics collection, see :doc:`features`.
+
+Executor Metrics
+================
+
+These metrics track the performance and queue sizes of various executors in the Presto C++ worker.
+
+``presto_cpp.driver_cpu_executor_queue_size``
+---------------------------------------------
+
+* **Type:** gauge
+* **Description:** Number of tasks currently queued in the driver CPU executor waiting to be processed.
+
+``presto_cpp.driver_cpu_executor_latency_ms``
+---------------------------------------------
+
+* **Type:** histogram
+* **Unit:** milliseconds
+* **Description:** Latency distribution of tasks in the driver CPU executor, measuring the time from task submission to execution start.
+
+``presto_cpp.spiller_executor_queue_size``
+------------------------------------------
+
+* **Type:** gauge
+* **Description:** Number of spilling tasks currently queued in the spiller executor.
+
+``presto_cpp.spiller_executor_latency_ms``
+------------------------------------------
+
+* **Type:** histogram
+* **Unit:** milliseconds
+* **Description:** Latency distribution of spilling tasks in the spiller executor.
+
+``presto_cpp.http_executor_latency_ms``
+---------------------------------------
+
+* **Type:** histogram
+* **Unit:** milliseconds
+* **Description:** Latency distribution of HTTP request processing tasks in the HTTP executor.
+
+HTTP Metrics
+============
+
+These metrics track HTTP requests and responses in the Presto C++ worker.
+
+``presto_cpp.num_http_request``
+-------------------------------
+
+* **Type:** counter
+* **Description:** Total number of HTTP requests received by the worker since startup.
+
+``presto_cpp.num_http_request_error``
+-------------------------------------
+
+* **Type:** counter
+* **Description:** Total number of HTTP request errors encountered by the worker since startup.
+
+``presto_cpp.http_request_latency_ms``
+--------------------------------------
+
+* **Type:** histogram
+* **Unit:** milliseconds
+* **Description:** Latency distribution of HTTP request processing, from receipt to response.
+
+``presto_cpp.http_request_size_bytes``
+--------------------------------------
+
+* **Type:** histogram
+* **Unit:** bytes
+* **Description:** Size distribution of HTTP request payloads.
+
+HTTP Client Metrics
+===================
+
+These metrics track HTTP client connection behavior for outbound requests.
+
+``presto_cpp.http.client.num_connections_created``
+--------------------------------------------------
+
+* **Type:** counter
+* **Description:** Total number of HTTP client connections created by the worker.
+
+``presto_cpp.http.client.connection_first_use``
+-----------------------------------------------
+
+* **Type:** counter
+* **Description:** Number of HTTP requests that are the first request on a new connection (sequence number == 0).
+
+``presto_cpp.http.client.connection_reuse``
+-------------------------------------------
+
+* **Type:** counter
+* **Description:** Number of HTTP requests sent on reused connections (sequence number > 0).
+
+``presto_cpp.http.client.transaction_create_delay_ms``
+------------------------------------------------------
+
+* **Type:** histogram
+* **Unit:** milliseconds
+* **Description:** Delay in creating HTTP client transactions.
+
+Exchange Metrics
+================
+
+These metrics track data exchange operations between workers.
+
+``presto_cpp.exchange_source_peak_queued_bytes``
+------------------------------------------------
+
+* **Type:** gauge
+* **Unit:** bytes
+* **Description:** Peak number of bytes queued in PrestoExchangeSource waiting to be consumed.
+
+``presto_cpp.exchange.request.duration``
+----------------------------------------
+
+* **Type:** histogram
+* **Unit:** milliseconds
+* **Description:** Duration distribution of exchange data fetch requests.
+
+``presto_cpp.exchange.request.num_tries``
+-----------------------------------------
+
+* **Type:** histogram
+* **Description:** Number of retry attempts for exchange data fetch requests.
+
+``presto_cpp.exchange.request.page_size``
+-----------------------------------------
+* **Type:** histogram
+* **Unit:** bytes
+* **Description:** Size distribution of data pages fetched through exchange requests.
+
+``presto_cpp.exchange.get_data_size.duration``
+----------------------------------------------
+
+* **Type:** histogram
+* **Unit:** milliseconds
+* **Description:** Duration distribution of operations to get the size of exchange data.
+
+``presto_cpp.exchange.get_data_size.num_tries``
+-----------------------------------------------
+
+* **Type:** histogram
+* **Description:** Number of retry attempts for getting exchange data size.
+
+Query Context and Memory Metrics
+================================
+
+These metrics track query execution contexts and memory usage.
+
+``presto_cpp.num_query_contexts``
+---------------------------------
+
+* **Type:** gauge
+* **Description:** Current number of active query contexts in the worker.
+
+``presto_cpp.memory_manager_total_bytes``
+-----------------------------------------
+
+* **Type:** gauge
+* **Unit:** bytes
+* **Description:** Total bytes currently used by the memory manager across all queries' memory pools.
+
+Task Metrics
+============
+
+These metrics track task lifecycle and execution states.
+
+Task Counts
+-----------
+
+``presto_cpp.num_tasks``
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Total number of tasks created on this worker since startup.
+
+``presto_cpp.num_tasks_bytes_processed``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Unit:** bytes
+* **Description:** Total bytes processed by all tasks on this worker.
+
+``presto_cpp.num_tasks_running``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Current number of tasks in running state.
+
+``presto_cpp.num_tasks_finished``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Total number of tasks that completed successfully.
+
+``presto_cpp.num_tasks_cancelled``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Total number of tasks that were cancelled.
+
+``presto_cpp.num_tasks_aborted``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Total number of tasks that were aborted.
+
+``presto_cpp.num_tasks_failed``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Total number of tasks that failed with an error.
+
+``presto_cpp.num_tasks_planned``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of tasks that have been created but not yet started, including queued tasks.
+
+``presto_cpp.num_tasks_queued``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of tasks currently waiting in the task queue.
+
+Task Health Metrics
+-------------------
+
+``presto_cpp.num_zombie_velox_tasks``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of zombie Velox tasks (tasks that are no longer active but not cleaned up).
+
+``presto_cpp.num_zombie_presto_tasks``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of zombie Presto tasks (tasks that are no longer active but not cleaned up).
+
+``presto_cpp.num_tasks_with_stuck_operator``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of tasks that have at least one stuck operator.
+
+``presto_cpp.num_cancelled_tasks_by_stuck_driver``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Total number of tasks cancelled due to stuck drivers.
+
+``presto_cpp.num_tasks_deadlock``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Total number of tasks that encountered deadlock conditions.
+
+``presto_cpp.num_tasks_manager_lock_timeout``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Number of times the task manager lock acquisition timed out.
+
+Driver Metrics
+==============
+
+These metrics track the state and execution of drivers within tasks.
+
+Driver States
+-------------
+
+``presto_cpp.num_queued_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers currently queued and waiting to execute.
+
+``presto_cpp.num_on_thread_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers currently executing on threads.
+
+``presto_cpp.num_suspended_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers that are suspended.
+
+Driver Blocking Reasons
+-----------------------
+
+``presto_cpp.num_blocked_wait_for_consumer_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers blocked waiting for downstream consumers to consume data.
+
+``presto_cpp.num_blocked_wait_for_split_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers blocked waiting for new splits to be assigned.
+
+``presto_cpp.num_blocked_wait_for_producer_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers blocked waiting for upstream producers to provide data.
+
+``presto_cpp.num_blocked_wait_for_join_build_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers blocked waiting for join build side to complete.
+
+``presto_cpp.num_blocked_wait_for_join_probe_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers blocked during join probe operations.
+
+``presto_cpp.num_blocked_wait_for_merge_join_right_side_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers blocked waiting for merge join right side data.
+
+``presto_cpp.num_blocked_wait_for_memory_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers blocked waiting for memory to become available.
+
+``presto_cpp.num_blocked_wait_for_connector_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers blocked waiting for connector operations to complete.
+
+``presto_cpp.num_blocked_yield_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers that have yielded execution.
+
+``presto_cpp.num_stuck_drivers``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of drivers that appear to be stuck and not making progress.
+
+Worker Overload Metrics
+=======================
+
+These metrics indicate when the worker is overloaded and may reject new work.
+
+``presto_cpp.overloaded_mem``
+-----------------------------
+
+* **Type:** gauge
+* **Description:** Exports 100 if the worker is overloaded in terms of memory usage, 0 otherwise.
+
+``presto_cpp.overloaded_cpu``
+-----------------------------
+
+* **Type:** gauge
+* **Description:** Exports 100 if the worker is overloaded in terms of CPU usage, 0 otherwise.
+
+``presto_cpp.overloaded``
+-------------------------
+
+* **Type:** gauge
+* **Description:** Exports 100 if the worker is overloaded in terms of either memory or CPU, 0 otherwise.
+
+``presto_cpp.task_planned_time_ms``
+-----------------------------------
+
+* **Type:** gauge
+* **Unit:** milliseconds
+* **Description:** Average time tasks spend in the planned state (queued) before starting execution.
+
+``presto_cpp.overloaded_duration_sec``
+--------------------------------------
+
+* **Type:** gauge
+* **Unit:** seconds
+* **Description:** Duration in seconds that the worker has been continuously overloaded, or 0 if not currently overloaded.
+
+Output Buffer Metrics
+=====================
+
+These metrics track the partitioned output buffers used for shuffling data.
+
+``presto_cpp.num_partitioned_output_buffer``
+--------------------------------------------
+
+* **Type:** gauge
+* **Description:** Total number of output buffers currently managed by all OutputBufferManagers.
+
+``presto_cpp.partitioned_output_buffer_get_data_latency_ms``
+------------------------------------------------------------
+
+* **Type:** histogram
+* **Unit:** milliseconds
+* **Description:** Latency distribution of getData() calls on OutputBufferManager instances.
+
+Worker Runtime Metrics
+======================
+
+``presto_cpp.worker_runtime_uptime_secs``
+-----------------------------------------
+
+* **Type:** counter
+* **Unit:** seconds
+* **Description:** Worker runtime uptime in seconds after the worker process started. This metric tracks how long the worker has been running.
+
+Operating System Metrics
+========================
+
+These metrics provide insight into OS-level resource usage by the worker process.
+
+CPU Time Metrics
+----------------
+
+``presto_cpp.os_user_cpu_time_micros``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Unit:** microseconds
+* **Description:** User CPU time consumed by the presto_server process since the process started.
+
+``presto_cpp.os_system_cpu_time_micros``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Unit:** microseconds
+* **Description:** System CPU time consumed by the presto_server process since the process started.
+
+Page Fault Metrics
+------------------
+
+``presto_cpp.os_num_soft_page_faults``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Total number of soft page faults (page faults that can be resolved without disk I/O) encountered by the presto_server process since startup.
+
+``presto_cpp.os_num_hard_page_faults``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Total number of hard page faults (page faults requiring disk I/O) encountered by the presto_server process since startup.
+
+Context Switch Metrics
+----------------------
+
+``presto_cpp.os_num_voluntary_context_switches``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Total number of voluntary context switches in the presto_server process (when the process yields the CPU voluntarily).
+
+``presto_cpp.os_num_forced_context_switches``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Total number of involuntary context switches in the presto_server process (when the process is preempted by the OS).
+
+Hive Connector Metrics
+======================
+
+These metrics track the performance of the Hive connector's file handle cache. The metrics include
+a placeholder ``{}`` in their name which is replaced with the connector name at runtime.
+
+File Handle Cache Metrics
+-------------------------
+
+``presto_cpp.{connector}.hive_file_handle_cache_num_elements``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Description:** Number of elements currently in the Hive file handle cache.
+
+``presto_cpp.{connector}.hive_file_handle_cache_pinned_size``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Unit:** bytes
+* **Description:** Total size of pinned (in-use) entries in the Hive file handle cache.
+
+``presto_cpp.{connector}.hive_file_handle_cache_cur_size``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** gauge
+* **Unit:** bytes
+* **Description:** Current total size of the Hive file handle cache.
+
+``presto_cpp.{connector}.hive_file_handle_cache_num_accumulative_hits``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Cumulative number of cache hits in the Hive file handle cache since startup.
+
+``presto_cpp.{connector}.hive_file_handle_cache_num_accumulative_lookups``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Cumulative number of cache lookups in the Hive file handle cache since startup.
+
+``presto_cpp.{connector}.hive_file_handle_cache_num_hits``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Number of cache hits in the Hive file handle cache (recent window).
+
+``presto_cpp.{connector}.hive_file_handle_cache_num_lookups``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** counter
+* **Description:** Number of cache lookups in the Hive file handle cache (recent window).
+
+Thread Pool Metrics
+===================
+
+These metrics track the state of various thread pools. The metrics include a placeholder ``{}``
+in their name which is replaced with the thread pool name at runtime.
+
+``presto_cpp.{pool}.num_threads``
+---------------------------------
+
+* **Type:** gauge
+* **Description:** Current number of threads in the thread pool.
+
+``presto_cpp.{pool}.num_active_threads``
+----------------------------------------
+
+* **Type:** gauge
+* **Description:** Number of threads currently executing tasks in the thread pool.
+
+``presto_cpp.{pool}.num_pending_tasks``
+---------------------------------------
+
+* **Type:** gauge
+* **Description:** Number of tasks waiting to be executed in the thread pool.
+
+``presto_cpp.{pool}.num_total_tasks``
+-------------------------------------
+
+* **Type:** counter
+* **Description:** Total number of tasks that have been submitted to the thread pool since startup.
+
+``presto_cpp.{pool}.max_idle_time_ns``
+--------------------------------------
+
+* **Type:** gauge
+* **Unit:** nanoseconds
+* **Description:** Maximum idle time for threads in the pool before they are terminated.
+
+EventBase Violation Metrics
+===========================
+
+These metrics track violations of the EventBase (event loop) threading model.
+
+``presto_cpp.exchange_io_evb_violation_count``
+----------------------------------------------
+
+* **Type:** counter
+* **Description:** Number of times the exchange I/O EventBase threading model was violated (operations performed on wrong thread).
+
+``presto_cpp.http_server_io_evb_violation_count``
+-------------------------------------------------
+
+* **Type:** counter
+* **Description:** Number of times the HTTP server I/O EventBase threading model was violated.
+
+Memory Pushback Metrics
+=======================
+
+These metrics track the memory pushback mechanism that helps prevent out-of-memory conditions.
+
+``presto_cpp.memory_pushback_count``
+------------------------------------
+
+* **Type:** counter
+* **Description:** Number of times the memory pushback mechanism has been triggered.
+
+``presto_cpp.memory_pushback_latency_ms``
+-----------------------------------------
+
+* **Type:** histogram
+* **Unit:** milliseconds
+* **Range:** 0-100,000 ms (0-100 seconds)
+* **Percentiles:** P50, P90, P99, P100
+* **Description:** Latency distribution of memory pushback operations, measuring how long each pushback attempt takes.
+
+``presto_cpp.memory_pushback_reduction_bytes``
+----------------------------------------------
+
+* **Type:** histogram
+* **Unit:** bytes
+* **Range:** 0-15 GB (150 buckets)
+* **Percentiles:** P50, P90, P99, P100
+* **Description:** Distribution of actual memory usage reduction achieved by each memory pushback attempt. This metric helps gauge the effectiveness of the memory pushback mechanism.
+
+``presto_cpp.memory_pushback_expected_reduction_bytes``
+-------------------------------------------------------
+
+* **Type:** histogram
+* **Unit:** bytes
+* **Range:** 0-15 GB (150 buckets)
+* **Percentiles:** P50, P90, P99, P100
+* **Description:** Distribution of expected memory usage reduction for each memory pushback attempt. The expected reduction may differ from actual reduction as other threads might allocate memory during the pushback operation.
+
+Additional Runtime Metrics
+==========================
+
+For additional runtime metrics related to specific subsystems:
+
+* **S3 FileSystem Metrics:** When Presto C++ workers interact with S3, additional runtime metrics are collected. See the `Velox S3 FileSystem documentation `_.
+
+* **Velox Metrics:** Metrics from the underlying Velox execution engine are also available. These are prefixed with ``velox.`` instead of ``presto_cpp.``. See the `Velox metrics documentation `_.
+
+Accessing Metrics
+=================
+
+To access these metrics:
+
+1. Enable metrics collection by setting ``runtime-metrics-collection-enabled=true`` in your worker configuration.
+
+2. Query the metrics endpoint:
+
+ .. code-block:: bash
+
+ curl http://worker-host:7777/v1/info/metrics
+
+3. The response will be in Prometheus text format, suitable for scraping by Prometheus or other monitoring systems.
+
+Example Output
+--------------
+
+.. code-block:: text
+
+ # TYPE presto_cpp_worker_runtime_uptime_secs counter
+ presto_cpp_worker_runtime_uptime_secs{cluster="production",worker="worker-01"} 3600
+ # TYPE presto_cpp_num_tasks_running gauge
+ presto_cpp_num_tasks_running{cluster="production",worker="worker-01"} 42
+ # TYPE presto_cpp_memory_manager_total_bytes gauge
+ presto_cpp_memory_manager_total_bytes{cluster="production",worker="worker-01"} 8589934592
+
+See Also
+========
+
+* :doc:`features` - For information on enabling metrics collection
+* :doc:`properties` - For worker configuration properties
+* `Velox Metrics Documentation `_ - For metrics from the Velox execution engine
diff --git a/presto-docs/src/main/sphinx/presto_cpp/properties-session.rst b/presto-docs/src/main/sphinx/presto_cpp/properties-session.rst
index cac0763034dba..fd2357cf3f19e 100644
--- a/presto-docs/src/main/sphinx/presto_cpp/properties-session.rst
+++ b/presto-docs/src/main/sphinx/presto_cpp/properties-session.rst
@@ -300,6 +300,15 @@ The maximum bytes to buffer per PartitionedOutput operator to avoid creating tin
For PartitionedOutputNode::Kind::kPartitioned, PartitionedOutput operator would buffer up to that number of
bytes / number of destinations for each destination before producing a SerializedPage. Default is 32MB.
+``native_partitioned_output_eager_flush``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``boolean``
+* **Default value:** ``false``
+
+Native Execution only. If true, the PartitionedOutput operator will flush rows eagerly, without waiting
+until buffers reach a certain size. Default is false.
+
``native_max_local_exchange_partition_count``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -472,6 +481,17 @@ In streaming aggregation, wait until there are enough output rows
to produce a batch of the size specified by this property. If set to ``0``, then
``Operator::outputBatchRows`` is used as the minimum number of output batch rows.
+``native_merge_join_output_batch_start_size``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``integer``
+* **Default value:** ``0``
+
+Native Execution only. Initial output batch size in rows for MergeJoin operator.
+When non-zero, the batch size starts at this value and is dynamically adjusted
+based on the average row size of previous output batches. When zero (default),
+dynamic adjustment is disabled and the batch size is fixed at ``preferred_output_batch_rows``.
+
``native_request_data_sizes_max_wait_sec``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -568,3 +588,28 @@ with StringView type during global aggregation.
Native Execution only. Ratio of unused (evicted) bytes to total bytes that triggers
compaction. The value is in the range of [0, 1). Currently only applies to
approx_most_frequent aggregate with StringView type during global aggregation.
+
+``native_aggregation_memory_compaction_reclaim_enabled``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``boolean``
+* **Default value:** ``false``
+
+Native Execution only. If true, enables lightweight memory compaction before
+spilling during memory reclaim in aggregation. When enabled, the aggregation
+operator will try to compact aggregate function state (for example, free dead strings)
+before resorting to spilling.
+
+``optimizer.optimize_top_n_rank``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* **Type:** ``boolean``
+* **Default value:** ``false``
+
+If this is true, then filter and limit queries for ``n`` rows of
+``rank()`` and ``dense_rank()`` window function values are executed
+with a special TopNRowNumber operator instead of the
+WindowFunction operator.
+
+The TopNRowNumber operator is more efficient than window as
+it has a streaming behavior and does not need to buffer all input rows.
diff --git a/presto-docs/src/main/sphinx/router/deployment.rst b/presto-docs/src/main/sphinx/router/deployment.rst
index 04f4956db8e52..a1426471f49bf 100644
--- a/presto-docs/src/main/sphinx/router/deployment.rst
+++ b/presto-docs/src/main/sphinx/router/deployment.rst
@@ -2,11 +2,6 @@
Deploying Presto Router
=======================
-.. contents::
- :local:
- :backlinks: none
- :depth: 1
-
Installing Router
-----------------
diff --git a/presto-docs/src/main/sphinx/router/scheduler.rst b/presto-docs/src/main/sphinx/router/scheduler.rst
index bb9292e9fa927..693ec5c6d7044 100644
--- a/presto-docs/src/main/sphinx/router/scheduler.rst
+++ b/presto-docs/src/main/sphinx/router/scheduler.rst
@@ -2,36 +2,31 @@
Router Schedulers
=================
-.. contents::
- :local:
- :backlinks: none
- :depth: 1
-
Presto router provides multiple scheduling algorithms for load balancing across
multiple clusters.
* ``RANDOM_CHOICE``
-Randomly selecting a cluster from a list of candidates.
+ Randomly selecting a cluster from a list of candidates.
* ``ROUND_ROBIN``
-Selecting clusters from a list of candidates in turn. Note that as the algorithm
-keeps the state of the selected index, it can only be used when the candidates
-are always consistent.
+ Selecting clusters from a list of candidates in turn. Because the algorithm
+ keeps the state of the selected index, it can only be used when the candidates
+ are always consistent.
* ``USER_HASH``
-Selecting a clusters by hashing the username. This ensures queries from the same
-user will always be routed to the same cluster.
+ Selecting a cluster by hashing the username. This ensures queries from the same
+ user are always routed to the same cluster.
* ``WEIGHTED_RANDOM_CHOICE``
-Randomly selecting a cluster from a list of candidates with pre-defined weights.
-Clusters with higher weights have higher opportunity to be selected.
+ Randomly selecting a cluster from a list of candidates with pre-defined weights.
+ Clusters with higher weights have higher opportunity to be selected.
* ``WEIGHTED_ROUND_ROBIN``
-Selecting clusters from a list of candidates with pre-defined weights in turn.
-Note that similar to the `ROUND_ROBIN` approach, this algorithm keeps the state
-of the selected index so candidates and weights should be consistent.
+ Selecting clusters from a list of candidates with pre-defined weights in turn.
+ Similar to the `ROUND_ROBIN` approach, this algorithm keeps the state
+ of the selected index so candidates and weights should be consistent.
diff --git a/presto-docs/src/main/sphinx/sql/alter-table.rst b/presto-docs/src/main/sphinx/sql/alter-table.rst
index 93778714a24bb..e128a7c3694da 100644
--- a/presto-docs/src/main/sphinx/sql/alter-table.rst
+++ b/presto-docs/src/main/sphinx/sql/alter-table.rst
@@ -17,6 +17,15 @@ Synopsis
ALTER TABLE [ IF EXISTS ] name SET PROPERTIES (property_name=value, [, ...])
ALTER TABLE [ IF EXISTS ] name DROP BRANCH [ IF EXISTS ] branch_name
ALTER TABLE [ IF EXISTS ] name DROP TAG [ IF EXISTS ] tag_name
+ ALTER TABLE [ IF EXISTS ] name CREATE [ OR REPLACE ] BRANCH [ IF NOT EXISTS ] branch_name
+ ALTER TABLE [ IF EXISTS ] name CREATE [ OR REPLACE ] BRANCH [ IF NOT EXISTS ] branch_name FOR SYSTEM_VERSION AS OF version
+ ALTER TABLE [ IF EXISTS ] name CREATE [ OR REPLACE ] BRANCH [ IF NOT EXISTS ] branch_name FOR SYSTEM_TIME AS OF timestamp
+ ALTER TABLE [ IF EXISTS ] name CREATE [ OR REPLACE ] BRANCH [ IF NOT EXISTS ] branch_name FOR SYSTEM_VERSION AS OF version RETAIN retention_period
+ ALTER TABLE [ IF EXISTS ] name CREATE [ OR REPLACE ] BRANCH [ IF NOT EXISTS ] branch_name FOR SYSTEM_VERSION AS OF version RETAIN retention_period WITH SNAPSHOT RETENTION min_snapshots SNAPSHOTS min_retention_period
+ ALTER TABLE [ IF EXISTS ] name CREATE [ OR REPLACE ] TAG [ IF NOT EXISTS ] tag_name
+ ALTER TABLE [ IF EXISTS ] name CREATE [ OR REPLACE ] TAG [ IF NOT EXISTS ] tag_name FOR SYSTEM_VERSION AS OF version
+ ALTER TABLE [ IF EXISTS ] name CREATE [ OR REPLACE ] TAG [ IF NOT EXISTS ] tag_name FOR SYSTEM_TIME AS OF timestamp
+ ALTER TABLE [ IF EXISTS ] name CREATE [ OR REPLACE ] TAG [ IF NOT EXISTS ] tag_name FOR SYSTEM_VERSION AS OF version RETAIN retention_period
Description
-----------
@@ -29,6 +38,18 @@ The optional ``IF EXISTS`` (when used before the column name) clause causes the
The optional ``IF NOT EXISTS`` clause causes the error to be suppressed if the column already exists.
+For ``CREATE BRANCH`` statements:
+
+* The optional ``OR REPLACE`` clause causes the branch to be replaced if it already exists.
+* The optional ``IF NOT EXISTS`` clause causes the error to be suppressed if the branch already exists.
+* ``OR REPLACE`` and ``IF NOT EXISTS`` cannot be specified together.
+
+For ``CREATE TAG`` statements:
+
+* The optional ``OR REPLACE`` clause causes the tag to be replaced if it already exists.
+* The optional ``IF NOT EXISTS`` clause causes the error to be suppressed if the tag already exists.
+* ``OR REPLACE`` and ``IF NOT EXISTS`` cannot be specified together.
+
Examples
--------
@@ -104,6 +125,66 @@ Drop tag ``tag1`` from the ``users`` table::
ALTER TABLE users DROP TAG 'tag1';
+Create branch ``branch1`` from the ``users`` table::
+
+ ALTER TABLE users CREATE BRANCH 'branch1';
+
+Create branch ``branch1`` from the ``users`` table only if it doesn't already exist::
+
+ ALTER TABLE users CREATE BRANCH IF NOT EXISTS 'branch1';
+
+Create or replace branch ``branch1`` from the ``users`` table::
+
+ ALTER TABLE users CREATE OR REPLACE BRANCH 'branch1';
+
+Create branch ``branch1`` from the ``users`` table for system version as of version 5::
+
+ ALTER TABLE users CREATE BRANCH 'branch1' FOR SYSTEM_VERSION AS OF 5;
+
+Create branch ``branch1`` from the ``users`` table for system version as of version 5, only if it doesn't exist::
+
+ ALTER TABLE users CREATE BRANCH IF NOT EXISTS 'branch1' FOR SYSTEM_VERSION AS OF 5;
+
+Create or replace branch ``branch1`` from the ``users`` table for system time as of timestamp '2026-01-02 17:30:35.247 Asia/Kolkata'::
+
+ ALTER TABLE users CREATE OR REPLACE BRANCH 'branch1' FOR SYSTEM_TIME AS OF TIMESTAMP '2026-01-02 17:30:35.247 Asia/Kolkata';
+
+Create branch ``branch1`` from the ``users`` table for system version as of version 5 with retention period of 30 days::
+
+ ALTER TABLE users CREATE BRANCH 'branch1' FOR SYSTEM_VERSION AS OF 5 RETAIN INTERVAL 30 DAY;
+
+Create branch ``branch1`` from the ``users`` table for system version as of version 5 with snapshot retention of minimum 3 snapshots and minimum retention period of 7 days::
+
+ ALTER TABLE users CREATE BRANCH 'branch1' FOR SYSTEM_VERSION AS OF 5 RETAIN INTERVAL 7 DAY WITH SNAPSHOT RETENTION 3 SNAPSHOTS INTERVAL 7 DAYS;
+
+Create tag ``tag1`` from the ``users`` table::
+
+ ALTER TABLE users CREATE TAG 'tag1';
+
+Create tag ``tag1`` from the ``users`` table only if it doesn't already exist::
+
+ ALTER TABLE users CREATE TAG IF NOT EXISTS 'tag1';
+
+Create or replace tag ``tag1`` from the ``users`` table::
+
+ ALTER TABLE users CREATE OR REPLACE TAG 'tag1';
+
+Create tag ``tag1`` from the ``users`` table for system version as of version 5::
+
+ ALTER TABLE users CREATE TAG 'tag1' FOR SYSTEM_VERSION AS OF 5;
+
+Create tag ``tag1`` from the ``users`` table for system version as of version 5, only if it doesn't exist::
+
+ ALTER TABLE users CREATE TAG IF NOT EXISTS 'tag1' FOR SYSTEM_VERSION AS OF 5;
+
+Create or replace tag ``tag1`` from the ``users`` table for system time as of timestamp '2026-01-02 17:30:35.247 Asia/Kolkata'::
+
+ ALTER TABLE users CREATE OR REPLACE TAG 'tag1' FOR SYSTEM_TIME AS OF TIMESTAMP '2026-01-02 17:30:35.247 Asia/Kolkata';
+
+Create tag ``tag1`` from the ``users`` table for system version as of version 5 with retention period of 30 days::
+
+ ALTER TABLE users CREATE TAG 'tag1' FOR SYSTEM_VERSION AS OF 5 RETAIN INTERVAL 30 DAY;
+
See Also
--------
diff --git a/presto-docs/src/main/sphinx/sql/analyze.rst b/presto-docs/src/main/sphinx/sql/analyze.rst
index 5d91a928b7fdb..bdcb793bc849c 100644
--- a/presto-docs/src/main/sphinx/sql/analyze.rst
+++ b/presto-docs/src/main/sphinx/sql/analyze.rst
@@ -21,7 +21,8 @@ connector-specific properties. To list all available properties, run the followi
SELECT * FROM system.metadata.analyze_properties
Currently, this statement is only supported by the
-:ref:`Hive connector `.
+:ref:`Hive connector ` and
+:ref:`Iceberg connector `.
Examples
--------
@@ -34,6 +35,10 @@ Analyze table ``stores`` in catalog ``hive`` and schema ``default``::
ANALYZE hive.default.stores;
+Analyze table ``stores`` in catalog ``iceberg`` and schema ``default``::
+
+ ANALYZE iceberg.default.stores;
+
Analyze partitions ``'1992-01-01', '1992-01-02'`` from a Hive partitioned table ``sales``::
ANALYZE hive.default.sales WITH (partitions = ARRAY[ARRAY['1992-01-01'], ARRAY['1992-01-02']]);
diff --git a/presto-docs/src/main/sphinx/sql/explain-analyze.rst b/presto-docs/src/main/sphinx/sql/explain-analyze.rst
index 9111aa0b02355..0cb0c2c5e6104 100644
--- a/presto-docs/src/main/sphinx/sql/explain-analyze.rst
+++ b/presto-docs/src/main/sphinx/sql/explain-analyze.rst
@@ -36,43 +36,91 @@ relevant plan nodes). Such statistics are useful when one wants to detect data a
.. code-block:: none
- presto:sf1> EXPLAIN ANALYZE SELECT count(*), clerk FROM orders WHERE orderdate > date '1995-01-01' GROUP BY clerk;
+ presto:tiny> EXPLAIN ANALYZE SELECT
+ -> s.acctbal,
+ -> s.name,
+ -> n.name,
+ -> p.partkey,
+ -> p.mfgr,
+ -> s.address,
+ -> s.phone,
+ -> s.comment
+ -> FROM
+ -> part p,
+ -> supplier s,
+ -> partsupp ps,
+ -> nation n,
+ -> region r
+ -> WHERE
+ -> p.partkey = ps.partkey
+ -> AND s.suppkey = ps.suppkey
+ -> AND p.size = 15
+ -> AND p.type like '%BRASS'
+ -> AND s.nationkey = n.nationkey
+ -> AND n.regionkey = r.regionkey
+ -> AND r.name = 'EUROPE'
+ -> AND ps.supplycost = (
+ -> SELECT
+ -> min(ps.supplycost)
+ -> FROM
+ -> partsupp ps,
+ -> supplier s,
+ -> nation n,
+ -> region r
+ -> WHERE
+ -> p.partkey = ps.partkey
+ -> AND s.suppkey = ps.suppkey
+ -> AND s.nationkey = n.nationkey
+ -> AND n.regionkey = r.regionkey
+ -> AND r.name = 'EUROPE'
+ -> )
+ -> ORDER BY
+ -> s.acctbal desc,
+ -> n.name,
+ -> s.name,
+ -> p.partkey
+ -> LIMIT 100;
+
+
Query Plan
-----------------------------------------------------------------------------------------------
- Fragment 1 [HASH]
- Cost: CPU 88.57ms, Input: 4000 rows (148.44kB), Output: 1000 rows (28.32kB)
- Output layout: [count, clerk]
- Output partitioning: SINGLE []
- - Project[] => [count:bigint, clerk:varchar(15)]
- Cost: 26.24%, Input: 1000 rows (37.11kB), Output: 1000 rows (28.32kB), Filtered: 0.00%
- Input avg.: 62.50 lines, Input std.dev.: 14.77%
- - Aggregate(FINAL)[clerk][$hashvalue] => [clerk:varchar(15), $hashvalue:bigint, count:bigint]
- Cost: 16.83%, Output: 1000 rows (37.11kB)
- Input avg.: 250.00 lines, Input std.dev.: 14.77%
- count := "count"("count_8")
- - LocalExchange[HASH][$hashvalue] ("clerk") => clerk:varchar(15), count_8:bigint, $hashvalue:bigint
- Cost: 47.28%, Output: 4000 rows (148.44kB)
- Input avg.: 4000.00 lines, Input std.dev.: 0.00%
- - RemoteSource[2] => [clerk:varchar(15), count_8:bigint, $hashvalue_9:bigint]
- Cost: 9.65%, Output: 4000 rows (148.44kB)
- Input avg.: 4000.00 lines, Input std.dev.: 0.00%
-
- Fragment 2 [tpch:orders:1500000]
- Cost: CPU 14.00s, Input: 818058 rows (22.62MB), Output: 4000 rows (148.44kB)
- Output layout: [clerk, count_8, $hashvalue_10]
- Output partitioning: HASH [clerk][$hashvalue_10]
- - Aggregate(PARTIAL)[clerk][$hashvalue_10] => [clerk:varchar(15), $hashvalue_10:bigint, count_8:bigint]
- Cost: 4.47%, Output: 4000 rows (148.44kB)
- Input avg.: 204514.50 lines, Input std.dev.: 0.05%
- Collisions avg.: 5701.28 (17569.93% est.), Collisions std.dev.: 1.12%
- count_8 := "count"(*)
- - ScanFilterProject[table = tpch:tpch:orders:sf1.0, originalConstraint = ("orderdate" > "$literal$date"(BIGINT '9131')), filterPredicate = ("orderdate" > "$literal$date"(BIGINT '9131'))] => [cler
- Cost: 95.53%, Input: 1500000 rows (0B), Output: 818058 rows (22.62MB), Filtered: 45.46%
- Input avg.: 375000.00 lines, Input std.dev.: 0.00%
- $hashvalue_10 := "combine_hash"(BIGINT '0', COALESCE("$operator$hash_code"("clerk"), 0))
- orderdate := tpch:orderdate
- clerk := tpch:clerk
+ ...
+ Fragment 4 [SOURCE]
+ CPU: 31.55ms, Scheduled: 38.34ms, Input: 8,020 rows (260B); per task: avg.: 8,020.00 std.dev.: 0.00, Output: 1,196 rows (21.02kB), 1 tasks
+ Output layout: [partkey_15, min_73]
+ Output partitioning: HASH [partkey_15]
+ Output encoding: COLUMNAR
+ Stage Execution Strategy: UNGROUPED_EXECUTION
+ - Aggregate(PARTIAL)[partkey_15][PlanNodeId 3023] => [partkey_15:bigint, min_73:double]
+ CPU: 3.00ms (1.74%), Scheduled: 4.00ms (0.54%), Output: 1,196 rows (21.02kB)
+ Input total: 1,600 rows (40.63kB), avg.: 400.00 rows, std.dev.: 0.00%
+ Collisions avg.: 4.50 (160.41% est.), Collisions std.dev.: 86.78%
+ min_73 := "presto.default.min"((supplycost_18)) (1:365)
+ - InnerJoin[PlanNodeId 2455][("suppkey_16" = "suppkey_21")] => [partkey_15:bigint, supplycost_18:double]
+ Estimates: {source: CostBasedSourceInfo, rows: 1,600 (28.13kB), cpu: 684,460.00, memory: 225.00, network: 234.00}
+ CPU: 11.00ms (6.40%), Scheduled: 13.00ms (1.77%), Output: 1,600 rows (40.63kB)
+ Left (probe) Input total: 8,000 rows (210.94kB), avg.: 2,000.00 rows, std.dev.: 0.00%
+ Right (build) Input total: 20 rows (260B), avg.: 1.25 rows, std.dev.: 60.00%
+ Collisions avg.: 0.40 (30.84% est.), Collisions std.dev.: 183.71%
+ Distribution: REPLICATED
+ - ScanFilter[PlanNodeId 9,2699][table = TableHandle {connectorId='tpch', connectorHandle='partsupp:sf0.01', layout='Optional[partsupp:sf0.01]'}, grouped = false, filterPredicate = (not(IS_NULL(partkey_15))) AND (not(IS_NULL(suppkey_16)))] => [partkey_15:bigint, suppkey_16:bigint, supplycost_18:double]
+ Estimates: {source: CostBasedSourceInfo, rows: 8,000 (210.94kB), cpu: 216,000.00, memory: 0.00, network: 0.00}/{source: CostBasedSourceInfo, rows: 8,000 (210.94kB), cpu: 432,000.00, memory: 0.00, network: 0.00}
+ CPU: 14.00ms (8.14%), Scheduled: 16.00ms (2.17%), Output: 8,000 rows (210.94kB)
+ Input total: 8,000 rows (0B), avg.: 2,000.00 rows, std.dev.: 0.00%
+ partkey_15 := tpch:partkey (1:389)
+ supplycost_18 := tpch:supplycost (1:389)
+ suppkey_16 := tpch:suppkey (1:389)
+ Input: 8,000 rows (0B), Filtered: 0.00%
+ - LocalExchange[PlanNodeId 2949][HASH] (suppkey_21) => [suppkey_21:bigint]
+ Estimates: {source: CostBasedSourceInfo, rows: 20 (180B), cpu: 7,480.00, memory: 54.00, network: 234.00}
+ CPU: 0.00ns (0.00%), Scheduled: 0.00ns (0.00%), Output: 20 rows (260B)
+ Input total: 20 rows (260B), avg.: 1.25 rows, std.dev.: 225.39%
+ - RemoteSource[5] => [suppkey_21:bigint]
+ CPU: 0.00ns (0.00%), Scheduled: 0.00ns (0.00%), Output: 20 rows (260B)
+ Input total: 20 rows (260B), avg.: 1.25 rows, std.dev.: 225.39%
+ ...
+
When the ``VERBOSE`` option is used, some operators may report additional information.
For example, the window function operator will output the following:
diff --git a/presto-docs/src/main/sphinx/sql/explain.rst b/presto-docs/src/main/sphinx/sql/explain.rst
index 1b541b1d82e9a..dc5cf1339fd4e 100644
--- a/presto-docs/src/main/sphinx/sql/explain.rst
+++ b/presto-docs/src/main/sphinx/sql/explain.rst
@@ -20,9 +20,9 @@ Description
-----------
Show the logical or distributed execution plan of a statement, or validate the statement.
-Use ``TYPE DISTRIBUTED`` option to display fragmented plan. Each
-`plan fragment `_
-is executed by a single or multiple Presto nodes. Fragment type specifies how the fragment
+Use ``TYPE DISTRIBUTED`` option to display a fragmented plan. Each
+`plan fragment `_
+is executed by a single or multiple Presto nodes. Fragment type specifies how the fragment
is executed by Presto nodes and how the data is distributed between fragments:
``SINGLE``
@@ -152,6 +152,48 @@ IO:
}
}
+DDL Statements
+^^^^^^^^^^^^^^
+
+``EXPLAIN`` can also be used with DDL statements such as ``CREATE TABLE`` and ``DROP TABLE``.
+For these statements, the output shows a summary of the operation rather than an execution plan.
+
+CREATE TABLE:
+
+.. code-block:: none
+
+ presto:tiny> EXPLAIN CREATE TABLE new_table (id BIGINT, name VARCHAR);
+ Query Plan
+ --------------------------
+ CREATE TABLE new_table
+
+CREATE TABLE IF NOT EXISTS:
+
+.. code-block:: none
+
+ presto:tiny> EXPLAIN CREATE TABLE IF NOT EXISTS new_table (id BIGINT, name VARCHAR);
+ Query Plan
+ --------------------------------------
+ CREATE TABLE IF NOT EXISTS new_table
+
+DROP TABLE:
+
+.. code-block:: none
+
+ presto:tiny> EXPLAIN DROP TABLE test_table;
+ Query Plan
+ --------------------------------------------------------------
+ DROP TABLE test_table
+
+DROP TABLE IF EXISTS:
+
+.. code-block:: none
+
+ presto:tiny> EXPLAIN DROP TABLE IF EXISTS test_table;
+ Query Plan
+ --------------------------------------------------------------
+ DROP TABLE IF EXISTS test_table
+
See Also
--------
diff --git a/presto-docs/src/main/sphinx/sql/merge.rst b/presto-docs/src/main/sphinx/sql/merge.rst
index b4b738dda86ab..74a0b6473a39b 100644
--- a/presto-docs/src/main/sphinx/sql/merge.rst
+++ b/presto-docs/src/main/sphinx/sql/merge.rst
@@ -26,7 +26,7 @@ In the ``MATCHED`` case, the ``UPDATE`` column value expressions can depend on a
In the ``NOT MATCHED`` case, the ``INSERT`` expressions can depend on any field of the source.
The ``MERGE`` command requires each target row to match at most one source row. An exception is raised when a single target table row matches more than one source row.
-If a source row is not matched by the ``WHEN`` clause and there is no ``WHEN NOT MATCHED`` clause, the source row is ignored.
+If a source row is not matched by the ``WHEN MATCHED`` clause and there is no ``WHEN NOT MATCHED`` clause, the source row is ignored.
The ``MERGE`` statement is commonly used to integrate data from two tables with different contents but similar structures.
For example, the source table could be part of a production transactional system, while the target table might be located in a data warehouse for analytics.
diff --git a/presto-docs/src/main/sphinx/sql/select.rst b/presto-docs/src/main/sphinx/sql/select.rst
index 549030f9ec48b..3dc5e48603b08 100644
--- a/presto-docs/src/main/sphinx/sql/select.rst
+++ b/presto-docs/src/main/sphinx/sql/select.rst
@@ -229,6 +229,11 @@ is equivalent to::
(destination_state),
());
+.. note::
+
+ ``CUBE`` supports at most 30 columns. This is because CUBE generates 2^n
+ grouping sets, and 2^30 (approximately 1 billion) is the practical limit.
+
.. code-block:: none
origin_state | destination_state | _col0
diff --git a/presto-druid/pom.xml b/presto-druid/pom.xml
index aec5e2d885830..a24aa6d0b591a 100644
--- a/presto-druid/pom.xml
+++ b/presto-druid/pom.xml
@@ -32,14 +32,23 @@
- org.hibernate
+ org.hibernate.validatorhibernate-validator8.0.3.Finalorg.glassfishjakarta.el
- 4.0.1
+ 5.0.0-M1
+
+
+ at.yawk.lz4
+ lz4-java
+
+
+ org.mozilla
+ rhino
+ 1.8.1
diff --git a/presto-druid/src/main/java/com/facebook/presto/druid/segment/PrestoQueryableIndex.java b/presto-druid/src/main/java/com/facebook/presto/druid/segment/PrestoQueryableIndex.java
new file mode 100644
index 0000000000000..24d4fa716b6fe
--- /dev/null
+++ b/presto-druid/src/main/java/com/facebook/presto/druid/segment/PrestoQueryableIndex.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.facebook.presto.druid.segment;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Maps;
+import org.apache.druid.collections.bitmap.BitmapFactory;
+import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
+import org.apache.druid.query.OrderBy;
+import org.apache.druid.segment.DimensionHandler;
+import org.apache.druid.segment.Metadata;
+import org.apache.druid.segment.QueryableIndex;
+import org.apache.druid.segment.column.BaseColumnHolder;
+import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.data.Indexed;
+import org.joda.time.Interval;
+
+import javax.annotation.Nullable;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+public class PrestoQueryableIndex
+ implements QueryableIndex
+{
+ private final Interval dataInterval;
+ private final List columnNames;
+ private final Indexed availableDimensions;
+ private final BitmapFactory bitmapFactory;
+ private final Map> columns;
+ private final SmooshedFileMapper fileMapper;
+ @Nullable
+ private final Metadata metadata;
+ private final Supplier