From 05336cfcd2e9ec9492b0f468e09743b3282d8f13 Mon Sep 17 00:00:00 2001 From: Ezra Varady <76978395+ezra-varady@users.noreply.github.com> Date: Tue, 24 Oct 2023 23:06:16 -1000 Subject: [PATCH] Add ASAN and UBSAN ci/cd checks (based on timescale's ci/cd) (#212) * initial work on getting asan support * touch ups * docker stuff working * fixup test so it doesnt fail under asan (failure was due to spurious variation in cost estimate) * update gitignore trap docker kill **** BUGFIX ***** cache the value instead of a pointer to it, this fixes a potential use after free detected by asan * fix action, new asan issues * fix paths for github too many lines * switching to PR, should help iron out the alst few bugs at least * mark more things sudo * chmod not chown * add step to cache build if it worked so that its not contingent on tests passing **** flush cache on retriever area reset * add support for ubsan in docker * add some notes about using the sanitizer container, update workflow to run ubsan against releases, cleanup attributions * log postgres output separately * pull down the correct ref * rebase onto main, lower ef_search minimum value --- .../workflows/sanitizer-build-and-test.yaml | 248 ++++++++++++++++++ .gitignore | 1 + CMakeLists.txt | 23 +- CONTRIBUTING.md | 4 + scripts/sanitizers/Dockerfile | 99 +++++++ scripts/sanitizers/bundle_coredump.sh | 20 ++ .../postgres-asan-instrumentation.patch | 53 ++++ scripts/sanitizers/run_sanitizers.sh | 61 +++++ scripts/sanitizers/suppressions/README.md | 7 + .../sanitizers/suppressions/suppr_asan.txt | 2 + .../sanitizers/suppressions/suppr_leak.txt | 33 +++ scripts/sanitizers/suppressions/suppr_ub.txt | 29 ++ src/hnsw/external_index.c | 14 +- src/hnsw/options.c | 2 +- src/hnsw/retriever.c | 2 + test/expected/hnsw_dist_func.out | 24 +- test/expected/hnsw_ef_search.out | 5 +- test/sql/hnsw_dist_func.sql | 8 +- 18 files changed, 604 insertions(+), 31 deletions(-) create mode 100644 .github/workflows/sanitizer-build-and-test.yaml create mode 100644 scripts/sanitizers/Dockerfile create mode 100755 scripts/sanitizers/bundle_coredump.sh create mode 100644 scripts/sanitizers/postgres-asan-instrumentation.patch create mode 100755 scripts/sanitizers/run_sanitizers.sh create mode 100644 scripts/sanitizers/suppressions/README.md create mode 100644 scripts/sanitizers/suppressions/suppr_asan.txt create mode 100644 scripts/sanitizers/suppressions/suppr_leak.txt create mode 100644 scripts/sanitizers/suppressions/suppr_ub.txt diff --git a/.github/workflows/sanitizer-build-and-test.yaml b/.github/workflows/sanitizer-build-and-test.yaml new file mode 100644 index 000000000..ae4785d67 --- /dev/null +++ b/.github/workflows/sanitizer-build-and-test.yaml @@ -0,0 +1,248 @@ +# Run tests with sanitizers enabled +# derived from from https://github.com/timescale/timescaledb/blob/main/.github/workflows/sanitizer-build-and-test.yaml +name: Sanitizer test +on: + push: + branches: + - main + - trigger/sanitizer + pull_request: + branches: + - main + paths: .github/workflows/sanitizer-build-and-test.yaml + release: + types: [created, edited] + workflow_dispatch: + inputs: + debug_enabled: + type: boolean + description: "Run the build against llvm sanitizers" + required: false + default: false + +env: + name: "Sanitizer" + PG_SRC_DIR: "pgbuild" + PG_INSTALL_DIR: ${{ github.workspace }}/pgsql + extra_packages: "clang-15 llvm-15 llvm-15-dev llvm-15-tools" + llvm_config: "llvm-config-15" + CLANG: "clang-15" + CC: "clang-15" + CXX: "clang-15" + + CFLAGS: "-g -fsanitize=address -fno-omit-frame-pointer -Og -fno-inline-functions" + CXXFLAGS: "-g -fsanitize=address -fno-omit-frame-pointer -Og -fno-inline-functions" + LDFLAGS: "-fsanitize=address" + + ASAN_OPTIONS: suppressions=${{ github.workspace }}/scripts/sanitizers/suppressions/suppr_asan.txt + detect_odr_violation=0 log_path=${{ github.workspace }}/sanitizer/ + log_exe_name=true print_suppressions=false exitcode=27 + detect_leaks=0 abort_on_error=1 + + LSAN_OPTIONS: suppressions=${{ github.workspace }}/scripts/sanitizers/suppressions/suppr_leak.txt + print_suppressions=0 log_path=${{ github.workspace }}/sanitizer/ + log_exe_name=true print_suppressions=false exitcode=27 + + UBSAN_OPTIONS: suppressions=${{ github.workspace }}/scripts/sanitizers/suppressions/suppr_ub.txt + print_stacktrace=1 halt_on_error=1 log_path=${{ github.workspace }}/sanitizer/ + log_exe_name=true print_suppressions=false exitcode=27 + +jobs: + sanitizer: + # Change the JOB_NAME variable below when changing the name. + # Don't use the env variable here because the env context is not accessible. + name: PG${{ matrix.pg }} Sanitizer ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ["ubuntu-22.04"] + pg: ["11.21", "12.16", "13.2", "14.9", "15.4", "16.0"] + steps: + - name: Enable UBSan if this is a release + if: ${{ github.event_name == 'release' }} + run: | + echo "CFLAGS=\"\-g -fsanitize=address,undefined -fno-omit-frame-pointer -O0 -fno-inline-functions"" >> $GITHUB_ENV + echo "CXXFLAGS=\"\-g -fsanitize=address,undefined -fno-omit-frame-pointer -O0 -fno-inline-functions"" >> $GITHUB_ENV + echo "LDFLAGS=\"-fsanitize=address,undefined\"" >> $GITHUB_ENV + + - name: Install Linux Dependencies + run: | + sudo apt-get update + sudo apt-get install -y wget \ + curl \ + systemd-coredump \ + build-essential \ + gdb \ + make \ + cmake \ + pkg-config \ + flex \ + bison \ + libicu-dev \ + libssl-dev \ + clang-15 \ + llvm-15 \ + llvm-15-dev \ + llvm-15-tools \ + libstdc++-12-dev \ + libstdc++6 + + - name: Checkout lantern + uses: actions/checkout@v3 + with: + fetch-depth: 0 + submodules: "recursive" + + # We are going to rebuild Postgres daily, so that it doesn't suddenly break + # ages after the original problem. + - name: Get date for build caching + id: get-date + run: | + echo "date=$(date +"%m-%y")" >> $GITHUB_OUTPUT + + # Create a directory for sanitizer logs. This directory is referenced by + # ASAN_OPTIONS, LSAN_OPTIONS, and UBSAN_OPTIONS + - name: Create sanitizer log directory + run: | + mkdir ${{ github.workspace }}/sanitizer + + # we cache the build directory instead of the install directory here + # because extension installation will write files to install directory + # leading to a tainted cache + - name: Cache PostgreSQL ${{ matrix.pg }} + id: cache-postgresql + uses: actions/cache@v3 + with: + path: ~/${{ env.PG_SRC_DIR }} + key: "${{ matrix.os }}-${{ env.name }}-postgresql-${{ matrix.pg }}-${{ env.CC }}\ + -${{ steps.get-date.outputs.date }}-${{ hashFiles('.github/**') }}" + + - name: Build PostgreSQL ${{ matrix.pg }} if not in cache + id: build-postgresql + if: steps.cache-postgresql.outputs.cache-hit != 'true' + run: | + wget -q -O postgresql.tar.bz2 \ + https://ftp.postgresql.org/pub/source/v${{ matrix.pg }}/postgresql-${{ matrix.pg }}.tar.bz2 + mkdir -p ~/$PG_SRC_DIR + tar --extract --file postgresql.tar.bz2 --directory ~/$PG_SRC_DIR --strip-components 1 + # Add instrumentation to the Postgres memory contexts. For more details, see + # https://www.postgresql.org/message-id/CAM-w4HNH7%2BU9jZevpVK7Wr49tkfpWSR6wav0RLYrq0HWuP5cxw%40mail.gmail.com + patch -F5 -p1 -d ~/$PG_SRC_DIR < scripts/sanitizers/postgres-asan-instrumentation.patch + cd ~/$PG_SRC_DIR + ./configure --prefix=$PG_INSTALL_DIR --enable-debug --enable-cassert \ + --with-openssl --without-readline --without-zlib --without-libxml + make -j$(nproc) + make -j$(nproc) -C contrib/pageinspect + make -j$(nproc) -C src/test/isolation + echo "exit_code=$?" >> $GITHUB_OUTPUT + + - name: save cache preemptively if postgres built + uses: actions/cache/save@v3 + if: steps.build-postgresql.outputs.exit_code == 0 + with: + path: ~/${{ env.PG_SRC_DIR }} + key: "${{ matrix.os }}-${{ env.name }}-postgresql-${{ matrix.pg }}-${{ env.CC }}\ + -${{ steps.get-date.outputs.date }}-${{ hashFiles('.github/**') }}" + + - name: Upload config.log + if: always() && steps.cache-postgresql.outputs.cache-hit != 'true' + uses: actions/upload-artifact@v3 + with: + name: config.log for PostgreSQL ${{ matrix.os }} ${{ matrix.name }} ${{ matrix.pg }} + path: ~/${{ env.PG_SRC_DIR }}/config.log + + - name: Install PostgreSQL ${{ matrix.pg }} + run: | + make -C ~/$PG_SRC_DIR install + echo $PG_INSTALL_DIR/bin >> $GITHUB_PATH + sudo chmod -R 755 $PG_INSTALL_DIR/bin + $PG_INSTALL_DIR/bin/pg_config --version + + - name: Build Lantern + run: | + mkdir lantern_build + cd lantern_build + CXXFLAG="" cmake .. + make install + + - name: Start Postgres + run: | + mkdir -p $PG_INSTALL_DIR/data + #sudo chown -R postgres:postgres $PG_INSTALL_DIR/data + #sudo chown -R postgres:postgres ${{ github.workspace }}/sanitizer + #getent group postgres || sudo groupadd postgres + #id -u postgres &>/dev/null || sudo useradd -g postgres postgres + initdb -A trust -D $PG_INSTALL_DIR/data + postgres -D $PG_INSTALL_DIR/data >/tmp/postgres.log 2>&1 & + + - name: make test + run: | + cd lantern_build + make test + + - name: Show regression diffs + if: always() + id: collectlogs + run: | + find /tmp/lantern -name regression.diffs -exec cat {} + > regression.log + cp /tmp/postgres.log . + if [[ "${{ runner.os }}" == "Linux" ]] ; then + # wait in case there are in-progress coredumps + sleep 10 + if coredumpctl -q list >/dev/null; then echo "coredumps=true" >>$GITHUB_OUTPUT; fi + # print OOM killer information + sudo journalctl --system -q --facility=kern --grep "Killed process" || true + fi + if [[ -s regression.log ]]; then echo "regression_diff=true" >>$GITHUB_OUTPUT; fi + #grep -e 'FAILED' -e 'failed (ignored)' installcheck.log || true + cat regression.log + + - name: Save regression diffs + if: always() && steps.collectlogs.outputs.regression_diff == 'true' + uses: actions/upload-artifact@v3 + with: + name: Regression diff ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }} + path: | + regression.log + + - name: Save postgres log + if: always() && steps.collectlogs.outputs.regression_diff == 'true' + uses: actions/upload-artifact@v3 + with: + name: Postgres log ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }} + path: | + postgres.log + + - name: Stack trace + if: always() && steps.collectlogs.outputs.coredumps == 'true' + run: | + sudo coredumpctl gdb <<<" + set verbose on + set trace-commands on + show debug-file-directory + printf "'"'"query = '%s'\n\n"'"'", debug_query_string + frame function ExceptionalCondition + printf "'"'"condition = '%s'\n"'"'", conditionName + up 1 + l + info args + info locals + bt full + " 2>&1 | tee stacktrace.log + ./scripts/sanitizers/bundle_coredump.sh + grep -C40 "was terminated by signal" postgres.log > postgres-failure.log ||: + + - name: Coredumps + if: always() && steps.collectlogs.outputs.coredumps == 'true' + uses: actions/upload-artifact@v3 + with: + name: Coredumps ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }} + path: coredumps + + - name: sanitizer logs + if: always() + uses: actions/upload-artifact@v3 + with: + name: sanitizer logs ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }} + path: ${{ github.workspace }}/sanitizer diff --git a/.gitignore b/.gitignore index 788f26863..2f2708997 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,7 @@ test/tmp_output/ .DS_Store build data +sanitizer .vscode/ .devcontainer/ .cache diff --git a/CMakeLists.txt b/CMakeLists.txt index 4553ad203..becb084b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -119,15 +119,24 @@ set_target_properties( # needed to make sure cmake does not add libstdc++ to the linker command when an # external cpp library is added more at` # https://cmake-developers.cmake.narkive.com/JnbrDyGT/setting-linker-language-still-adds-lstdc - if(NOT APPLE) - # apples does not understand -static-libstdc++ used in usearch to bundle libstdc++ with the - # created archive. - # so, on apple we dynamically link to the c++ runtime - # todo:: find a way to statically link the c++ runtime on mac - set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "") - set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "") + # clang handles static libstdc++ differently than gcc + if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + find_library(STATIC_LIBSTDCPP NAMES libstdc++.a PATHS ${CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES}) + + if(STATIC_LIBSTDCPP) + set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "${CMAKE_CXX_IMPLICIT_LINK_LIBRARIES};${STATIC_LIBSTDCPP}") + endif() + else() + # apples does not understand -static-libstdc++ used in usearch to bundle libstdc++ with the + # created archive. + # so, on apple we dynamically link to the c++ runtime + # todo:: find a way to statically link the c++ runtime on mac + set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "") + set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "") + endif() endif() + set_target_properties(lantern PROPERTIES LINKER_LANGUAGE C) target_include_directories(lantern PRIVATE "./third_party/usearch/c") diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dc8f6be78..c80843703 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -39,6 +39,10 @@ Below is a short recording demonstrating the use of `livedebug.py`: [![asciicast](https://asciinema.org/a/jTsbWdOcTvUl4iAJlAw3Cszbt.svg)](https://asciinema.org/a/jTsbWdOcTvUl4iAJlAw3Cszbt) +## Running sanitizers + +To ensure that code is safe, pull requests are tested using google's [AddressSanitizer](https://github.com/google/sanitizers/wiki/AddressSanitizer). Additionally [UBSan](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html) is run against releases. A [docker container](scripts/sanitizers/Dockerfile) is provided for testing changes locally. it can be invoked by running the script `scripts/sanitizers/run_sanitizers.sh`. **Please note that this script must be run in the root directory of the lantern repository**. By default it will build `postgres 15.4` and run tests against it instrumented only with AddressSanitizer. If you would like to run UBSan you can pass the `-u` flag. If you wish to test against a specific version you can use the `-v` flag specifying a specific version, e.g. `scripts/sanitizers/run_sanitizers.sh -u -v11.21` + ## Adding/modifying LanternDB's SQL interface When modifying the SQL interface, you add relevant SQL logic under `sql/`. In addition, you add an update script under `sql/updates`, in a file named `[CURRENT_VERSION]--latest.sql`. You should create this file if it does not exist. diff --git a/scripts/sanitizers/Dockerfile b/scripts/sanitizers/Dockerfile new file mode 100644 index 000000000..5f3b0976a --- /dev/null +++ b/scripts/sanitizers/Dockerfile @@ -0,0 +1,99 @@ +FROM debian:bookworm + +ARG VERSION=15.4 +ARG PGVECTOR_VERSION=0.5.0 +ARG UBSAN= + +WORKDIR /lantern +# This requires the docker command be run in the lantern base director +COPY scripts scripts + +WORKDIR pg_build + +RUN apt-get update && \ + apt-mark hold locales && \ + apt-get install -y \ + wget \ + curl \ + build-essential \ + make \ + cmake \ + pkg-config \ + flex \ + bison \ + libicu-dev \ + libssl-dev \ + clang-15 \ + llvm-15 \ + llvm-15-dev \ + llvm-15-tools \ + libstdc++6 + +RUN wget -q -O postgresql.tar.bz2 \ + https://ftp.postgresql.org/pub/source/v${VERSION}/postgresql-${VERSION}.tar.bz2 && \ + tar --extract --file postgresql.tar.bz2 --directory . --strip-components 1 + +RUN patch -F5 -p1 -d . < /lantern/scripts/sanitizers/postgres-asan-instrumentation.patch + +RUN groupadd -r postgres --gid=999 && \ + useradd -r -g postgres --uid=999 postgres + +ENV LLVM_CONFIG "llvm-config-15" +ENV CC "clang-15" +ENV CXX "clang-15" +ENV CFLAGS "-g -fsanitize=address${UBSAN} -fno-omit-frame-pointer -Og -fno-inline-functions" +ENV CXXFLAGS "-g -fsanitize=address${UBSAN} -fno-omit-frame-pointer -Og -fno-inline-functions" +ENV LDFLAGS "-fsanitize=address${UBSAN}" + +RUN ./configure --prefix=/usr/local/pgsql --enable-debug --enable-cassert \ + --with-openssl --without-readline --without-zlib --without-libxml && \ + make -j$(nproc) && \ + make -j$(nproc) -C src/test/isolation && \ + make install + +ENV PATH="/usr/local/pgsql/bin:${PATH}" +ENV LD_LIBRARY_PATH=:/usr/local/pgsql/lib +ENV PGDATA=/var/lib/postgresql/data +RUN mkdir -p ${PGDATA} && \ + chown -R postgres:postgres ${PGDATA} && \ + chmod 777 ${PGDATA} + +WORKDIR /lantern + +RUN mkdir /lantern/sanitizer && \ + chown -R postgres:postgres /lantern && \ + chmod 777 /lantern/sanitizer + +ENV ASAN_OPTIONS suppressions=/lantern/scripts/sanitizers/suppressions/suppr_asan.txt \ + detect_odr_violation=0 log_path=/lantern/sanitizer/ \ + log_exe_name=true print_suppressions=false exitcode=27 \ + detect_leaks=0 abort_on_error=1 + +ENV LSAN_OPTIONS suppressions=/lantern/scripts/sanitizers/suppressions/suppr_leak.txt \ + print_suppressions=0 log_path=/lantern/sanitizer/ \ + log_exe_name=true print_suppressions=false exitcode=27 + +ENV UBSAN_OPTIONS suppressions=/lantern/scripts/sanitizers/suppressions/suppr_ub.txt \ + print_stacktrace=1 halt_on_error=1 log_path=/lantern/sanitizer/ \ + log_exe_name=true print_suppressions=false exitcode=27 + +RUN wget -O pgvector.tar.gz https://github.com/pgvector/pgvector/archive/refs/tags/v${PGVECTOR_VERSION}.tar.gz && \ + tar xf pgvector.tar.gz && \ + cd pgvector-${PGVECTOR_VERSION} && \ + make && make install + +COPY . . + +RUN rm -rf build && \ + mkdir build && \ + cd build && \ + CXXFLAGS="-g -fsanitize=address -fno-omit-frame-pointer -Og -fno-inline-functions" cmake .. && \ + make install + +USER postgres +RUN initdb -D ${PGDATA} && \ + echo "local all all trust" > ${PGDATA}/pg_hba.conf && \ + echo "host all all 127.0.0.1/32 trust" >> ${PGDATA}/pg_hba.conf && \ + echo "host all all ::1/128 trust" >> ${PGDATA}/pg_hba.conf + +CMD ["postgres", "-D", "/var/lib/postgresql/data"] diff --git a/scripts/sanitizers/bundle_coredump.sh b/scripts/sanitizers/bundle_coredump.sh new file mode 100755 index 000000000..b2a207c12 --- /dev/null +++ b/scripts/sanitizers/bundle_coredump.sh @@ -0,0 +1,20 @@ +# This file was copied from timescaledb +#!/bin/bash + +TARGET=coredumps +COREDUMP_DIR=/var/lib/systemd/coredump + +set -e + +mkdir -p "$TARGET" + +# get information from gdb +info=$(echo "info sharedlibrary" | coredumpctl gdb) + +executable=$(echo "$info" | grep Executable | sed -e 's!^[^/]\+!!') + +cp "$executable" "$TARGET" +cp ${COREDUMP_DIR}/* "$TARGET" + +# copy libraries extracted from gdb info +echo "$info" | grep '^0x' | sed -e 's!^[^/]\+!!' | xargs -ILIB cp "LIB" "$TARGET" diff --git a/scripts/sanitizers/postgres-asan-instrumentation.patch b/scripts/sanitizers/postgres-asan-instrumentation.patch new file mode 100644 index 000000000..bc8f4e184 --- /dev/null +++ b/scripts/sanitizers/postgres-asan-instrumentation.patch @@ -0,0 +1,53 @@ +diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c +index 2c50575b37..11b6c688c7 100644 +--- a/src/backend/tcop/postgres.c ++++ b/src/backend/tcop/postgres.c +@@ -3492,6 +4476,12 @@ check_stack_depth(void) + bool + stack_is_too_deep(void) + { ++ /* ++ * Pointer arithmetics to determine stack depth doesn't work under ++ * AddressSanitizer. ++ */ ++ return false; ++ + char stack_top_loc; + long stack_depth; + +diff --git a/src/include/utils/memdebug.h b/src/include/utils/memdebug.h +index e88b4c6e8e..4ccbbf0146 100644 +--- a/src/include/utils/memdebug.h ++++ b/src/include/utils/memdebug.h +@@ -19,6 +19,31 @@ + + #ifdef USE_VALGRIND + #include ++ ++#elif __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) ++ ++#include ++ ++#define VALGRIND_MAKE_MEM_DEFINED(addr, size) \ ++ ASAN_UNPOISON_MEMORY_REGION(addr, size) ++ ++#define VALGRIND_MAKE_MEM_NOACCESS(addr, size) \ ++ ASAN_POISON_MEMORY_REGION(addr, size) ++ ++#define VALGRIND_MAKE_MEM_UNDEFINED(addr, size) \ ++ ASAN_UNPOISON_MEMORY_REGION(addr, size) ++ ++#define VALGRIND_MEMPOOL_ALLOC(context, addr, size) \ ++ ASAN_UNPOISON_MEMORY_REGION(addr, size) ++ ++#define VALGRIND_MEMPOOL_FREE(context, addr) \ ++ ASAN_POISON_MEMORY_REGION(addr, 1 /* Length unknown, poison first byte. */) ++ ++#define VALGRIND_CHECK_MEM_IS_DEFINED(addr, size) do {} while (0) ++#define VALGRIND_CREATE_MEMPOOL(context, redzones, zeroed) do {} while (0) ++#define VALGRIND_DESTROY_MEMPOOL(context) do {} while (0) ++#define VALGRIND_MEMPOOL_CHANGE(context, optr, nptr, size) do {} while (0) ++ + #else + #define VALGRIND_CHECK_MEM_IS_DEFINED(addr, size) do {} while (0) + #define VALGRIND_CREATE_MEMPOOL(context, redzones, zeroed) do {} while (0) diff --git a/scripts/sanitizers/run_sanitizers.sh b/scripts/sanitizers/run_sanitizers.sh new file mode 100755 index 000000000..1f0df5553 --- /dev/null +++ b/scripts/sanitizers/run_sanitizers.sh @@ -0,0 +1,61 @@ +#!/bin/bash +u_flag="" +v_flag="" +if [ ! -f src/hnsw.c ]; then + echo "script must be run in lantern root directory" + exit 1 +fi + +function print_usage { + printf "FLAGS: + '-u' | run the container with ubsan enabled. This may take a long time, expect testing take about 30m + '-v ' | the version of postgres you wish to test against, by default 15.4. Must include minor version\n" +} + +while getopts ':uv:' flag; do + case "${flag}" in + u) u_flag="true" ;; + v) v_flag="$OPTARG" ;; + *) print_usage + exit 1 ;; + esac +done + +function kill_docker { + docker kill lantern-sanitizers +} + +trap kill_docker EXIT + +mkdir -p sanitizer + +CONTAINER="" +ARGS="" +if [[ ! -z $v_flag ]]; then + if [[ $v_flag =~ [0-9]{2}\.[0-9]{1,2} ]]; then + CONTAINER="-$v_flag" + ARGS="--build-arg VERSION=$v_flag" + else + echo "please specify a valid version" + exit 1 + fi +fi +if [[ "$u_flag" == "true" ]]; then + ARGS="$ARGS --build-arg UBSAN=,undefined" + CONTAINER="lantern-san-ub$CONTAINER" +else + CONTAINER="lantern-san$CONTAINER" +fi + +docker build -t $CONTAINER -f scripts/sanitizers/Dockerfile $ARGS . + +docker run --rm -d -v $(pwd)/sanitizer:/lantern/sanitizer --name lantern-sanitizers $CONTAINER + +docker exec -i -u root lantern-sanitizers /bin/bash <blocknos[ offset ]; - cache_set_item(cache, &id, &blockmap_page->blocknos[ offset ]); + size_t cache_value = (size_t)blockno; + cache_set_item(cache, &id, (void *)cache_value); if(!idx_pagemap_prelocked) { UnlockReleaseBuffer(buf); } diff --git a/src/hnsw/options.c b/src/hnsw/options.c index 9e9f61a9a..6e15e24be 100644 --- a/src/hnsw/options.c +++ b/src/hnsw/options.c @@ -226,7 +226,7 @@ void _PG_init(void) "Valid values are in range [1, 400]", &ldb_hnsw_ef_search, USEARCH_SEARCH_EF_INVALID_VALUE, - 1, + USEARCH_SEARCH_EF_INVALID_VALUE, HNSW_MAX_EF, PGC_USERSET, 0, diff --git a/src/hnsw/retriever.c b/src/hnsw/retriever.c index 1b6a5965d..c9fd5bfec 100644 --- a/src/hnsw/retriever.c +++ b/src/hnsw/retriever.c @@ -48,6 +48,8 @@ void ldb_wal_retriever_area_reset(RetrieverCtx *ctx, HnswIndexHeaderPage *header } dlist_init(&ctx->takenbuffers); + fa_cache_init(&ctx->fa_cache); + assert(ctx->header_page_under_wal == header_page_under_wal); ctx->header_page_under_wal = header_page_under_wal; } diff --git a/test/expected/hnsw_dist_func.out b/test/expected/hnsw_dist_func.out index 78acab3a5..3006d4ffd 100644 --- a/test/expected/hnsw_dist_func.out +++ b/test/expected/hnsw_dist_func.out @@ -104,24 +104,24 @@ SELECT ARRAY_AGG(id ORDER BY id), ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) (4 rows) -- Verify that the indexes is being used -EXPLAIN SELECT id FROM small_world_l2 ORDER BY v <-> '{0,1,0}'; - QUERY PLAN ------------------------------------------------------------------------------------------------- - Index Scan using small_world_l2_v_idx on small_world_l2 (cost=0.00..60.05 rows=1070 width=20) +EXPLAIN (COSTS false) SELECT id FROM small_world_l2 ORDER BY v <-> '{0,1,0}'; + QUERY PLAN +--------------------------------------------------------- + Index Scan using small_world_l2_v_idx on small_world_l2 Order By: (v <-> '{0,1,0}'::real[]) (2 rows) -EXPLAIN SELECT id FROM small_world_cos ORDER BY v <-> '{0,1,0}'; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Index Scan using small_world_cos_v_idx on small_world_cos (cost=0.00..60.05 rows=1070 width=20) +EXPLAIN (COSTS false) SELECT id FROM small_world_cos ORDER BY v <-> '{0,1,0}'; + QUERY PLAN +----------------------------------------------------------- + Index Scan using small_world_cos_v_idx on small_world_cos Order By: (v <-> '{0,1,0}'::real[]) (2 rows) -EXPLAIN SELECT id FROM small_world_ham ORDER BY v <-> '{0,1,0}'; - QUERY PLAN --------------------------------------------------------------------------------------------------- - Index Scan using small_world_ham_v_idx on small_world_ham (cost=0.00..60.05 rows=1070 width=20) +EXPLAIN (COSTS false) SELECT id FROM small_world_ham ORDER BY v <-> '{0,1,0}'; + QUERY PLAN +----------------------------------------------------------- + Index Scan using small_world_ham_v_idx on small_world_ham Order By: (v <-> '{0,1,0}'::integer[]) (2 rows) diff --git a/test/expected/hnsw_ef_search.out b/test/expected/hnsw_ef_search.out index f5146e59e..38b1a2b1a 100644 --- a/test/expected/hnsw_ef_search.out +++ b/test/expected/hnsw_ef_search.out @@ -23,11 +23,10 @@ INSERT INTO sift_base1k (id, v) VALUES -- Validate error on invalid ef_search values \set ON_ERROR_STOP off SET lantern_hnsw.ef_search = -1; -ERROR: -1 is outside the valid range for parameter "lantern_hnsw.ef_search" (1 .. 400) +ERROR: -1 is outside the valid range for parameter "lantern_hnsw.ef_search" (0 .. 400) SET lantern_hnsw.ef_search = 0; -ERROR: 0 is outside the valid range for parameter "lantern_hnsw.ef_search" (1 .. 400) SET lantern_hnsw.ef_search = 401; -ERROR: 401 is outside the valid range for parameter "lantern_hnsw.ef_search" (1 .. 400) +ERROR: 401 is outside the valid range for parameter "lantern_hnsw.ef_search" (0 .. 400) \set ON_ERROR_STOP on -- Repeat the same query while varying ef parameter -- NOTE: it is not entirely known if the results of these are deterministic diff --git a/test/sql/hnsw_dist_func.sql b/test/sql/hnsw_dist_func.sql index 4184fb35f..c2767c4a4 100644 --- a/test/sql/hnsw_dist_func.sql +++ b/test/sql/hnsw_dist_func.sql @@ -29,9 +29,9 @@ SELECT ARRAY_AGG(id ORDER BY id), ROUND(cos_dist(v, '{0,1,0}')::numeric, 2) FROM SELECT ARRAY_AGG(id ORDER BY id), ROUND(hamming_dist(v, '{0,1,0}')::numeric, 2) FROM small_world_ham GROUP BY 2 ORDER BY 2; -- Verify that the indexes is being used -EXPLAIN SELECT id FROM small_world_l2 ORDER BY v <-> '{0,1,0}'; -EXPLAIN SELECT id FROM small_world_cos ORDER BY v <-> '{0,1,0}'; -EXPLAIN SELECT id FROM small_world_ham ORDER BY v <-> '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_l2 ORDER BY v <-> '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_cos ORDER BY v <-> '{0,1,0}'; +EXPLAIN (COSTS false) SELECT id FROM small_world_ham ORDER BY v <-> '{0,1,0}'; \set ON_ERROR_STOP off @@ -97,4 +97,4 @@ CREATE TABLE extra_small_world_ham ( ); INSERT INTO extra_small_world_ham (v) VALUES ('{0,0}'), ('{1,1}'), ('{2,2}'), ('{3,3}'); CREATE INDEX ON extra_small_world_ham USING hnsw (v dist_hamming_ops) WITH (dim=2); -SELECT ROUND(hamming_dist(v, '{0,0}')::numeric, 2) FROM extra_small_world_ham ORDER BY v <-> '{0,0}'; \ No newline at end of file +SELECT ROUND(hamming_dist(v, '{0,0}')::numeric, 2) FROM extra_small_world_ham ORDER BY v <-> '{0,0}';