Skip to content

Commit

Permalink
Add ASAN and UBSAN ci/cd checks (based on timescale's ci/cd) (#212)
Browse files Browse the repository at this point in the history
* initial work on getting asan support
* touch ups
* docker stuff working
* fixup test so it doesnt fail under asan (failure was due to spurious variation in cost estimate)
* update gitignore trap docker kill
**** BUGFIX ***** cache the value instead of a pointer to it, this fixes a potential use after free detected by asan
* fix action, new asan issues
* fix paths for github
 too many lines
* switching to PR, should help iron out the alst few bugs at least
* mark more things sudo
* chmod not chown
* add step to cache build if it worked so that its not contingent on tests passing
**** flush cache on retriever area reset
* add support for ubsan in docker
* add some notes about using the sanitizer container, update workflow to run ubsan against releases, cleanup attributions
* log postgres output separately
* pull down the correct ref
* rebase onto main, lower ef_search minimum value
  • Loading branch information
ezra-varady authored Oct 25, 2023
1 parent cd8186c commit 05336cf
Show file tree
Hide file tree
Showing 18 changed files with 604 additions and 31 deletions.
248 changes: 248 additions & 0 deletions .github/workflows/sanitizer-build-and-test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
# Run tests with sanitizers enabled
# derived from from https://github.com/timescale/timescaledb/blob/main/.github/workflows/sanitizer-build-and-test.yaml
name: Sanitizer test
on:
push:
branches:
- main
- trigger/sanitizer
pull_request:
branches:
- main
paths: .github/workflows/sanitizer-build-and-test.yaml
release:
types: [created, edited]
workflow_dispatch:
inputs:
debug_enabled:
type: boolean
description: "Run the build against llvm sanitizers"
required: false
default: false

env:
name: "Sanitizer"
PG_SRC_DIR: "pgbuild"
PG_INSTALL_DIR: ${{ github.workspace }}/pgsql
extra_packages: "clang-15 llvm-15 llvm-15-dev llvm-15-tools"
llvm_config: "llvm-config-15"
CLANG: "clang-15"
CC: "clang-15"
CXX: "clang-15"

CFLAGS: "-g -fsanitize=address -fno-omit-frame-pointer -Og -fno-inline-functions"
CXXFLAGS: "-g -fsanitize=address -fno-omit-frame-pointer -Og -fno-inline-functions"
LDFLAGS: "-fsanitize=address"

ASAN_OPTIONS: suppressions=${{ github.workspace }}/scripts/sanitizers/suppressions/suppr_asan.txt
detect_odr_violation=0 log_path=${{ github.workspace }}/sanitizer/
log_exe_name=true print_suppressions=false exitcode=27
detect_leaks=0 abort_on_error=1

LSAN_OPTIONS: suppressions=${{ github.workspace }}/scripts/sanitizers/suppressions/suppr_leak.txt
print_suppressions=0 log_path=${{ github.workspace }}/sanitizer/
log_exe_name=true print_suppressions=false exitcode=27

UBSAN_OPTIONS: suppressions=${{ github.workspace }}/scripts/sanitizers/suppressions/suppr_ub.txt
print_stacktrace=1 halt_on_error=1 log_path=${{ github.workspace }}/sanitizer/
log_exe_name=true print_suppressions=false exitcode=27

jobs:
sanitizer:
# Change the JOB_NAME variable below when changing the name.
# Don't use the env variable here because the env context is not accessible.
name: PG${{ matrix.pg }} Sanitizer ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ["ubuntu-22.04"]
pg: ["11.21", "12.16", "13.2", "14.9", "15.4", "16.0"]
steps:
- name: Enable UBSan if this is a release
if: ${{ github.event_name == 'release' }}
run: |
echo "CFLAGS=\"\-g -fsanitize=address,undefined -fno-omit-frame-pointer -O0 -fno-inline-functions"" >> $GITHUB_ENV
echo "CXXFLAGS=\"\-g -fsanitize=address,undefined -fno-omit-frame-pointer -O0 -fno-inline-functions"" >> $GITHUB_ENV
echo "LDFLAGS=\"-fsanitize=address,undefined\"" >> $GITHUB_ENV
- name: Install Linux Dependencies
run: |
sudo apt-get update
sudo apt-get install -y wget \
curl \
systemd-coredump \
build-essential \
gdb \
make \
cmake \
pkg-config \
flex \
bison \
libicu-dev \
libssl-dev \
clang-15 \
llvm-15 \
llvm-15-dev \
llvm-15-tools \
libstdc++-12-dev \
libstdc++6
- name: Checkout lantern
uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: "recursive"

# We are going to rebuild Postgres daily, so that it doesn't suddenly break
# ages after the original problem.
- name: Get date for build caching
id: get-date
run: |
echo "date=$(date +"%m-%y")" >> $GITHUB_OUTPUT
# Create a directory for sanitizer logs. This directory is referenced by
# ASAN_OPTIONS, LSAN_OPTIONS, and UBSAN_OPTIONS
- name: Create sanitizer log directory
run: |
mkdir ${{ github.workspace }}/sanitizer
# we cache the build directory instead of the install directory here
# because extension installation will write files to install directory
# leading to a tainted cache
- name: Cache PostgreSQL ${{ matrix.pg }}
id: cache-postgresql
uses: actions/cache@v3
with:
path: ~/${{ env.PG_SRC_DIR }}
key: "${{ matrix.os }}-${{ env.name }}-postgresql-${{ matrix.pg }}-${{ env.CC }}\
-${{ steps.get-date.outputs.date }}-${{ hashFiles('.github/**') }}"

- name: Build PostgreSQL ${{ matrix.pg }} if not in cache
id: build-postgresql
if: steps.cache-postgresql.outputs.cache-hit != 'true'
run: |
wget -q -O postgresql.tar.bz2 \
https://ftp.postgresql.org/pub/source/v${{ matrix.pg }}/postgresql-${{ matrix.pg }}.tar.bz2
mkdir -p ~/$PG_SRC_DIR
tar --extract --file postgresql.tar.bz2 --directory ~/$PG_SRC_DIR --strip-components 1
# Add instrumentation to the Postgres memory contexts. For more details, see
# https://www.postgresql.org/message-id/CAM-w4HNH7%2BU9jZevpVK7Wr49tkfpWSR6wav0RLYrq0HWuP5cxw%40mail.gmail.com
patch -F5 -p1 -d ~/$PG_SRC_DIR < scripts/sanitizers/postgres-asan-instrumentation.patch
cd ~/$PG_SRC_DIR
./configure --prefix=$PG_INSTALL_DIR --enable-debug --enable-cassert \
--with-openssl --without-readline --without-zlib --without-libxml
make -j$(nproc)
make -j$(nproc) -C contrib/pageinspect
make -j$(nproc) -C src/test/isolation
echo "exit_code=$?" >> $GITHUB_OUTPUT
- name: save cache preemptively if postgres built
uses: actions/cache/save@v3
if: steps.build-postgresql.outputs.exit_code == 0
with:
path: ~/${{ env.PG_SRC_DIR }}
key: "${{ matrix.os }}-${{ env.name }}-postgresql-${{ matrix.pg }}-${{ env.CC }}\
-${{ steps.get-date.outputs.date }}-${{ hashFiles('.github/**') }}"

- name: Upload config.log
if: always() && steps.cache-postgresql.outputs.cache-hit != 'true'
uses: actions/upload-artifact@v3
with:
name: config.log for PostgreSQL ${{ matrix.os }} ${{ matrix.name }} ${{ matrix.pg }}
path: ~/${{ env.PG_SRC_DIR }}/config.log

- name: Install PostgreSQL ${{ matrix.pg }}
run: |
make -C ~/$PG_SRC_DIR install
echo $PG_INSTALL_DIR/bin >> $GITHUB_PATH
sudo chmod -R 755 $PG_INSTALL_DIR/bin
$PG_INSTALL_DIR/bin/pg_config --version
- name: Build Lantern
run: |
mkdir lantern_build
cd lantern_build
CXXFLAG="" cmake ..
make install
- name: Start Postgres
run: |
mkdir -p $PG_INSTALL_DIR/data
#sudo chown -R postgres:postgres $PG_INSTALL_DIR/data
#sudo chown -R postgres:postgres ${{ github.workspace }}/sanitizer
#getent group postgres || sudo groupadd postgres
#id -u postgres &>/dev/null || sudo useradd -g postgres postgres
initdb -A trust -D $PG_INSTALL_DIR/data
postgres -D $PG_INSTALL_DIR/data >/tmp/postgres.log 2>&1 &
- name: make test
run: |
cd lantern_build
make test
- name: Show regression diffs
if: always()
id: collectlogs
run: |
find /tmp/lantern -name regression.diffs -exec cat {} + > regression.log
cp /tmp/postgres.log .
if [[ "${{ runner.os }}" == "Linux" ]] ; then
# wait in case there are in-progress coredumps
sleep 10
if coredumpctl -q list >/dev/null; then echo "coredumps=true" >>$GITHUB_OUTPUT; fi
# print OOM killer information
sudo journalctl --system -q --facility=kern --grep "Killed process" || true
fi
if [[ -s regression.log ]]; then echo "regression_diff=true" >>$GITHUB_OUTPUT; fi
#grep -e 'FAILED' -e 'failed (ignored)' installcheck.log || true
cat regression.log
- name: Save regression diffs
if: always() && steps.collectlogs.outputs.regression_diff == 'true'
uses: actions/upload-artifact@v3
with:
name: Regression diff ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }}
path: |
regression.log
- name: Save postgres log
if: always() && steps.collectlogs.outputs.regression_diff == 'true'
uses: actions/upload-artifact@v3
with:
name: Postgres log ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }}
path: |
postgres.log
- name: Stack trace
if: always() && steps.collectlogs.outputs.coredumps == 'true'
run: |
sudo coredumpctl gdb <<<"
set verbose on
set trace-commands on
show debug-file-directory
printf "'"'"query = '%s'\n\n"'"'", debug_query_string
frame function ExceptionalCondition
printf "'"'"condition = '%s'\n"'"'", conditionName
up 1
l
info args
info locals
bt full
" 2>&1 | tee stacktrace.log
./scripts/sanitizers/bundle_coredump.sh
grep -C40 "was terminated by signal" postgres.log > postgres-failure.log ||:
- name: Coredumps
if: always() && steps.collectlogs.outputs.coredumps == 'true'
uses: actions/upload-artifact@v3
with:
name: Coredumps ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }}
path: coredumps

- name: sanitizer logs
if: always()
uses: actions/upload-artifact@v3
with:
name: sanitizer logs ${{ matrix.os }} ${{ env.name }} ${{ matrix.pg }}
path: ${{ github.workspace }}/sanitizer
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ test/tmp_output/
.DS_Store
build
data
sanitizer
.vscode/
.devcontainer/
.cache
23 changes: 16 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,15 +119,24 @@ set_target_properties(
# needed to make sure cmake does not add libstdc++ to the linker command when an
# external cpp library is added more at`
# https://cmake-developers.cmake.narkive.com/JnbrDyGT/setting-linker-language-still-adds-lstdc

if(NOT APPLE)
# apples does not understand -static-libstdc++ used in usearch to bundle libstdc++ with the
# created archive.
# so, on apple we dynamically link to the c++ runtime
# todo:: find a way to statically link the c++ runtime on mac
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "")
# clang handles static libstdc++ differently than gcc
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
find_library(STATIC_LIBSTDCPP NAMES libstdc++.a PATHS ${CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES})

if(STATIC_LIBSTDCPP)
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "${CMAKE_CXX_IMPLICIT_LINK_LIBRARIES};${STATIC_LIBSTDCPP}")
endif()
else()
# apples does not understand -static-libstdc++ used in usearch to bundle libstdc++ with the
# created archive.
# so, on apple we dynamically link to the c++ runtime
# todo:: find a way to statically link the c++ runtime on mac
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "")
endif()
endif()

set_target_properties(lantern PROPERTIES LINKER_LANGUAGE C)

target_include_directories(lantern PRIVATE "./third_party/usearch/c")
Expand Down
4 changes: 4 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ Below is a short recording demonstrating the use of `livedebug.py`:

[![asciicast](https://asciinema.org/a/jTsbWdOcTvUl4iAJlAw3Cszbt.svg)](https://asciinema.org/a/jTsbWdOcTvUl4iAJlAw3Cszbt)

## Running sanitizers

To ensure that code is safe, pull requests are tested using google's [AddressSanitizer](https://github.com/google/sanitizers/wiki/AddressSanitizer). Additionally [UBSan](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html) is run against releases. A [docker container](scripts/sanitizers/Dockerfile) is provided for testing changes locally. it can be invoked by running the script `scripts/sanitizers/run_sanitizers.sh`. **Please note that this script must be run in the root directory of the lantern repository**. By default it will build `postgres 15.4` and run tests against it instrumented only with AddressSanitizer. If you would like to run UBSan you can pass the `-u` flag. If you wish to test against a specific version you can use the `-v` flag specifying a specific version, e.g. `scripts/sanitizers/run_sanitizers.sh -u -v11.21`

## Adding/modifying LanternDB's SQL interface

When modifying the SQL interface, you add relevant SQL logic under `sql/`. In addition, you add an update script under `sql/updates`, in a file named `[CURRENT_VERSION]--latest.sql`. You should create this file if it does not exist.
Expand Down
99 changes: 99 additions & 0 deletions scripts/sanitizers/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
FROM debian:bookworm

ARG VERSION=15.4
ARG PGVECTOR_VERSION=0.5.0
ARG UBSAN=

WORKDIR /lantern
# This requires the docker command be run in the lantern base director
COPY scripts scripts

WORKDIR pg_build

RUN apt-get update && \
apt-mark hold locales && \
apt-get install -y \
wget \
curl \
build-essential \
make \
cmake \
pkg-config \
flex \
bison \
libicu-dev \
libssl-dev \
clang-15 \
llvm-15 \
llvm-15-dev \
llvm-15-tools \
libstdc++6

RUN wget -q -O postgresql.tar.bz2 \
https://ftp.postgresql.org/pub/source/v${VERSION}/postgresql-${VERSION}.tar.bz2 && \
tar --extract --file postgresql.tar.bz2 --directory . --strip-components 1

RUN patch -F5 -p1 -d . < /lantern/scripts/sanitizers/postgres-asan-instrumentation.patch

RUN groupadd -r postgres --gid=999 && \
useradd -r -g postgres --uid=999 postgres

ENV LLVM_CONFIG "llvm-config-15"
ENV CC "clang-15"
ENV CXX "clang-15"
ENV CFLAGS "-g -fsanitize=address${UBSAN} -fno-omit-frame-pointer -Og -fno-inline-functions"
ENV CXXFLAGS "-g -fsanitize=address${UBSAN} -fno-omit-frame-pointer -Og -fno-inline-functions"
ENV LDFLAGS "-fsanitize=address${UBSAN}"

RUN ./configure --prefix=/usr/local/pgsql --enable-debug --enable-cassert \
--with-openssl --without-readline --without-zlib --without-libxml && \
make -j$(nproc) && \
make -j$(nproc) -C src/test/isolation && \
make install

ENV PATH="/usr/local/pgsql/bin:${PATH}"
ENV LD_LIBRARY_PATH=:/usr/local/pgsql/lib
ENV PGDATA=/var/lib/postgresql/data
RUN mkdir -p ${PGDATA} && \
chown -R postgres:postgres ${PGDATA} && \
chmod 777 ${PGDATA}

WORKDIR /lantern

RUN mkdir /lantern/sanitizer && \
chown -R postgres:postgres /lantern && \
chmod 777 /lantern/sanitizer

ENV ASAN_OPTIONS suppressions=/lantern/scripts/sanitizers/suppressions/suppr_asan.txt \
detect_odr_violation=0 log_path=/lantern/sanitizer/ \
log_exe_name=true print_suppressions=false exitcode=27 \
detect_leaks=0 abort_on_error=1

ENV LSAN_OPTIONS suppressions=/lantern/scripts/sanitizers/suppressions/suppr_leak.txt \
print_suppressions=0 log_path=/lantern/sanitizer/ \
log_exe_name=true print_suppressions=false exitcode=27

ENV UBSAN_OPTIONS suppressions=/lantern/scripts/sanitizers/suppressions/suppr_ub.txt \
print_stacktrace=1 halt_on_error=1 log_path=/lantern/sanitizer/ \
log_exe_name=true print_suppressions=false exitcode=27

RUN wget -O pgvector.tar.gz https://github.com/pgvector/pgvector/archive/refs/tags/v${PGVECTOR_VERSION}.tar.gz && \
tar xf pgvector.tar.gz && \
cd pgvector-${PGVECTOR_VERSION} && \
make && make install

COPY . .

RUN rm -rf build && \
mkdir build && \
cd build && \
CXXFLAGS="-g -fsanitize=address -fno-omit-frame-pointer -Og -fno-inline-functions" cmake .. && \
make install

USER postgres
RUN initdb -D ${PGDATA} && \
echo "local all all trust" > ${PGDATA}/pg_hba.conf && \
echo "host all all 127.0.0.1/32 trust" >> ${PGDATA}/pg_hba.conf && \
echo "host all all ::1/128 trust" >> ${PGDATA}/pg_hba.conf

CMD ["postgres", "-D", "/var/lib/postgresql/data"]
Loading

0 comments on commit 05336cf

Please sign in to comment.