Skip to content
This repository was archived by the owner on May 10, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 83 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
option(PARQUET_USE_SSE
"Build with SSE4 optimizations"
OFF)
option(PARQUET_BUILD_BENCHMARKS
"Build the libparquet benchmark suite"
OFF)
option(PARQUET_BUILD_TESTS
"Build the libparquet test suite"
ON)
Expand Down Expand Up @@ -102,6 +105,60 @@ else()
set(BUILD_OUTPUT_ROOT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${BUILD_SUBDIR_NAME}")
endif()

############################################################
# Benchmarking
############################################################
# Add a new micro benchmark, with or without an executable that should be built.
# If benchmarks are enabled then they will be run along side unit tests with ctest.
# 'make runbenchmark' and 'make unittest' to build/run only benchmark or unittests,
# respectively.
#
# REL_BENCHMARK_NAME is the name of the benchmark app. It may be a single component
# (e.g. monotime-benchmark) or contain additional components (e.g.
# net/net_util-benchmark). Either way, the last component must be a globally
# unique name.

# The benchmark will registered as unit test with ctest with a label
# of 'benchmark'.
#
# Arguments after the test name will be passed to set_tests_properties().
function(ADD_PARQUET_BENCHMARK REL_BENCHMARK_NAME)
if(NOT PARQUET_BUILD_BENCHMARKS)
return()
endif()
get_filename_component(BENCHMARK_NAME ${REL_BENCHMARK_NAME} NAME_WE)

if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME}.cc)
# This benchmark has a corresponding .cc file, set it up as an executable.
set(BENCHMARK_PATH "${EXECUTABLE_OUTPUT_PATH}/${BENCHMARK_NAME}")
add_executable(${BENCHMARK_NAME} "${REL_BENCHMARK_NAME}.cc")
target_link_libraries(${BENCHMARK_NAME} ${PARQUET_BENCHMARK_LINK_LIBS})
add_dependencies(runbenchmark ${BENCHMARK_NAME})
set(NO_COLOR "--color_print=false")
else()
# No executable, just invoke the benchmark (probably a script) directly.
set(BENCHMARK_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME})
set(NO_COLOR "")
endif()

add_test(${BENCHMARK_NAME}
${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} benchmark ${BENCHMARK_PATH} ${NO_COLOR})
set_tests_properties(${BENCHMARK_NAME} PROPERTIES LABELS "benchmark")
if(ARGN)
set_tests_properties(${BENCHMARK_NAME} PROPERTIES ${ARGN})
endif()
endfunction()

# A wrapper for add_dependencies() that is compatible with NO_BENCHMARKS.
function(ADD_PARQUET_BENCHMARK_DEPENDENCIES REL_BENCHMARK_NAME)
if(NOT PARQUET_BUILD_BENCHMARKS)
return()
endif()
get_filename_component(BENCMARK_NAME ${REL_BENCHMARK_NAME} NAME_WE)

add_dependencies(${BENCHMARK_NAME} ${ARGN})
endfunction()

############################################################
# Testing
############################################################
Expand All @@ -113,6 +170,9 @@ endif()
# net/net_util-test). Either way, the last component must be a globally
# unique name.
#
# The unit test is added with a label of "unittest" to support filtering with
# ctest.
#
# Arguments after the test name will be passed to set_tests_properties().
function(ADD_PARQUET_TEST REL_TEST_NAME)
if(NOT PARQUET_BUILD_TESTS)
Expand All @@ -124,6 +184,7 @@ function(ADD_PARQUET_TEST REL_TEST_NAME)
# This test has a corresponding .cc file, set it up as an executable.
set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}")
add_executable(${TEST_NAME} "${REL_TEST_NAME}.cc")
add_dependencies(unittest ${TEST_NAME})

if(APPLE)
# On OS X / Thrift >= 0.9.2, tr1/tuple.h is not in libc++
Expand All @@ -149,8 +210,9 @@ function(ADD_PARQUET_TEST REL_TEST_NAME)
valgrind --tool=memcheck --leak-check=full --error-exitcode=1 ${TEST_PATH})
else()
add_test(${TEST_NAME}
${BUILD_SUPPORT_DIR}/run-test.sh ${TEST_PATH})
${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH})
endif()
set_tests_properties(${TEST_NAME} PROPERTIES LABELS "unittest")
if(ARGN)
set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN})
endif()
Expand Down Expand Up @@ -213,11 +275,26 @@ add_library(zlibstatic STATIC IMPORTED)
set_target_properties(zlibstatic PROPERTIES IMPORTED_LOCATION ${ZLIB_STATIC_LIB})

## GTest
add_custom_target(unittest ctest -L unittest)
find_package(GTest REQUIRED)
include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
add_library(gtest STATIC IMPORTED)
set_target_properties(gtest PROPERTIES IMPORTED_LOCATION ${GTEST_STATIC_LIB})

## Google Benchmark
if ("$ENV{GBENCHMARK_HOME}" STREQUAL "")
set(GBENCHMARK_HOME ${THIRDPARTY_DIR}/installed)
endif()

if(PARQUET_BUILD_BENCHMARKS)
add_custom_target(runbenchmark ctest -L benchmark)
find_package(GBenchmark REQUIRED)
include_directories(SYSTEM ${GBENCHMARK_INCLUDE_DIR})
message(${GBENCHMARK_STATIC_LIB})
add_library(gbenchmark STATIC IMPORTED)
set_target_properties(gbenchmark PROPERTIES IMPORTED_LOCATION ${GBENCHMARK_STATIC_LIB})
endif()

# Thrift requires these definitions for some types that we use
add_definitions(-DHAVE_INTTYPES_H -DHAVE_NETINET_IN_H -DHAVE_NETDB_H)
add_definitions(-fPIC)
Expand Down Expand Up @@ -331,6 +408,11 @@ set(PARQUET_MIN_TEST_LIBS
parquet)
set(PARQUET_TEST_LINK_LIBS ${PARQUET_MIN_TEST_LIBS})

#############################################################
# Benchmark linking

set(PARQUET_BENCHMARK_LINK_LIBS parquet parquet_benchmark_main)

#############################################################
# Code coverage

Expand Down
22 changes: 18 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
- zlib
- thrift 0.7+ [install instructions](https://thrift.apache.org/docs/install/)
- googletest 1.7.0 (cannot be installed with package managers)
- Google Benchmark (only required if building benchmarks)

You can install these dependencies using a package manager or using the
`thirdparty/` scripts in this repository. On Homebrew, you can run:
Expand Down Expand Up @@ -87,7 +88,7 @@ This library uses Google's `googletest` unit test framework. After building
with `make`, you can run the test suite by running

```
ctest
make unittest
```

The test suite relies on an environment variable `PARQUET_TEST_DATA` pointing
Expand All @@ -107,6 +108,19 @@ you can use valgrind with ctest to look for memory leaks:
valgrind --tool=memcheck --leak-check=yes ctest
```

## Building/Running benchmarks

Follow the directions for simple build except run cmake
with the `--PARQUET_BUILD_BENCHMARKS` parameter set correctly:

cmake -DPARQUET_BUILD_BENCHMARKS=ON ..

and instead of make unittest run either `make; ctest` to run both unit tests
and benchmarks or `make runbenchmark` to run only the benchmark tests.

Benchmark logs will be placed in the build directory under `build/benchmark-logs`.


## Out-of-source builds

parquet-cpp supports out of source builds. For example:
Expand All @@ -116,7 +130,7 @@ mkdir test-build
cd test-build
cmake ..
make
ctest
ctest -L unittest
```

By using out-of-source builds you can preserve your current build state in case
Expand Down Expand Up @@ -172,7 +186,7 @@ mkdir coverage-build
cd coverage-build
cmake -DPARQUET_GENERATE_COVERAGE=1
make -j$PARALLEL
ctest
ctest -L unittest
```

The `gcov` artifacts are not located in a place that works well with either
Expand Down Expand Up @@ -205,4 +219,4 @@ coveralls -t $PARQUET_CPP_COVERAGE_TOKEN --gcov-options '\-l' -r $PARQUET_ROOT -


Note that `gcov` throws off artifacts from the STL, so I excluded my toolchain
root stored in `$NATIVE_TOOLCHAIN` to avoid a cluttered coverage report.
root stored in `$NATIVE_TOOLCHAIN` to avoid a cluttered coverage report.
136 changes: 104 additions & 32 deletions build-support/run-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,23 @@
# Script which wraps running a test and redirects its output to a
# test log directory.
#
# If PARQUET_COMPRESS_TEST_OUTPUT is non-empty, then the logs will be
# gzip-compressed while they are written.
# Arguments:
# $1 - Base path for logs/artifacts.
# $2 - type of test (e.g. test or benchmark)
# $3 - path to executable
# $ARGN - arguments for executable
#

OUTPUT_ROOT=$1
shift
ROOT=$(cd $(dirname $BASH_SOURCE)/..; pwd)

TEST_LOGDIR=$ROOT/build/test-logs
TEST_LOGDIR=$OUTPUT_ROOT/build/$1-logs
mkdir -p $TEST_LOGDIR

TEST_DEBUGDIR=$ROOT/build/test-debug
RUN_TYPE=$1
shift
TEST_DEBUGDIR=$OUTPUT_ROOT/build/$RUN_TYPE-debug
mkdir -p $TEST_DEBUGDIR

TEST_DIRNAME=$(cd $(dirname $1); pwd)
Expand All @@ -37,11 +45,8 @@ shift
TEST_EXECUTABLE="$TEST_DIRNAME/$TEST_FILENAME"
TEST_NAME=$(echo $TEST_FILENAME | perl -pe 's/\..+?$//') # Remove path and extension (if any).

TEST_EXECUTION_ATTEMPTS=1


# We run each test in its own subdir to avoid core file related races.
TEST_WORKDIR=$ROOT/build/test-work/$TEST_NAME
TEST_WORKDIR=$OUTPUT_ROOT/build/test-work/$TEST_NAME
mkdir -p $TEST_WORKDIR
pushd $TEST_WORKDIR >/dev/null || exit 1
rm -f *
Expand All @@ -51,40 +56,57 @@ set -o pipefail
LOGFILE=$TEST_LOGDIR/$TEST_NAME.txt
XMLFILE=$TEST_LOGDIR/$TEST_NAME.xml

# Remove both the compressed and uncompressed output, so the developer
# doesn't accidentally get confused and read output from a prior test
# run.
TEST_EXECUTION_ATTEMPTS=1

# Remove both the uncompressed output, so the developer doesn't accidentally get confused
# and read output from a prior test run.
rm -f $LOGFILE $LOGFILE.gz

if [ -n "$PARQUET_COMPRESS_TEST_OUTPUT" ] && [ "$PARQUET_COMPRESS_TEST_OUTPUT" -ne 0 ] ; then
pipe_cmd=gzip
LOGFILE=${LOGFILE}.gz
else
pipe_cmd=cat
fi
pipe_cmd=cat

# Allow for collecting core dumps.
PARQUET_TEST_ULIMIT_CORE=${PARQUET_TEST_ULIMIT_CORE:-0}
ulimit -c $PARQUET_TEST_ULIMIT_CORE
ARROW_TEST_ULIMIT_CORE=${ARROW_TEST_ULIMIT_CORE:-0}
ulimit -c $ARROW_TEST_ULIMIT_CORE
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Buglet from Arrow porting


# Run the actual test.
for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then
# If the test fails, the test output may or may not be left behind,
# depending on whether the test cleaned up or exited immediately. Either
# way we need to clean it up. We do this by comparing the data directory
# contents before and after the test runs, and deleting anything new.
#
# The comm program requires that its two inputs be sorted.
TEST_TMPDIR_BEFORE=$(find $TEST_TMPDIR -maxdepth 1 -type d | sort)

function setup_sanitizers() {
# Sets environment variables for different sanitizers (it configures how) the run_tests. Function works.

# Configure TSAN (ignored if this isn't a TSAN build).
#
# Deadlock detection (new in clang 3.5) is disabled because:
# 1. The clang 3.5 deadlock detector crashes in some unit tests. It
# needs compiler-rt commits c4c3dfd, 9a8efe3, and possibly others.
# 2. Many unit tests report lock-order-inversion warnings; they should be
# fixed before reenabling the detector.
TSAN_OPTIONS="$TSAN_OPTIONS detect_deadlocks=0"
TSAN_OPTIONS="$TSAN_OPTIONS suppressions=$ROOT/build-support/tsan-suppressions.txt"
TSAN_OPTIONS="$TSAN_OPTIONS history_size=7"
export TSAN_OPTIONS

# Enable leak detection even under LLVM 3.4, where it was disabled by default.
# This flag only takes effect when running an ASAN build.
ASAN_OPTIONS="$ASAN_OPTIONS detect_leaks=1"
export ASAN_OPTIONS

# Set up suppressions for LeakSanitizer
LSAN_OPTIONS="$LSAN_OPTIONS suppressions=$ROOT/build-support/lsan-suppressions.txt"
export LSAN_OPTIONS

# Suppressions require symbolization. We'll default to using the symbolizer in
# thirdparty.
if [ -z "$ASAN_SYMBOLIZER_PATH" ]; then
export ASAN_SYMBOLIZER_PATH=$(find $NATIVE_TOOLCHAIN/llvm-3.7.0/bin -name llvm-symbolizer)
fi
}

function run_test() {
# Run gtest style tests with sanitizers if they are setup appropriately.

# gtest won't overwrite old junit test files, resulting in a build failure
# even when retries are successful.
rm -f $XMLFILE

echo "Running $TEST_NAME, redirecting output into $LOGFILE" \
"(attempt ${ATTEMPT_NUMBER}/$TEST_EXECUTION_ATTEMPTS)"
$TEST_EXECUTABLE "$@" 2>&1 \
| $ROOT/build-support/stacktrace_addr2line.pl $TEST_EXECUTABLE \
| $pipe_cmd > $LOGFILE
Expand All @@ -104,6 +126,46 @@ for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
STATUS=1
rm -f $XMLFILE
fi
}

function post_process_tests() {
# If we have a LeakSanitizer report, and XML reporting is configured, add a new test
# case result to the XML file for the leak report. Otherwise Jenkins won't show
# us which tests had LSAN errors.
if zgrep --silent "ERROR: LeakSanitizer: detected memory leaks" $LOGFILE ; then
echo Test had memory leaks. Editing XML
perl -p -i -e '
if (m#</testsuite>#) {
print "<testcase name=\"LeakSanitizer\" status=\"run\" classname=\"LSAN\">\n";
print " <failure message=\"LeakSanitizer failed\" type=\"\">\n";
print " See txt log file for details\n";
print " </failure>\n";
print "</testcase>\n";
}' $XMLFILE
fi
}

function run_other() {
# Generic run function for test like executables that aren't actually gtest
$TEST_EXECUTABLE "$@" 2>&1 | $pipe_cmd > $LOGFILE
STATUS=$?
}

if [ $RUN_TYPE = "test" ]; then
setup_sanitizers
fi

# Run the actual test.
for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then
# If the test fails, the test output may or may not be left behind,
# depending on whether the test cleaned up or exited immediately. Either
# way we need to clean it up. We do this by comparing the data directory
# contents before and after the test runs, and deleting anything new.
#
# The comm program requires that its two inputs be sorted.
TEST_TMPDIR_BEFORE=$(find $TEST_TMPDIR -maxdepth 1 -type d | sort)
fi

if [ $ATTEMPT_NUMBER -lt $TEST_EXECUTION_ATTEMPTS ]; then
# Now delete any new test output.
Expand All @@ -123,7 +185,13 @@ for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
fi
done
fi

echo "Running $TEST_NAME, redirecting output into $LOGFILE" \
"(attempt ${ATTEMPT_NUMBER}/$TEST_EXECUTION_ATTEMPTS)"
if [ $RUN_TYPE = "test" ]; then
run_test $*
else
run_other $*
fi
if [ "$STATUS" -eq "0" ]; then
break
elif [ "$ATTEMPT_NUMBER" -lt "$TEST_EXECUTION_ATTEMPTS" ]; then
Expand All @@ -132,6 +200,10 @@ for ATTEMPT_NUMBER in $(seq 1 $TEST_EXECUTION_ATTEMPTS) ; do
fi
done

if [ $RUN_TYPE = "test" ]; then
post_process_tests
fi

# Capture and compress core file and binary.
COREFILES=$(ls | grep ^core)
if [ -n "$COREFILES" ]; then
Expand Down
Loading