Skip to content
This repository was archived by the owner on May 10, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,23 @@ matrix:
- compiler: gcc
os: linux
before_script:
- export PARQUET_CXXFLAGS="-Werror -DARROW_NO_DEPRECATED_API"
- export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API"
- source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh
- compiler: gcc
os: linux
before_script:
- export PARQUET_CXXFLAGS="-Werror"
- source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh
- compiler: clang
os: linux
before_script:
- export PARQUET_CXXFLAGS="-Werror -DARROW_NO_DEPRECATED_API"
- export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API"
- source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh
- compiler: clang
os: osx
osx_image: xcode6.4
addons:
before_script:
- export PARQUET_CXXFLAGS="-Werror -DARROW_NO_DEPRECATED_API"
- export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API"
- source $TRAVIS_BUILD_DIR/ci/before_script_travis.sh
before_install:
- mkdir $TRAVIS_BUILD_DIR/parquet-build
Expand All @@ -68,7 +67,7 @@ matrix:
env: PARQUET_BUILD_GROUP=toolchain
before_script:
script:
- export PARQUET_CXXFLAGS="-Werror -DARROW_NO_DEPRECATED_API"
- export PARQUET_CXXFLAGS="-DARROW_NO_DEPRECATED_API"
- $TRAVIS_BUILD_DIR/ci/travis_script_static.sh
- compiler: gcc
os: linux
Expand Down
84 changes: 13 additions & 71 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
option(PARQUET_BUILD_BENCHMARKS
"Build the libparquet benchmark suite"
OFF)

set(PARQUET_BUILD_WARNING_LEVEL "PRODUCTION" CACHE STRING
"Levels of compiler warnings for development: PRODUCTION/CHECKIN/EVERYTHING")

option(PARQUET_BOOST_USE_SHARED
"Rely on boost shared libraries where relevant"
ON)
Expand Down Expand Up @@ -375,6 +379,10 @@ enable_testing()
# Dependencies
############################################################

# Determine compiler version
include(CompilerInfo)
include(SetupCxxFlags)

include_directories(${CMAKE_CURRENT_BINARY_DIR}/src)
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/src
Expand All @@ -388,6 +396,11 @@ else()
endif()
include(ThirdpartyToolchain)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_COMMON_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PARQUET_CXXFLAGS}")

message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")

# Thrift requires these definitions for some types that we use
add_definitions(-DHAVE_INTTYPES_H -DHAVE_NETDB_H)
if (MSVC)
Expand All @@ -396,77 +409,6 @@ else()
add_definitions(-DHAVE_NETINET_IN_H -fPIC)
endif()

#############################################################
# Compiler flags and release types

# compiler flags for different build types (run 'cmake -DCMAKE_BUILD_TYPE=<type> .')
# For all builds:
# For CMAKE_BUILD_TYPE=Debug
# -ggdb: Enable gdb debugging
# For CMAKE_BUILD_TYPE=FastDebug
# Same as DEBUG, except with -O1
# For CMAKE_BUILD_TYPE=Release
# -O3: Enable all compiler optimizations
# Debug symbols are stripped for reduced binary size. Add
# -DPARQUET_CXXFLAGS="-g" to include them
if (MSVC)
set(CXX_FLAGS_DEBUG "${CXX_FLAGS_DEBUG} /bigobj") # TODO set /bigobj only for specific lib
else()
set(CXX_FLAGS_DEBUG "-ggdb -O0")
set(CXX_FLAGS_FASTDEBUG "-ggdb -O1")
set(CXX_FLAGS_RELEASE "-O3")
endif()

string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE)

if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_DEBUG}")

elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "FASTDEBUG")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_FASTDEBUG}")
elseif ("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX_FLAGS_RELEASE}")
else()
message(FATAL_ERROR "Unknown build type: ${CMAKE_BUILD_TYPE}")
endif ()

message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}")

set(CMAKE_CXX_FLAGS "${PARQUET_CXXFLAGS} ${CMAKE_CXX_FLAGS}")
if (MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W3")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing -Wall")
endif()

if (PARQUET_USE_SSE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
add_definitions(-DPARQUET_USE_SSE)
endif()

if (APPLE)
# Use libc++ to avoid linker errors on some platforms
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
endif()

# Determine compiler version
include(CompilerInfo)

if ("${COMPILER_FAMILY}" STREQUAL "clang")
# Using Clang with ccache causes a bunch of spurious warnings that are
# purportedly fixed in the next version of ccache. See the following for details:
#
# http://petereisentraut.blogspot.com/2011/05/ccache-and-clang.html
# http://petereisentraut.blogspot.com/2011/09/ccache-and-clang-part-2.html
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Qunused-arguments")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CLANG_OPTIONS}")
endif()

if ("${COMPILER_FAMILY}" STREQUAL "msvc")
# MSVC version of -Wno-deprecated
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996")
endif()

############################################################
# "make lint" target
############################################################
Expand Down
58 changes: 31 additions & 27 deletions benchmarks/decode_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,32 +42,33 @@ class DeltaBitPackEncoder {

uint8_t* Encode(int* encoded_len) {
uint8_t* result = new uint8_t[10 * 1024 * 1024];
int num_mini_blocks = arrow::BitUtil::Ceil(num_values() - 1, mini_block_size_);
int num_mini_blocks = static_cast<int>(arrow::BitUtil::Ceil(num_values() - 1,
mini_block_size_));
uint8_t* mini_block_widths = NULL;

arrow::BitWriter writer(result, 10 * 1024 * 1024);

// Writer the size of each block. We only use 1 block currently.
writer.PutVlqInt(num_mini_blocks * mini_block_size_);
writer.PutVlqInt(static_cast<uint32_t>(num_mini_blocks * mini_block_size_));

// Write the number of mini blocks.
writer.PutVlqInt(num_mini_blocks);
writer.PutVlqInt(static_cast<uint32_t>(num_mini_blocks));

// Write the number of values.
writer.PutVlqInt(num_values() - 1);

// Write the first value.
writer.PutZigZagVlqInt(values_[0]);
writer.PutZigZagVlqInt(static_cast<uint32_t>(values_[0]));

// Compute the values as deltas and the min delta.
int64_t min_delta = std::numeric_limits<int64_t>::max();
for (int i = values_.size() - 1; i > 0; --i) {
for (size_t i = values_.size() - 1; i > 0; --i) {
values_[i] -= values_[i - 1];
min_delta = std::min(min_delta, values_[i]);
}

// Write out the min delta.
writer.PutZigZagVlqInt(min_delta);
writer.PutZigZagVlqInt(static_cast<int32_t>(min_delta));

// We need to save num_mini_blocks bytes to store the bit widths of the mini
// blocks.
Expand All @@ -86,7 +87,7 @@ class DeltaBitPackEncoder {
// The bit width for this block is the number of bits needed to store
// (max_delta - min_delta).
int bit_width = arrow::BitUtil::NumRequiredBits(max_delta - min_delta);
mini_block_widths[i] = bit_width;
mini_block_widths[i] = static_cast<uint8_t>(bit_width);

// Encode this mini blocking using min_delta and bit_width
for (int j = 0; j < n; ++j) {
Expand All @@ -105,7 +106,7 @@ class DeltaBitPackEncoder {
return result;
}

int num_values() const { return values_.size(); }
int num_values() const { return static_cast<int>(values_.size()); }

private:
int mini_block_size_;
Expand All @@ -121,11 +122,11 @@ class DeltaLengthByteArrayEncoder {
plain_encoded_len_(0) {}

void Add(const std::string& s) {
Add(reinterpret_cast<const uint8_t*>(s.data()), s.size());
Add(reinterpret_cast<const uint8_t*>(s.data()), static_cast<int>(s.size()));
}

void Add(const uint8_t* ptr, int len) {
plain_encoded_len_ += len + sizeof(int);
plain_encoded_len_ += static_cast<int>(len + sizeof(int));
len_encoder_.Add(len);
memcpy(buffer_ + offset_, ptr, len);
offset_ += len;
Expand All @@ -136,7 +137,7 @@ class DeltaLengthByteArrayEncoder {
memmove(buffer_ + *encoded_len + sizeof(int), buffer_, offset_);
memcpy(buffer_, encoded_len, sizeof(int));
memcpy(buffer_ + sizeof(int), encoded_lengths, *encoded_len);
*encoded_len += offset_ + sizeof(int);
*encoded_len += static_cast<int>(offset_ + sizeof(int));
return buffer_;
}

Expand All @@ -155,8 +156,8 @@ class DeltaByteArrayEncoder {
DeltaByteArrayEncoder() : plain_encoded_len_(0) {}

void Add(const std::string& s) {
plain_encoded_len_ += s.size() + sizeof(int);
int min_len = std::min(s.size(), last_value_.size());
plain_encoded_len_ += static_cast<int>(s.size() + sizeof(int));
int min_len = static_cast<int>(std::min(s.size(), last_value_.size()));
int prefix_len = 0;
for (int i = 0; i < min_len; ++i) {
if (s[i] == last_value_[i]) {
Expand All @@ -167,7 +168,7 @@ class DeltaByteArrayEncoder {
}
prefix_len_encoder_.Add(prefix_len);
suffix_encoder_.Add(reinterpret_cast<const uint8_t*>(s.data()) + prefix_len,
s.size() - prefix_len);
static_cast<int>(s.size() - prefix_len));
last_value_ = s;
}

Expand All @@ -181,7 +182,7 @@ class DeltaByteArrayEncoder {
memcpy(buffer, &prefix_buffer_len, sizeof(int));
memcpy(buffer + sizeof(int), prefix_buffer, prefix_buffer_len);
memcpy(buffer + sizeof(int) + prefix_buffer_len, suffix_buffer, suffix_buffer_len);
*encoded_len = sizeof(int) + prefix_buffer_len + suffix_buffer_len;
*encoded_len = static_cast<int>(sizeof(int) + prefix_buffer_len + suffix_buffer_len);
return buffer;
}

Expand All @@ -198,7 +199,7 @@ class DeltaByteArrayEncoder {
uint64_t TestPlainIntEncoding(const uint8_t* data, int num_values, int batch_size) {
uint64_t result = 0;
parquet::PlainDecoder<parquet::Int64Type> decoder(nullptr);
decoder.SetData(num_values, data, num_values * sizeof(int64_t));
decoder.SetData(num_values, data, static_cast<int>(num_values * sizeof(int64_t)));
std::vector<int64_t> values(batch_size);
for (int i = 0; i < num_values;) {
int n = decoder.Decode(values.data(), batch_size);
Expand Down Expand Up @@ -227,14 +228,15 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>&
encoder.Add(values[i]);
}

int raw_len = encoder.num_values() * sizeof(int);
int raw_len = static_cast<int>(encoder.num_values() * sizeof(int));
int len;
uint8_t* buffer = encoder.Encode(&len);

if (benchmark_iters == -1) {
printf("%s\n", name);
printf(" Raw len: %d\n", raw_len);
printf(" Encoded len: %d (%0.2f%%)\n", len, len * 100 / static_cast<float>(raw_len));
printf(" Encoded len: %d (%0.2f%%)\n", len,
static_cast<float>(len) * 100.0f / static_cast<float>(raw_len));
decoder.SetData(encoder.num_values(), buffer, len);
for (int i = 0; i < encoder.num_values(); ++i) {
int64_t x = 0;
Expand All @@ -249,7 +251,8 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>&
} else {
printf("%s\n", name);
printf(" Raw len: %d\n", raw_len);
printf(" Encoded len: %d (%0.2f%%)\n", len, len * 100 / static_cast<float>(raw_len));
printf(" Encoded len: %d (%0.2f%%)\n", len,
static_cast<float>(len) * 100.0f / static_cast<float>(raw_len));

uint64_t result = 0;
std::vector<int64_t> buf(benchmark_batch_size);
Expand All @@ -266,9 +269,9 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>&
}
}
uint64_t elapsed = sw.Stop();
double num_ints = values.size() * benchmark_iters * 1000.;
double num_ints = static_cast<double>(values.size() * benchmark_iters) * 1000.;
printf("%s rate (batch size = %2d): %0.3fM per second.\n", name, benchmark_batch_size,
num_ints / elapsed);
num_ints / static_cast<double>(elapsed));
return result;
}
}
Expand All @@ -280,15 +283,15 @@ uint64_t TestBinaryPackedEncoding(const char* name, const std::vector<int64_t>&
} \
elapsed = sw.Stop(); \
printf("%s rate (batch size = %2d): %0.3fM per second.\n", NAME, BATCH_SIZE, \
mult / elapsed);
mult / static_cast<double>(elapsed));

void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector<int64_t>& data,
int num_iters, int batch_size) {
const uint8_t* raw_data = reinterpret_cast<const uint8_t*>(&data[0]);
int uncompressed_len = data.size() * sizeof(int64_t);
int uncompressed_len = static_cast<int>(data.size() * sizeof(int64_t));
uint8_t* decompressed_data = new uint8_t[uncompressed_len];

int max_compressed_size = codec->MaxCompressedLen(uncompressed_len, raw_data);
int64_t max_compressed_size = codec->MaxCompressedLen(uncompressed_len, raw_data);
uint8_t* compressed_data = new uint8_t[max_compressed_size];
int64_t compressed_len;
DCHECK(codec
Expand All @@ -299,18 +302,19 @@ void TestPlainIntCompressed(::arrow::Codec* codec, const std::vector<int64_t>& d
printf("\n%s:\n Uncompressed len: %d\n Compressed len: %d\n", codec->name(),
uncompressed_len, static_cast<int>(compressed_len));

double mult = num_iters * data.size() * 1000.;
double mult = static_cast<double>(num_iters * data.size()) * 1000.;
parquet::StopWatch sw;
sw.Start();
uint64_t r = 0;
for (int i = 0; i < num_iters; ++i) {
ABORT_NOT_OK(codec->Decompress(compressed_len, compressed_data, uncompressed_len,
decompressed_data));
r += TestPlainIntEncoding(decompressed_data, data.size(), batch_size);
r += TestPlainIntEncoding(decompressed_data, static_cast<int>(data.size()),
batch_size);
}
int64_t elapsed = sw.Stop();
printf("Compressed(%s) plain int rate (batch size = %2d): %0.3fM per second.\n",
codec->name(), batch_size, mult / elapsed);
codec->name(), batch_size, mult / static_cast<double>(elapsed));

delete[] compressed_data;
delete[] decompressed_data;
Expand Down
2 changes: 2 additions & 0 deletions ci/before_script_travis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@ if [ $TRAVIS_OS_NAME == "linux" ]; then
cmake -DPARQUET_CXXFLAGS="$PARQUET_CXXFLAGS" \
-DPARQUET_TEST_MEMCHECK=ON \
-DPARQUET_BUILD_BENCHMARKS=ON \
-DPARQUET_BUILD_WARNING_LEVEL=CHECKIN \
-DPARQUET_GENERATE_COVERAGE=1 \
$TRAVIS_BUILD_DIR
else
cmake -DPARQUET_CXXFLAGS="$PARQUET_CXXFLAGS" \
-DPARQUET_BUILD_WARNING_LEVEL=CHECKIN \
$TRAVIS_BUILD_DIR
fi
2 changes: 1 addition & 1 deletion cmake_modules/CompilerInfo.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ elseif("${COMPILER_VERSION_FULL}" MATCHES ".*based on LLVM.*")
# clang on Mac OS X, XCode 7+.
elseif("${COMPILER_VERSION_FULL}" MATCHES ".*clang-.*")
set(COMPILER_FAMILY "clang")

set(COMPILER_VERSION "4.0")
# gcc
elseif("${COMPILER_VERSION_FULL_LOWER}" MATCHES ".*gcc[ -]version.*")
set(COMPILER_FAMILY "gcc")
Expand Down
Loading