Skip to content

Commit

Permalink
Merge pull request #1545 from finos/arrow-5-0-0
Browse files Browse the repository at this point in the history
Upgrade Apache Arrow to 5.0.0
  • Loading branch information
texodus authored Oct 13, 2021
2 parents 1d40006 + 6882a84 commit 4265e79
Show file tree
Hide file tree
Showing 9 changed files with 783 additions and 555 deletions.
4 changes: 2 additions & 2 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ parameters:
architecture: 'x64'
displayName: 'Use Python $(python.version)'

- bash: python -m pip install --upgrade pip wheel setuptools "jupyterlab>=3.0.14" numpy "pyarrow>=2" "black==20.8b1" flake8-black
- bash: python -m pip install --upgrade pip wheel setuptools "jupyterlab>=3.0.14" numpy "pyarrow>=5" "black==20.8b1" flake8-black
displayName: 'Install Python base dependencies'
condition: and(succeeded(), ne(variables['python.version'], '2.7'))

Expand Down Expand Up @@ -462,7 +462,7 @@ jobs:
architecture: 'x64'
displayName: 'Use Python $(python.version)'

- bash: python -m pip install --upgrade pip wheel setuptools jupyterlab numpy "pyarrow>=2" "black==20.8b1" flake8-black
- bash: python -m pip install --upgrade pip wheel setuptools jupyterlab numpy "pyarrow>=5" "black==20.8b1" flake8-black
displayName: 'Install Python base dependencies'
condition: and(succeeded(), ne(variables['python.version'], '2.7'))

Expand Down
2 changes: 1 addition & 1 deletion cmake/arrow.txt.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ project(arrow-download NONE)
include(ExternalProject)
ExternalProject_Add(apachearrow
GIT_REPOSITORY https://github.com/apache/arrow.git
GIT_TAG apache-arrow-1.0.1
GIT_TAG apache-arrow-5.0.0
SOURCE_DIR "${CMAKE_BINARY_DIR}/arrow-src"
BINARY_DIR "${CMAKE_BINARY_DIR}/arrow-build"
CONFIGURE_COMMAND ""
Expand Down
13 changes: 9 additions & 4 deletions cmake/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/buffer.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/chunked_array.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/compare.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/datum.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/device.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/extension_type.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/memory_pool.cc
Expand All @@ -45,6 +46,7 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/column_decoder.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/options.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/parser.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/reader.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/filesystem/filesystem.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/filesystem/localfs.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/filesystem/mockfs.cc
Expand All @@ -57,6 +59,7 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/json/parser.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/json/reader.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/buffered.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/caching.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/compressed.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/interfaces.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/memory.cc
Expand All @@ -66,17 +69,20 @@ set(ARROW_SRCS
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bit_util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bitmap_builders.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/bitmap_ops.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/cancel.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/compression.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/cpu_info.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/decimal.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/future.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/formatting.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/delimiting.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/int_util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/io_util.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/iterator.cc
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/iterator.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/logging.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/key_value_metadata.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/memory.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/mutex.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/string.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/string_builder.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/task_group.cc
Expand Down Expand Up @@ -104,13 +110,12 @@ set(ARROW_SRCS
if (PSP_PYTHON_BUILD)
set(ARROW_SRCS
${ARROW_SRCS}
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/csv/reader.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/datum.cc
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/datum.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/io/file.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/tensor/coo_converter.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/tensor/csf_converter.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/tensor/csx_converter.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/formatting.cc
# ${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/formatting.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/util/time.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/bignum-dtoa.cc
${CMAKE_BINARY_DIR}/arrow-src/cpp/src/arrow/vendored/double-conversion/fast-dtoa.cc
Expand Down
4 changes: 2 additions & 2 deletions cmake/arrow/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
// specific language governing permissions and limitations
// under the License.

#define ARROW_VERSION_MAJOR 1
#define ARROW_VERSION_MAJOR 5
#define ARROW_VERSION_MINOR 0
#define ARROW_VERSION_PATCH 1
#define ARROW_VERSION_PATCH 0
#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH

/* #undef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY */
Expand Down
55 changes: 49 additions & 6 deletions cpp/perspective/src/cpp/arrow_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,49 @@
#include <arrow/csv/reader.h>
#endif


template <class TimePoint>
static inline arrow::TimestampType::c_type ConvertTimePoint(TimePoint tp, arrow::TimeUnit::type unit) {
auto duration = tp.time_since_epoch();
switch (unit) {
case arrow::TimeUnit::SECOND:
return std::chrono::duration_cast<std::chrono::seconds>(duration).count();
case arrow::TimeUnit::MILLI:
return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
case arrow::TimeUnit::MICRO:
return std::chrono::duration_cast<std::chrono::microseconds>(duration).count();
case arrow::TimeUnit::NANO:
return std::chrono::duration_cast<std::chrono::nanoseconds>(duration).count();
default:
// Compiler errors without default case even though all enum cases are handled
assert(0);
return 0;
}
}


static inline bool ParseYYYY_MM_DD(const char* s,
arrow_vendored::date::year_month_day* out) {
uint16_t year = 0;
uint8_t month = 0;
uint8_t day = 0;
if (ARROW_PREDICT_FALSE(s[4] != '-') || ARROW_PREDICT_FALSE(s[7] != '-')) {
return false;
}
if (ARROW_PREDICT_FALSE(!arrow::internal::ParseUnsigned(s + 0, 4, &year))) {
return false;
}
if (ARROW_PREDICT_FALSE(!arrow::internal::ParseUnsigned(s + 5, 2, &month))) {
return false;
}
if (ARROW_PREDICT_FALSE(!arrow::internal::ParseUnsigned(s + 8, 2, &day))) {
return false;
}
*out = {arrow_vendored::date::year{year}, arrow_vendored::date::month{month},
arrow_vendored::date::day{day}};
return out->ok();
}

namespace perspective {
namespace apachearrow {

Expand Down Expand Up @@ -101,7 +144,7 @@ namespace apachearrow {
// "YYYY-MM-DD[ T]hh:mm:ss.sss"
arrow_vendored::date::year_month_day ymd;
if (ARROW_PREDICT_FALSE(
!arrow::internal::detail::ParseYYYY_MM_DD(
!ParseYYYY_MM_DD(
s, &ymd))) {
return false;
}
Expand All @@ -116,15 +159,15 @@ namespace apachearrow {
return false;
}

*out = arrow::internal::detail::ConvertTimePoint(
*out = ConvertTimePoint(
arrow_vendored::date::sys_days(ymd) + seconds + millis,
unit);
return true;
} else if (length == 25) {
// "2008-09-15[ T]15:53:00+05:00"
arrow_vendored::date::year_month_day ymd;
if (ARROW_PREDICT_FALSE(
!arrow::internal::detail::ParseYYYY_MM_DD(
!ParseYYYY_MM_DD(
s, &ymd))) {
return false;
}
Expand All @@ -139,7 +182,7 @@ namespace apachearrow {
return false;
}

*out = arrow::internal::detail::ConvertTimePoint(
*out = ConvertTimePoint(
arrow_vendored::date::sys_days(ymd) + tz + seconds,
unit);
return true;
Expand Down Expand Up @@ -193,7 +236,7 @@ namespace apachearrow {
csvToTable(std::string& csv, bool is_update,
std::unordered_map<std::string, std::shared_ptr<arrow::DataType>>&
schema) {
arrow::MemoryPool* pool = arrow::default_memory_pool();
arrow::io::IOContext io_context = arrow::io::default_io_context();
auto input = std::make_shared<arrow::io::BufferReader>(csv);
auto read_options = arrow::csv::ReadOptions::Defaults();
auto parse_options = arrow::csv::ParseOptions::Defaults();
Expand All @@ -209,7 +252,7 @@ namespace apachearrow {
}

auto maybe_reader = arrow::csv::TableReader::Make(
pool, input, read_options, parse_options, convert_options);
io_context, input, read_options, parse_options, convert_options);

std::shared_ptr<arrow::csv::TableReader> reader = *maybe_reader;

Expand Down
Loading

0 comments on commit 4265e79

Please sign in to comment.