diff --git a/.gitignore b/.gitignore
index dd69b6cec9c..e6dfe19bb98 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,4 +27,5 @@ MANIFEST
cpp/.idea/
python/.eggs/
-.vscode
\ No newline at end of file
+.vscode
+.idea/
diff --git a/.travis.yml b/.travis.yml
index cdf787c831b..b93f1c2519b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -42,7 +42,6 @@ cache:
ccache: true
directories:
- $HOME/.conda_packages
- - $HOME/.ccache
matrix:
fast_finish: true
@@ -56,6 +55,9 @@ matrix:
before_script:
- export CC="gcc-4.9"
- export CXX="g++-4.9"
+ - export ARROW_TRAVIS_USE_TOOLCHAIN=1
+ - export ARROW_TRAVIS_VALGRIND=1
+ - export ARROW_TRAVIS_PLASMA=1
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
script:
- $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
@@ -66,6 +68,8 @@ matrix:
cache:
addons:
before_script:
+ - export ARROW_TRAVIS_USE_TOOLCHAIN=1
+ - export ARROW_TRAVIS_PLASMA=1
- $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh
script:
- $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 55b02e0f9a1..6cedf32df62 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,24 +1,167 @@
- http://www.apache.org/licenses/LICENSE-2.0
+# Apache Arrow 0.5.0 (23 July 2017)
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. See accompanying LICENSE file.
--->
+## Bug
+
+* ARROW-1074 - from_pandas doesnt convert ndarray to list
+* ARROW-1079 - [Python] Empty "private" directories should be ignored by Parquet interface
+* ARROW-1081 - C++: arrow::test::TestBase::MakePrimitive doesn't fill null_bitmap
+* ARROW-1096 - [C++] Memory mapping file over 4GB fails on Windows
+* ARROW-1097 - Reading tensor needs file to be opened in writeable mode
+* ARROW-1098 - Document Error?
+* ARROW-1101 - UnionListWriter is not implementing all methods on interface ScalarWriter
+* ARROW-1103 - [Python] Utilize pandas metadata from common `_metadata` Parquet file if it exists
+* ARROW-1107 - [JAVA] NullableMapVector getField() should return nullable type
+* ARROW-1108 - Check if ArrowBuf is empty buffer in getActualConsumedMemory() and getPossibleConsumedMemory()
+* ARROW-1109 - [JAVA] transferOwnership fails when readerIndex is not 0
+* ARROW-1110 - [JAVA] make union vector naming consistent
+* ARROW-1111 - [JAVA] Make aligning buffers optional, and allow -1 for unknown null count
+* ARROW-1112 - [JAVA] Set lastSet for VarLength and List vectors when loading
+* ARROW-1113 - [C++] gflags EP build gets triggered (as a no-op) on subsequent calls to make or ninja build
+* ARROW-1115 - [C++] Use absolute path for ccache
+* ARROW-1117 - [Docs] Minor issues in GLib README
+* ARROW-1124 - [Python] pyarrow needs to depend on numpy>=1.10 (not 1.9)
+* ARROW-1125 - Python: `Table.from_pandas` doesn't work anymore on partial schemas
+* ARROW-1128 - [Docs] command to build a wheel is not properly rendered
+* ARROW-1129 - [C++] Fix Linux toolchain build regression from ARROW-742
+* ARROW-1131 - Python: Parquet unit tests are always skipped
+* ARROW-1132 - [Python] Unable to write pandas DataFrame w/MultiIndex containing duplicate values to parquet
+* ARROW-1136 - [C++/Python] Segfault on empty stream
+* ARROW-1138 - Travis: Use OpenJDK7 instead of OracleJDK7
+* ARROW-1139 - [C++] dlmalloc doesn't allow arrow to be built with clang 4 or gcc 7.1.1
+* ARROW-1141 - on import get libjemalloc.so.2: cannot allocate memory in static TLS block
+* ARROW-1143 - C++: Fix comparison of NullArray
+* ARROW-1144 - [C++] Remove unused variable
+* ARROW-1150 - [C++] AdaptiveIntBuilder compiler warning on MSVC
+* ARROW-1152 - [Cython] `read_tensor` should work with a readable file
+* ARROW-1155 - segmentation fault when run pa.Int16Value()
+* ARROW-1157 - C++/Python: Decimal templates are not correctly exported on OSX
+* ARROW-1159 - [C++] Static data members cannot be accessed from inline functions in Arrow headers by thirdparty users
+* ARROW-1162 - Transfer Between Empty Lists Should Not Invoke Callback
+* ARROW-1166 - Errors in Struct type's example and missing reference in Layout.md
+* ARROW-1167 - [Python] Create chunked BinaryArray in `Table.from_pandas` when a column's data exceeds 2GB
+* ARROW-1168 - [Python] pandas metadata may contain "mixed" data types
+* ARROW-1169 - C++: jemalloc externalproject doesn't build with CMake's ninja generator
+* ARROW-1170 - C++: `ARROW_JEMALLOC=OFF` breaks linking on unittest
+* ARROW-1174 - [GLib] Investigate root cause of ListArray glib test failure
+* ARROW-1177 - [C++] Detect int32 overflow in ListBuilder::Append
+* ARROW-1179 - C++: Add missing virtual destructors
+* ARROW-1180 - [GLib] `garrow_tensor_get_dimension_name()` returns invalid address
+* ARROW-1181 - [Python] Parquet test fail if not enabled
+* ARROW-1182 - C++: Specify `BUILD_BYPRODUCTS` for zlib and zstd
+* ARROW-1186 - [C++] Enable option to build arrow with minimal dependencies needed to build Parquet library
+* ARROW-1188 - Segfault when trying to serialize a DataFrame with Null-only Categorical Column
+* ARROW-1190 - VectorLoader corrupts vectors with duplicate names
+* ARROW-1191 - [JAVA] Implement getField() method for the complex readers
+* ARROW-1194 - Getting record batch size with `pa.get_record_batch_size` returns a size that is too small for pandas DataFrame.
+* ARROW-1197 - [GLib] `record_batch.hpp` Inclusion is missing
+* ARROW-1200 - [C++] DictionaryBuilder should use signed integers for indices
+* ARROW-1201 - [Python] Incomplete Python types cause a core dump when repr-ing
+* ARROW-1203 - [C++] Disallow BinaryBuilder to append byte strings larger than the maximum value of `int32_t`
+* ARROW-1205 - C++: Reference to type objects in ArrayLoader may cause segmentation faults.
+* ARROW-1206 - [C++] Enable MSVC builds to work with some compression library support disabled
+* ARROW-1208 - [C++] Toolchain build with ZSTD library from conda-forge failure
+* ARROW-1215 - [Python] Class methods in API reference
+* ARROW-1216 - Numpy arrays cannot be created from Arrow Buffers on Python 2
+* ARROW-1218 - Arrow doesn't compile if all compression libraries are deactivated
+* ARROW-1222 - [Python] pyarrow.array returns NullArray for array of unsupported Python objects
+* ARROW-1223 - [GLib] Fix function name that returns wrapped object
+* ARROW-1235 - [C++] macOS linker failure with operator<< and std::ostream
+* ARROW-1236 - Library paths in exported pkg-config file are incorrect
+* ARROW-601 - Some logical types not supported when loading Parquet
+* ARROW-784 - Cleaning up thirdparty toolchain support in Arrow on Windows
+* ARROW-992 - [Python] In place development builds do not have a `__version__`
+
+## Improvement
+
+* ARROW-1041 - [Python] Support `read_pandas` on a directory of Parquet files
+* ARROW-1100 - [Python] Add "mode" property to NativeFile instances
+* ARROW-1102 - Make MessageSerializer.serializeMessage() public
+* ARROW-1120 - [Python] Write support for int96
+* ARROW-1137 - Python: Ensure Pandas roundtrip of all-None column
+* ARROW-1148 - [C++] Raise minimum CMake version to 3.2
+* ARROW-1151 - [C++] Add gcc branch prediction to status check macro
+* ARROW-1160 - C++: Implement DictionaryBuilder
+* ARROW-1165 - [C++] Refactor PythonDecimalToArrowDecimal to not use templates
+* ARROW-1185 - [C++] Clean up arrow::Status implementation, add `warn_unused_result` attribute for clang
+* ARROW-1187 - Serialize a DataFrame with None column
+* ARROW-1193 - [C++] Support pkg-config for `arrow_python.so`
+* ARROW-1196 - [C++] Appveyor separate jobs for Debug/Release builds from sources; Build with conda toolchain; Build with NMake Makefiles Generator
+* ARROW-1199 - [C++] Introduce mutable POD struct for generic array data
+* ARROW-1202 - Remove semicolons from status macros
+* ARROW-1217 - [GLib] Add GInputStream based arrow::io::RandomAccessFile
+* ARROW-1220 - [C++] Standartize usage of `*_HOME` cmake script variables for 3rd party libs
+* ARROW-1221 - [C++] Pin clang-format version
+* ARROW-1229 - [GLib] Follow Reader API change (get -> read)
+* ARROW-742 - Handling exceptions during execution of `std::wstring_convert`
+* ARROW-834 - [Python] Support creating Arrow arrays from Python iterables
+* ARROW-915 - Struct Array reads limited support
+* ARROW-935 - [Java] Build Javadoc in Travis CI
+* ARROW-960 - [Python] Add source build guide for macOS + Homebrew
+* ARROW-962 - [Python] Add schema attribute to FileReader
+* ARROW-966 - [Python] `pyarrow.list_` should also accept Field instance
+* ARROW-978 - [Python] Use sphinx-bootstrap-theme for Sphinx documentation
+
+## New Feature
+
+* ARROW-1048 - Allow user `LD_LIBRARY_PATH` to be used with source release script
+* ARROW-1073 - C++: Adapative integer builder
+* ARROW-1095 - [Website] Add Arrow icon asset
+* ARROW-111 - [C++] Add static analyzer to tool chain to verify checking of Status returns
+* ARROW-1122 - [Website] Guest blog post on Arrow + ODBC from turbodbc
+* ARROW-1123 - C++: Make jemalloc the default allocator
+* ARROW-1135 - Upgrade Travis CI clang builds to use LLVM 4.0
+* ARROW-1142 - [C++] Move over compression library toolchain from parquet-cpp
+* ARROW-1145 - [GLib] Add `get_values()`
+* ARROW-1154 - [C++] Migrate more computational utility code from parquet-cpp
+* ARROW-1183 - [Python] Implement time type conversions in `to_pandas`
+* ARROW-1198 - Python: Add public C++ API to unwrap PyArrow object
+* ARROW-1212 - [GLib] Add `garrow_binary_array_get_offsets_buffer()`
+* ARROW-1214 - [Python] Add classes / functions to enable stream message components to be handled outside of the stream reader class
+* ARROW-1227 - [GLib] Support GOutputStream
+* ARROW-460 - [C++] Implement JSON round trip for DictionaryArray
+* ARROW-462 - [C++] Implement in-memory conversions between non-nested primitive types and DictionaryArray equivalent
+* ARROW-575 - Python: Auto-detect nested lists and nested numpy arrays in Pandas
+* ARROW-597 - [Python] Add convenience function to yield DataFrame from any object that a StreamReader or FileReader can read from
+* ARROW-599 - [C++] Add LZ4 codec to 3rd-party toolchain
+* ARROW-600 - [C++] Add ZSTD codec to 3rd-party toolchain
+* ARROW-692 - Java<->C++ Integration tests for dictionary-encoded vectors
+* ARROW-693 - [Java] Add JSON support for dictionary vectors
+
+## Task
+
+* ARROW-1052 - Arrow 0.5.0 release
+
+## Test
+
+* ARROW-1228 - [GLib] Test file name should be the same name as target class
+* ARROW-1233 - [C++] Validate cmake script resolving of 3rd party linked libs from correct location in toolchain build
# Apache Arrow 0.4.1 (9 June 2017)
## Bug
-* ARROW-1039 - Python: pyarrow.Filesystem.read_parquet causing error if nthreads>1
+* ARROW-1039 - Python: `pyarrow.Filesystem.read_parquet` causing error if nthreads>1
* ARROW-1050 - [C++] Export arrow::ValidateArray
-* ARROW-1051 - [Python] If pyarrow.parquet fails to import due to a shared library ABI conflict, the test_parquet.py tests silently do not run
+* ARROW-1051 - [Python] If pyarrow.parquet fails to import due to a shared library ABI conflict, the `test_parquet.py` tests silently do not run
* ARROW-1056 - [Python] Parquet+HDFS test failure due to writing pandas index
* ARROW-1057 - Fix cmake warning and msvc debug asserts
* ARROW-1062 - [GLib] Examples use old API
@@ -27,8 +170,8 @@
* ARROW-1075 - [GLib] Build error on macOS
* ARROW-1085 - [java] Follow up on template cleanup. Missing method for IntervalYear
* ARROW-1086 - [Python] pyarrow 0.4.0 on pypi is missing pxd files
-* ARROW-1088 - [Python] test_unicode_filename test fails when unicode filenames aren't supported by system
-* ARROW-1090 - [Python] build_ext usability
+* ARROW-1088 - [Python] `test_unicode_filename` test fails when unicode filenames aren't supported by system
+* ARROW-1090 - [Python] `build_ext` usability
* ARROW-1091 - Decimal scale and precision are flipped
* ARROW-1092 - More Decimal and scale flipped follow-up
* ARROW-1094 - [C++] Incomplete buffer reads in arrow::io::ReadableFile should exactly truncate returned buffer
@@ -63,9 +206,9 @@
* ARROW-1003 - [C++] Hdfs and java dlls fail to load when built for Windows with MSVC
* ARROW-1004 - ArrowInvalid: Invalid: Python object of type float is not None and is not a string, bool, or date object
-* ARROW-1017 - Python: Table.to_pandas leaks memory
+* ARROW-1017 - Python: `Table.to_pandas` leaks memory
* ARROW-1023 - Python: Fix bundling of arrow-cpp for macOS
-* ARROW-1033 - [Python] pytest discovers scripts/test_leak.py
+* ARROW-1033 - [Python] pytest discovers `scripts/test_leak.py`
* ARROW-1046 - [Python] Conform DataFrame metadata to pandas spec
* ARROW-1053 - [Python] Memory leak with RecordBatchFileReader
* ARROW-1054 - [Python] Test suite fails on pandas 0.19.2
@@ -74,16 +217,16 @@
* ARROW-813 - [Python] setup.py sdist must also bundle dependent cmake modules
* ARROW-824 - Date and Time Vectors should reflect timezone-less semantics
* ARROW-856 - CmakeError by Unknown compiler.
-* ARROW-881 - [Python] Reconstruct Pandas DataFrame indexes using custom_metadata
+* ARROW-881 - [Python] Reconstruct Pandas DataFrame indexes using `custom_metadata`
* ARROW-909 - libjemalloc.so.2: cannot open shared object file:
* ARROW-939 - Fix division by zero for zero-dimensional Tensors
* ARROW-940 - [JS] Generate multiple sets of artifacts
* ARROW-944 - Python: Compat broken for pandas==0.18.1
* ARROW-948 - [GLib] Update C++ header file list
* ARROW-952 - Compilation error on macOS with clang-802.0.42
-* ARROW-958 - [Python] Conda build guide still needs ARROW_HOME, PARQUET_HOME
-* ARROW-979 - [Python] Fix setuptools_scm version when release tag is not in the master timeline
-* ARROW-991 - [Python] PyArray_SimpleNew should not be used with NPY_DATETIME
+* ARROW-958 - [Python] Conda build guide still needs `ARROW_HOME`, `PARQUET_HOME`
+* ARROW-979 - [Python] Fix `setuptools_scm` version when release tag is not in the master timeline
+* ARROW-991 - [Python] `PyArray_SimpleNew` should not be used with `NPY_DATETIME`
* ARROW-995 - [Website] 0.3 release announce has a typo in reference
* ARROW-998 - [Doc] File format documents incorrect schema location
@@ -138,9 +281,9 @@
* ARROW-1044 - [GLib] Support Feather
* ARROW-29 - C++: Add re2 as optional 3rd-party toolchain dependency
* ARROW-446 - [Python] Document NativeFile interfaces, HDFS client in Sphinx
-* ARROW-482 - [Java] Provide API access to "custom_metadata" Field attribute in IPC setting
+* ARROW-482 - [Java] Provide API access to `custom_metadata` Field attribute in IPC setting
* ARROW-596 - [Python] Add convenience function to convert pandas.DataFrame to pyarrow.Buffer containing a file or stream representation
-* ARROW-714 - [C++] Add import_pyarrow C API in the style of NumPy for thirdparty C++ users
+* ARROW-714 - [C++] Add `import_pyarrow` C API in the style of NumPy for thirdparty C++ users
* ARROW-819 - [Python] Define public Cython API
* ARROW-872 - [JS] Read streaming format
* ARROW-873 - [JS] Implement fixed width list type
@@ -165,8 +308,8 @@
* ARROW-208 - Add checkstyle policy to java project
* ARROW-347 - Add method to pass CallBack when creating a transfer pair
* ARROW-413 - DATE type is not specified clearly
-* ARROW-431 - [Python] Review GIL release and acquisition in to_pandas conversion
-* ARROW-443 - [Python] Support for converting from strided pandas data in Table.from_pandas
+* ARROW-431 - [Python] Review GIL release and acquisition in `to_pandas` conversion
+* ARROW-443 - [Python] Support for converting from strided pandas data in `Table.from_pandas`
* ARROW-451 - [C++] Override DataType::Equals for other types with additional metadata
* ARROW-454 - pojo.Field doesn't implement hashCode()
* ARROW-526 - [Format] Update IPC.md to account for File format changes and Streaming format
@@ -178,8 +321,8 @@
* ARROW-604 - Python: boxed Field instances are missing the reference to DataType
* ARROW-613 - [JS] Implement random-access file format
* ARROW-617 - Time type is not specified clearly
-* ARROW-619 - Python: Fix typos in setup.py args and LD_LIBRARY_PATH
-* ARROW-623 - segfault with __repr__ of empty Field
+* ARROW-619 - Python: Fix typos in setup.py args and `LD_LIBRARY_PATH`
+* ARROW-623 - segfault with `__repr__` of empty Field
* ARROW-624 - [C++] Restore MakePrimitiveArray function
* ARROW-627 - [C++] Compatibility macros for exported extern template class declarations
* ARROW-628 - [Python] Install nomkl metapackage when building parquet-cpp for faster Travis builds
@@ -201,7 +344,7 @@
* ARROW-686 - [C++] Account for time metadata changes, add time32 and time64 types
* ARROW-689 - [GLib] Install header files and documents to wrong directories
* ARROW-691 - [Java] Encode dictionary Int type in message format
-* ARROW-697 - [Java] Raise appropriate exceptions when encountering large (> INT32_MAX) record batches
+* ARROW-697 - [Java] Raise appropriate exceptions when encountering large (> `INT32_MAX`) record batches
* ARROW-699 - [C++] Arrow dynamic libraries are missed on run of unit tests on Windows
* ARROW-702 - Fix BitVector.copyFromSafe to reAllocate instead of returning false
* ARROW-703 - Fix issue where setValueCount(0) doesn’t work in the case that we’ve shipped vectors across the wire
@@ -211,14 +354,14 @@
* ARROW-715 - Python: Explicit pandas import makes it a hard requirement
* ARROW-716 - error building arrow/python
* ARROW-720 - [java] arrow should not have a dependency on slf4j bridges in compile
-* ARROW-723 - Arrow freezes on write if chunk_size=0
+* ARROW-723 - Arrow freezes on write if `chunk_size=0`
* ARROW-726 - [C++] PyBuffer dtor may segfault if constructor passed an object not exporting buffer protocol
* ARROW-732 - Schema comparison bugs in struct and union types
* ARROW-736 - [Python] Mixed-type object DataFrame columns should not silently coerce to an Arrow type by default
* ARROW-738 - [Python] Fix manylinux1 packaging
* ARROW-739 - Parallel build fails non-deterministically.
* ARROW-740 - FileReader fails for large objects
-* ARROW-747 - [C++] Fix spurious warning caused by passing dl to add_dependencies
+* ARROW-747 - [C++] Fix spurious warning caused by passing dl to `add_dependencies`
* ARROW-749 - [Python] Delete incomplete binary files when writing fails
* ARROW-753 - [Python] Unit tests in arrow/python fail to link on some OS X platforms
* ARROW-756 - [C++] Do not pass -fPIC when compiling with MSVC
@@ -238,13 +381,13 @@
* ARROW-809 - C++: Writing sliced record batch to IPC writes the entire array
* ARROW-812 - Pip install pyarrow on mac failed.
* ARROW-817 - [C++] Fix incorrect code comment from ARROW-722
-* ARROW-821 - [Python] Extra file _table_api.h generated during Python build process
+* ARROW-821 - [Python] Extra file `_table_api.h` generated during Python build process
* ARROW-822 - [Python] StreamWriter fails to open with socket as sink
-* ARROW-826 - Compilation error on Mac with -DARROW_PYTHON=on
+* ARROW-826 - Compilation error on Mac with `-DARROW_PYTHON=on`
* ARROW-829 - Python: Parquet: Dictionary encoding is deactivated if column-wise compression was selected
* ARROW-830 - Python: jemalloc is not anymore publicly exposed
-* ARROW-839 - [C++] Portable alternative to PyDate_to_ms function
-* ARROW-847 - C++: BUILD_BYPRODUCTS not specified anymore for gtest
+* ARROW-839 - [C++] Portable alternative to `PyDate_to_ms` function
+* ARROW-847 - C++: `BUILD_BYPRODUCTS` not specified anymore for gtest
* ARROW-852 - Python: Also set Arrow Library PATHS when detection was done through pkg-config
* ARROW-853 - [Python] It is no longer necessary to modify the RPATH of the Cython extensions on many environments
* ARROW-858 - Remove dependency on boost regex
@@ -262,7 +405,7 @@
* ARROW-914 - [C++/Python] Fix Decimal ToBytes
* ARROW-922 - Allow Flatbuffers and RapidJSON to be used locally on Windows
* ARROW-928 - Update CMAKE script to detect unsupported msvc compilers versions
-* ARROW-933 - [Python] arrow_python bindings have debug print statement
+* ARROW-933 - [Python] `arrow_python` bindings have debug print statement
* ARROW-934 - [GLib] Glib sources missing from result of 02-source.sh
* ARROW-936 - Fix release README
* ARROW-938 - Fix Apache Rat errors from source release build
@@ -275,7 +418,7 @@
* ARROW-566 - Python: Deterministic position of libarrow in manylinux1 wheels
* ARROW-569 - [C++] Set version for .pc
* ARROW-577 - [C++] Refactor StreamWriter and FileWriter to have private implementations
-* ARROW-580 - C++: Also provide jemalloc_X targets if only a static or shared version is found
+* ARROW-580 - C++: Also provide `jemalloc_X` targets if only a static or shared version is found
* ARROW-582 - [Java] Add Date/Time Support to JSON File
* ARROW-589 - C++: Use system provided shared jemalloc if static is unavailable
* ARROW-593 - [C++] Rename ReadableFileInterface to RandomAccessFile
@@ -296,7 +439,7 @@
* ARROW-679 - [Format] Change RecordBatch and Field length members from int to long
* ARROW-681 - [C++] Build Arrow on Windows with dynamically linked boost
* ARROW-684 - Python: More informative message when parquet-cpp but not parquet-arrow is available
-* ARROW-688 - [C++] Use CMAKE_INSTALL_INCLUDEDIR for consistency
+* ARROW-688 - [C++] Use `CMAKE_INSTALL_INCLUDEDIR` for consistency
* ARROW-690 - Only send JIRA updates to issues@arrow.apache.org
* ARROW-700 - Add headroom interface for allocator.
* ARROW-706 - [GLib] Add package install document
@@ -311,13 +454,13 @@
* ARROW-731 - [C++] Add shared library related versions to .pc
* ARROW-741 - [Python] Add Python 3.6 to Travis CI
* ARROW-743 - [C++] Consolidate unit tests for code in array.h
-* ARROW-744 - [GLib] Re-add an assertion to garrow_table_new() test
+* ARROW-744 - [GLib] Re-add an assertion to `garrow_table_new()` test
* ARROW-745 - [C++] Allow use of system cpplint
-* ARROW-746 - [GLib] Add garrow_array_get_data_type()
+* ARROW-746 - [GLib] Add `garrow_array_get_data_type()`
* ARROW-751 - [Python] Rename all Cython extensions to "private" status with leading underscore
* ARROW-752 - [Python] Construct pyarrow.DictionaryArray from boxed pyarrow array objects
-* ARROW-754 - [GLib] Add garrow_array_is_null()
-* ARROW-755 - [GLib] Add garrow_array_get_value_type()
+* ARROW-754 - [GLib] Add `garrow_array_is_null()`
+* ARROW-755 - [GLib] Add `garrow_array_get_value_type()`
* ARROW-758 - [C++] Fix compiler warnings on MSVC x64
* ARROW-761 - [Python] Add function to compute the total size of tensor payloads, including metadata and padding
* ARROW-763 - C++: Use `python-config` to find libpythonX.X.dylib
@@ -329,7 +472,7 @@
* ARROW-779 - [C++/Python] Raise exception if old metadata encountered
* ARROW-782 - [C++] Change struct to class for objects that meet the criteria in the Google style guide
* ARROW-788 - Possible nondeterminism in Tensor serialization code
-* ARROW-795 - [C++] Combine libarrow/libarrow_io/libarrow_ipc
+* ARROW-795 - [C++] Combine `libarrow/libarrow_io/libarrow_ipc`
* ARROW-802 - [GLib] Add read examples
* ARROW-803 - [GLib] Update package repository URL
* ARROW-804 - [GLib] Update build document
@@ -342,7 +485,7 @@
* ARROW-816 - [C++] Use conda packages for RapidJSON, Flatbuffers to speed up builds
* ARROW-818 - [Python] Review public pyarrow. API completeness and update docs
* ARROW-820 - [C++] Build dependencies for Parquet library without arrow support
-* ARROW-825 - [Python] Generalize pyarrow.from_pylist to accept any object implementing the PySequence protocol
+* ARROW-825 - [Python] Generalize `pyarrow.from_pylist` to accept any object implementing the PySequence protocol
* ARROW-827 - [Python] Variety of Parquet improvements to support Dask integration
* ARROW-828 - [CPP] Document new requirement (libboost-regex-dev) in README.md
* ARROW-832 - [C++] Upgrade thirdparty gtest to 1.8.0
@@ -352,7 +495,7 @@
* ARROW-845 - [Python] Sync FindArrow.cmake changes from parquet-cpp
* ARROW-846 - [GLib] Add GArrowTensor, GArrowInt8Tensor and GArrowUInt8Tensor
* ARROW-848 - [Python] Improvements / fixes to conda quickstart guide
-* ARROW-849 - [C++] Add optional $ARROW_BUILD_TOOLCHAIN environment variable option for configuring build environment
+* ARROW-849 - [C++] Add optional `$ARROW_BUILD_TOOLCHAIN` environment variable option for configuring build environment
* ARROW-857 - [Python] Automate publishing Python documentation to arrow-site
* ARROW-860 - [C++] Decide if typed Tensor subclasses are worthwhile
* ARROW-861 - [Python] Move DEVELOPMENT.md to Sphinx docs
@@ -362,8 +505,8 @@
* ARROW-868 - [GLib] Use GBytes to reduce copy
* ARROW-871 - [GLib] Unify DataType files
* ARROW-876 - [GLib] Unify ArrayBuffer files
-* ARROW-877 - [GLib] Add garrow_array_get_null_bitmap()
-* ARROW-878 - [GLib] Add garrow_binary_array_get_buffer()
+* ARROW-877 - [GLib] Add `garrow_array_get_null_bitmap()`
+* ARROW-878 - [GLib] Add `garrow_binary_array_get_buffer()`
* ARROW-892 - [GLib] Fix GArrowTensor document
* ARROW-893 - Add GLib document to Web site
* ARROW-894 - [GLib] Add GArrowPoolBuffer
@@ -389,13 +532,13 @@
* ARROW-341 - [Python] Making libpyarrow available to third parties
* ARROW-452 - [C++/Python] Merge "Feather" file format implementation
* ARROW-459 - [C++] Implement IPC round trip for DictionaryArray, dictionaries shared across record batches
-* ARROW-483 - [C++/Python] Provide access to "custom_metadata" Field attribute in IPC setting
+* ARROW-483 - [C++/Python] Provide access to `custom_metadata` Field attribute in IPC setting
* ARROW-491 - [C++] Add FixedWidthBinary type
* ARROW-493 - [C++] Allow in-memory array over 2^31 -1 elements but require splitting at IPC / RPC boundaries
* ARROW-502 - [C++/Python] Add MemoryPool implementation that logs allocation activity to std::cout
* ARROW-510 - Add integration tests for date and time types
* ARROW-520 - [C++] Add STL-compliant allocator that hooks into an arrow::MemoryPool
-* ARROW-528 - [Python] Support _metadata or _common_metadata files when reading Parquet directories
+* ARROW-528 - [Python] Support `_metadata` or `_common_metadata` files when reading Parquet directories
* ARROW-534 - [C++] Add IPC tests for date/time types
* ARROW-539 - [Python] Support reading Parquet datasets with standard partition directory schemes
* ARROW-550 - [Format] Add a TensorMessage type
@@ -444,7 +587,7 @@
* ARROW-771 - [Python] Add APIs for reading individual Parquet row groups
* ARROW-773 - [C++] Add function to create arrow::Table with column appended to existing table
* ARROW-865 - [Python] Verify Parquet roundtrips for new date/time types
-* ARROW-880 - [GLib] Add garrow_primitive_array_get_buffer()
+* ARROW-880 - [GLib] Add `garrow_primitive_array_get_buffer()`
* ARROW-890 - [GLib] Add GArrowMutableBuffer
* ARROW-926 - Update KEYS to include wesm
@@ -481,7 +624,7 @@
* ARROW-323 - [Python] Opt-in to PyArrow parquet build rather than skipping silently on failure
* ARROW-334 - [Python] OS X rpath issues on some configurations
* ARROW-337 - UnionListWriter.list() is doing more than it should, this can cause data corruption
-* ARROW-339 - Make merge_arrow_pr script work with Python 3
+* ARROW-339 - Make `merge_arrow_pr` script work with Python 3
* ARROW-340 - [C++] Opening a writeable file on disk that already exists does not truncate to zero
* ARROW-342 - Set Python version on release
* ARROW-345 - libhdfs integration doesn't work for Mac
@@ -490,15 +633,15 @@
* ARROW-349 - Six is missing as a requirement in the python setup.py
* ARROW-351 - Time type has no unit
* ARROW-354 - Connot compare an array of empty strings to another
-* ARROW-357 - Default Parquet chunk_size of 64k is too small
+* ARROW-357 - Default Parquet `chunk_size` of 64k is too small
* ARROW-358 - [C++] libhdfs can be in non-standard locations in some Hadoop distributions
-* ARROW-362 - Python: Calling to_pandas on a table read from Parquet leaks memory
+* ARROW-362 - Python: Calling `to_pandas` on a table read from Parquet leaks memory
* ARROW-371 - Python: Table with null timestamp becomes float in pandas
-* ARROW-375 - columns parameter in parquet.read_table() raises KeyError for valid column
+* ARROW-375 - columns parameter in `parquet.read_table()` raises KeyError for valid column
* ARROW-384 - Align Java and C++ RecordBatch data and metadata layout
* ARROW-386 - [Java] Respect case of struct / map field names
* ARROW-387 - [C++] arrow::io::BufferReader does not permit shared memory ownership in zero-copy reads
-* ARROW-390 - C++: CMake fails on json-integration-test with ARROW_BUILD_TESTS=OFF
+* ARROW-390 - C++: CMake fails on json-integration-test with `ARROW_BUILD_TESTS=OFF`
* ARROW-392 - Fix string/binary integration tests
* ARROW-393 - [JAVA] JSON file reader fails to set the buffer size on String data vector
* ARROW-395 - Arrow file format writes record batches in reverse order.
@@ -509,19 +652,19 @@
* ARROW-402 - [Java] "refCnt gone negative" error in integration tests
* ARROW-403 - [JAVA] UnionVector: Creating a transfer pair doesn't transfer the schema to destination vector
* ARROW-404 - [Python] Closing an HdfsClient while there are still open file handles results in a crash
-* ARROW-405 - [C++] Be less stringent about finding include/hdfs.h in HADOOP_HOME
+* ARROW-405 - [C++] Be less stringent about finding include/hdfs.h in `HADOOP_HOME`
* ARROW-406 - [C++] Large HDFS reads must utilize the set file buffer size when making RPCs
* ARROW-408 - [C++/Python] Remove defunct conda recipes
* ARROW-414 - [Java] "Buffer too large to resize to ..." error
* ARROW-420 - Align Date implementation between Java and C++
* ARROW-421 - [Python] Zero-copy buffers read by pyarrow::PyBytesReader must retain a reference to the parent PyBytes to avoid premature garbage collection issues
-* ARROW-422 - C++: IPC should depend on rapidjson_ep if RapidJSON is vendored
+* ARROW-422 - C++: IPC should depend on `rapidjson_ep` if RapidJSON is vendored
* ARROW-429 - git-archive SHA-256 checksums are changing
* ARROW-433 - [Python] Date conversion is locale-dependent
* ARROW-434 - Segfaults and encoding issues in Python Parquet reads
-* ARROW-435 - C++: Spelling mistake in if(RAPIDJSON_VENDORED)
+* ARROW-435 - C++: Spelling mistake in `if(RAPIDJSON_VENDORED)`
* ARROW-437 - [C++] clang compiler warnings from overridden virtual functions
-* ARROW-445 - C++: arrow_ipc is built before arrow/ipc/Message_generated.h was generated
+* ARROW-445 - C++: `arrow_ipc` is built before `arrow/ipc/Message_generated.h` was generated
* ARROW-447 - Python: Align scalar/pylist string encoding with pandas' one.
* ARROW-455 - [C++] BufferOutputStream dtor does not call Close()
* ARROW-469 - C++: Add option so that resize doesn't decrease the capacity
@@ -536,13 +679,13 @@
* ARROW-519 - [C++] Missing vtable in libarrow.dylib on Xcode 6.4
* ARROW-523 - Python: Account for changes in PARQUET-834
* ARROW-533 - [C++] arrow::TimestampArray / TimeArray has a broken constructor
-* ARROW-535 - [Python] Add type mapping for NPY_LONGLONG
+* ARROW-535 - [Python] Add type mapping for `NPY_LONGLONG`
* ARROW-537 - [C++] StringArray/BinaryArray comparisons may be incorrect when values with non-zero length are null
* ARROW-540 - [C++] Fix build in aftermath of ARROW-33
-* ARROW-543 - C++: Lazily computed null_counts counts number of non-null entries
+* ARROW-543 - C++: Lazily computed `null_counts` counts number of non-null entries
* ARROW-544 - [C++] ArrayLoader::LoadBinary fails for length-0 arrays
* ARROW-545 - [Python] Ignore files without .parq or .parquet prefix when reading directory of files
-* ARROW-548 - [Python] Add nthreads option to pyarrow.Filesystem.read_parquet
+* ARROW-548 - [Python] Add nthreads option to `pyarrow.Filesystem.read_parquet`
* ARROW-551 - C++: Construction of Column with nullptr Array segfaults
* ARROW-556 - [Integration] Can not run Integration tests if different cpp build path
* ARROW-561 - Update java & python dependencies to improve downstream packaging experience
@@ -551,7 +694,7 @@
* ARROW-189 - C++: Use ExternalProject to build thirdparty dependencies
* ARROW-191 - Python: Provide infrastructure for manylinux1 wheels
-* ARROW-328 - [C++] Return shared_ptr by value instead of const-ref?
+* ARROW-328 - [C++] Return `shared_ptr` by value instead of const-ref?
* ARROW-330 - [C++] CMake functions to simplify shared / static library configuration
* ARROW-333 - Make writers update their internal schema even when no data is written.
* ARROW-335 - Improve Type apis and toString() by encapsulating flatbuffers better
@@ -562,20 +705,20 @@
* ARROW-356 - Add documentation about reading Parquet
* ARROW-360 - C++: Add method to shrink PoolBuffer using realloc
* ARROW-361 - Python: Support reading a column-selection from Parquet files
-* ARROW-365 - Python: Provide Array.to_pandas()
+* ARROW-365 - Python: Provide `Array.to_pandas()`
* ARROW-366 - [java] implement Dictionary vector
* ARROW-374 - Python: clarify unicode vs. binary in API
-* ARROW-379 - Python: Use setuptools_scm/setuptools_scm_git_archive to provide the version number
+* ARROW-379 - Python: Use `setuptools_scm`/`setuptools_scm_git_archive` to provide the version number
* ARROW-380 - [Java] optimize null count when serializing vectors.
* ARROW-382 - Python: Extend API documentation
* ARROW-396 - Python: Add pyarrow.schema.Schema.equals
-* ARROW-409 - Python: Change pyarrow.Table.dataframe_from_batches API to create Table instead
+* ARROW-409 - Python: Change `pyarrow.Table.dataframe_from_batches` API to create Table instead
* ARROW-411 - [Java] Move Intergration.compare and Intergration.compareSchemas to a public utils class
-* ARROW-423 - C++: Define BUILD_BYPRODUCTS in external project to support non-make CMake generators
+* ARROW-423 - C++: Define `BUILD_BYPRODUCTS` in external project to support non-make CMake generators
* ARROW-425 - Python: Expose a C function to convert arrow::Table to pyarrow.Table
* ARROW-426 - Python: Conversion from pyarrow.Array to a Python list
* ARROW-430 - Python: Better version handling
-* ARROW-432 - [Python] Avoid unnecessary memory copy in to_pandas conversion by using low-level pandas internals APIs
+* ARROW-432 - [Python] Avoid unnecessary memory copy in `to_pandas` conversion by using low-level pandas internals APIs
* ARROW-450 - Python: Fixes for PARQUET-818
* ARROW-457 - Python: Better control over memory pool
* ARROW-458 - Python: Expose jemalloc MemoryPool
@@ -596,7 +739,7 @@
* ARROW-108 - [C++] Add IPC round trip for union types
* ARROW-221 - Add switch for writing Parquet 1.0 compatible logical types
-* ARROW-227 - [C++/Python] Hook arrow_io generic reader / writer interface into arrow_parquet
+* ARROW-227 - [C++/Python] Hook `arrow_io` generic reader / writer interface into `arrow_parquet`
* ARROW-228 - [Python] Create an Arrow-cpp-compatible interface for reading bytes from Python file-like objects
* ARROW-243 - [C++] Add "driver" option to HdfsClient to choose between libhdfs and libhdfs3 at runtime
* ARROW-303 - [C++] Also build static libraries for leaf libraries
@@ -624,7 +767,7 @@
* ARROW-440 - [C++] Support pkg-config
* ARROW-441 - [Python] Expose Arrow's file and memory map classes as NativeFile subclasses
* ARROW-442 - [Python] Add public Python API to inspect Parquet file metadata
-* ARROW-444 - [Python] Avoid unnecessary memory copies from use of PyBytes_* C APIs
+* ARROW-444 - [Python] Avoid unnecessary memory copies from use of `PyBytes_*` C APIs
* ARROW-449 - Python: Conversion from pyarrow.{Table,RecordBatch} to a Python dict
* ARROW-456 - C++: Add jemalloc based MemoryPool
* ARROW-461 - [Python] Implement conversion between arrow::DictionaryArray and pandas.Categorical
@@ -657,9 +800,9 @@
* ARROW-268 - [C++] Flesh out union implementation to have all required methods for IPC
* ARROW-327 - [Python] Remove conda builds from Travis CI processes
* ARROW-353 - Arrow release 0.2
-* ARROW-359 - Need to document ARROW_LIBHDFS_DIR
+* ARROW-359 - Need to document `ARROW_LIBHDFS_DIR`
* ARROW-367 - [java] converter csv/json <=> Arrow file format for Integration tests
-* ARROW-368 - Document use of LD_LIBRARY_PATH when using Python
+* ARROW-368 - Document use of `LD_LIBRARY_PATH` when using Python
* ARROW-372 - Create JSON arrow file format for integration tests
* ARROW-506 - Implement Arrow Echo server for integration testing
* ARROW-527 - clean drill-module.conf file
@@ -687,7 +830,7 @@
* ARROW-210 - [C++] Tidy up the type system a little bit
* ARROW-211 - Several typos/errors in Layout.md examples
* ARROW-217 - Fix Travis w.r.t conda 4.1.0 changes
-* ARROW-219 - [C++] Passed CMAKE_CXX_FLAGS are being dropped, fix compiler warnings
+* ARROW-219 - [C++] Passed `CMAKE_CXX_FLAGS` are being dropped, fix compiler warnings
* ARROW-223 - Do not link against libpython
* ARROW-225 - [C++/Python] master Travis CI build is broken
* ARROW-244 - [C++] Some global APIs of IPC module should be visible to the outside
@@ -699,7 +842,7 @@
* ARROW-266 - [C++] Fix the broken build
* ARROW-274 - Make the MapVector nullable
* ARROW-278 - [Format] Struct type name consistency in implementations and metadata
-* ARROW-283 - [C++] Update arrow_parquet to account for API changes in PARQUET-573
+* ARROW-283 - [C++] Update `arrow_parquet` to account for API changes in PARQUET-573
* ARROW-284 - [C++] Triage builds by disabling Arrow-Parquet module
* ARROW-287 - [java] Make nullable vectors use a BitVecor instead of UInt1Vector for bits
* ARROW-297 - Fix Arrow pom for release
@@ -737,7 +880,7 @@
* ARROW-212 - [C++] Clarify the fact that PrimitiveArray is now abstract class
* ARROW-213 - Exposing static arrow build
* ARROW-218 - Add option to use GitHub API token via environment variable when merging PRs
-* ARROW-234 - [C++] Build with libhdfs support in arrow_io in conda builds
+* ARROW-234 - [C++] Build with libhdfs support in `arrow_io` in conda builds
* ARROW-238 - C++: InternalMemoryPool::Free() should throw an error when there is insufficient allocated memory
* ARROW-245 - [Format] Clarify Arrow's relationship with big endian platforms
* ARROW-252 - Add implementation guidelines to the documentation
@@ -757,7 +900,7 @@
* ARROW-290 - Specialize alloc() in ArrowBuf
* ARROW-292 - [Java] Upgrade Netty to 4.041
* ARROW-299 - Use absolute namespace in macros
-* ARROW-305 - Add compression and use_dictionary options to Parquet interface
+* ARROW-305 - Add compression and `use_dictionary` options to Parquet interface
* ARROW-306 - Add option to pass cmake arguments via environment variable
* ARROW-315 - Finalize timestamp type
* ARROW-319 - Add canonical Arrow Schema json representation
@@ -767,7 +910,7 @@
* ARROW-54 - Python: rename package to "pyarrow"
* ARROW-64 - Add zsh support to C++ build scripts
* ARROW-66 - Maybe some missing steps in installation guide
-* ARROW-68 - Update setup_build_env and third-party script to be more userfriendly
+* ARROW-68 - Update `setup_build_env` and third-party script to be more userfriendly
* ARROW-71 - C++: Add script to run clang-tidy on codebase
* ARROW-73 - Support CMake 2.8
* ARROW-78 - C++: Add constructor for DecimalType
@@ -809,7 +952,7 @@
* ARROW-267 - [C++] C++ implementation of file-like layout for RPC / IPC
* ARROW-28 - C++: Add google/benchmark to the 3rd-party build toolchain
* ARROW-293 - [C++] Implementations of IO interfaces for operating system files
-* ARROW-296 - [C++] Remove arrow_parquet C++ module and related parts of build system
+* ARROW-296 - [C++] Remove `arrow_parquet` C++ module and related parts of build system
* ARROW-3 - Post Initial Arrow Format Spec
* ARROW-30 - Python: pandas/NumPy to/from Arrow conversion routines
* ARROW-301 - [Format] Add some form of user field metadata to IPC schemas
@@ -819,8 +962,8 @@
* ARROW-37 - C++: Represent boolean array data in bit-packed form
* ARROW-4 - Initial Arrow CPP Implementation
* ARROW-42 - Python: Add to Travis CI build
-* ARROW-43 - Python: Add rudimentary console __repr__ for array types
-* ARROW-44 - Python: Implement basic object model for scalar values (i.e. results of arrow_arr[i])
+* ARROW-43 - Python: Add rudimentary console `__repr__` for array types
+* ARROW-44 - Python: Implement basic object model for scalar values (i.e. results of `arrow_arr[i]`)
* ARROW-48 - Python: Add Schema object wrapper
* ARROW-49 - Python: Add Column and Table wrapper interface
* ARROW-53 - Python: Fix RPATH and add source installation instructions
diff --git a/README.md b/README.md
index 27908958785..9dda25de911 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,20 @@
## Apache Arrow
diff --git a/appveyor.yml b/appveyor.yml
index 91e9ee26490..55c58d0bf66 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -60,7 +60,7 @@ environment:
init:
- set MINICONDA=C:\Miniconda35-x64
- set PATH=%MINICONDA%;%MINICONDA%/Scripts;%MINICONDA%/Library/bin;%PATH%
- - if "%GENERATOR%"=="NMake Makefiles" call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" x86_amd64
+ - if "%GENERATOR%"=="NMake Makefiles" call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
build_script:
- git config core.symlinks true
diff --git a/c_glib/README.md b/c_glib/README.md
index 622938550d8..fec877e236f 100644
--- a/c_glib/README.md
+++ b/c_glib/README.md
@@ -1,15 +1,20 @@
# Arrow GLib
diff --git a/c_glib/doc/reference/arrow-glib-docs.sgml b/c_glib/doc/reference/arrow-glib-docs.sgml
index 4fa1b7c42de..26fd2f6262b 100644
--- a/c_glib/doc/reference/arrow-glib-docs.sgml
+++ b/c_glib/doc/reference/arrow-glib-docs.sgml
@@ -1,16 +1,21 @@
# Arrow GLib example
diff --git a/c_glib/example/go/Makefile b/c_glib/example/go/Makefile
index d8831122d4d..fa2163ca81b 100644
--- a/c_glib/example/go/Makefile
+++ b/c_glib/example/go/Makefile
@@ -1,14 +1,19 @@
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
PROGRAMS = \
read-batch \
diff --git a/c_glib/example/go/README.md b/c_glib/example/go/README.md
index 2054055e655..76eeed78c71 100644
--- a/c_glib/example/go/README.md
+++ b/c_glib/example/go/README.md
@@ -1,15 +1,20 @@
# Arrow Go example
diff --git a/c_glib/example/lua/README.md b/c_glib/example/lua/README.md
index 6145bc74ddd..e7e3351fef1 100644
--- a/c_glib/example/lua/README.md
+++ b/c_glib/example/lua/README.md
@@ -1,15 +1,20 @@
# Arrow Lua example
diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat
index 22108abdd3b..04fe2ab62cb 100644
--- a/ci/msvc-build.bat
+++ b/ci/msvc-build.bat
@@ -51,9 +51,16 @@ conda create -n arrow -q -y python=%PYTHON% ^
six pytest setuptools numpy pandas cython ^
thrift-cpp
+@rem ARROW-1294 CMake 3.9.0 in conda-forge breaks the build
+set ARROW_CMAKE_VERSION=3.8.0
+
if "%JOB%" == "Toolchain" (
+
conda install -n arrow -q -y -c conda-forge ^
- flatbuffers rapidjson cmake git boost-cpp ^
+ flatbuffers rapidjson ^
+ cmake=%ARROW_CMAKE_VERSION% ^
+ git ^
+ boost-cpp ^
snappy zlib brotli gflags lz4-c zstd
)
@@ -107,6 +114,9 @@ popd
@rem see PARQUET-1018
pushd python
+
+set PYARROW_CXXFLAGS=/WX
python setup.py build_ext --inplace --with-parquet --bundle-arrow-cpp bdist_wheel || exit /B
py.test pyarrow -v -s --parquet || exit /B
+
popd
diff --git a/ci/travis_before_script_c_glib.sh b/ci/travis_before_script_c_glib.sh
index 6547ea4e537..bf2d385d79d 100755
--- a/ci/travis_before_script_c_glib.sh
+++ b/ci/travis_before_script_c_glib.sh
@@ -1,17 +1,21 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
-
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
set -ex
diff --git a/ci/travis_before_script_cpp.sh b/ci/travis_before_script_cpp.sh
index e250e705f1f..d456d308c53 100755
--- a/ci/travis_before_script_cpp.sh
+++ b/ci/travis_before_script_cpp.sh
@@ -1,36 +1,52 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
set -ex
+source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
+
if [ "$1" == "--only-library" ]; then
only_library_mode=yes
else
only_library_mode=no
+ source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
fi
-source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
-
-if [ $only_library_mode == "no" ]; then
- # C++ toolchain
- export CPP_TOOLCHAIN=$TRAVIS_BUILD_DIR/cpp-toolchain
- export RAPIDJSON_HOME=$CPP_TOOLCHAIN
-
+if [ "$ARROW_TRAVIS_USE_TOOLCHAIN" == "1" ]; then
# Set up C++ toolchain from conda-forge packages for faster builds
- source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh
- conda create -y -q -p $CPP_TOOLCHAIN python=2.7 rapidjson
+ conda create -y -q -p $CPP_TOOLCHAIN python=2.7 \
+ jemalloc=4.4.0 \
+ nomkl \
+ boost-cpp \
+ rapidjson \
+ flatbuffers \
+ gflags \
+ lz4-c \
+ snappy \
+ zstd \
+ brotli \
+ zlib \
+ cmake \
+ curl \
+ thrift-cpp \
+ ninja
fi
if [ $TRAVIS_OS_NAME == "osx" ]; then
@@ -45,7 +61,6 @@ pushd $ARROW_CPP_BUILD_DIR
CMAKE_COMMON_FLAGS="\
-DARROW_BUILD_BENCHMARKS=ON \
-DCMAKE_INSTALL_PREFIX=$ARROW_CPP_INSTALL \
--DARROW_PLASMA=ON \
-DARROW_NO_DEPRECATED_API=ON"
CMAKE_LINUX_FLAGS=""
CMAKE_OSX_FLAGS=""
@@ -60,8 +75,20 @@ else
# also in the manylinux1 image.
CMAKE_LINUX_FLAGS="\
$CMAKE_LINUX_FLAGS \
--DARROW_JEMALLOC=ON \
--DARROW_TEST_MEMCHECK=ON"
+-DARROW_JEMALLOC=ON"
+fi
+
+# Use Ninja for faster builds when using toolchain
+if [ $ARROW_TRAVIS_USE_TOOLCHAIN == "1" ]; then
+ CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -GNinja"
+fi
+
+if [ $ARROW_TRAVIS_PLASMA == "1" ]; then
+ CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_PLASMA=ON"
+fi
+
+if [ $ARROW_TRAVIS_VALGRIND == "1" ]; then
+ CMAKE_COMMON_FLAGS="$CMAKE_COMMON_FLAGS -DARROW_TEST_MEMCHECK=ON"
fi
if [ $TRAVIS_OS_NAME == "linux" ]; then
@@ -76,7 +103,7 @@ else
$ARROW_CPP_DIR
fi
-make VERBOSE=1 -j4
-make install
+$TRAVIS_MAKE -j4
+$TRAVIS_MAKE install
popd
diff --git a/ci/travis_before_script_js.sh b/ci/travis_before_script_js.sh
index 304c48137aa..b72accc2193 100755
--- a/ci/travis_before_script_js.sh
+++ b/ci/travis_before_script_js.sh
@@ -1,16 +1,21 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
set -ex
diff --git a/ci/travis_env_common.sh b/ci/travis_env_common.sh
index a2e591014cf..d84753125d5 100755
--- a/ci/travis_env_common.sh
+++ b/ci/travis_env_common.sh
@@ -1,16 +1,21 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
export MINICONDA=$HOME/miniconda
export PATH="$MINICONDA/bin:$PATH"
@@ -29,6 +34,19 @@ export ARROW_CPP_INSTALL=$TRAVIS_BUILD_DIR/cpp-install
export ARROW_CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build
export ARROW_C_GLIB_INSTALL=$TRAVIS_BUILD_DIR/c-glib-install
+if [ "$ARROW_TRAVIS_USE_TOOLCHAIN" == "1" ]; then
+ # C++ toolchain
+ export CPP_TOOLCHAIN=$TRAVIS_BUILD_DIR/cpp-toolchain
+ export ARROW_BUILD_TOOLCHAIN=$CPP_TOOLCHAIN
+ export BOOST_ROOT=$CPP_TOOLCHAIN
+
+ export PATH=$CPP_TOOLCHAIN/bin:$PATH
+ export LD_LIBRARY_PATH=$CPP_TOOLCHAIN/lib:$LD_LIBRARY_PATH
+ export TRAVIS_MAKE=ninja
+else
+ export TRAVIS_MAKE=make
+fi
+
if [ $TRAVIS_OS_NAME == "osx" ]; then
export GOPATH=$TRAVIS_BUILD_DIR/gopath
fi
diff --git a/ci/travis_install_clang_tools.sh b/ci/travis_install_clang_tools.sh
index a4fd0e24619..bad1e73d24a 100644
--- a/ci/travis_install_clang_tools.sh
+++ b/ci/travis_install_clang_tools.sh
@@ -1,16 +1,21 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
wget -O - http://llvm.org/apt/llvm-snapshot.gpg.key|sudo apt-key add -
sudo apt-add-repository -y \
diff --git a/ci/travis_install_conda.sh b/ci/travis_install_conda.sh
index 369820b37f5..c2502a3744c 100644
--- a/ci/travis_install_conda.sh
+++ b/ci/travis_install_conda.sh
@@ -1,16 +1,21 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
set -e
diff --git a/ci/travis_script_c_glib.sh b/ci/travis_script_c_glib.sh
index 4bfa0c0af49..d392abdfbbc 100755
--- a/ci/travis_script_c_glib.sh
+++ b/ci/travis_script_c_glib.sh
@@ -1,16 +1,21 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
set -e
diff --git a/ci/travis_script_cpp.sh b/ci/travis_script_cpp.sh
index c368a1daedd..4e3e7bbea1c 100755
--- a/ci/travis_script_cpp.sh
+++ b/ci/travis_script_cpp.sh
@@ -1,20 +1,25 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
set -e
-: ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build}
+source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
# Check licenses according to Apache policy
git archive HEAD --prefix=apache-arrow/ --output=arrow-src.tar.gz
@@ -22,7 +27,7 @@ git archive HEAD --prefix=apache-arrow/ --output=arrow-src.tar.gz
pushd $CPP_BUILD_DIR
-make lint
+$TRAVIS_MAKE lint
# ARROW-209: checks depending on the LLVM toolchain are disabled temporarily
# until we are able to install the full LLVM toolchain in Travis CI again
diff --git a/ci/travis_script_integration.sh b/ci/travis_script_integration.sh
index 6e93ed79a22..be025512f0b 100755
--- a/ci/travis_script_integration.sh
+++ b/ci/travis_script_integration.sh
@@ -1,16 +1,21 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
set -e
diff --git a/ci/travis_script_java.sh b/ci/travis_script_java.sh
index 259b73ec24e..2f6b685253b 100755
--- a/ci/travis_script_java.sh
+++ b/ci/travis_script_java.sh
@@ -1,16 +1,21 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
set -e
diff --git a/ci/travis_script_js.sh b/ci/travis_script_js.sh
index 52ac3b9bdf8..cb1e9e19440 100755
--- a/ci/travis_script_js.sh
+++ b/ci/travis_script_js.sh
@@ -1,16 +1,21 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
set -e
diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh
index 4e6be62bd3e..14e6404d3de 100755
--- a/ci/travis_script_manylinux.sh
+++ b/ci/travis_script_manylinux.sh
@@ -1,16 +1,21 @@
#!/usr/bin/env bash
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
set -ex
@@ -18,4 +23,4 @@ set -ex
pushd python/manylinux1
git clone ../../ arrow
docker build -t arrow-base-x86_64 -f Dockerfile-x86_64 .
-docker run --rm -e PYARROW_PARALLEL=3 -v $PWD:/io arrow-base-x86_64 /io/build_arrow.sh
+docker run --shm-size=2g --rm -e PYARROW_PARALLEL=3 -v $PWD:/io arrow-base-x86_64 /io/build_arrow.sh
diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh
index ac64c548d82..9135aaf38e4 100755
--- a/ci/travis_script_python.sh
+++ b/ci/travis_script_python.sh
@@ -1,41 +1,35 @@
#!/usr/bin/env bash
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License. See accompanying LICENSE file.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
set -e
source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh
export ARROW_HOME=$ARROW_CPP_INSTALL
-
-pushd $ARROW_PYTHON_DIR
export PARQUET_HOME=$TRAVIS_BUILD_DIR/parquet-env
+export LD_LIBRARY_PATH=$ARROW_HOME/lib:$PARQUET_HOME/lib:$LD_LIBRARY_PATH
+export PYARROW_CXXFLAGS="-Werror"
build_parquet_cpp() {
export PARQUET_ARROW_VERSION=$(git rev-parse HEAD)
- conda create -y -q -p $PARQUET_HOME python=3.6 cmake curl
- source activate $PARQUET_HOME
-
- # In case some package wants to download the MKL
- conda install -y -q nomkl
- conda install -y -q thrift-cpp snappy zlib brotli boost
-
- export BOOST_ROOT=$PARQUET_HOME
- export SNAPPY_HOME=$PARQUET_HOME
- export THRIFT_HOME=$PARQUET_HOME
- export ZLIB_HOME=$PARQUET_HOME
- export BROTLI_HOME=$PARQUET_HOME
+ # $CPP_TOOLCHAIN set up in before_script_cpp
+ export PARQUET_BUILD_TOOLCHAIN=$CPP_TOOLCHAIN
PARQUET_DIR=$TRAVIS_BUILD_DIR/parquet
mkdir -p $PARQUET_DIR
@@ -47,38 +41,39 @@ build_parquet_cpp() {
cd build-dir
cmake \
+ -GNinja \
-DCMAKE_BUILD_TYPE=debug \
-DCMAKE_INSTALL_PREFIX=$PARQUET_HOME \
+ -DPARQUET_BOOST_USE_SHARED=off \
-DPARQUET_BUILD_BENCHMARKS=off \
-DPARQUET_BUILD_EXECUTABLES=off \
- -DPARQUET_ZLIB_VENDORED=off \
- -DPARQUET_BUILD_TESTS=on \
+ -DPARQUET_BUILD_TESTS=off \
..
- make -j${CPU_COUNT}
- make install
+ ninja
+ ninja install
popd
}
build_parquet_cpp
-function build_arrow_libraries() {
- CPP_BUILD_DIR=$1
- CPP_DIR=$TRAVIS_BUILD_DIR/cpp
+function rebuild_arrow_libraries() {
+ pushd $ARROW_CPP_BUILD_DIR
- mkdir $CPP_BUILD_DIR
- pushd $CPP_BUILD_DIR
+ # Clear out prior build files
+ rm -rf *
- cmake -DARROW_BUILD_TESTS=off \
- -DARROW_PYTHON=on \
- -DPLASMA_PYTHON=on \
+ cmake -GNinja \
+ -DARROW_BUILD_TESTS=off \
+ -DARROW_BUILD_UTILITIES=off \
-DARROW_PLASMA=on \
- -DCMAKE_INSTALL_PREFIX=$2 \
- $CPP_DIR
+ -DARROW_PYTHON=on \
+ -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
+ $ARROW_CPP_DIR
- make -j4
- make install
+ ninja
+ ninja install
popd
}
@@ -87,9 +82,6 @@ python_version_tests() {
PYTHON_VERSION=$1
CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION
- export ARROW_HOME=$TRAVIS_BUILD_DIR/arrow-install-$PYTHON_VERSION
- export LD_LIBRARY_PATH=$ARROW_HOME/lib:$PARQUET_HOME/lib
-
conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION cmake curl
source activate $CONDA_ENV_DIR
@@ -103,27 +95,35 @@ python_version_tests() {
conda install -y -q pip numpy pandas cython
# Build C++ libraries
- build_arrow_libraries arrow-build-$PYTHON_VERSION $ARROW_HOME
+ rebuild_arrow_libraries
# Other stuff pip install
+ pushd $ARROW_PYTHON_DIR
pip install -r requirements.txt
-
- python setup.py build_ext --inplace --with-parquet
+ python setup.py build_ext --with-parquet --with-plasma \
+ install --single-version-externally-managed --record=record.text
+ popd
python -c "import pyarrow.parquet"
+ python -c "import pyarrow.plasma"
- python -m pytest -vv -r sxX pyarrow --parquet
+ if [ $TRAVIS_OS_NAME == "linux" ]; then
+ export PLASMA_VALGRIND=1
+ fi
+ PYARROW_PATH=$CONDA_PREFIX/lib/python$PYTHON_VERSION/site-packages/pyarrow
+ python -m pytest -vv -r sxX -s $PYARROW_PATH --parquet
+
+ pushd $ARROW_PYTHON_DIR
# Build documentation once
if [[ "$PYTHON_VERSION" == "3.6" ]]
then
conda install -y -q --file=doc/requirements.txt
python setup.py build_sphinx -s doc/source
fi
+ popd
}
# run tests for python 2.7 and 3.6
python_version_tests 2.7
python_version_tests 3.6
-
-popd
diff --git a/cpp/.clang-format b/cpp/.clang-format
index 33f282a20de..06453dfbb25 100644
--- a/cpp/.clang-format
+++ b/cpp/.clang-format
@@ -15,67 +15,6 @@
# specific language governing permissions and limitations
# under the License.
---
-Language: Cpp
-# BasedOnStyle: Google
-AccessModifierOffset: -1
-AlignAfterOpenBracket: false
-AlignConsecutiveAssignments: false
-AlignEscapedNewlinesLeft: true
-AlignOperands: true
-AlignTrailingComments: true
-AllowAllParametersOfDeclarationOnNextLine: true
-AllowShortBlocksOnASingleLine: true
-AllowShortCaseLabelsOnASingleLine: false
-AllowShortFunctionsOnASingleLine: Inline
-AllowShortIfStatementsOnASingleLine: true
-AllowShortLoopsOnASingleLine: false
-AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakBeforeMultilineStrings: true
-AlwaysBreakTemplateDeclarations: true
-BinPackArguments: true
-BinPackParameters: true
-BreakBeforeBinaryOperators: None
-BreakBeforeBraces: Attach
-BreakBeforeTernaryOperators: true
-BreakConstructorInitializersBeforeComma: false
-ColumnLimit: 90
-CommentPragmas: '^ IWYU pragma:'
-ConstructorInitializerAllOnOneLineOrOnePerLine: true
-ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 4
-Cpp11BracedListStyle: true
+BasedOnStyle: Google
DerivePointerAlignment: false
-DisableFormat: false
-ExperimentalAutoDetectBinPacking: false
-ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
-IndentCaseLabels: true
-IndentWidth: 2
-IndentWrappedFunctionNames: false
-KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: ''
-MacroBlockEnd: ''
-MaxEmptyLinesToKeep: 1
-NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
-PenaltyBreakBeforeFirstCallParameter: 1000
-PenaltyBreakComment: 300
-PenaltyBreakFirstLessLess: 120
-PenaltyBreakString: 1000
-PenaltyExcessCharacter: 1000000
-PenaltyReturnTypeOnItsOwnLine: 200
-PointerAlignment: Left
-SpaceAfterCStyleCast: false
-SpaceBeforeAssignmentOperators: true
-SpaceBeforeParens: ControlStatements
-SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
-SpacesInAngles: false
-SpacesInContainerLiterals: true
-SpacesInCStyleCastParentheses: false
-SpacesInParentheses: false
-SpacesInSquareBrackets: false
-Standard: Cpp11
-TabWidth: 8
-UseTab: Never
+ColumnLimit: 90
diff --git a/cpp/.gitignore b/cpp/.gitignore
index 4910544ec87..ec846b35ba6 100644
--- a/cpp/.gitignore
+++ b/cpp/.gitignore
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
thirdparty/
CMakeFiles/
CMakeCache.txt
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2891a5d7618..07b8e15b504 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -162,10 +162,14 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
"Build with zstd compression"
ON)
+ option(ARROW_VERBOSE_THIRDPARTY_BUILD
+ "If off, output from ExternalProjects will be logged to files rather than shown"
+ ON)
+
if (MSVC)
set(BROTLI_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING
"Brotli static lib suffix used on Windows with MSVC (default _static)")
- set(SNAPPY_MSVC_STATIC_LIB_SUFFIX "" CACHE STRING
+ set(SNAPPY_MSVC_STATIC_LIB_SUFFIX "_static" CACHE STRING
"Snappy static lib suffix used on Windows with MSVC (default is empty string)")
set(ZLIB_MSVC_STATIC_LIB_SUFFIX "libstatic" CACHE STRING
"Zlib static lib suffix used on Windows with MSVC (default libstatic)")
@@ -303,8 +307,28 @@ include_directories(src)
# For generate_export_header() and add_compiler_export_flags().
include(GenerateExportHeader)
-# Sets -fvisibility=hidden for gcc
-add_compiler_export_flags()
+# Adapted from Apache Kudu: https://github.com/apache/kudu/commit/bd549e13743a51013585
+# Honor visibility properties for all target types. See
+# "cmake --help-policy CMP0063" for details.
+#
+# This policy was only added to cmake in version 3.3, so until the cmake in
+# thirdparty is updated, we must check if the policy exists before setting it.
+if(POLICY CMP0063)
+ cmake_policy(SET CMP0063 NEW)
+endif()
+
+if (PARQUET_BUILD_SHARED)
+ if (POLICY CMP0063)
+ set_target_properties(arrow_shared
+ PROPERTIES
+ C_VISIBILITY_PRESET hidden
+ CXX_VISIBILITY_PRESET hidden
+ VISIBILITY_INLINES_HIDDEN 1)
+ else()
+ # Sets -fvisibility=hidden for gcc
+ add_compiler_export_flags()
+ endif()
+endif()
############################################################
# Benchmarking
@@ -582,20 +606,6 @@ if (ARROW_STATIC_LINK_LIBS)
add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS})
endif()
-set(ARROW_MIN_TEST_LIBS
- arrow_static
- ${ARROW_STATIC_LINK_LIBS}
- gtest
- gtest_main)
-
-if(NOT MSVC)
- set(ARROW_MIN_TEST_LIBS
- ${ARROW_MIN_TEST_LIBS}
- ${CMAKE_DL_LIBS})
-endif()
-
-set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
-
set(ARROW_BENCHMARK_LINK_LIBS
arrow_static
arrow_benchmark_main
@@ -618,6 +628,20 @@ if (NOT MSVC)
${CMAKE_DL_LIBS})
endif()
+set(ARROW_MIN_TEST_LIBS
+ arrow_static
+ ${ARROW_STATIC_LINK_LIBS}
+ gtest
+ gtest_main)
+
+if(NOT MSVC)
+ set(ARROW_MIN_TEST_LIBS
+ ${ARROW_MIN_TEST_LIBS}
+ ${CMAKE_DL_LIBS})
+endif()
+
+set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS})
+
if (ARROW_JEMALLOC)
add_definitions(-DARROW_JEMALLOC)
# In the case that jemalloc is only available as a shared library also use it to
diff --git a/cpp/README.md b/cpp/README.md
index 5bb516fc99b..2f98b085115 100644
--- a/cpp/README.md
+++ b/cpp/README.md
@@ -1,15 +1,20 @@
# Arrow C++
@@ -77,11 +82,24 @@ Benchmark logs will be placed in the build directory under `build/benchmark-logs
To set up your own specific build toolchain, here are the relevant environment
variables
+* Boost: `BOOST_ROOT`
* Googletest: `GTEST_HOME` (only required to build the unit tests)
+* gflags: `GFLAGS_HOME` (only required to build the unit tests)
* Google Benchmark: `GBENCHMARK_HOME` (only required if building benchmarks)
* Flatbuffers: `FLATBUFFERS_HOME` (only required for the IPC extensions)
* Hadoop: `HADOOP_HOME` (only required for the HDFS I/O extensions)
-* jemalloc: `JEMALLOC_HOME` (only required for the jemalloc-based memory pool)
+* jemalloc: `JEMALLOC_HOME`
+* brotli: `BROTLI_HOME`, can be disabled with `-DARROW_WITH_BROTLI=off`
+* lz4: `LZ4_HOME`, can be disabled with `-DARROW_WITH_LZ4=off`
+* snappy: `SNAPPY_HOME`, can be disabled with `-DARROW_WITH_SNAPPY=off`
+* zlib: `ZLIB_HOME`, can be disabled with `-DARROW_WITH_ZLIB=off`
+* zstd: `ZSTD_HOME`, can be disabled with `-DARROW_WITH_ZSTD=off`
+
+If you have all of your toolchain libraries installed at the same prefix, you
+can use the environment variable `$ARROW_BUILD_TOOLCHAIN` to automatically set
+all of these variables. Note that `ARROW_BUILD_TOOLCHAIN` will not set
+`BOOST_ROOT`, so if you have custom Boost installation, you must set this
+environment variable separately.
### Building Python integration library
@@ -102,6 +120,35 @@ directoy:
This requires [Doxygen](http://www.doxygen.org) to be installed.
+## Development
+
+This project follows [Google's C++ Style Guide][3] with minor exceptions. We do
+not encourage anonymous namespaces and we relax the line length restriction to
+90 characters.
+
+### Error Handling and Exceptions
+
+For error handling, we use `arrow::Status` values instead of throwing C++
+exceptions. Since the Arrow C++ libraries are intended to be useful as a
+component in larger C++ projects, using `Status` objects can help with good
+code hygiene by making explicit when a function is expected to be able to fail.
+
+For expressing invariants and "cannot fail" errors, we use DCHECK macros
+defined in `arrow/util/logging.h`. These checks are disabled in release builds
+and are intended to catch internal development errors, particularly when
+refactoring. These macros are not to be included in any public header files.
+
+Since we do not use exceptions, we avoid doing expensive work in object
+constructors. Objects that are expensive to construct may often have private
+constructors, with public static factory methods that return `Status`.
+
+There are a number of object constructors, like `arrow::Schema` and
+`arrow::RecordBatch` where larger STL container objects like `std::vector` may
+be created. While it is possible for `std::bad_alloc` to be thrown in these
+constructors, the circumstances where they would are somewhat esoteric, and it
+is likely that an application would have encountered other more serious
+problems prior to having `std::bad_alloc` thrown in a constructor.
+
## Continuous Integration
Pull requests are run through travis-ci for continuous integration. You can avoid
@@ -109,9 +156,8 @@ build failures by running the following checks before submitting your pull reque
make unittest
make lint
- # The next two commands may change your code. It is recommended you commit
- # before running them.
- make clang-tidy # requires clang-tidy is installed
+ # The next command may change your code. It is recommended you commit
+ # before running it.
make format # requires clang-format is installed
Note that the clang-tidy target may take a while to run. You might consider
@@ -127,3 +173,4 @@ both of these options would be used rarely. Current known uses-cases whent hey
[1]: https://brew.sh/
[2]: https://github.com/apache/arrow/blob/master/cpp/apidoc/Windows.md
+[3]: https://google.github.io/styleguide/cppguide.html
\ No newline at end of file
diff --git a/cpp/apidoc/Doxyfile b/cpp/apidoc/Doxyfile
index 31276624133..f32ad5425da 100644
--- a/cpp/apidoc/Doxyfile
+++ b/cpp/apidoc/Doxyfile
@@ -833,50 +833,17 @@ INPUT_ENCODING = UTF-8
# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08,
# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf.
-FILE_PATTERNS = *.c \
- *.cc \
- *.cxx \
- *.cpp \
- *.c++ \
- *.java \
- *.ii \
- *.ixx \
- *.ipp \
- *.i++ \
- *.inl \
- *.idl \
- *.ddl \
- *.odl \
- *.h \
+FILE_PATTERNS = *.h \
*.hh \
*.hxx \
*.hpp \
- *.h++ \
- *.cs \
- *.d \
- *.php \
- *.php4 \
- *.php5 \
- *.phtml \
*.inc \
*.m \
*.markdown \
*.md \
*.mm \
*.dox \
- *.py \
- *.pyw \
- *.f90 \
- *.f95 \
- *.f03 \
- *.f08 \
- *.f \
- *.for \
- *.tcl \
- *.vhd \
- *.vhdl \
- *.ucf \
- *.qsf
+ *.py
# The RECURSIVE tag can be used to specify whether or not subdirectories should
# be searched for input files as well.
@@ -908,6 +875,7 @@ EXCLUDE_SYMLINKS = NO
# exclude all test directories for example use the pattern */test/*
EXCLUDE_PATTERNS = *-test.cc \
+ *test* \
*_generated.h \
*-benchmark.cc
@@ -920,7 +888,11 @@ EXCLUDE_PATTERNS = *-test.cc \
# Note that the wildcards are matched against the file with absolute path, so to
# exclude all test directories use the pattern */test/*
-EXCLUDE_SYMBOLS =
+EXCLUDE_SYMBOLS = detail
+EXCLUDE_SYMBOLS += internal
+EXCLUDE_SYMBOLS += _*
+EXCLUDE_SYMBOLS += BitUtil
+EXCLUDE_SYMBOLS += SSEUtil
# The EXAMPLE_PATH tag can be used to specify one or more files or directories
# that contain example code fragments that are included (see the \include
@@ -2060,7 +2032,7 @@ ENABLE_PREPROCESSING = YES
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-MACRO_EXPANSION = NO
+MACRO_EXPANSION = YES
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
# the macro expansion is limited to the macros specified with the PREDEFINED and
@@ -2068,7 +2040,7 @@ MACRO_EXPANSION = NO
# The default value is: NO.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-EXPAND_ONLY_PREDEF = NO
+EXPAND_ONLY_PREDEF = YES
# If the SEARCH_INCLUDES tag is set to YES, the include files in the
# INCLUDE_PATH will be searched if a #include is found.
@@ -2100,7 +2072,10 @@ INCLUDE_FILE_PATTERNS =
# recursively expanded use the := operator instead of the = operator.
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
-PREDEFINED =
+PREDEFINED = __attribute__(x)= \
+ __declspec(x)= \
+ ARROW_EXPORT= \
+ ARROW_EXTERN_TEMPLATE=
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
# tag can be used to specify a list of macro names that should be expanded. The
diff --git a/cpp/apidoc/HDFS.md b/cpp/apidoc/HDFS.md
index 180d31e54d5..d54ad270c05 100644
--- a/cpp/apidoc/HDFS.md
+++ b/cpp/apidoc/HDFS.md
@@ -1,15 +1,20 @@
## Using Arrow's HDFS (Apache Hadoop Distributed File System) interface
@@ -72,4 +77,3 @@ If you get an error about needing to install Java 6, then add *BundledApp* and
https://oliverdowling.com.au/2015/10/09/oracles-jre-8-on-mac-os-x-el-capitan/
https://derflounder.wordpress.com/2015/08/08/modifying-oracles-java-sdk-to-run-java-applications-on-os-x/
-
diff --git a/cpp/apidoc/Windows.md b/cpp/apidoc/Windows.md
index 6bfb951548a..30b7b8f3ce2 100644
--- a/cpp/apidoc/Windows.md
+++ b/cpp/apidoc/Windows.md
@@ -1,15 +1,20 @@
# Developing Arrow C++ on Windows
@@ -26,7 +31,7 @@ other development instructions for Windows here.
[Miniconda][1] is a minimal Python distribution including the conda package
manager. To get started, download and install a 64-bit distribution.
-We recommend using packages from [conda-forge][2].
+We recommend using packages from [conda-forge][2].
Launch cmd.exe and run following commands:
```shell
@@ -46,7 +51,7 @@ previous step:
activate arrow-dev
```
-We are using [cmake][4] tool to support Windows builds.
+We are using [cmake][4] tool to support Windows builds.
To allow cmake to pick up 3rd party dependencies, you should set
`ARROW_BUILD_TOOLCHAIN` environment variable to contain `Library` folder
path of new created on previous step `arrow-dev` conda environment.
@@ -71,16 +76,16 @@ As alternative to `ARROW_BUILD_TOOLCHAIN`, it's possible to configure path
to each 3rd party dependency separately by setting appropriate environment
variable:
-`FLATBUFFERS_HOME` variable with path to `flatbuffers` installation
-`RAPIDJSON_HOME` variable with path to `rapidjson` installation
-`GFLAGS_HOME` variable with path to `gflags` installation
-`SNAPPY_HOME` variable with path to `snappy` installation
-`ZLIB_HOME` variable with path to `zlib` installation
-`BROTLI_HOME` variable with path to `brotli` installation
-`LZ4_HOME` variable with path to `lz4` installation
+`FLATBUFFERS_HOME` variable with path to `flatbuffers` installation
+`RAPIDJSON_HOME` variable with path to `rapidjson` installation
+`GFLAGS_HOME` variable with path to `gflags` installation
+`SNAPPY_HOME` variable with path to `snappy` installation
+`ZLIB_HOME` variable with path to `zlib` installation
+`BROTLI_HOME` variable with path to `brotli` installation
+`LZ4_HOME` variable with path to `lz4` installation
`ZSTD_HOME` variable with path to `zstd` installation
-### Customize static libraries names lookup of 3rd party dependencies
+### Customize static libraries names lookup of 3rd party dependencies
If you decided to use pre-built 3rd party dependencies libs, it's possible to
configure Arrow's cmake build script to search for customized names of 3rd
diff --git a/cpp/apidoc/index.md b/cpp/apidoc/index.md
index 4004e1ef42e..8389d16b4aa 100644
--- a/cpp/apidoc/index.md
+++ b/cpp/apidoc/index.md
@@ -2,17 +2,22 @@ Apache Arrow C++ API documentation {#index}
==================================
Apache Arrow is a columnar in-memory analytics layer designed to accelerate
diff --git a/cpp/apidoc/tutorials/row_wise_conversion.md b/cpp/apidoc/tutorials/row_wise_conversion.md
index 1486fc2a4e0..e91c26e9da1 100644
--- a/cpp/apidoc/tutorials/row_wise_conversion.md
+++ b/cpp/apidoc/tutorials/row_wise_conversion.md
@@ -1,15 +1,20 @@
Convert a vector of row-wise data into an Arrow table
@@ -118,7 +123,7 @@ To convert an Arrow table back into the same row-wise representation as in the
above section, we first will check that the table conforms to our expected
schema and then will build up the vector of rows incrementally.
-For the check if the table is as expected, we can utilise solely its schema.
+For the check if the table is as expected, we can utilise solely its schema.
```
// This is our input that was passed in from the outside.
diff --git a/cpp/build-support/build-lz4-lib.sh b/cpp/build-support/build-lz4-lib.sh
index 62805bae286..8cb5c18782a 100755
--- a/cpp/build-support/build-lz4-lib.sh
+++ b/cpp/build-support/build-lz4-lib.sh
@@ -1,16 +1,21 @@
#!/bin/sh
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
#
export CFLAGS="${CFLAGS} -O3 -fPIC"
-make -j4
\ No newline at end of file
+make -j4
diff --git a/cpp/build-support/build-zstd-lib.sh b/cpp/build-support/build-zstd-lib.sh
index 62805bae286..8cb5c18782a 100755
--- a/cpp/build-support/build-zstd-lib.sh
+++ b/cpp/build-support/build-zstd-lib.sh
@@ -1,16 +1,21 @@
#!/bin/sh
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
#
export CFLAGS="${CFLAGS} -O3 -fPIC"
-make -j4
\ No newline at end of file
+make -j4
diff --git a/cpp/build-support/lz4_msbuild_wholeprogramoptimization_param.patch b/cpp/build-support/lz4_msbuild_wholeprogramoptimization_param.patch
new file mode 100644
index 00000000000..ee0f8a12054
--- /dev/null
+++ b/cpp/build-support/lz4_msbuild_wholeprogramoptimization_param.patch
@@ -0,0 +1,225 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+diff --git a/visual/VS2010/datagen/datagen.vcxproj b/visual/VS2010/datagen/datagen.vcxproj
+index aaf81ad..67b716f 100644
+--- a/visual/VS2010/datagen/datagen.vcxproj
++++ b/visual/VS2010/datagen/datagen.vcxproj
+@@ -39,15 +39,19 @@
+
+ Application
+ false
+- true
+ Unicode
+
+
+ Application
+ false
+- true
+ Unicode
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/visual/VS2010/frametest/frametest.vcxproj b/visual/VS2010/frametest/frametest.vcxproj
+index 76d12c9..723571d 100644
+--- a/visual/VS2010/frametest/frametest.vcxproj
++++ b/visual/VS2010/frametest/frametest.vcxproj
+@@ -39,15 +39,19 @@
+
+ Application
+ false
+- true
+ Unicode
+
+
+ Application
+ false
+- true
+ Unicode
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/visual/VS2010/fullbench-dll/fullbench-dll.vcxproj b/visual/VS2010/fullbench-dll/fullbench-dll.vcxproj
+index c10552a..0c8f293 100644
+--- a/visual/VS2010/fullbench-dll/fullbench-dll.vcxproj
++++ b/visual/VS2010/fullbench-dll/fullbench-dll.vcxproj
+@@ -39,15 +39,19 @@
+
+ Application
+ false
+- true
+ Unicode
+
+
+ Application
+ false
+- true
+ Unicode
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/visual/VS2010/fullbench/fullbench.vcxproj b/visual/VS2010/fullbench/fullbench.vcxproj
+index e2d95c9..4cd88d0 100644
+--- a/visual/VS2010/fullbench/fullbench.vcxproj
++++ b/visual/VS2010/fullbench/fullbench.vcxproj
+@@ -39,15 +39,19 @@
+
+ Application
+ false
+- true
+ Unicode
+
+
+ Application
+ false
+- true
+ Unicode
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/visual/VS2010/fuzzer/fuzzer.vcxproj b/visual/VS2010/fuzzer/fuzzer.vcxproj
+index 85d6c9b..3ddc77d 100644
+--- a/visual/VS2010/fuzzer/fuzzer.vcxproj
++++ b/visual/VS2010/fuzzer/fuzzer.vcxproj
+@@ -39,15 +39,19 @@
+
+ Application
+ false
+- true
+ Unicode
+
+
+ Application
+ false
+- true
+ Unicode
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/visual/VS2010/liblz4-dll/liblz4-dll.vcxproj b/visual/VS2010/liblz4-dll/liblz4-dll.vcxproj
+index 389f13c..038a4d2 100644
+--- a/visual/VS2010/liblz4-dll/liblz4-dll.vcxproj
++++ b/visual/VS2010/liblz4-dll/liblz4-dll.vcxproj
+@@ -40,15 +40,19 @@
+
+ DynamicLibrary
+ false
+- true
+ Unicode
+
+
+ DynamicLibrary
+ false
+- true
+ Unicode
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/visual/VS2010/liblz4/liblz4.vcxproj b/visual/VS2010/liblz4/liblz4.vcxproj
+index a0b8000..9aad8c2 100644
+--- a/visual/VS2010/liblz4/liblz4.vcxproj
++++ b/visual/VS2010/liblz4/liblz4.vcxproj
+@@ -39,15 +39,19 @@
+
+ StaticLibrary
+ false
+- true
+ Unicode
+
+
+ StaticLibrary
+ false
+- true
+ Unicode
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/visual/VS2010/lz4/lz4.vcxproj b/visual/VS2010/lz4/lz4.vcxproj
+index 693e121..7e63f1e 100644
+--- a/visual/VS2010/lz4/lz4.vcxproj
++++ b/visual/VS2010/lz4/lz4.vcxproj
+@@ -39,15 +39,19 @@
+
+ Application
+ false
+- true
+ Unicode
+
+
+ Application
+ false
+- true
+ Unicode
+
++
++ true
++
++
++ true
++
+
+
+
diff --git a/cpp/build-support/run-clang-tidy.sh b/cpp/build-support/run-clang-tidy.sh
index 4ba8ab8cd76..75e9458e257 100755
--- a/cpp/build-support/run-clang-tidy.sh
+++ b/cpp/build-support/run-clang-tidy.sh
@@ -1,16 +1,21 @@
#!/bin/bash
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
#
#
# Runs clang format in the given directory
@@ -27,7 +32,7 @@ shift
APPLY_FIXES=$1
shift
-# clang format will only find its configuration if we are in
+# clang format will only find its configuration if we are in
# the source tree or in a path relative to the source tree
if [ "$APPLY_FIXES" == "1" ]; then
$CLANG_TIDY -p $COMPILE_COMMANDS -fix $@
@@ -37,4 +42,4 @@ else
echo "clang-tidy had suggested fixes. Please fix these!!!"
exit 1
fi
-fi
+fi
diff --git a/cpp/build-support/run_clang_format.py b/cpp/build-support/run_clang_format.py
index ab800e641b5..ac4954ca570 100755
--- a/cpp/build-support/run_clang_format.py
+++ b/cpp/build-support/run_clang_format.py
@@ -57,5 +57,9 @@
# exit 1
# fi
-subprocess.check_output([CLANG_FORMAT, '-i'] + files_to_format,
- stderr=subprocess.STDOUT)
+try:
+ subprocess.check_output([CLANG_FORMAT, '-i'] + files_to_format,
+ stderr=subprocess.STDOUT)
+except Exception as e:
+ print(e)
+ raise
diff --git a/cpp/build-support/zstd_msbuild_wholeprogramoptimization_param.patch b/cpp/build-support/zstd_msbuild_wholeprogramoptimization_param.patch
new file mode 100644
index 00000000000..8bfb928947e
--- /dev/null
+++ b/cpp/build-support/zstd_msbuild_wholeprogramoptimization_param.patch
@@ -0,0 +1,199 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+diff --git a/build/VS2010/datagen/datagen.vcxproj b/build/VS2010/datagen/datagen.vcxproj
+index bd8a213..8e4dc89 100644
+--- a/build/VS2010/datagen/datagen.vcxproj
++++ b/build/VS2010/datagen/datagen.vcxproj
+@@ -39,15 +39,19 @@
+
+ Application
+ false
+- true
+ MultiByte
+
+
+ Application
+ false
+- true
+ MultiByte
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/build/VS2010/fullbench-dll/fullbench-dll.vcxproj b/build/VS2010/fullbench-dll/fullbench-dll.vcxproj
+index e697318..82cd4ab 100644
+--- a/build/VS2010/fullbench-dll/fullbench-dll.vcxproj
++++ b/build/VS2010/fullbench-dll/fullbench-dll.vcxproj
+@@ -39,15 +39,19 @@
+
+ Application
+ false
+- true
+ MultiByte
+
+
+ Application
+ false
+- true
+ MultiByte
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/build/VS2010/fullbench/fullbench.vcxproj b/build/VS2010/fullbench/fullbench.vcxproj
+index 2bff4ca..ced4047 100644
+--- a/build/VS2010/fullbench/fullbench.vcxproj
++++ b/build/VS2010/fullbench/fullbench.vcxproj
+@@ -39,15 +39,19 @@
+
+ Application
+ false
+- true
+ MultiByte
+
+
+ Application
+ false
+- true
+ MultiByte
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/build/VS2010/fuzzer/fuzzer.vcxproj b/build/VS2010/fuzzer/fuzzer.vcxproj
+index 12a4b93..227efd1 100644
+--- a/build/VS2010/fuzzer/fuzzer.vcxproj
++++ b/build/VS2010/fuzzer/fuzzer.vcxproj
+@@ -39,15 +39,19 @@
+
+ Application
+ false
+- true
+ MultiByte
+
+
+ Application
+ false
+- true
+ MultiByte
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
+index 364b3be..b227320 100644
+--- a/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
++++ b/build/VS2010/libzstd-dll/libzstd-dll.vcxproj
+@@ -94,15 +94,19 @@
+
+ DynamicLibrary
+ false
+- true
+ MultiByte
+
+
+ DynamicLibrary
+ false
+- true
+ MultiByte
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/build/VS2010/libzstd/libzstd.vcxproj b/build/VS2010/libzstd/libzstd.vcxproj
+index 6087d73..51a0572 100644
+--- a/build/VS2010/libzstd/libzstd.vcxproj
++++ b/build/VS2010/libzstd/libzstd.vcxproj
+@@ -91,15 +91,19 @@
+
+ StaticLibrary
+ false
+- true
+ MultiByte
+
+
+ StaticLibrary
+ false
+- true
+ MultiByte
+
++
++ true
++
++
++ true
++
+
+
+
+diff --git a/build/VS2010/zstd/zstd.vcxproj b/build/VS2010/zstd/zstd.vcxproj
+index 438dc61..834ae01 100644
+--- a/build/VS2010/zstd/zstd.vcxproj
++++ b/build/VS2010/zstd/zstd.vcxproj
+@@ -100,15 +100,19 @@
+
+ Application
+ false
+- true
+ MultiByte
+
+
+ Application
+ false
+- true
+ MultiByte
+
++
++ true
++
++
++ true
++
+
+
+
diff --git a/cpp/cmake_modules/SnappyCMakeLists.txt b/cpp/cmake_modules/SnappyCMakeLists.txt
index 9d0a166064e..50083ce405e 100644
--- a/cpp/cmake_modules/SnappyCMakeLists.txt
+++ b/cpp/cmake_modules/SnappyCMakeLists.txt
@@ -68,10 +68,10 @@ set(SNAPPY_SRCS snappy.cc
snappy-stubs-public.h)
add_library(snappy SHARED ${SNAPPY_SRCS})
-add_library(snappystatic STATIC ${SNAPPY_SRCS})
+add_library(snappy_static STATIC ${SNAPPY_SRCS})
TARGET_COMPILE_DEFINITIONS(snappy PRIVATE -DHAVE_CONFIG_H)
-TARGET_COMPILE_DEFINITIONS(snappystatic PRIVATE -DHAVE_CONFIG_H)
+TARGET_COMPILE_DEFINITIONS(snappy_static PRIVATE -DHAVE_CONFIG_H)
install(FILES snappy.h
snappy-c.h
@@ -79,7 +79,7 @@ install(FILES snappy.h
${snappy_BINARY_DIR}/snappy-stubs-public.h
DESTINATION include)
-install(TARGETS snappy snappystatic
+install(TARGETS snappy snappy_static
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index b9d9823e80c..1271b8a4ab3 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -35,6 +35,16 @@ string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE)
set(EP_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}}")
set(EP_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}}")
+if (NOT ARROW_VERBOSE_THIRDPARTY_BUILD)
+ set(EP_LOG_OPTIONS
+ LOG_CONFIGURE 1
+ LOG_BUILD 1
+ LOG_INSTALL 1
+ LOG_DOWNLOAD 1)
+else()
+ set(EP_LOG_OPTIONS)
+endif()
+
if (NOT MSVC)
# Set -fPIC on all external projects
set(EP_CXX_FLAGS "${EP_CXX_FLAGS} -fPIC")
@@ -205,7 +215,8 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
ExternalProject_Add(googletest_ep
URL "https://github.com/google/googletest/archive/release-${GTEST_VERSION}.tar.gz"
BUILD_BYPRODUCTS ${GTEST_STATIC_LIB} ${GTEST_MAIN_STATIC_LIB}
- CMAKE_ARGS ${GTEST_CMAKE_ARGS})
+ CMAKE_ARGS ${GTEST_CMAKE_ARGS}
+ ${EP_LOG_OPTIONS})
else()
find_package(GTest REQUIRED)
set(GTEST_VENDORED 0)
@@ -250,6 +261,7 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
ExternalProject_Add(gflags_ep
URL ${GFLAGS_URL}
+ ${EP_LOG_OPTIONS}
BUILD_IN_SOURCE 1
BUILD_BYPRODUCTS "${GFLAGS_STATIC_LIB}"
CMAKE_ARGS ${GFLAGS_CMAKE_ARGS})
@@ -300,7 +312,8 @@ if(ARROW_BUILD_BENCHMARKS)
ExternalProject_Add(gbenchmark_ep
URL "https://github.com/google/benchmark/archive/v${GBENCHMARK_VERSION}.tar.gz"
BUILD_BYPRODUCTS "${GBENCHMARK_STATIC_LIB}"
- CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS})
+ CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS}
+ ${EP_LOG_OPTIONS})
else()
find_package(GBenchmark REQUIRED)
set(GBENCHMARK_VENDORED 0)
@@ -327,6 +340,7 @@ if (ARROW_IPC)
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
BUILD_IN_SOURCE 1
+ ${EP_LOG_OPTIONS}
INSTALL_COMMAND "")
ExternalProject_Get_Property(rapidjson_ep SOURCE_DIR)
@@ -356,7 +370,8 @@ if (ARROW_IPC)
CMAKE_ARGS
"-DCMAKE_CXX_FLAGS=${FLATBUFFERS_CMAKE_CXX_FLAGS}"
"-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}"
- "-DFLATBUFFERS_BUILD_TESTS=OFF")
+ "-DFLATBUFFERS_BUILD_TESTS=OFF"
+ ${EP_LOG_OPTIONS})
set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_PREFIX}/include")
set(FLATBUFFERS_COMPILER "${FLATBUFFERS_PREFIX}/bin/flatc")
@@ -395,6 +410,7 @@ if (ARROW_JEMALLOC)
ExternalProject_Add(jemalloc_ep
URL https://github.com/jemalloc/jemalloc/releases/download/${JEMALLOC_VERSION}/jemalloc-${JEMALLOC_VERSION}.tar.bz2
CONFIGURE_COMMAND ./configure "--prefix=${JEMALLOC_PREFIX}" "--with-jemalloc-prefix="
+ ${EP_LOG_OPTIONS}
BUILD_IN_SOURCE 1
BUILD_COMMAND ${MAKE}
BUILD_BYPRODUCTS "${JEMALLOC_STATIC_LIB}" "${JEMALLOC_SHARED_LIB}"
@@ -475,6 +491,7 @@ if (ARROW_WITH_ZLIB)
ExternalProject_Add(zlib_ep
URL "http://zlib.net/fossils/zlib-1.2.8.tar.gz"
+ ${EP_LOG_OPTIONS}
BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}"
CMAKE_ARGS ${ZLIB_CMAKE_ARGS})
set(ZLIB_VENDORED 1)
@@ -501,7 +518,7 @@ if (ARROW_WITH_SNAPPY)
set(SNAPPY_HOME "${SNAPPY_PREFIX}")
set(SNAPPY_INCLUDE_DIR "${SNAPPY_PREFIX}/include")
if (MSVC)
- set(SNAPPY_STATIC_LIB_NAME snappystatic)
+ set(SNAPPY_STATIC_LIB_NAME snappy_static)
else()
set(SNAPPY_STATIC_LIB_NAME snappy)
endif()
@@ -529,6 +546,7 @@ if (ARROW_WITH_SNAPPY)
./config.h)
ExternalProject_Add(snappy_ep
UPDATE_COMMAND ${SNAPPY_UPDATE_COMMAND}
+ ${EP_LOG_OPTIONS}
BUILD_IN_SOURCE 1
BUILD_COMMAND ${MAKE}
INSTALL_DIR ${SNAPPY_PREFIX}
@@ -538,6 +556,7 @@ if (ARROW_WITH_SNAPPY)
else()
ExternalProject_Add(snappy_ep
CONFIGURE_COMMAND ./configure --with-pic "--prefix=${SNAPPY_PREFIX}" ${SNAPPY_CXXFLAGS}
+ ${EP_LOG_OPTIONS}
BUILD_IN_SOURCE 1
BUILD_COMMAND ${MAKE}
INSTALL_DIR ${SNAPPY_PREFIX}
@@ -586,6 +605,7 @@ if (ARROW_WITH_BROTLI)
URL "https://github.com/google/brotli/archive/${BROTLI_VERSION}.tar.gz"
BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" "${BROTLI_STATIC_LIBRARY_DEC}" "${BROTLI_STATIC_LIBRARY_COMMON}"
${BROTLI_BUILD_BYPRODUCTS}
+ ${EP_LOG_OPTIONS}
CMAKE_ARGS ${BROTLI_CMAKE_ARGS}
STEP_TARGETS headers_copy)
if (MSVC)
@@ -624,41 +644,43 @@ if (ARROW_WITH_LZ4)
if("${LZ4_HOME}" STREQUAL "")
set(LZ4_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/lz4_ep-prefix/src/lz4_ep")
set(LZ4_INCLUDE_DIR "${LZ4_BUILD_DIR}/lib")
-
+
if (MSVC)
set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/visual/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/liblz4_static.lib")
set(LZ4_BUILD_COMMAND BUILD_COMMAND msbuild.exe /m /p:Configuration=${CMAKE_BUILD_TYPE} /p:Platform=x64 /p:PlatformToolset=v140 /t:Build ${LZ4_BUILD_DIR}/visual/VS2010/lz4.sln)
+ set(LZ4_PATCH_COMMAND PATCH_COMMAND git --git-dir=. apply --verbose ${CMAKE_SOURCE_DIR}/build-support/lz4_msbuild_wholeprogramoptimization_param.patch)
else()
set(LZ4_STATIC_LIB "${LZ4_BUILD_DIR}/lib/liblz4.a")
set(LZ4_BUILD_COMMAND BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-lz4-lib.sh)
endif()
-
+
ExternalProject_Add(lz4_ep
URL "https://github.com/lz4/lz4/archive/v${LZ4_VERSION}.tar.gz"
+ ${EP_LOG_OPTIONS}
UPDATE_COMMAND ""
- PATCH_COMMAND ""
+ ${LZ4_PATCH_COMMAND}
CONFIGURE_COMMAND ""
INSTALL_COMMAND ""
BINARY_DIR ${LZ4_BUILD_DIR}
BUILD_BYPRODUCTS ${LZ4_STATIC_LIB}
${LZ4_BUILD_COMMAND}
)
-
+
set(LZ4_VENDORED 1)
else()
find_package(Lz4 REQUIRED)
set(LZ4_VENDORED 0)
endif()
-
+
include_directories(SYSTEM ${LZ4_INCLUDE_DIR})
ADD_THIRDPARTY_LIB(lz4_static
STATIC_LIB ${LZ4_STATIC_LIB})
-
+
if (LZ4_VENDORED)
add_dependencies(lz4_static lz4_ep)
endif()
endif()
-
+
if (ARROW_WITH_ZSTD)
# ----------------------------------------------------------------------
# ZSTD
@@ -670,6 +692,7 @@ if (ARROW_WITH_ZSTD)
if (MSVC)
set(ZSTD_STATIC_LIB "${ZSTD_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/libzstd_static.lib")
set(ZSTD_BUILD_COMMAND BUILD_COMMAND msbuild ${ZSTD_BUILD_DIR}/build/VS2010/zstd.sln /t:Build /v:minimal /p:Configuration=${CMAKE_BUILD_TYPE} /p:Platform=x64 /p:PlatformToolset=v140 /p:OutDir=${ZSTD_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/ /p:SolutionDir=${ZSTD_BUILD_DIR}/build/VS2010/ )
+ set(ZSTD_PATCH_COMMAND PATCH_COMMAND git --git-dir=. apply --verbose ${CMAKE_SOURCE_DIR}/build-support/zstd_msbuild_wholeprogramoptimization_param.patch)
else()
set(ZSTD_STATIC_LIB "${ZSTD_BUILD_DIR}/lib/libzstd.a")
set(ZSTD_BUILD_COMMAND BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-zstd-lib.sh)
@@ -677,8 +700,9 @@ if (ARROW_WITH_ZSTD)
ExternalProject_Add(zstd_ep
URL "https://github.com/facebook/zstd/archive/v${ZSTD_VERSION}.tar.gz"
+ ${EP_LOG_OPTIONS}
UPDATE_COMMAND ""
- PATCH_COMMAND ""
+ ${ZSTD_PATCH_COMMAND}
CONFIGURE_COMMAND ""
INSTALL_COMMAND ""
BINARY_DIR ${ZSTD_BUILD_DIR}
diff --git a/cpp/doc/Parquet.md b/cpp/doc/Parquet.md
index ce2961ab26a..0ed100731ca 100644
--- a/cpp/doc/Parquet.md
+++ b/cpp/doc/Parquet.md
@@ -1,15 +1,20 @@
## Building Arrow-Parquet integration
diff --git a/cpp/src/arrow/allocator-test.cc b/cpp/src/arrow/allocator-test.cc
index 5a4e98d7660..f3a80cdae81 100644
--- a/cpp/src/arrow/allocator-test.cc
+++ b/cpp/src/arrow/allocator-test.cc
@@ -48,7 +48,7 @@ TEST(stl_allocator, FreeLargeMemory) {
#ifndef NDEBUG
EXPECT_EXIT(alloc.deallocate(data, 120), ::testing::ExitedWithCode(1),
- ".*Check failed: \\(bytes_allocated_\\) >= \\(size\\)");
+ ".*Check failed: \\(bytes_allocated_\\) >= \\(size\\)");
#endif
alloc.deallocate(data, 100);
diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h
index 731f23918e4..4d731bd32bf 100644
--- a/cpp/src/arrow/api.h
+++ b/cpp/src/arrow/api.h
@@ -32,4 +32,7 @@
#include "arrow/type.h"
#include "arrow/visitor.h"
+/// \brief Top-level namespace for Apache Arrow C++ API
+namespace arrow {}
+
#endif // ARROW_API_H
diff --git a/cpp/src/arrow/array-decimal-test.cc b/cpp/src/arrow/array-decimal-test.cc
index 0959d686498..436ce9cf7c3 100644
--- a/cpp/src/arrow/array-decimal-test.cc
+++ b/cpp/src/arrow/array-decimal-test.cc
@@ -28,12 +28,12 @@ namespace decimal {
template
class DecimalTestBase {
public:
- virtual std::vector data(
- const std::vector& input, size_t byte_width) const = 0;
+ virtual std::vector data(const std::vector& input,
+ size_t byte_width) const = 0;
void test(int precision, const std::vector& draw,
- const std::vector& valid_bytes,
- const std::vector& sign_bitmap = {}, int64_t offset = 0) const {
+ const std::vector& valid_bytes,
+ const std::vector& sign_bitmap = {}, int64_t offset = 0) const {
auto type = std::make_shared(precision, 4);
int byte_width = type->byte_width();
auto pool = default_memory_pool();
@@ -63,8 +63,9 @@ class DecimalTestBase {
ASSERT_OK(BitUtil::BytesToBits(valid_bytes, &expected_null_bitmap));
int64_t expected_null_count = test::null_count(valid_bytes);
- auto expected = std::make_shared(type, size, expected_data,
- expected_null_bitmap, expected_null_count, offset, expected_sign_bitmap);
+ auto expected =
+ std::make_shared(type, size, expected_data, expected_null_bitmap,
+ expected_null_count, offset, expected_sign_bitmap);
std::shared_ptr out;
ASSERT_OK(builder->Finish(&out));
@@ -75,8 +76,8 @@ class DecimalTestBase {
template
class DecimalTest : public DecimalTestBase {
public:
- std::vector data(
- const std::vector& input, size_t byte_width) const override {
+ std::vector data(const std::vector& input,
+ size_t byte_width) const override {
std::vector result(input.size() * byte_width);
// TODO(phillipc): There's probably a better way to do this
constexpr static const size_t bytes_per_element = sizeof(T);
@@ -90,8 +91,8 @@ class DecimalTest : public DecimalTestBase {
template <>
class DecimalTest : public DecimalTestBase {
public:
- std::vector data(
- const std::vector& input, size_t byte_width) const override {
+ std::vector data(const std::vector& input,
+ size_t byte_width) const override {
std::vector result;
result.reserve(input.size() * byte_width);
constexpr static const size_t bytes_per_element = 16;
@@ -120,24 +121,24 @@ class Decimal128BuilderTest : public ::testing::TestWithParam,
TEST_P(Decimal32BuilderTest, NoNulls) {
int precision = GetParam();
- std::vector draw = {
- Decimal32(1), Decimal32(2), Decimal32(2389), Decimal32(4), Decimal32(-12348)};
+ std::vector draw = {Decimal32(1), Decimal32(2), Decimal32(2389),
+ Decimal32(4), Decimal32(-12348)};
std::vector valid_bytes = {true, true, true, true, true};
this->test(precision, draw, valid_bytes);
}
TEST_P(Decimal64BuilderTest, NoNulls) {
int precision = GetParam();
- std::vector draw = {
- Decimal64(1), Decimal64(2), Decimal64(2389), Decimal64(4), Decimal64(-12348)};
+ std::vector draw = {Decimal64(1), Decimal64(2), Decimal64(2389),
+ Decimal64(4), Decimal64(-12348)};
std::vector valid_bytes = {true, true, true, true, true};
this->test(precision, draw, valid_bytes);
}
TEST_P(Decimal128BuilderTest, NoNulls) {
int precision = GetParam();
- std::vector draw = {
- Decimal128(1), Decimal128(-2), Decimal128(2389), Decimal128(4), Decimal128(-12348)};
+ std::vector draw = {Decimal128(1), Decimal128(-2), Decimal128(2389),
+ Decimal128(4), Decimal128(-12348)};
std::vector valid_bytes = {true, true, true, true, true};
std::vector sign_bitmap = {false, true, false, false, true};
this->test(precision, draw, valid_bytes, sign_bitmap);
@@ -145,41 +146,47 @@ TEST_P(Decimal128BuilderTest, NoNulls) {
TEST_P(Decimal32BuilderTest, WithNulls) {
int precision = GetParam();
- std::vector draw = {
- Decimal32(1), Decimal32(2), Decimal32(-1), Decimal32(4), Decimal32(-1)};
+ std::vector draw = {Decimal32(1), Decimal32(2), Decimal32(-1), Decimal32(4),
+ Decimal32(-1)};
std::vector valid_bytes = {true, true, false, true, false};
this->test(precision, draw, valid_bytes);
}
TEST_P(Decimal64BuilderTest, WithNulls) {
int precision = GetParam();
- std::vector draw = {
- Decimal64(-1), Decimal64(2), Decimal64(-1), Decimal64(4), Decimal64(-1)};
+ std::vector draw = {Decimal64(-1), Decimal64(2), Decimal64(-1), Decimal64(4),
+ Decimal64(-1)};
std::vector valid_bytes = {true, true, false, true, false};
this->test(precision, draw, valid_bytes);
}
TEST_P(Decimal128BuilderTest, WithNulls) {
int precision = GetParam();
- std::vector draw = {Decimal128(1), Decimal128(2), Decimal128(-1),
- Decimal128(4), Decimal128(-1), Decimal128(1), Decimal128(2),
- Decimal128("230342903942.234234"), Decimal128("-23049302932.235234")};
- std::vector valid_bytes = {
- true, true, false, true, false, true, true, true, true};
- std::vector sign_bitmap = {
- false, false, false, false, false, false, false, false, true};
+ std::vector draw = {Decimal128(1),
+ Decimal128(2),
+ Decimal128(-1),
+ Decimal128(4),
+ Decimal128(-1),
+ Decimal128(1),
+ Decimal128(2),
+ Decimal128("230342903942.234234"),
+ Decimal128("-23049302932.235234")};
+ std::vector valid_bytes = {true, true, false, true, false,
+ true, true, true, true};
+ std::vector sign_bitmap = {false, false, false, false, false,
+ false, false, false, true};
this->test(precision, draw, valid_bytes, sign_bitmap);
}
INSTANTIATE_TEST_CASE_P(Decimal32BuilderTest, Decimal32BuilderTest,
- ::testing::Range(
- DecimalPrecision::minimum, DecimalPrecision::maximum));
+ ::testing::Range(DecimalPrecision::minimum,
+ DecimalPrecision::maximum));
INSTANTIATE_TEST_CASE_P(Decimal64BuilderTest, Decimal64BuilderTest,
- ::testing::Range(
- DecimalPrecision::minimum, DecimalPrecision::maximum));
+ ::testing::Range(DecimalPrecision::minimum,
+ DecimalPrecision::maximum));
INSTANTIATE_TEST_CASE_P(Decimal128BuilderTest, Decimal128BuilderTest,
- ::testing::Range(
- DecimalPrecision::minimum, DecimalPrecision::maximum));
+ ::testing::Range(DecimalPrecision::minimum,
+ DecimalPrecision::maximum));
} // namespace decimal
} // namespace arrow
diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index acb4819dd09..0efb51ccece 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -64,8 +64,8 @@ TEST_F(TestArray, TestLength) {
ASSERT_EQ(arr->length(), 100);
}
-Status MakeArrayFromValidBytes(
- const vector& v, MemoryPool* pool, std::shared_ptr* out) {
+Status MakeArrayFromValidBytes(const vector& v, MemoryPool* pool,
+ std::shared_ptr* out) {
int64_t null_count = v.size() - std::accumulate(v.begin(), v.end(), 0);
std::shared_ptr null_buf;
@@ -147,7 +147,9 @@ TEST_F(TestArray, TestIsNull) {
// clang-format on
int64_t null_count = 0;
for (uint8_t x : null_bitmap) {
- if (x == 0) { ++null_count; }
+ if (x == 0) {
+ ++null_count;
+ }
}
std::shared_ptr null_buf;
@@ -223,8 +225,8 @@ class TestPrimitiveBuilder : public TestBuilder {
void Check(const std::unique_ptr& builder, bool nullable) {
int64_t size = builder->length();
- auto ex_data = std::make_shared(
- reinterpret_cast(draws_.data()), size * sizeof(T));
+ auto ex_data = std::make_shared(reinterpret_cast(draws_.data()),
+ size * sizeof(T));
std::shared_ptr ex_null_bitmap;
int64_t ex_null_count = 0;
@@ -316,8 +318,8 @@ void TestPrimitiveBuilder::RandomData(int64_t N, double pct_null) {
}
template <>
-void TestPrimitiveBuilder::Check(
- const std::unique_ptr& builder, bool nullable) {
+void TestPrimitiveBuilder::Check(const std::unique_ptr& builder,
+ bool nullable) {
int64_t size = builder->length();
std::shared_ptr ex_data;
@@ -351,7 +353,9 @@ void TestPrimitiveBuilder::Check(
ASSERT_EQ(expected->length(), result->length());
for (int64_t i = 0; i < result->length(); ++i) {
- if (nullable) { ASSERT_EQ(valid_bytes_[i] == 0, result->IsNull(i)) << i; }
+ if (nullable) {
+ ASSERT_EQ(valid_bytes_[i] == 0, result->IsNull(i)) << i;
+ }
bool actual = BitUtil::GetBit(result->values()->data(), i);
ASSERT_EQ(draws_[i] != 0, actual) << i;
}
@@ -359,7 +363,7 @@ void TestPrimitiveBuilder::Check(
}
typedef ::testing::Types
+ PInt32, PInt64, PFloat, PDouble>
Primitives;
TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives);
@@ -377,7 +381,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestInit) {
ASSERT_OK(this->builder_->Reserve(n));
ASSERT_EQ(BitUtil::NextPower2(n), this->builder_->capacity());
ASSERT_EQ(BitUtil::NextPower2(TypeTraits::bytes_required(n)),
- this->builder_->data()->size());
+ this->builder_->data()->size());
// unsure if this should go in all builder classes
ASSERT_EQ(0, this->builder_->num_children());
@@ -440,8 +444,8 @@ TYPED_TEST(TestPrimitiveBuilder, Equality) {
ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &equal_array));
// Make the not equal array by negating the first valid element with itself.
- const auto first_valid = std::find_if(
- valid_bytes.begin(), valid_bytes.end(), [](uint8_t valid) { return valid > 0; });
+ const auto first_valid = std::find_if(valid_bytes.begin(), valid_bytes.end(),
+ [](uint8_t valid) { return valid > 0; });
const int64_t first_valid_idx = std::distance(valid_bytes.begin(), first_valid);
// This should be true with a very high probability, but might introduce flakiness
ASSERT_LT(first_valid_idx, size - 1);
@@ -679,8 +683,8 @@ class TestStringArray : public ::testing::Test {
ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, &null_bitmap_));
null_count_ = test::null_count(valid_bytes_);
- strings_ = std::make_shared(
- length_, offsets_buf_, value_buf_, null_bitmap_, null_count_);
+ strings_ = std::make_shared(length_, offsets_buf_, value_buf_,
+ null_bitmap_, null_count_);
}
protected:
@@ -723,8 +727,8 @@ TEST_F(TestStringArray, TestListFunctions) {
}
TEST_F(TestStringArray, TestDestructor) {
- auto arr = std::make_shared(
- length_, offsets_buf_, value_buf_, null_bitmap_, null_count_);
+ auto arr = std::make_shared(length_, offsets_buf_, value_buf_,
+ null_bitmap_, null_count_);
}
TEST_F(TestStringArray, TestGetString) {
@@ -742,10 +746,10 @@ TEST_F(TestStringArray, TestEmptyStringComparison) {
offsets_buf_ = test::GetBufferFromVector(offsets_);
length_ = static_cast(offsets_.size() - 1);
- auto strings_a = std::make_shared(
- length_, offsets_buf_, nullptr, null_bitmap_, null_count_);
- auto strings_b = std::make_shared(
- length_, offsets_buf_, nullptr, null_bitmap_, null_count_);
+ auto strings_a = std::make_shared(length_, offsets_buf_, nullptr,
+ null_bitmap_, null_count_);
+ auto strings_b = std::make_shared(length_, offsets_buf_, nullptr,
+ null_bitmap_, null_count_);
ASSERT_TRUE(strings_a->Equals(strings_b));
}
@@ -893,8 +897,8 @@ class TestBinaryArray : public ::testing::Test {
ASSERT_OK(BitUtil::BytesToBits(valid_bytes_, &null_bitmap_));
null_count_ = test::null_count(valid_bytes_);
- strings_ = std::make_shared(
- length_, offsets_buf_, value_buf_, null_bitmap_, null_count_);
+ strings_ = std::make_shared(length_, offsets_buf_, value_buf_,
+ null_bitmap_, null_count_);
}
protected:
@@ -937,8 +941,8 @@ TEST_F(TestBinaryArray, TestListFunctions) {
}
TEST_F(TestBinaryArray, TestDestructor) {
- auto arr = std::make_shared(
- length_, offsets_buf_, value_buf_, null_bitmap_, null_count_);
+ auto arr = std::make_shared(length_, offsets_buf_, value_buf_,
+ null_bitmap_, null_count_);
}
TEST_F(TestBinaryArray, TestGetValue) {
@@ -965,8 +969,9 @@ TEST_F(TestBinaryArray, TestEqualsEmptyStrings) {
ASSERT_OK(builder.Finish(&left_arr));
const BinaryArray& left = static_cast(*left_arr);
- std::shared_ptr right = std::make_shared(left.length(),
- left.value_offsets(), nullptr, left.null_bitmap(), left.null_count());
+ std::shared_ptr right =
+ std::make_shared(left.length(), left.value_offsets(), nullptr,
+ left.null_bitmap(), left.null_count());
ASSERT_TRUE(left.Equals(right));
ASSERT_TRUE(left.RangeEquals(0, left.length(), 0, right));
@@ -1082,17 +1087,11 @@ void CheckSliceEquality() {
ASSERT_TRUE(array->RangeEquals(5, 25, 0, slice));
}
-TEST_F(TestBinaryArray, TestSliceEquality) {
- CheckSliceEquality();
-}
+TEST_F(TestBinaryArray, TestSliceEquality) { CheckSliceEquality(); }
-TEST_F(TestStringArray, TestSliceEquality) {
- CheckSliceEquality();
-}
+TEST_F(TestStringArray, TestSliceEquality) { CheckSliceEquality(); }
-TEST_F(TestBinaryArray, LengthZeroCtor) {
- BinaryArray array(0, nullptr, nullptr);
-}
+TEST_F(TestBinaryArray, LengthZeroCtor) { BinaryArray array(0, nullptr, nullptr); }
// ----------------------------------------------------------------------
// FixedSizeBinary tests
@@ -1126,8 +1125,8 @@ TEST_F(TestFWBinaryArray, Builder) {
std::shared_ptr result;
- auto CheckResult = [this, &length, &is_valid, &raw_data, &byte_width](
- const Array& result) {
+ auto CheckResult = [this, &length, &is_valid, &raw_data,
+ &byte_width](const Array& result) {
// Verify output
const auto& fw_result = static_cast(result);
@@ -1135,8 +1134,8 @@ TEST_F(TestFWBinaryArray, Builder) {
for (int64_t i = 0; i < result.length(); ++i) {
if (is_valid[i]) {
- ASSERT_EQ(
- 0, memcmp(raw_data + byte_width * i, fw_result.GetValue(i), byte_width));
+ ASSERT_EQ(0,
+ memcmp(raw_data + byte_width * i, fw_result.GetValue(i), byte_width));
} else {
ASSERT_TRUE(fw_result.IsNull(i));
}
@@ -1323,8 +1322,8 @@ TEST_F(TestAdaptiveIntBuilder, TestInt16) {
SetUp();
ASSERT_OK(builder_->Append(std::numeric_limits::max()));
ASSERT_OK(builder_->Append(std::numeric_limits::min()));
- expected_values = {
- std::numeric_limits::max(), std::numeric_limits::min()};
+ expected_values = {std::numeric_limits::max(),
+ std::numeric_limits::min()};
Done();
ArrayFromVector(expected_values, &expected_);
@@ -1354,8 +1353,8 @@ TEST_F(TestAdaptiveIntBuilder, TestInt32) {
SetUp();
ASSERT_OK(builder_->Append(std::numeric_limits::max()));
ASSERT_OK(builder_->Append(std::numeric_limits::min()));
- expected_values = {
- std::numeric_limits::max(), std::numeric_limits::min()};
+ expected_values = {std::numeric_limits::max(),
+ std::numeric_limits::min()};
Done();
ArrayFromVector(expected_values, &expected_);
@@ -1385,8 +1384,8 @@ TEST_F(TestAdaptiveIntBuilder, TestInt64) {
SetUp();
ASSERT_OK(builder_->Append(std::numeric_limits::max()));
ASSERT_OK(builder_->Append(std::numeric_limits::min()));
- expected_values = {
- std::numeric_limits::max(), std::numeric_limits::min()};
+ expected_values = {std::numeric_limits::max(),
+ std::numeric_limits::min()};
Done();
ArrayFromVector(expected_values, &expected_);
@@ -1505,7 +1504,7 @@ template
class TestDictionaryBuilder : public TestBuilder {};
typedef ::testing::Types
+ UInt32Type, Int64Type, UInt64Type, FloatType, DoubleType>
PrimitiveDictionaries;
TYPED_TEST_CASE(TestDictionaryBuilder, PrimitiveDictionaries);
@@ -1784,7 +1783,7 @@ TEST_F(TestListBuilder, TestAppendNull) {
}
void ValidateBasicListArray(const ListArray* result, const vector& values,
- const vector& is_valid) {
+ const vector& is_valid) {
ASSERT_OK(ValidateArray(*result));
ASSERT_EQ(1, result->null_count());
ASSERT_EQ(0, result->values()->null_count());
@@ -1997,9 +1996,12 @@ TEST(TestDictionary, Validate) {
// Struct tests
void ValidateBasicStructArray(const StructArray* result,
- const vector& struct_is_valid, const vector& list_values,
- const vector& list_is_valid, const vector& list_lengths,
- const vector& list_offsets, const vector& int_values) {
+ const vector& struct_is_valid,
+ const vector& list_values,
+ const vector& list_is_valid,
+ const vector& list_lengths,
+ const vector& list_offsets,
+ const vector& int_values) {
ASSERT_EQ(4, result->length());
ASSERT_OK(ValidateArray(*result));
@@ -2040,9 +2042,9 @@ class TestStructBuilder : public TestBuilder {
auto list_type = list(char_type);
vector> types = {list_type, int32_type};
- vector fields;
- fields.push_back(FieldPtr(new Field("list", list_type)));
- fields.push_back(FieldPtr(new Field("int", int32_type)));
+ vector> fields;
+ fields.push_back(field("list", list_type));
+ fields.push_back(field("int", int32_type));
type_ = struct_(fields);
value_fields_ = fields;
@@ -2060,7 +2062,7 @@ class TestStructBuilder : public TestBuilder {
}
protected:
- vector value_fields_;
+ vector> value_fields_;
std::shared_ptr type_;
std::shared_ptr builder_;
@@ -2134,7 +2136,7 @@ TEST_F(TestStructBuilder, TestBasics) {
Done();
ValidateBasicStructArray(result_.get(), struct_is_valid, list_values, list_is_valid,
- list_lengths, list_offsets, int_values);
+ list_lengths, list_offsets, int_values);
}
TEST_F(TestStructBuilder, BulkAppend) {
@@ -2166,7 +2168,7 @@ TEST_F(TestStructBuilder, BulkAppend) {
Done();
ValidateBasicStructArray(result_.get(), struct_is_valid, list_values, list_is_valid,
- list_lengths, list_offsets, int_values);
+ list_lengths, list_offsets, int_values);
}
TEST_F(TestStructBuilder, BulkAppendInvalid) {
@@ -2280,7 +2282,7 @@ TEST_F(TestStructBuilder, TestEquality) {
// setup an unequal one with unequal offsets
ASSERT_OK(builder_->Append(struct_is_valid.size(), struct_is_valid.data()));
ASSERT_OK(list_vb->Append(unequal_list_offsets.data(), unequal_list_offsets.size(),
- unequal_list_is_valid.data()));
+ unequal_list_is_valid.data()));
for (int8_t value : list_values) {
char_vb->UnsafeAppend(value);
}
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 4a405f24342..ab0be7a0964 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -57,45 +57,57 @@ int64_t Array::null_count() const {
bool Array::Equals(const Array& arr) const {
bool are_equal = false;
Status error = ArrayEquals(*this, arr, &are_equal);
- if (!error.ok()) { DCHECK(false) << "Arrays not comparable: " << error.ToString(); }
+ if (!error.ok()) {
+ DCHECK(false) << "Arrays not comparable: " << error.ToString();
+ }
return are_equal;
}
bool Array::Equals(const std::shared_ptr& arr) const {
- if (!arr) { return false; }
+ if (!arr) {
+ return false;
+ }
return Equals(*arr);
}
bool Array::ApproxEquals(const Array& arr) const {
bool are_equal = false;
Status error = ArrayApproxEquals(*this, arr, &are_equal);
- if (!error.ok()) { DCHECK(false) << "Arrays not comparable: " << error.ToString(); }
+ if (!error.ok()) {
+ DCHECK(false) << "Arrays not comparable: " << error.ToString();
+ }
return are_equal;
}
bool Array::ApproxEquals(const std::shared_ptr& arr) const {
- if (!arr) { return false; }
+ if (!arr) {
+ return false;
+ }
return ApproxEquals(*arr);
}
bool Array::RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
- const std::shared_ptr& other) const {
- if (!other) { return false; }
+ const std::shared_ptr& other) const {
+ if (!other) {
+ return false;
+ }
return RangeEquals(*other, start_idx, end_idx, other_start_idx);
}
bool Array::RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
- int64_t other_start_idx) const {
+ int64_t other_start_idx) const {
bool are_equal = false;
Status error =
ArrayRangeEquals(*this, other, start_idx, end_idx, other_start_idx, &are_equal);
- if (!error.ok()) { DCHECK(false) << "Arrays not comparable: " << error.ToString(); }
+ if (!error.ok()) {
+ DCHECK(false) << "Arrays not comparable: " << error.ToString();
+ }
return are_equal;
}
// Last two parameters are in-out parameters
-static inline void ConformSliceParams(
- int64_t array_offset, int64_t array_length, int64_t* offset, int64_t* length) {
+static inline void ConformSliceParams(int64_t array_offset, int64_t array_length,
+ int64_t* offset, int64_t* length) {
DCHECK_LE(*offset, array_length);
DCHECK_NE(offset, nullptr);
*length = std::min(array_length - *offset, *length);
@@ -113,8 +125,8 @@ std::string Array::ToString() const {
return ss.str();
}
-static inline std::shared_ptr SliceData(
- const ArrayData& data, int64_t offset, int64_t length) {
+static inline std::shared_ptr SliceData(const ArrayData& data, int64_t offset,
+ int64_t length) {
ConformSliceParams(data.offset, data.length, &offset, &length);
auto new_data = data.ShallowCopy();
@@ -139,8 +151,9 @@ std::shared_ptr NullArray::Slice(int64_t offset, int64_t length) const {
// Primitive array base
PrimitiveArray::PrimitiveArray(const std::shared_ptr& type, int64_t length,
- const std::shared_ptr& data, const std::shared_ptr& null_bitmap,
- int64_t null_count, int64_t offset) {
+ const std::shared_ptr& data,
+ const std::shared_ptr& null_bitmap,
+ int64_t null_count, int64_t offset) {
BufferVector buffers = {null_bitmap, data};
SetData(
std::make_shared(type, length, std::move(buffers), null_count, offset));
@@ -166,7 +179,8 @@ BooleanArray::BooleanArray(const std::shared_ptr& data)
}
BooleanArray::BooleanArray(int64_t length, const std::shared_ptr& data,
- const std::shared_ptr& null_bitmap, int64_t null_count, int64_t offset)
+ const std::shared_ptr& null_bitmap, int64_t null_count,
+ int64_t offset)
: PrimitiveArray(boolean(), length, data, null_bitmap, null_count, offset) {}
std::shared_ptr BooleanArray::Slice(int64_t offset, int64_t length) const {
@@ -182,8 +196,10 @@ ListArray::ListArray(const std::shared_ptr& data) {
}
ListArray::ListArray(const std::shared_ptr& type, int64_t length,
- const std::shared_ptr& value_offsets, const std::shared_ptr& values,
- const std::shared_ptr& null_bitmap, int64_t null_count, int64_t offset) {
+ const std::shared_ptr& value_offsets,
+ const std::shared_ptr& values,
+ const std::shared_ptr& null_bitmap, int64_t null_count,
+ int64_t offset) {
BufferVector buffers = {null_bitmap, value_offsets};
auto internal_data =
std::make_shared(type, length, std::move(buffers), null_count, offset);
@@ -192,7 +208,7 @@ ListArray::ListArray(const std::shared_ptr& type, int64_t length,
}
Status ListArray::FromArrays(const Array& offsets, const Array& values, MemoryPool* pool,
- std::shared_ptr* out) {
+ std::shared_ptr* out) {
if (ARROW_PREDICT_FALSE(offsets.length() == 0)) {
return Status::Invalid("List offsets must have non-zero length");
}
@@ -205,12 +221,13 @@ Status ListArray::FromArrays(const Array& offsets, const Array& values, MemoryPo
return Status::Invalid("List offsets must be signed int32");
}
- BufferVector buffers = {
- offsets.null_bitmap(), static_cast(offsets).values()};
+ BufferVector buffers = {offsets.null_bitmap(),
+ static_cast(offsets).values()};
auto list_type = list(values.type());
- auto internal_data = std::make_shared(list_type,
- offsets.length() - 1, std::move(buffers), offsets.null_count(), offsets.offset());
+ auto internal_data = std::make_shared(
+ list_type, offsets.length() - 1, std::move(buffers), offsets.null_count(),
+ offsets.offset());
internal_data->child_data.push_back(values.data());
*out = std::make_shared(internal_data);
@@ -230,14 +247,12 @@ std::shared_ptr ListArray::value_type() const {
return static_cast(*type()).value_type();
}
-std::shared_ptr ListArray::values() const {
- return values_;
-}
+std::shared_ptr ListArray::values() const { return values_; }
std::shared_ptr ListArray::Slice(int64_t offset, int64_t length) const {
ConformSliceParams(data_->offset, data_->length, &offset, &length);
return std::make_shared(type(), length, value_offsets(), values(),
- null_bitmap(), kUnknownNullCount, offset);
+ null_bitmap(), kUnknownNullCount, offset);
}
// ----------------------------------------------------------------------
@@ -262,14 +277,17 @@ void BinaryArray::SetData(const std::shared_ptr& data) {
}
BinaryArray::BinaryArray(int64_t length, const std::shared_ptr& value_offsets,
- const std::shared_ptr& data, const std::shared_ptr& null_bitmap,
- int64_t null_count, int64_t offset)
+ const std::shared_ptr& data,
+ const std::shared_ptr& null_bitmap, int64_t null_count,
+ int64_t offset)
: BinaryArray(kBinary, length, value_offsets, data, null_bitmap, null_count, offset) {
}
BinaryArray::BinaryArray(const std::shared_ptr& type, int64_t length,
- const std::shared_ptr& value_offsets, const std::shared_ptr& data,
- const std::shared_ptr& null_bitmap, int64_t null_count, int64_t offset) {
+ const std::shared_ptr& value_offsets,
+ const std::shared_ptr& data,
+ const std::shared_ptr& null_bitmap, int64_t null_count,
+ int64_t offset) {
BufferVector buffers = {null_bitmap, value_offsets, data};
SetData(
std::make_shared(type, length, std::move(buffers), null_count, offset));
@@ -285,8 +303,9 @@ StringArray::StringArray(const std::shared_ptr& data) {
}
StringArray::StringArray(int64_t length, const std::shared_ptr& value_offsets,
- const std::shared_ptr& data, const std::shared_ptr& null_bitmap,
- int64_t null_count, int64_t offset)
+ const std::shared_ptr& data,
+ const std::shared_ptr& null_bitmap, int64_t null_count,
+ int64_t offset)
: BinaryArray(kString, length, value_offsets, data, null_bitmap, null_count, offset) {
}
@@ -304,8 +323,10 @@ FixedSizeBinaryArray::FixedSizeBinaryArray(
}
FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr& type,
- int64_t length, const std::shared_ptr& data,
- const std::shared_ptr& null_bitmap, int64_t null_count, int64_t offset)
+ int64_t length,
+ const std::shared_ptr& data,
+ const std::shared_ptr& null_bitmap,
+ int64_t null_count, int64_t offset)
: PrimitiveArray(type, length, data, null_bitmap, null_count, offset),
byte_width_(static_cast(*type).byte_width()) {}
@@ -335,8 +356,9 @@ void DecimalArray::SetData(const std::shared_ptr& data) {
}
DecimalArray::DecimalArray(const std::shared_ptr& type, int64_t length,
- const std::shared_ptr& data, const std::shared_ptr& null_bitmap,
- int64_t null_count, int64_t offset, const std::shared_ptr& sign_bitmap) {
+ const std::shared_ptr& data,
+ const std::shared_ptr& null_bitmap, int64_t null_count,
+ int64_t offset, const std::shared_ptr& sign_bitmap) {
BufferVector buffers = {null_bitmap, data, sign_bitmap};
SetData(
std::make_shared(type, length, std::move(buffers), null_count, offset));
@@ -392,8 +414,9 @@ StructArray::StructArray(const std::shared_ptr& data) {
}
StructArray::StructArray(const std::shared_ptr& type, int64_t length,
- const std::vector>& children,
- std::shared_ptr null_bitmap, int64_t null_count, int64_t offset) {
+ const std::vector>& children,
+ std::shared_ptr null_bitmap, int64_t null_count,
+ int64_t offset) {
BufferVector buffers = {null_bitmap};
SetData(
std::make_shared(type, length, std::move(buffers), null_count, offset));
@@ -433,9 +456,11 @@ UnionArray::UnionArray(const std::shared_ptr& data) {
}
UnionArray::UnionArray(const std::shared_ptr& type, int64_t length,
- const std::vector>& children,
- const std::shared_ptr& type_ids, const std::shared_ptr& value_offsets,
- const std::shared_ptr& null_bitmap, int64_t null_count, int64_t offset) {
+ const std::vector>& children,
+ const std::shared_ptr& type_ids,
+ const std::shared_ptr& value_offsets,
+ const std::shared_ptr& null_bitmap, int64_t null_count,
+ int64_t offset) {
BufferVector buffers = {null_bitmap, type_ids, value_offsets};
auto internal_data =
std::make_shared(type, length, std::move(buffers), null_count, offset);
@@ -464,8 +489,8 @@ DictionaryArray::DictionaryArray(const std::shared_ptr& data)
SetData(data);
}
-DictionaryArray::DictionaryArray(
- const std::shared_ptr& type, const std::shared_ptr& indices)
+DictionaryArray::DictionaryArray(const std::shared_ptr& type,
+ const std::shared_ptr& indices)
: dict_type_(static_cast