diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ecdf628355..31db245ba6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,618 @@ +# Apache Arrow 7.0.0 (2022-01-21) + +## Bug Fixes + +* [ARROW-8340](https://issues.apache.org/jira/browse/ARROW-8340) - [Documentation] Remove the old Sphinx pin +* [ARROW-9648](https://issues.apache.org/jira/browse/ARROW-9648) - [C++] Added compression level parameter to LZ4_FRAME compression codec +* [ARROW-9688](https://issues.apache.org/jira/browse/ARROW-9688) - [C++][Python] Enable building c++ library and pyarrow package for win/arm64 build +* [ARROW-10140](https://issues.apache.org/jira/browse/ARROW-10140) - [Python][C++] Add test for map column of a parquet file created from pyarrow and pandas +* [ARROW-10485](https://issues.apache.org/jira/browse/ARROW-10485) - [R] Accept partitioning in open_dataset when file paths are hive-style +* [ARROW-10794](https://issues.apache.org/jira/browse/ARROW-10794) - [JS] Typescript Arrowjs Class 'RecordBatch' incorrectly extends base class 'StructVector +* [ARROW-11549](https://issues.apache.org/jira/browse/ARROW-11549) - [C++][Gandiva] Fix issues with FilterCacheKey caused by ToString() not distinguishing null and 'null' +* [ARROW-12042](https://issues.apache.org/jira/browse/ARROW-12042) - [C++] Fix array_sort_indices on chunked arrays +* [ARROW-12066](https://issues.apache.org/jira/browse/ARROW-12066) - [Python] Test to ensure filtering with equal to null does not crash +* [ARROW-12768](https://issues.apache.org/jira/browse/ARROW-12768) - [C++] Stricter signed zero comparison in tests +* [ARROW-13294](https://issues.apache.org/jira/browse/ARROW-13294) - [C#] Create Flight example server and client +* [ARROW-13412](https://issues.apache.org/jira/browse/ARROW-13412) - [C++] Fix Kleene kernels on chunked array + scalar input +* [ARROW-13462](https://issues.apache.org/jira/browse/ARROW-13462) - [C++] Fix example code stub in Compute API documentation +* [ARROW-13628](https://issues.apache.org/jira/browse/ARROW-13628) - [Rust] Activate IPC month_day_nano_interval integration test for rust +* [ARROW-13735](https://issues.apache.org/jira/browse/ARROW-13735) - [C++][Python] Creating a Map array with non-default field names segfaults +* [ARROW-13756](https://issues.apache.org/jira/browse/ARROW-13756) - [Python] Error in pandas conversion for datetimetz column index +* [ARROW-13780](https://issues.apache.org/jira/browse/ARROW-13780) - [Gandiva][UDF] Fix bug in udf space/rpad/lpad +* [ARROW-13861](https://issues.apache.org/jira/browse/ARROW-13861) - [JS] Create Field with List type will throw error +* [ARROW-13879](https://issues.apache.org/jira/browse/ARROW-13879) - [C++] Mixed support for binary types in regex functions +* [ARROW-13896](https://issues.apache.org/jira/browse/ARROW-13896) - [Python] Print of timestamp with timezone errors +* [ARROW-13947](https://issues.apache.org/jira/browse/ARROW-13947) - [C++] Support more types in index kernel +* [ARROW-13948](https://issues.apache.org/jira/browse/ARROW-13948) - [C++] Support timestamp with timezone in is_in/index_in +* [ARROW-13950](https://issues.apache.org/jira/browse/ARROW-13950) - [C++] min_element_wise/max_element_wise missing support for some types +* [ARROW-13981](https://issues.apache.org/jira/browse/ARROW-13981) - [Java] VectorSchemaRootAppender doesn't work for BitVector +* [ARROW-14029](https://issues.apache.org/jira/browse/ARROW-14029) - [R] Repair map_batches() +* [ARROW-14151](https://issues.apache.org/jira/browse/ARROW-14151) - [C++] Mixed support for binary types in ASCII string functions +* [ARROW-14238](https://issues.apache.org/jira/browse/ARROW-14238) - [Python] "could not run mc" error in test_fs.py +* [ARROW-14253](https://issues.apache.org/jira/browse/ARROW-14253) - [R] Update lz4 test failing locally due to different error message +* [ARROW-14318](https://issues.apache.org/jira/browse/ARROW-14318) - [Docs] Fix doc building of dataset docs multiple times +* [ARROW-14374](https://issues.apache.org/jira/browse/ARROW-14374) - [Java] Integration tests for the C data Interface implementation for Java +* [ARROW-14395](https://issues.apache.org/jira/browse/ARROW-14395) - [R] Re-enable duckdb autocleaning +* [ARROW-14405](https://issues.apache.org/jira/browse/ARROW-14405) - [C++] Fix build error from clang for windows +* [ARROW-14426](https://issues.apache.org/jira/browse/ARROW-14426) - [C++] Add a minimum_row_group_size to dataset writing +* [ARROW-14429](https://issues.apache.org/jira/browse/ARROW-14429) - [C++] RecordBatchFileReader performance really bad in S3 +* [ARROW-14437](https://issues.apache.org/jira/browse/ARROW-14437) - [Python] Make CSV cancellation test more robust +* [ARROW-14469](https://issues.apache.org/jira/browse/ARROW-14469) - [R] Binding for lubridate::month() doesn't have `label` argument implemented +* [ARROW-14475](https://issues.apache.org/jira/browse/ARROW-14475) - [C++] Don't shadow enable_if helpers +* [ARROW-14492](https://issues.apache.org/jira/browse/ARROW-14492) - [JS] Fix export for browser bundles +* [ARROW-14493](https://issues.apache.org/jira/browse/ARROW-14493) - [Release][Go] Add update of import path for major versions to script +* [ARROW-14513](https://issues.apache.org/jira/browse/ARROW-14513) - [Release][Go] Update release-6.0.0 with /v6 suffix +* [ARROW-14516](https://issues.apache.org/jira/browse/ARROW-14516) - [CI] Disable privileged mode for Docker runs +* [ARROW-14517](https://issues.apache.org/jira/browse/ARROW-14517) - [Python] Missing ampersand in CIpcReadOptions of CFeatherReader +* [ARROW-14519](https://issues.apache.org/jira/browse/ARROW-14519) - [C++] Properly error if joining on unsupported type +* [ARROW-14522](https://issues.apache.org/jira/browse/ARROW-14522) - [C++] Fix validation of ExtensionType with null storage type +* [ARROW-14523](https://issues.apache.org/jira/browse/ARROW-14523) - [C++] Fix potential data loss in S3 multipart upload +* [ARROW-14529](https://issues.apache.org/jira/browse/ARROW-14529) - [GLib] Validate Decimal{128,256}DataType precision +* [ARROW-14530](https://issues.apache.org/jira/browse/ARROW-14530) - [GLib] Return error for invalid decimal string +* [ARROW-14538](https://issues.apache.org/jira/browse/ARROW-14538) - [R] Work around empty tr call on Solaris +* [ARROW-14539](https://issues.apache.org/jira/browse/ARROW-14539) - [C++] Dataset scanner test failing a DCHECK +* [ARROW-14550](https://issues.apache.org/jira/browse/ARROW-14550) - [Doc] Remove the JSON license; a non-free one. +* [ARROW-14554](https://issues.apache.org/jira/browse/ARROW-14554) - [C++][CI] Fix OSS-Fuzz build failure +* [ARROW-14578](https://issues.apache.org/jira/browse/ARROW-14578) - [Format][Documentation] Update union-of-structs doc +* [ARROW-14582](https://issues.apache.org/jira/browse/ARROW-14582) - [CI] Timeout asan ubsan job after 60m +* [ARROW-14583](https://issues.apache.org/jira/browse/ARROW-14583) - [C++] Handle empty chunked arrays in Take, empty datasets in GroupByNode +* [ARROW-14584](https://issues.apache.org/jira/browse/ARROW-14584) - [Python][CI] Python sdist installation fails with latest setuptools 58.5 +* [ARROW-14586](https://issues.apache.org/jira/browse/ARROW-14586) - [R] summarise() with nested aggregate expressions has a confusing error +* [ARROW-14589](https://issues.apache.org/jira/browse/ARROW-14589) - [CI][Go] Fix CGO Windows Tests +* [ARROW-14592](https://issues.apache.org/jira/browse/ARROW-14592) - [C++] list_parent_indices output type should not depend on input type +* [ARROW-14593](https://issues.apache.org/jira/browse/ARROW-14593) - [C++] Fix crashes on invalid IPC file (OSS-Fuzz) +* [ARROW-14594](https://issues.apache.org/jira/browse/ARROW-14594) - [R] Enable snappy+lz4 by default +* [ARROW-14595](https://issues.apache.org/jira/browse/ARROW-14595) - [R] Clean up from setting deps_source to auto +* [ARROW-14598](https://issues.apache.org/jira/browse/ARROW-14598) - [C++][Flight] Fix protoc generation dependency for example +* [ARROW-14600](https://issues.apache.org/jira/browse/ARROW-14600) - [Docs] Fix broken link in Python Development page +* [ARROW-14616](https://issues.apache.org/jira/browse/ARROW-14616) - [C++] Fix build errors on master +* [ARROW-14620](https://issues.apache.org/jira/browse/ARROW-14620) - [Python] Missing bindings for existing_data_behavior makes it impossible to maintain old behavior +* [ARROW-14622](https://issues.apache.org/jira/browse/ARROW-14622) - [C++] Fix initialization-order-fiasco reports +* [ARROW-14625](https://issues.apache.org/jira/browse/ARROW-14625) - [Python][CI] Enable Python test on s390x +* [ARROW-14627](https://issues.apache.org/jira/browse/ARROW-14627) - [C++] Fix tests compilation error using GCC 11.1 +* [ARROW-14629](https://issues.apache.org/jira/browse/ARROW-14629) - [Python] Add pytest dataset marker to test_permutation_of_column_order +* [ARROW-14630](https://issues.apache.org/jira/browse/ARROW-14630) - [C++] Fix aggregation over scalar key columns +* [ARROW-14640](https://issues.apache.org/jira/browse/ARROW-14640) - [R] reading data from S3 +* [ARROW-14642](https://issues.apache.org/jira/browse/ARROW-14642) - [C++] ScanNode is not using the filter expression +* [ARROW-14644](https://issues.apache.org/jira/browse/ARROW-14644) - [C++][R] open_dataset doesn't ignore BOM in csv file +* [ARROW-14659](https://issues.apache.org/jira/browse/ARROW-14659) - [R] Remove warning about factor conversion to string in if_else() +* [ARROW-14664](https://issues.apache.org/jira/browse/ARROW-14664) - [C++] Fix accepted types for Parquet encoding DELTA_BYTE_ARRAY +* [ARROW-14667](https://issues.apache.org/jira/browse/ARROW-14667) - [C++] Added a dcheck to ensure aws is initialized before s3 options are used +* [ARROW-14667](https://issues.apache.org/jira/browse/ARROW-14667) - [R][C++] segfault on calls to arrow::S3FileSystem$create +* [ARROW-14682](https://issues.apache.org/jira/browse/ARROW-14682) - [dev] Verify go on non x86 archs +* [ARROW-14685](https://issues.apache.org/jira/browse/ARROW-14685) - [Python] test case automatically detects byteorder of numpy object +* [ARROW-14693](https://issues.apache.org/jira/browse/ARROW-14693) - [R] Non-integers being passed to chunk_size +* [ARROW-14696](https://issues.apache.org/jira/browse/ARROW-14696) - [Java] Reset vectors before populating JDBC data when reusing vector schema root +* [ARROW-14699](https://issues.apache.org/jira/browse/ARROW-14699) - [C++] Fix lz4 undefined behaviour issues +* [ARROW-14700](https://issues.apache.org/jira/browse/ARROW-14700) - [C++] Only check zone offset sign when offset present +* [ARROW-14701](https://issues.apache.org/jira/browse/ARROW-14701) - [Python][MINOR] document parquet.write_table row_group_size +* [ARROW-14704](https://issues.apache.org/jira/browse/ARROW-14704) - [C++] Fix Valgrind failure in parquet-arrow-test +* [ARROW-14709](https://issues.apache.org/jira/browse/ARROW-14709) - [C++][Java] Upgrade ORC to 1.7.1 and use the official Apache distribution site +* [ARROW-14710](https://issues.apache.org/jira/browse/ARROW-14710) - [R] Install error on Linux arm64 with cmake-X.X.X-Linux-x86_64 +* [ARROW-14717](https://issues.apache.org/jira/browse/ARROW-14717) - [Go] Use the ipc.Reader allocator in messageReader +* [ARROW-14721](https://issues.apache.org/jira/browse/ARROW-14721) - [C++] Strengthen DELTA_BYTE_ARRAY decoder +* [ARROW-14722](https://issues.apache.org/jira/browse/ARROW-14722) - [R] Fix altrep vector negation modifying original +* [ARROW-14728](https://issues.apache.org/jira/browse/ARROW-14728) - [Go] Pull LICENSE.txt up to new module root +* [ARROW-14739](https://issues.apache.org/jira/browse/ARROW-14739) - [JS] Ensure docs point to right source +* [ARROW-14749](https://issues.apache.org/jira/browse/ARROW-14749) - [Python][Release] Set release verification script to use target source instead of current source directory +* [ARROW-14765](https://issues.apache.org/jira/browse/ARROW-14765) - [Python] StructFieldOptions not exposed +* [ARROW-14766](https://issues.apache.org/jira/browse/ARROW-14766) - [Python] Mark compute function arguments positional-only +* [ARROW-14769](https://issues.apache.org/jira/browse/ARROW-14769) - [Go] Ensure MessageReader errors get reported +* [ARROW-14773](https://issues.apache.org/jira/browse/ARROW-14773) - [JS] Fix sourcemap paths +* [ARROW-14774](https://issues.apache.org/jira/browse/ARROW-14774) - [JS] Correct package exports +* [ARROW-14778](https://issues.apache.org/jira/browse/ARROW-14778) - [C++] Round mean of decimal types after division +* [ARROW-14786](https://issues.apache.org/jira/browse/ARROW-14786) - [R] Bump dev version following 6.0.1 patch release +* [ARROW-14788](https://issues.apache.org/jira/browse/ARROW-14788) - [C++] Fix warning in dataset/file_orc_test.cc +* [ARROW-14791](https://issues.apache.org/jira/browse/ARROW-14791) - [C++] Fix crash when validating corrupt list array +* [ARROW-14792](https://issues.apache.org/jira/browse/ARROW-14792) - [C++] Fix crash when reading DELTA_BYTE_ARRAY Parquet file +* [ARROW-14795](https://issues.apache.org/jira/browse/ARROW-14795) - [C++] Fix issue on replace with mask for null values +* [ARROW-14796](https://issues.apache.org/jira/browse/ARROW-14796) - [Python] Documentation: Correct default value +* [ARROW-14800](https://issues.apache.org/jira/browse/ARROW-14800) - [C++] Disambiguate std::launder on MSVC with C++17 enabled +* [ARROW-14803](https://issues.apache.org/jira/browse/ARROW-14803) - [R] Function not declared in scope +* [ARROW-14839](https://issues.apache.org/jira/browse/ARROW-14839) - [R] test-fedora-r-clang-sanitizer job failing due to snappy causing a sanitizer error +* [ARROW-14840](https://issues.apache.org/jira/browse/ARROW-14840) - [R][CI] test-ubuntu-20.10-docs nightly build failing due to R install issue +* [ARROW-14851](https://issues.apache.org/jira/browse/ARROW-14851) - [Archery] Don't dump JSON benchmark output on stdout +* [ARROW-14853](https://issues.apache.org/jira/browse/ARROW-14853) - [C++][Python] Improve error message for missing function options +* [ARROW-14854](https://issues.apache.org/jira/browse/ARROW-14854) - [C++] Fix struct_field crash on invalid index +* [ARROW-14894](https://issues.apache.org/jira/browse/ARROW-14894) - [R] Integer overflow in write_parquet chunk size calculation +* [ARROW-14898](https://issues.apache.org/jira/browse/ARROW-14898) - [C++][Compute] Fix crash of out-of-bounds memory accessing in key_hash if a key is smaller than int64 +* [ARROW-14919](https://issues.apache.org/jira/browse/ARROW-14919) - [R] write_parquet() drops attributes for grouped dataframes +* [ARROW-14922](https://issues.apache.org/jira/browse/ARROW-14922) - [C++][Parquet] Fix column-io-benchmark throws +* [ARROW-14930](https://issues.apache.org/jira/browse/ARROW-14930) - [C++] Make S3 directory detection more robust +* [ARROW-14931](https://issues.apache.org/jira/browse/ARROW-14931) - [Python] csv/orc format strings missing from some dataset docs +* [ARROW-14933](https://issues.apache.org/jira/browse/ARROW-14933) - [JS] apache-arrow does not compile with typescript when types are checked +* [ARROW-14936](https://issues.apache.org/jira/browse/ARROW-14936) - [C++][Gandiva] Fix split_part function in gandiva +* [ARROW-14937](https://issues.apache.org/jira/browse/ARROW-14937) - [Doc] Make sure the docs directory is mounted as a volume +* [ARROW-14962](https://issues.apache.org/jira/browse/ARROW-14962) - [CI] Fix minio installation on s390x +* [ARROW-14966](https://issues.apache.org/jira/browse/ARROW-14966) - [R][CI] Add redundancy to CRAN mirrors for dependency installation +* [ARROW-14979](https://issues.apache.org/jira/browse/ARROW-14979) - [C++] Fix process leaks in GCS integration tests +* [ARROW-14980](https://issues.apache.org/jira/browse/ARROW-14980) - [C++] GCS tests use PYTHON environment variable +* [ARROW-14991](https://issues.apache.org/jira/browse/ARROW-14991) - [Packaging][Python] Windows wheel builds are failing due to wrong vcpkg triplet name +* [ARROW-15002](https://issues.apache.org/jira/browse/ARROW-15002) - [Python] Fix hypothesis strategy for interval types +* [ARROW-15004](https://issues.apache.org/jira/browse/ARROW-15004) - [Dev][Archery] Use default simd level +* [ARROW-15009](https://issues.apache.org/jira/browse/ARROW-15009) - [C++] Make hash join tests less slow with TSan +* [ARROW-15027](https://issues.apache.org/jira/browse/ARROW-15027) - [C++] Fix OpenTelemetry CMake definitions +* [ARROW-15028](https://issues.apache.org/jira/browse/ARROW-15028) - [C++] Fix Gandiva compile failure on Unity builds +* [ARROW-15030](https://issues.apache.org/jira/browse/ARROW-15030) - [C++] CSV writer test failures +* [ARROW-15031](https://issues.apache.org/jira/browse/ARROW-15031) - [C++] Fix crash on invalid Parquet file (OSS-Fuzz) +* [ARROW-15041](https://issues.apache.org/jira/browse/ARROW-15041) - [R] Flaky BOM removal test +* [ARROW-15047](https://issues.apache.org/jira/browse/ARROW-15047) - [R][MINOR] Suggest R command for setting build environment variables +* [ARROW-15071](https://issues.apache.org/jira/browse/ARROW-15071) - [C#] Fixed a bug in Column.cs ValidateArrayDataTypes method +* [ARROW-15076](https://issues.apache.org/jira/browse/ARROW-15076) - [C++][Gandiva] Fix allocation of AES {en,de}cryption result +* [ARROW-15078](https://issues.apache.org/jira/browse/ARROW-15078) - [C++] Silence CMake error "includes non-existent path" with bundled OpenTelemetry +* [ARROW-15090](https://issues.apache.org/jira/browse/ARROW-15090) - [C++] SerializedAsyncTaskGroup does not finish if an error arrives while there are still tasks to run +* [ARROW-15101](https://issues.apache.org/jira/browse/ARROW-15101) - [Python] Fix build failure on CSV writer +* [ARROW-15105](https://issues.apache.org/jira/browse/ARROW-15105) - [R] unsupported timestamp cast in CSV with tz element +* [ARROW-15143](https://issues.apache.org/jira/browse/ARROW-15143) - [C++] Remove incorrect comment on API of Transform for StringBinaryTransformExecBase +* [ARROW-15144](https://issues.apache.org/jira/browse/ARROW-15144) - [Java] Unable to read IPC file in master +* [ARROW-15145](https://issues.apache.org/jira/browse/ARROW-15145) - [R][CI] test-r-minimal-build fails due to updated error message +* [ARROW-15147](https://issues.apache.org/jira/browse/ARROW-15147) - [CI][C++][Gandiva] Fix broken nigthly builds related to boost dependencies +* [ARROW-15171](https://issues.apache.org/jira/browse/ARROW-15171) - [C++][Java] Update ORC to 1.7.2 +* [ARROW-15181](https://issues.apache.org/jira/browse/ARROW-15181) - [C++][FlightRPC] Fix race between signal handler and shutdown +* [ARROW-15184](https://issues.apache.org/jira/browse/ARROW-15184) - [C++] Unit tests of reading delta-encoded Parquet files with and without nulls +* [ARROW-15185](https://issues.apache.org/jira/browse/ARROW-15185) - [R] Make arrow build options check case insensitive +* [ARROW-15194](https://issues.apache.org/jira/browse/ARROW-15194) - [C++] Combine ChunkedArray constructors +* [ARROW-15199](https://issues.apache.org/jira/browse/ARROW-15199) - [Java] Update protobuf-maven-plugin to avoid 'Text file busy' failure +* [ARROW-15200](https://issues.apache.org/jira/browse/ARROW-15200) - [C++][Gandiva] Enable RTTI when building LLVM dependency using vcpkg +* [ARROW-15226](https://issues.apache.org/jira/browse/ARROW-15226) - [Python] Update Cython bindings of ChunkedArray constructors +* [ARROW-15231](https://issues.apache.org/jira/browse/ARROW-15231) - [Packaging][deb] Add missing ArrowFlight-1.0.typelib +* [ARROW-15234](https://issues.apache.org/jira/browse/ARROW-15234) - [Python] Fix crash with custom CSV invalid row handler +* [ARROW-15241](https://issues.apache.org/jira/browse/ARROW-15241) - [C++] MakeArrayOfNull fails on extension types with a nested storage type +* [ARROW-15243](https://issues.apache.org/jira/browse/ARROW-15243) - [CI][Python] Make PyArrow installation more robust in CI +* [ARROW-15265](https://issues.apache.org/jira/browse/ARROW-15265) - [C++] Fix hang in dataset writer with kDeleteMatchingPartitions and #partitions >= 8 +* [ARROW-15266](https://issues.apache.org/jira/browse/ARROW-15266) - [R][CI] Test reorganization triggering valgrind errors +* [ARROW-15286](https://issues.apache.org/jira/browse/ARROW-15286) - [Python] Convert indices passed to FileSystemDataset.take to array to avoid segfault +* [ARROW-15290](https://issues.apache.org/jira/browse/ARROW-15290) - [Python][Docs] Documentation pages for PyArrow have incorrect hyperlinks +* [ARROW-15306](https://issues.apache.org/jira/browse/ARROW-15306) - [C++] S3FileSystem Should set the content-type header to application/octet-stream if not specified +* [ARROW-15315](https://issues.apache.org/jira/browse/ARROW-15315) - [Java][FlightRPC] FlightSqlProducer#doAction always throws INVALID_ARGUMENT +* [ARROW-15318](https://issues.apache.org/jira/browse/ARROW-15318) - [C++][Python] Regression reading partition keys of large batches. +* [ARROW-15323](https://issues.apache.org/jira/browse/ARROW-15323) - [CI] Nightly spark integration builds are failing +* [ARROW-15324](https://issues.apache.org/jira/browse/ARROW-15324) - [C++] Avoid crashing when HDFS file fails closing +* [ARROW-15325](https://issues.apache.org/jira/browse/ARROW-15325) - [R] Fix CRAN comment on map\_batches collect +* [ARROW-15326](https://issues.apache.org/jira/browse/ARROW-15326) - [C++] Fix Gandiva crashes +* [ARROW-15335](https://issues.apache.org/jira/browse/ARROW-15335) - [Java] Fix setPosition call in UnionListReader for empty List +* [ARROW-15358](https://issues.apache.org/jira/browse/ARROW-15358) - [C++] Fix custom matcher compilation +* [ARROW-15360](https://issues.apache.org/jira/browse/ARROW-15360) - [Python] Check slice bounds in Buffer.slice() +* [ARROW-15362](https://issues.apache.org/jira/browse/ARROW-15362) - Setting OMP\_NUM\_THREADS to 1 causes dataset to hang +* [ARROW-15370](https://issues.apache.org/jira/browse/ARROW-15370) - [Python] Fix regression in empty table to_pandas conversion +* [ARROW-15371](https://issues.apache.org/jira/browse/ARROW-15371) - [C++][Release] Missing libsqlite-dev from the verification docker images +* [ARROW-15372](https://issues.apache.org/jira/browse/ARROW-15372) - [C++][Gandiva] Gandiva now depends on boost/crc.hpp which is missing from the trimmed boost archive +* [ARROW-15376](https://issues.apache.org/jira/browse/ARROW-15376) - [Go][Release] cpu_arm64 needs +build comment +* [ARROW-15377](https://issues.apache.org/jira/browse/ARROW-15377) - [Release] Bump nodejs version to 16 in the macOS verification builds +* [ARROW-15378](https://issues.apache.org/jira/browse/ARROW-15378) - [C++][Release] GTest linking error during windows verification +* [ARROW-15380](https://issues.apache.org/jira/browse/ARROW-15380) - [Python][Release] NumPy ABI incompatibility during verification +* [ARROW-15388](https://issues.apache.org/jira/browse/ARROW-15388) - [C++] Avoid including absl from flatbuffers +* [ARROW-15392](https://issues.apache.org/jira/browse/ARROW-15392) - [JS] Flaky javascript unittest +* [ARROW-15393](https://issues.apache.org/jira/browse/ARROW-15393) - [Release][Crossbow] Fall back to dev0 when the generated scm version number doesn't have a distance +* [ARROW-15394](https://issues.apache.org/jira/browse/ARROW-15394) - [CI][Docs] Fix env variable to ensure doxygen is used in doc build +* [ARROW-15395](https://issues.apache.org/jira/browse/ARROW-15395) - [Release][Ruby] Ruby verification fails on M1 +* [ARROW-15401](https://issues.apache.org/jira/browse/ARROW-15401) - [Python] Gdb tests are failing on windows +* [PARQUET-1856](https://issues.apache.org/jira/browse/PARQUET-1856) - [C++] Avoid failing tests with Snappy support disabled +* [PARQUET-2109](https://issues.apache.org/jira/browse/PARQUET-2109) - [C++] Check if Parquet page has too few values + + +## New Features and Improvements + +* [ARROW-1299](https://issues.apache.org/jira/browse/ARROW-1299) - [Doc] Publish nightly documentation against master somewhere +* [ARROW-1699](https://issues.apache.org/jira/browse/ARROW-1699) - [C++] forward, backward fill kernel functions +* [ARROW-2366](https://issues.apache.org/jira/browse/ARROW-2366) - [Python][C++][Parquet] Add test to ensure support reading Parquet files having a permutation of column order +* [ARROW-3699](https://issues.apache.org/jira/browse/ARROW-3699) - [C++] Dockerfile for testing 32-bit C++ build +* [ARROW-4975](https://issues.apache.org/jira/browse/ARROW-4975) - [C++] Support concatenation of UnionArrays +* [ARROW-5599](https://issues.apache.org/jira/browse/ARROW-5599) - [Go] Migrate array.{Interface,Record,Column,Chunked,Table} to arrow.{Array,Record,Column,Chunked,Table} +* [ARROW-6001](https://issues.apache.org/jira/browse/ARROW-6001) - [Python] : Add from_pylist() and to_pylist() to pyarrow.Table to convert list of records +* [ARROW-6276](https://issues.apache.org/jira/browse/ARROW-6276) - [C++] for some arrow classes +* [ARROW-8285](https://issues.apache.org/jira/browse/ARROW-8285) - [Python][Dataset] Test that ScalarExpression accepts numpy scalars +* [ARROW-8605](https://issues.apache.org/jira/browse/ARROW-8605) - [R] Add brotli to Windows R build +* [ARROW-8823](https://issues.apache.org/jira/browse/ARROW-8823) - [C++] Add total size of batch buffers to IPC write statistics +* [ARROW-9186](https://issues.apache.org/jira/browse/ARROW-9186) - [R] Allow specifying CSV file encoding +* [ARROW-9483](https://issues.apache.org/jira/browse/ARROW-9483) - [C++] Reorganize testing headers +* [ARROW-9630](https://issues.apache.org/jira/browse/ARROW-9630) - [Go] Implement public JSON reader/writer +* [ARROW-10209](https://issues.apache.org/jira/browse/ARROW-10209) - [Python] Support positional options in compute functions +* [ARROW-10220](https://issues.apache.org/jira/browse/ARROW-10220) - [JS] Cache javascript utf-8 dictionary keys? +* [ARROW-10317](https://issues.apache.org/jira/browse/ARROW-10317) - [Python] Document compute function options +* [ARROW-10998](https://issues.apache.org/jira/browse/ARROW-10998) - [C++] Detect URIs where a filesystem path is expected +* [ARROW-11297](https://issues.apache.org/jira/browse/ARROW-11297) - [C++][Python] Add ORC writer options +* [ARROW-11347](https://issues.apache.org/jira/browse/ARROW-11347) - [JS] Consider Objects instead of Maps +* [ARROW-11424](https://issues.apache.org/jira/browse/ARROW-11424) - [C++] StructType::{AddField,RemoveField,SetField} member functions +* [ARROW-11475](https://issues.apache.org/jira/browse/ARROW-11475) - [C++] Upgrade mimalloc to v1.7.3 +* [ARROW-11938](https://issues.apache.org/jira/browse/ARROW-11938) - [R] Enable R build process to find locally built C++ library on Windows +* [ARROW-12053](https://issues.apache.org/jira/browse/ARROW-12053) - [C++] Implement aggregate compute functions for decimal datatypes +* [ARROW-12060](https://issues.apache.org/jira/browse/ARROW-12060) - [Python] Enable calling compute functions on Expressions +* [ARROW-12315](https://issues.apache.org/jira/browse/ARROW-12315) - [R] add max_partitions argument to write_dataset() +* [ARROW-12404](https://issues.apache.org/jira/browse/ARROW-12404) - [C++] Implement "random" nullary function that generates uniform random between 0 and 1 +* [ARROW-12422](https://issues.apache.org/jira/browse/ARROW-12422) - [C++][Gandiva] Add castVARCHAR from date millis function +* [ARROW-12480](https://issues.apache.org/jira/browse/ARROW-12480) - [Java][Dataset] FileSystemDataset: Support reading from a directory +* [ARROW-12536](https://issues.apache.org/jira/browse/ARROW-12536) - [JS] Construct tables from JavaScript types +* [ARROW-12538](https://issues.apache.org/jira/browse/ARROW-12538) - [JS] Show Vectors in the docs +* [ARROW-12545](https://issues.apache.org/jira/browse/ARROW-12545) - [Python][Docs] Fill in section about Custom Schema and Field Metadata +* [ARROW-12548](https://issues.apache.org/jira/browse/ARROW-12548) - [JS] Get rid of columns +* [ARROW-12549](https://issues.apache.org/jira/browse/ARROW-12549) - [JS] Table and RecordBatch should not extend Vector, make JS lib smaller +* [ARROW-12595](https://issues.apache.org/jira/browse/ARROW-12595) - [C++][Gandiva][binary][string] functions +* [ARROW-12607](https://issues.apache.org/jira/browse/ARROW-12607) - [Website] Doc section for Dataset Java bindings +* [ARROW-12671](https://issues.apache.org/jira/browse/ARROW-12671) - [C++] Add OpenTelemetry to ThirdpartyToolchain +* [ARROW-12683](https://issues.apache.org/jira/browse/ARROW-12683) - [C++] Enable fine-grained I/O (coalescing) in IPC reader +* [ARROW-12706](https://issues.apache.org/jira/browse/ARROW-12706) - [Python] Drop Python 3.6 support +* [ARROW-12712](https://issues.apache.org/jira/browse/ARROW-12712) - [C++] String repeat kernel +* [ARROW-12735](https://issues.apache.org/jira/browse/ARROW-12735) - [C++] Write GDB plugin +* [ARROW-12803](https://issues.apache.org/jira/browse/ARROW-12803) - [C++] [Dataset] Write dataset with scanner does not support async scan +* [ARROW-12820](https://issues.apache.org/jira/browse/ARROW-12820) - [C++] Support zone offset in ISO8601, strptime parser +* [ARROW-12858](https://issues.apache.org/jira/browse/ARROW-12858) - [C++][Gandiva] Add isNull, isTrue, isFalse, isNotTrue, IsNotFalse and NVL functions on Gandiva +* [ARROW-12922](https://issues.apache.org/jira/browse/ARROW-12922) - [Java][FlightSQL] Create stubbed APIs for Flight SQL +* [ARROW-12943](https://issues.apache.org/jira/browse/ARROW-12943) - [Gandiva][C++] Implement MD5 Hive function +* [ARROW-13016](https://issues.apache.org/jira/browse/ARROW-13016) - [C++][Compute] Support Null type in Sum/Mean aggregation +* [ARROW-13035](https://issues.apache.org/jira/browse/ARROW-13035) - [C++] indices_nonzero compute function +* [ARROW-13051](https://issues.apache.org/jira/browse/ARROW-13051) - [Release][Java] Use artifacts built by Crossbow +* [ARROW-13081](https://issues.apache.org/jira/browse/ARROW-13081) - [C++] Disallow comparing zoned and naive timestamps +* [ARROW-13087](https://issues.apache.org/jira/browse/ARROW-13087) - [R] Expose Parquet ArrowReaderProperties::coerce_int96_timestamp_unit_ +* [ARROW-13111](https://issues.apache.org/jira/browse/ARROW-13111) - [R] altrep vectors for ChunkedArray +* [ARROW-13130](https://issues.apache.org/jira/browse/ARROW-13130) - [C++] Add decimal support to arithmetic kernels +* [ARROW-13156](https://issues.apache.org/jira/browse/ARROW-13156) - [R] bindings for str_count +* [ARROW-13208](https://issues.apache.org/jira/browse/ARROW-13208) - [Python][CI] Create a build for validating python docstrings +* [ARROW-13328](https://issues.apache.org/jira/browse/ARROW-13328) - [C++][Dataset] Use an ExecPlan for synchronous scans or drop synchronous scans +* [ARROW-13338](https://issues.apache.org/jira/browse/ARROW-13338) - [C++][Dataset] Make async Scanner the default +* [ARROW-13362](https://issues.apache.org/jira/browse/ARROW-13362) - [R] Clean up in/by Arrow messaging +* [ARROW-13371](https://issues.apache.org/jira/browse/ARROW-13371) - [R] binding for make_struct -> StructArray$create() +* [ARROW-13373](https://issues.apache.org/jira/browse/ARROW-13373) - [C++][Gandiva] Implement CRC32 Hive function on Gandiva +* [ARROW-13376](https://issues.apache.org/jira/browse/ARROW-13376) - [C++][Gandiva] Implement FACTORIAL Hive function on Gandiva +* [ARROW-13377](https://issues.apache.org/jira/browse/ARROW-13377) - [C++][Gandiva] Implement PMOD Hive functions on Gandiva +* [ARROW-13383](https://issues.apache.org/jira/browse/ARROW-13383) - [R] Add examples to functions which don't have examples +* [ARROW-13398](https://issues.apache.org/jira/browse/ARROW-13398) - [R] Update install.Rmd vignette +* [ARROW-13400](https://issues.apache.org/jira/browse/ARROW-13400) - [R] Update fs.Rmd (Working with S3) vignette +* [ARROW-13401](https://issues.apache.org/jira/browse/ARROW-13401) - [R] : Update python.Rmd vignette +* [ARROW-13408](https://issues.apache.org/jira/browse/ARROW-13408) - [Packaging] Update crossbow to checkout specific git hashes +* [ARROW-13449](https://issues.apache.org/jira/browse/ARROW-13449) - [Format] Update documentation related to wire format of schema +* [ARROW-13494](https://issues.apache.org/jira/browse/ARROW-13494) - [C++] Rename BitUtil and LittleEndianArray namespaces +* [ARROW-13514](https://issues.apache.org/jira/browse/ARROW-13514) - [JS] Update flatbuffers +* [ARROW-13536](https://issues.apache.org/jira/browse/ARROW-13536) - [C++] Use decimal-point aware conversion from fast-float +* [ARROW-13553](https://issues.apache.org/jira/browse/ARROW-13553) - [Doc] Add guidelines for code reviews +* [ARROW-13554](https://issues.apache.org/jira/browse/ARROW-13554) - [C++] Remove deprecated Scanner::Scan +* [ARROW-13558](https://issues.apache.org/jira/browse/ARROW-13558) - [C++] Validate decimal arrays/scalars +* [ARROW-13579](https://issues.apache.org/jira/browse/ARROW-13579) - Expose Create EmptyArray, EmptyRecordBatch and EmptyTable utility functions. +* [ARROW-13589](https://issues.apache.org/jira/browse/ARROW-13589) - [C++] Reconcile ValidateArray and ValidateArrayFull +* [ARROW-13590](https://issues.apache.org/jira/browse/ARROW-13590) - [C++] Ensure dataset writing applies back pressure +* [ARROW-13598](https://issues.apache.org/jira/browse/ARROW-13598) - [C++] Remove Datum::COLLECTION +* [ARROW-13607](https://issues.apache.org/jira/browse/ARROW-13607) - [C++] Add Skyhook to Arrow +* [ARROW-13610](https://issues.apache.org/jira/browse/ARROW-13610) - [R] Unvendor cpp11 +* [ARROW-13615](https://issues.apache.org/jira/browse/ARROW-13615) - [R] Bindings for stringr::str\_to\_sentence +* [ARROW-13617](https://issues.apache.org/jira/browse/ARROW-13617) - [C++] Make Decimal representations consistent +* [ARROW-13623](https://issues.apache.org/jira/browse/ARROW-13623) - [R] write_csv_arrow update to follow the signature of readr::write_csv +* [ARROW-13643](https://issues.apache.org/jira/browse/ARROW-13643) - [C++][Compute] Implement outer join with support for residual predicates +* [ARROW-13663](https://issues.apache.org/jira/browse/ARROW-13663) - [C++] RecordBatchReader STL-like iteration +* [ARROW-13668](https://issues.apache.org/jira/browse/ARROW-13668) - [Python] Add `write_batch` and `write` methods to `ParquetWriter` +* [ARROW-13707](https://issues.apache.org/jira/browse/ARROW-13707) - [Doc] Cookbook Release 2 +* [ARROW-13711](https://issues.apache.org/jira/browse/ARROW-13711) - [Doc][Cookbook] Sending and receiving data over a network using an Arrow Flight RPC server - R +* [ARROW-13781](https://issues.apache.org/jira/browse/ARROW-13781) - [Python] Allow per column encoding in parquet writer +* [ARROW-13811](https://issues.apache.org/jira/browse/ARROW-13811) - [Java] Provide a general out-of-place sorter +* [ARROW-13826](https://issues.apache.org/jira/browse/ARROW-13826) - [C++][Gandiva] Implement QUOTE Hive functions on Gandiva +* [ARROW-13828](https://issues.apache.org/jira/browse/ARROW-13828) - [C++][Gandiva] Implement SOUNDEX Hive functions on Gandiva +* [ARROW-13829](https://issues.apache.org/jira/browse/ARROW-13829) - [C++][Gandiva] Implement GREATEST and LEAST Hive functions on Gandiva +* [ARROW-13830](https://issues.apache.org/jira/browse/ARROW-13830) - [C++][Gandiva] Implement CHR Hive functions on Gandiva +* [ARROW-13832](https://issues.apache.org/jira/browse/ARROW-13832) - [Doc] Improve compute documentation +* [ARROW-13834](https://issues.apache.org/jira/browse/ARROW-13834) - [R][Documentation] Document the process of creating R bindings for compute kernels and rationale behind conventions +* [ARROW-13841](https://issues.apache.org/jira/browse/ARROW-13841) - [Doc] Document the different subcomponents that make up the CI and how they fit together +* [ARROW-13886](https://issues.apache.org/jira/browse/ARROW-13886) - [R] Expand documentation for decimal() +* [ARROW-13887](https://issues.apache.org/jira/browse/ARROW-13887) - [R] Capture error produced when reading in CSV file with headers and using a schema, and add suggestion +* [ARROW-13888](https://issues.apache.org/jira/browse/ARROW-13888) - [R] Rephrase docs for schema()'s ellipses argument and rephrase error message +* [ARROW-13923](https://issues.apache.org/jira/browse/ARROW-13923) - [C++] Faster CSV chunker with long CSV cells +* [ARROW-13943](https://issues.apache.org/jira/browse/ARROW-13943) - [Python] Hide hash_aggregate functions from compute module +* [ARROW-13984](https://issues.apache.org/jira/browse/ARROW-13984) - [Go][Parquet] File readers +* [ARROW-13984](https://issues.apache.org/jira/browse/ARROW-13984) - [Go][Parquet] file handling for go parquet, just the readers +* [ARROW-13986](https://issues.apache.org/jira/browse/ARROW-13986) - [Go][Parquet] Add File Writers and tests +* [ARROW-13987](https://issues.apache.org/jira/browse/ARROW-13987) - [C++] Support nested field refs +* [ARROW-13988](https://issues.apache.org/jira/browse/ARROW-13988) - [C++] Support base binary types in hash_min_max +* [ARROW-13989](https://issues.apache.org/jira/browse/ARROW-13989) - [C++] Add support for month-day-nano interval to compute functions +* [ARROW-14011](https://issues.apache.org/jira/browse/ARROW-14011) - [C++][Gandiva] Add elt hive function to gandiva +* [ARROW-14022](https://issues.apache.org/jira/browse/ARROW-14022) - [Dev] Remove arrow/dev/benchmarking +* [ARROW-14032](https://issues.apache.org/jira/browse/ARROW-14032) - [C++][Gandiva] Add concat_ws hive function to gandiva +* [ARROW-14039](https://issues.apache.org/jira/browse/ARROW-14039) - [C++][Docs] Indicate memory requirements for building +* [ARROW-14041](https://issues.apache.org/jira/browse/ARROW-14041) - [C++] Replace uses of BitmapReader in Parquet decoders +* [ARROW-14048](https://issues.apache.org/jira/browse/ARROW-14048) - [C++][Gandiva] Cache only object code in memory instead of entire module +* [ARROW-14051](https://issues.apache.org/jira/browse/ARROW-14051) - [R] Handle conditionals enclosing aggregate expressions +* [ARROW-14074](https://issues.apache.org/jira/browse/ARROW-14074) - [C++][Compute] C++ consumer of compute IR +* [ARROW-14166](https://issues.apache.org/jira/browse/ARROW-14166) - [C++] update vcpkg builtin baseline +* [ARROW-14167](https://issues.apache.org/jira/browse/ARROW-14167) - [C++][R] Directly support dictionaries in coalesce +* [ARROW-14171](https://issues.apache.org/jira/browse/ARROW-14171) - [C++][Python][Packaging] Upgrade VCPKG version and add google-cloud-cpp dependency +* [ARROW-14174](https://issues.apache.org/jira/browse/ARROW-14174) - [C++] Deduplicate some Decimal/FixedSizeBinary kernels +* [ARROW-14181](https://issues.apache.org/jira/browse/ARROW-14181) - [C++][Compute] Support for dictionaries in hash join +* [ARROW-14189](https://issues.apache.org/jira/browse/ARROW-14189) - [Docs] Add version dropdown to the sphinx docs +* [ARROW-14193](https://issues.apache.org/jira/browse/ARROW-14193) - [C++][Gandiva] Implement INSTR function +* [ARROW-14205](https://issues.apache.org/jira/browse/ARROW-14205) - [C++] Add utf8_normalize compute function +* [ARROW-14227](https://issues.apache.org/jira/browse/ARROW-14227) - [R] Implement lubridate is.* methods +* [ARROW-14229](https://issues.apache.org/jira/browse/ARROW-14229) - [C++] Bump versions of bundled dependencies +* [ARROW-14231](https://issues.apache.org/jira/browse/ARROW-14231) - [C++] Support casting timestamp with timezone to string +* [ARROW-14242](https://issues.apache.org/jira/browse/ARROW-14242) - Exposing the correct `indent` paramenter in `to_string` +* [ARROW-14277](https://issues.apache.org/jira/browse/ARROW-14277) - R Tutorials 2021-Q4 Initiative +* [ARROW-14278](https://issues.apache.org/jira/browse/ARROW-14278) - [Doc] New Contributors Guide +* [ARROW-14294](https://issues.apache.org/jira/browse/ARROW-14294) - [Doc][Python] Add tutorial on Flight to pyarrow documentation +* [ARROW-14297](https://issues.apache.org/jira/browse/ARROW-14297) - [R] smooth out integer division to better match R +* [ARROW-14306](https://issues.apache.org/jira/browse/ARROW-14306) - [C++][Compute] Add binary reverse kernel +* [ARROW-14310](https://issues.apache.org/jira/browse/ARROW-14310) - [R] Make expect_dplyr_equal() more intuitive +* [ARROW-14311](https://issues.apache.org/jira/browse/ARROW-14311) - [C++] Make GCS FileSystem tests faster +* [ARROW-14315](https://issues.apache.org/jira/browse/ARROW-14315) - [C++][Gandiva] Implement BROUND function +* [ARROW-14336](https://issues.apache.org/jira/browse/ARROW-14336) - [C++] Maintain bundled dependency tarballs in an Apache-managed location +* [ARROW-14338](https://issues.apache.org/jira/browse/ARROW-14338) - [Docs] Add version dropdown to the pkgdown (R) docs +* [ARROW-14346](https://issues.apache.org/jira/browse/ARROW-14346) - [C++] Implement GcsFileSystem::OpenOutputStream +* [ARROW-14347](https://issues.apache.org/jira/browse/ARROW-14347) - [C++] random access files for GcsFileSystem +* [ARROW-14349](https://issues.apache.org/jira/browse/ARROW-14349) - [IR] Remove RelBase +* [ARROW-14350](https://issues.apache.org/jira/browse/ARROW-14350) - [IR] Add filter expression to Source node +* [ARROW-14351](https://issues.apache.org/jira/browse/ARROW-14351) - [IR] Add projection list to Source node +* [ARROW-14352](https://issues.apache.org/jira/browse/ARROW-14352) - [IR] Remove schema property from Source +* [ARROW-14355](https://issues.apache.org/jira/browse/ARROW-14355) - [C++] Create naive implementation of algorithm to estimate table/batch buffer size +* [ARROW-14356](https://issues.apache.org/jira/browse/ARROW-14356) - [C++] Create kernel to determine buffer memory "referenced" by arrays (even if there are offsets) +* [ARROW-14365](https://issues.apache.org/jira/browse/ARROW-14365) - [R] Update README example to reflect new capabilities +* [ARROW-14384](https://issues.apache.org/jira/browse/ARROW-14384) - [Docs] Add documentation for building Sphinx docs without having to build pyarrow +* [ARROW-14388](https://issues.apache.org/jira/browse/ARROW-14388) - [Python] Add unit test for pandas masks +* [ARROW-14390](https://issues.apache.org/jira/browse/ARROW-14390) - [Packaging][Ubuntu] Add support for Ubuntu 21.10 +* [ARROW-14391](https://issues.apache.org/jira/browse/ARROW-14391) - [Docs] Archery requires docker +* [ARROW-14398](https://issues.apache.org/jira/browse/ARROW-14398) - [CI] Don't build doxygen docs in all of the conda builds +* [ARROW-14409](https://issues.apache.org/jira/browse/ARROW-14409) - [Packaging][Python] Update the manylinux platform tags +* [ARROW-14412](https://issues.apache.org/jira/browse/ARROW-14412) - [R] Better error handling for flight_put() when data arg object is wrong type +* [ARROW-14413](https://issues.apache.org/jira/browse/ARROW-14413) - [C++][Gandiva] Implement levenshtein function +* [ARROW-14416](https://issues.apache.org/jira/browse/ARROW-14416) - [R] Fix package installation on the Raspberry Pi +* [ARROW-14421](https://issues.apache.org/jira/browse/ARROW-14421) - [C++] Implement Flight SQL +* [ARROW-14430](https://issues.apache.org/jira/browse/ARROW-14430) - [Go] Basic Expression, Field Reference and Datum handling +* [ARROW-14431](https://issues.apache.org/jira/browse/ARROW-14431) - [C++][Gandiva] Implement AES ENCRYPT and AES DECRYPT functions +* [ARROW-14433](https://issues.apache.org/jira/browse/ARROW-14433) - [Release][APT] Skip arm64 Ubuntu 21.04 verification +* [ARROW-14435](https://issues.apache.org/jira/browse/ARROW-14435) - [Release] Update verification scripts to check python 3.10 wheels +* [ARROW-14436](https://issues.apache.org/jira/browse/ARROW-14436) - [C++] Disable color diagnostics when compiling with ccache +* [ARROW-14438](https://issues.apache.org/jira/browse/ARROW-14438) - [CI] Don't cancel builds on the main branch +* [ARROW-14440](https://issues.apache.org/jira/browse/ARROW-14440) - [C++][FlightRPC] Add gRPC + Flight example +* [ARROW-14441](https://issues.apache.org/jira/browse/ARROW-14441) - [R] Add our philosophy to the dev vignette +* [ARROW-14446](https://issues.apache.org/jira/browse/ARROW-14446) - [Docs][Release] Update documentation on verification of release candidates +* [ARROW-14448](https://issues.apache.org/jira/browse/ARROW-14448) - [Python] Update pyarrow.array() docstring note on timestamp (timezone) conversion +* [ARROW-14449](https://issues.apache.org/jira/browse/ARROW-14449) - [Python] RecordBatch in Cython is missing column\_data method +* [ARROW-14450](https://issues.apache.org/jira/browse/ARROW-14450) - [R] Old macos build error +* [ARROW-14451](https://issues.apache.org/jira/browse/ARROW-14451) - [Release][Ruby] The `--path` flag is deprecated +* [ARROW-14452](https://issues.apache.org/jira/browse/ARROW-14452) - [Release][JS] Update JavaScript testing +* [ARROW-14454](https://issues.apache.org/jira/browse/ARROW-14454) - [Release] shasum is not available on CentOS 8 +* [ARROW-14459](https://issues.apache.org/jira/browse/ARROW-14459) - [Doc] Update the pinned sphinx version to 4.2 +* [ARROW-14462](https://issues.apache.org/jira/browse/ARROW-14462) - [Go][Parquet] Update dependencies +* [ARROW-14464](https://issues.apache.org/jira/browse/ARROW-14464) - [R] Change write_parquet()'s default chunk_size from all rows +* [ARROW-14470](https://issues.apache.org/jira/browse/ARROW-14470) - [Python] Expose the use_threads option in Feather read functions +* [ARROW-14476](https://issues.apache.org/jira/browse/ARROW-14476) - [CI] Crossbow should comment cause of failure +* [ARROW-14479](https://issues.apache.org/jira/browse/ARROW-14479) - [C++] Hash Join Microbenchmarks +* [ARROW-14480](https://issues.apache.org/jira/browse/ARROW-14480) - [R] Expose arrow::dataset::ExistingDataBehavior to R +* [ARROW-14482](https://issues.apache.org/jira/browse/ARROW-14482) - [C++][Gandiva] Implement MASK_FIRST_N and MASK_LAST_N functions +* [ARROW-14483](https://issues.apache.org/jira/browse/ARROW-14483) - [Release] Add missing download targets +* [ARROW-14484](https://issues.apache.org/jira/browse/ARROW-14484) - [Crossbow] Add support for specifying queue path by environment variable +* [ARROW-14486](https://issues.apache.org/jira/browse/ARROW-14486) - [Packaging][deb] Add missing libthrift-dev dependency +* [ARROW-14489](https://issues.apache.org/jira/browse/ARROW-14489) - [Rust][CI] Install stable rust toolchain in the integration docker image +* [ARROW-14490](https://issues.apache.org/jira/browse/ARROW-14490) - [Doc] Regenerate CHANGELOG.md to include all versions +* [ARROW-14491](https://issues.apache.org/jira/browse/ARROW-14491) - [CI] Add Debian 10 C++ nightly build +* [ARROW-14496](https://issues.apache.org/jira/browse/ARROW-14496) - [Docs] Create relative links for R / JS / C/Glib references in the sphinx toctree using stub pages +* [ARROW-14499](https://issues.apache.org/jira/browse/ARROW-14499) - [Docs] Version dropdown side-by-side with search box +* [ARROW-14505](https://issues.apache.org/jira/browse/ARROW-14505) - [CI][Docs] Exercise documentation builds more frequently +* [ARROW-14510](https://issues.apache.org/jira/browse/ARROW-14510) - [R][CI] ensure that docker runs don't use host-built artifacts +* [ARROW-14514](https://issues.apache.org/jira/browse/ARROW-14514) - [C++][R] UBSAN error on round kernel +* [ARROW-14515](https://issues.apache.org/jira/browse/ARROW-14515) - [R] Add clang sanitizer to crossbow +* [ARROW-14531](https://issues.apache.org/jira/browse/ARROW-14531) - [Ruby] Add Arrow::Table#join +* [ARROW-14533](https://issues.apache.org/jira/browse/ARROW-14533) - [R] Turn linter off on curly braces on new line +* [ARROW-14551](https://issues.apache.org/jira/browse/ARROW-14551) - [Ruby] Accept Arrow::Column as Arrow::Datum argument +* [ARROW-14558](https://issues.apache.org/jira/browse/ARROW-14558) - [R] clarify OOP system wording in the Arrow vignette +* [ARROW-14559](https://issues.apache.org/jira/browse/ARROW-14559) - [C++] reduce memory usage in GcsFileSystem::OpenInputStream +* [ARROW-14562](https://issues.apache.org/jira/browse/ARROW-14562) - [Ruby] Add support for loading Arrow::Table from URI +* [ARROW-14577](https://issues.apache.org/jira/browse/ARROW-14577) - [C++] Enable fine grained IO for async IPC reader +* [ARROW-14580](https://issues.apache.org/jira/browse/ARROW-14580) - [Python] update trove classifiers to include Python 3.10 +* [ARROW-14581](https://issues.apache.org/jira/browse/ARROW-14581) - [C++] Fine-grained IPC reader tests are flaky +* [ARROW-14585](https://issues.apache.org/jira/browse/ARROW-14585) - [C++] Find libgrpc++_reflection via pkg-config +* [ARROW-14590](https://issues.apache.org/jira/browse/ARROW-14590) - [R] Implement lubridate::week +* [ARROW-14599](https://issues.apache.org/jira/browse/ARROW-14599) - [Release][Java] Upload .jar to Artifacts +* [ARROW-14601](https://issues.apache.org/jira/browse/ARROW-14601) - [JAVA] fix the comment for timestamp sec +* [ARROW-14602](https://issues.apache.org/jira/browse/ARROW-14602) - [Doc] Tutorial - Python feature PR +* [ARROW-14603](https://issues.apache.org/jira/browse/ARROW-14603) - [Doc] Tutorial - R bindings +* [ARROW-14605](https://issues.apache.org/jira/browse/ARROW-14605) - [Doc] General outline +* [ARROW-14608](https://issues.apache.org/jira/browse/ARROW-14608) - [Python] Provide access to hash_aggregate functions through a Table.group_by method +* [ARROW-14609](https://issues.apache.org/jira/browse/ARROW-14609) - [R] left_join by argument error message mismatch +* [ARROW-14610](https://issues.apache.org/jira/browse/ARROW-14610) - [Doc] New Contributors Guide: Introduction + skeleton +* [ARROW-14615](https://issues.apache.org/jira/browse/ARROW-14615) - [C++] Refactor nested field refs and add union support +* [ARROW-14617](https://issues.apache.org/jira/browse/ARROW-14617) - [R][CI] Upstream clang sanitizer to rhub +* [ARROW-14618](https://issues.apache.org/jira/browse/ARROW-14618) - [Release] Add missing AlmaLinux artifacts URL to vote email template +* [ARROW-14619](https://issues.apache.org/jira/browse/ARROW-14619) - [Ruby] Use no @ openssl Homebrew package for pkg-config +* [ARROW-14623](https://issues.apache.org/jira/browse/ARROW-14623) - [Packaging][Java] Upload not only .jar but also .pom +* [ARROW-14626](https://issues.apache.org/jira/browse/ARROW-14626) - [Website] Update versions tested on +* [ARROW-14628](https://issues.apache.org/jira/browse/ARROW-14628) - [Release][Python] Use python -m pytest +* [ARROW-14636](https://issues.apache.org/jira/browse/ARROW-14636) - [Ruby] Add Cookbook section to documentation +* [ARROW-14637](https://issues.apache.org/jira/browse/ARROW-14637) - [GLib][Ruby] Add support for initializing S3 APIs explicitly +* [ARROW-14641](https://issues.apache.org/jira/browse/ARROW-14641) - [C++][Compute] Reduce print statements from unit tests +* [ARROW-14645](https://issues.apache.org/jira/browse/ARROW-14645) - [Go] Add ValueOffsets function to array.String +* [ARROW-14650](https://issues.apache.org/jira/browse/ARROW-14650) - [JS] toArray equivalent to values/values64 +* [ARROW-14652](https://issues.apache.org/jira/browse/ARROW-14652) - [R] Dataset vignette download script likely to fail with default options +* [ARROW-14653](https://issues.apache.org/jira/browse/ARROW-14653) - [R] head() hangs on CSV datasets > 600MB +* [ARROW-14654](https://issues.apache.org/jira/browse/ARROW-14654) - [R][Docs] Add article on how to run R with C++ debugger to dev docs +* [ARROW-14657](https://issues.apache.org/jira/browse/ARROW-14657) - [R][Docs] Broken link in R docs +* [ARROW-14658](https://issues.apache.org/jira/browse/ARROW-14658) - [C++] Add basic support for nested field refs in scanning +* [ARROW-14662](https://issues.apache.org/jira/browse/ARROW-14662) - [Docs] Add note about linking Flight/gRPC/Protobuf +* [ARROW-14669](https://issues.apache.org/jira/browse/ARROW-14669) - [JS] Clarify Perspective's use of apache arrow +* [ARROW-14670](https://issues.apache.org/jira/browse/ARROW-14670) - [Release][Java] Build missing javadoc and source .jar +* [ARROW-14671](https://issues.apache.org/jira/browse/ARROW-14671) - [Python][Doc] Documentation on how to integrate PyArrow and R +* [ARROW-14675](https://issues.apache.org/jira/browse/ARROW-14675) - [R] Enable merge by union for NEWS.md +* [ARROW-14676](https://issues.apache.org/jira/browse/ARROW-14676) - [R][Docs] Add article on how to build a few different setups via docker to dev docs +* [ARROW-14678](https://issues.apache.org/jira/browse/ARROW-14678) - [C++] Add reasonable CMake presets for quick dev setup +* [ARROW-14683](https://issues.apache.org/jira/browse/ARROW-14683) - [Release][Java] Build missing source-release.zip +* [ARROW-14684](https://issues.apache.org/jira/browse/ARROW-14684) - [CI][C++] Use aws-sdk-cpp package on macOS +* [ARROW-14686](https://issues.apache.org/jira/browse/ARROW-14686) - [Python][C++] make byte order detection for numpy builtin type correct +* [ARROW-14694](https://issues.apache.org/jira/browse/ARROW-14694) - [R] Let me dput a schema +* [ARROW-14712](https://issues.apache.org/jira/browse/ARROW-14712) - [R] fix compare_dplyr_error() for dplyr 1.0.8 +* [ARROW-14714](https://issues.apache.org/jira/browse/ARROW-14714) - [C++][Doc] Rework CMake presets and add documentation +* [ARROW-14715](https://issues.apache.org/jira/browse/ARROW-14715) - [Doc] Steps in making your first PR - finding issues +* [ARROW-14716](https://issues.apache.org/jira/browse/ARROW-14716) - [R][CI] Bump R versions used in docker tests +* [ARROW-14718](https://issues.apache.org/jira/browse/ARROW-14718) - [Java] loadValidityBuffer should avoid allocating memory when input is not null and there are only null or non-null values +* [ARROW-14732](https://issues.apache.org/jira/browse/ARROW-14732) - [Python] Improve error message in compute functions when passing wrong number of positional arguments +* [ARROW-14733](https://issues.apache.org/jira/browse/ARROW-14733) - [R] Add section to how to get output when things hang to debugger docs +* [ARROW-14737](https://issues.apache.org/jira/browse/ARROW-14737) - [C++][Dataset] Support URI-decoding partition keys +* [ARROW-14738](https://issues.apache.org/jira/browse/ARROW-14738) - [Python][Doc] Make return types clickable +* [ARROW-14741](https://issues.apache.org/jira/browse/ARROW-14741) - [C++] Add support for RecordBatchReader in CSV writer +* [ARROW-14743](https://issues.apache.org/jira/browse/ARROW-14743) - [C++] Error reading in dataset when partitioning variable in schema +* [ARROW-14746](https://issues.apache.org/jira/browse/ARROW-14746) - [CI] Allow (temporary) disabling of constantly failing nightlies +* [ARROW-14747](https://issues.apache.org/jira/browse/ARROW-14747) - [Release] Add a script to merge changes in release branch +* [ARROW-14748](https://issues.apache.org/jira/browse/ARROW-14748) - [C++][CI] Update flags to give warning for unused results +* [ARROW-14750](https://issues.apache.org/jira/browse/ARROW-14750) - [Release] Update post-03-website.sh for 6.0.1 +* [ARROW-14751](https://issues.apache.org/jira/browse/ARROW-14751) - [C++] Add doc for set lookup "meta" compute functions +* [ARROW-14752](https://issues.apache.org/jira/browse/ARROW-14752) - [Doc] Steps in making your first PR - Set up +* [ARROW-14753](https://issues.apache.org/jira/browse/ARROW-14753) - [Doc] Steps in making your first PR - building C++ +* [ARROW-14754](https://issues.apache.org/jira/browse/ARROW-14754) - [Doc] Steps in making your first PR - building R package +* [ARROW-14755](https://issues.apache.org/jira/browse/ARROW-14755) - [Doc] Steps in making your first PR - building PyArrow +* [ARROW-14756](https://issues.apache.org/jira/browse/ARROW-14756) - [Doc] Steps in making your first PR - Python bindings +* [ARROW-14757](https://issues.apache.org/jira/browse/ARROW-14757) - [Doc] Steps in making your first PR - R bindings +* [ARROW-14758](https://issues.apache.org/jira/browse/ARROW-14758) - [Doc] Steps in making your first PR - test in Python +* [ARROW-14759](https://issues.apache.org/jira/browse/ARROW-14759) - [Doc] Steps in making your first PR - test in R +* [ARROW-14760](https://issues.apache.org/jira/browse/ARROW-14760) - [Doc] Steps in making your first PR - PR life cycle +* [ARROW-14761](https://issues.apache.org/jira/browse/ARROW-14761) - [Doc] Helping with documentation +* [ARROW-14762](https://issues.apache.org/jira/browse/ARROW-14762) - [Doc] Additional info and resources +* [ARROW-14763](https://issues.apache.org/jira/browse/ARROW-14763) - [Doc] Arrow General Overview +* [ARROW-14764](https://issues.apache.org/jira/browse/ARROW-14764) - [Website] Add instructions for installing Go package +* [ARROW-14768](https://issues.apache.org/jira/browse/ARROW-14768) - [C++] Validate compute function docstring formatting +* [ARROW-14777](https://issues.apache.org/jira/browse/ARROW-14777) - [Release] Enable to run on RHEL derivatives +* [ARROW-14779](https://issues.apache.org/jira/browse/ARROW-14779) - [C++] Add other common round mode names to RoundMode docs +* [ARROW-14784](https://issues.apache.org/jira/browse/ARROW-14784) - [GLib][Ruby] Rename GArrowSortKey::name to ::target +* [ARROW-14804](https://issues.apache.org/jira/browse/ARROW-14804) - [R] import_from_c() / export_to_c() methods should accept external pointers +* [ARROW-14822](https://issues.apache.org/jira/browse/ARROW-14822) - [C++] Implement floor/ceil/round for temporal objects +* [ARROW-14823](https://issues.apache.org/jira/browse/ARROW-14823) - [R] Implement bindings for lubridate::leap_year +* [ARROW-14842](https://issues.apache.org/jira/browse/ARROW-14842) - [C++] Improve precision range error messages for Decimal +* [ARROW-14843](https://issues.apache.org/jira/browse/ARROW-14843) - [R] Implement `decimal128()` (to replace `decimal()`) +* [ARROW-14844](https://issues.apache.org/jira/browse/ARROW-14844) - [R] Implement decimal256() +* [ARROW-14849](https://issues.apache.org/jira/browse/ARROW-14849) - [R] Update messaging in installation scripts +* [ARROW-14850](https://issues.apache.org/jira/browse/ARROW-14850) - [R] Update ARROW_DEPENDENCY_SOURCE to default to AUTO +* [ARROW-14857](https://issues.apache.org/jira/browse/ARROW-14857) - [CI][Homebrew] Add apache-arrow-glib fomula +* [ARROW-14858](https://issues.apache.org/jira/browse/ARROW-14858) - [R][CI] Don't build extra deps on ubuntu 21.04 +* [ARROW-14880](https://issues.apache.org/jira/browse/ARROW-14880) - [CI][C++] Enable ccache on MacOS builds +* [ARROW-14897](https://issues.apache.org/jira/browse/ARROW-14897) - [CI][C++] Upgrade Clang Tools to 12 from 8 +* [ARROW-14899](https://issues.apache.org/jira/browse/ARROW-14899) - [C++] implement GcsInputStream::GetMetadata +* [ARROW-14903](https://issues.apache.org/jira/browse/ARROW-14903) - [C++] Enable CSV Writer to control string to be used for missing data +* [ARROW-14905](https://issues.apache.org/jira/browse/ARROW-14905) - [C++] Enable CSV Writer to handle quoting +* [ARROW-14907](https://issues.apache.org/jira/browse/ARROW-14907) - [C++] Enable CSV Writer to control end-of-line character +* [ARROW-14910](https://issues.apache.org/jira/browse/ARROW-14910) - [R][CI] Use dev duckdb to build with < 8GB or ram +* [ARROW-14912](https://issues.apache.org/jira/browse/ARROW-14912) - [C++] implement GcsFileSystem::CopyFile +* [ARROW-14913](https://issues.apache.org/jira/browse/ARROW-14913) - [C++] implement GcsFileSystem::DeleteFile +* [ARROW-14914](https://issues.apache.org/jira/browse/ARROW-14914) - [C++] gcsfs will not implement DeleteRootDirContents +* [ARROW-14915](https://issues.apache.org/jira/browse/ARROW-14915) - [C++] implement GcsFileSystem::DeleteDirContents +* [ARROW-14916](https://issues.apache.org/jira/browse/ARROW-14916) - [C++] GcsFileSystem can delete directories +* [ARROW-14917](https://issues.apache.org/jira/browse/ARROW-14917) - [C++] Implement GcsFileSystem::CreateDir +* [ARROW-14918](https://issues.apache.org/jira/browse/ARROW-14918) - [C++] Implement GcsFileSystem::GetFileInfo(FileSelector) +* [ARROW-14920](https://issues.apache.org/jira/browse/ARROW-14920) - [C++] Use alphabetical ordering +* [ARROW-14924](https://issues.apache.org/jira/browse/ARROW-14924) - [C++] generic fs tests for GcsFileSystem +* [ARROW-14926](https://issues.apache.org/jira/browse/ARROW-14926) - [Docs] Fix CSS for visibility of the version dropdown +* [ARROW-14929](https://issues.apache.org/jira/browse/ARROW-14929) - [CI] Fix kartothek integration build (install new dependency) +* [ARROW-14932](https://issues.apache.org/jira/browse/ARROW-14932) - [CI] Prefer mamba over conda +* [ARROW-14935](https://issues.apache.org/jira/browse/ARROW-14935) - [Ruby] Add GArrowTemporalDataType +* [ARROW-14940](https://issues.apache.org/jira/browse/ARROW-14940) - [C++] Speed up CSV parser with long CSV cells +* [ARROW-14941](https://issues.apache.org/jira/browse/ARROW-14941) - [R] Implement Duration R6 class and bindings for lubridate::duration() +* [ARROW-14957](https://issues.apache.org/jira/browse/ARROW-14957) - [C++] Update OpenTelemetry to v1.1.0 +* [ARROW-14961](https://issues.apache.org/jira/browse/ARROW-14961) - [C++] Bump google benchmark version +* [ARROW-14968](https://issues.apache.org/jira/browse/ARROW-14968) - [Python] Pin numpy build dependency using oldest-supported-numpy +* [ARROW-14969](https://issues.apache.org/jira/browse/ARROW-14969) - [C++][Python] Un-deprecate FileSystem::OpenAppendStream +* [ARROW-14971](https://issues.apache.org/jira/browse/ARROW-14971) - [C++] Implement GcsFileSystem::Move +* [ARROW-14975](https://issues.apache.org/jira/browse/ARROW-14975) - [Docs] Fix typo in emit_dictionary_deltas documentation +* [ARROW-14976](https://issues.apache.org/jira/browse/ARROW-14976) - [Dev][Archery] Fail early if no benchmark found +* [ARROW-14977](https://issues.apache.org/jira/browse/ARROW-14977) - [Python] Add a "made-up" feature for the guide tutorial +* [ARROW-14981](https://issues.apache.org/jira/browse/ARROW-14981) - [CI][Docs] Upload built documents +* [ARROW-14984](https://issues.apache.org/jira/browse/ARROW-14984) - [CI][Debian] rsync is missing +* [ARROW-14985](https://issues.apache.org/jira/browse/ARROW-14985) - [CI][Go] Use Go 1.16 +* [ARROW-14986](https://issues.apache.org/jira/browse/ARROW-14986) - [Release][Docs] Use artifact built by Crossbow +* [ARROW-14990](https://issues.apache.org/jira/browse/ARROW-14990) - [CI] Fix nightly dask integration build (ensure pandas is installed) +* [ARROW-14992](https://issues.apache.org/jira/browse/ARROW-14992) - [R] Installation can't use prebuilt Arrow binaries on Pop! OS +* [ARROW-15005](https://issues.apache.org/jira/browse/ARROW-15005) - [C++] Improve csv parser with Neon +* [ARROW-15010](https://issues.apache.org/jira/browse/ARROW-15010) - [R] Create a function registry for our NSE funcs +* [ARROW-15019](https://issues.apache.org/jira/browse/ARROW-15019) - [Python] Add bindings for new dataset writing options +* [ARROW-15022](https://issues.apache.org/jira/browse/ARROW-15022) - [R] install vignette and installation dev vignette need alt text for images +* [ARROW-15029](https://issues.apache.org/jira/browse/ARROW-15029) - [C++] Split compute/kernels/scalar_string.cc +* [ARROW-15032](https://issues.apache.org/jira/browse/ARROW-15032) - [C++] Add year_month_day function +* [ARROW-15036](https://issues.apache.org/jira/browse/ARROW-15036) - [C++] Automatically configure S3 SDK configuration parameter "maxConnections" +* [ARROW-15038](https://issues.apache.org/jira/browse/ARROW-15038) - [Packaging][CentOS] Drop support for CentOS 8 +* [ARROW-15043](https://issues.apache.org/jira/browse/ARROW-15043) - [Python][Docs] Include time64 to type conversion table for pandas <-> arrow +* [ARROW-15044](https://issues.apache.org/jira/browse/ARROW-15044) - [C++] Add OpenTelemetry exporters for debugging use +* [ARROW-15049](https://issues.apache.org/jira/browse/ARROW-15049) - [R] arrowExports.cpp generation changed with glue package 1.5.1 +* [ARROW-15055](https://issues.apache.org/jira/browse/ARROW-15055) - [C++] Refactor GcsFileSystem tests +* [ARROW-15056](https://issues.apache.org/jira/browse/ARROW-15056) - [C++] Speed up GcsFileSystem tests +* [ARROW-15057](https://issues.apache.org/jira/browse/ARROW-15057) - [R][CI] Move where we install DuckDB from in CI +* [ARROW-15058](https://issues.apache.org/jira/browse/ARROW-15058) - [Java] Remove log4j2 dependency in performance module +* [ARROW-15070](https://issues.apache.org/jira/browse/ARROW-15070) - [Python][C++][R][Doc] Add a general statement to dataset docs around the lack of ACID guarantees +* [ARROW-15074](https://issues.apache.org/jira/browse/ARROW-15074) - [Format] Clarify that LZ4 contains a single frame +* [ARROW-15077](https://issues.apache.org/jira/browse/ARROW-15077) - [Python] Move Expression class from _dataset to _compute cython module +* [ARROW-15082](https://issues.apache.org/jira/browse/ARROW-15082) - [R] Clean up one more duration mapping entry +* [ARROW-15084](https://issues.apache.org/jira/browse/ARROW-15084) - [C++] public factory function for GcsFileSystem +* [ARROW-15085](https://issues.apache.org/jira/browse/ARROW-15085) - [C++] support credential types in GcsFileSystem +* [ARROW-15087](https://issues.apache.org/jira/browse/ARROW-15087) - [Python][Docs] Document MapArray and update parent class to ListArray +* [ARROW-15095](https://issues.apache.org/jira/browse/ARROW-15095) - [Dev][Website] Changelog generation should use commit messages +* [ARROW-15096](https://issues.apache.org/jira/browse/ARROW-15096) - [R] Sanitizer failures with duration type +* [ARROW-15099](https://issues.apache.org/jira/browse/ARROW-15099) - [C++] Improve GcsFileSystem::GetFileInfo +* [ARROW-15100](https://issues.apache.org/jira/browse/ARROW-15100) - [CI] Stop using Python 3.6 by default +* [ARROW-15103](https://issues.apache.org/jira/browse/ARROW-15103) - [Documentation][C++] Error building docs: "arrow/cpp/src/arrow/csv/options.h:182: error: Found unknown command '\r' " +* [ARROW-15109](https://issues.apache.org/jira/browse/ARROW-15109) - [Python] Add show_info() to print build, component, and system info +* [ARROW-15110](https://issues.apache.org/jira/browse/ARROW-15110) - [C++][Gandiva] Revert change on Gandiva's cache policy +* [ARROW-15112](https://issues.apache.org/jira/browse/ARROW-15112) - [C++][FlightRPC][Integration][Java] Implement Flight RPC integration tests +* [ARROW-15113](https://issues.apache.org/jira/browse/ARROW-15113) - [C++] Make GcsFileSystem tests a bit faster +* [ARROW-15114](https://issues.apache.org/jira/browse/ARROW-15114) - [C++] GcsFileSystem uses metadata for directory markers +* [ARROW-15115](https://issues.apache.org/jira/browse/ARROW-15115) - [C++] GcsFileSystem return errors if using closed streams +* [ARROW-15116](https://issues.apache.org/jira/browse/ARROW-15116) - [Python] Expose invalid_row_handler for CSV reader +* [ARROW-15119](https://issues.apache.org/jira/browse/ARROW-15119) - [C++] allow reading directories as files in generic tests +* [ARROW-15121](https://issues.apache.org/jira/browse/ARROW-15121) - [C++] Implement max recursion on GcsFileSystem +* [ARROW-15122](https://issues.apache.org/jira/browse/ARROW-15122) - [R] Gate parquet tests on snappy +* [ARROW-15127](https://issues.apache.org/jira/browse/ARROW-15127) - [R] More visible documentation of AWS_EC2_METADATA_DISABLED=TRUE +* [ARROW-15133](https://issues.apache.org/jira/browse/ARROW-15133) - [CI] Remove util_checkout.sh and util_cleanup.sh scripts +* [ARROW-15134](https://issues.apache.org/jira/browse/ARROW-15134) - [GLib] Add GArrow{Month,DayTime,MonthDayNano}IntervalDataType +* [ARROW-15136](https://issues.apache.org/jira/browse/ARROW-15136) - [C++] Make S3FS tests faster +* [ARROW-15137](https://issues.apache.org/jira/browse/ARROW-15137) - [Dev] Update archery crossbow latest-prefix to work with nightly dates +* [ARROW-15138](https://issues.apache.org/jira/browse/ARROW-15138) - [C++] Make ExecPlan::ToString give some additional information +* [ARROW-15140](https://issues.apache.org/jira/browse/ARROW-15140) - [CI] move to v2 of checkouts for GHA +* [ARROW-15150](https://issues.apache.org/jira/browse/ARROW-15150) - [Doc] Add guidance on partitioning datasets +* [ARROW-15153](https://issues.apache.org/jira/browse/ARROW-15153) - [Python] Expose ReferencedBufferSize to python +* [ARROW-15165](https://issues.apache.org/jira/browse/ARROW-15165) - [Python] Expose function to resolve S3 bucket region +* [ARROW-15166](https://issues.apache.org/jira/browse/ARROW-15166) - [C++] Enable filter for decimal256 +* [ARROW-15169](https://issues.apache.org/jira/browse/ARROW-15169) - [Python][R] Avoid unsafe Python-R pointer transfer +* [ARROW-15172](https://issues.apache.org/jira/browse/ARROW-15172) - [Go] Add Arm64 Neon implementation for Arrow-math +* [ARROW-15173](https://issues.apache.org/jira/browse/ARROW-15173) - [R] Provide backward compatibility for bridge to older versions of pyarrow +* [ARROW-15187](https://issues.apache.org/jira/browse/ARROW-15187) - [Java][FlightRPC] Fix pom.xml for new flight-sql modules +* [ARROW-15188](https://issues.apache.org/jira/browse/ARROW-15188) - [C++] Upgrade bundled re2 library version +* [ARROW-15189](https://issues.apache.org/jira/browse/ARROW-15189) - [C++] Upgrade bundled utf8proc version +* [ARROW-15190](https://issues.apache.org/jira/browse/ARROW-15190) - [C++] Upgrade bundled zstd version +* [ARROW-15193](https://issues.apache.org/jira/browse/ARROW-15193) - [R][Documentation] Update R binding documentation +* [ARROW-15198](https://issues.apache.org/jira/browse/ARROW-15198) - [C++][FlightRPC] Fix unity build error in Flight SQL +* [ARROW-15203](https://issues.apache.org/jira/browse/ARROW-15203) - [GLib] garrow_struct_scalar_get_value() for scalar from C++ returns value +* [ARROW-15204](https://issues.apache.org/jira/browse/ARROW-15204) - [GLib] Add Arrow::RoundOptions +* [ARROW-15205](https://issues.apache.org/jira/browse/ARROW-15205) - [GLib] Add garrow_function_all() +* [ARROW-15207](https://issues.apache.org/jira/browse/ARROW-15207) - [GLib] Use the Meson's default -Dwerror= +* [ARROW-15216](https://issues.apache.org/jira/browse/ARROW-15216) - [GLib] Add Arrow::RoundToMultipleOptions +* [ARROW-15219](https://issues.apache.org/jira/browse/ARROW-15219) - [Python] Export the random compute function +* [ARROW-15220](https://issues.apache.org/jira/browse/ARROW-15220) - [C++] Remove bool specializations of bit block counter operations +* [ARROW-15232](https://issues.apache.org/jira/browse/ARROW-15232) - [Packaging][deb] Disable DWARF optimization for libarrow.so +* [ARROW-15235](https://issues.apache.org/jira/browse/ARROW-15235) - [R] drop support for R 3.3 +* [ARROW-15244](https://issues.apache.org/jira/browse/ARROW-15244) - [Format] Clarify that offsets are monotonic for binary like arrays +* [ARROW-15245](https://issues.apache.org/jira/browse/ARROW-15245) - [Go] Address most of the staticcheck linting issues. +* [ARROW-15248](https://issues.apache.org/jira/browse/ARROW-15248) - [C++][Docs] Improve docs about linting/formatting +* [ARROW-15249](https://issues.apache.org/jira/browse/ARROW-15249) - [R] Autobrew + AWS sdk dependency +* [ARROW-15267](https://issues.apache.org/jira/browse/ARROW-15267) - [GLib] Add garrow_function_get_default_options() +* [ARROW-15268](https://issues.apache.org/jira/browse/ARROW-15268) - [Packaging][deb] Don't use gi shortcut +* [ARROW-15269](https://issues.apache.org/jira/browse/ARROW-15269) - [C++][Docs] Clarify that not all compute functions are invocable via CallFunction +* [ARROW-15273](https://issues.apache.org/jira/browse/ARROW-15273) - [GLib] add garrow_function_get_options_type() +* [ARROW-15274](https://issues.apache.org/jira/browse/ARROW-15274) - [Ruby] Improve Arrow::Function#execute usability +* [ARROW-15279](https://issues.apache.org/jira/browse/ARROW-15279) - [R] Update "writing bindings" dev docs based on user feedback +* [ARROW-15288](https://issues.apache.org/jira/browse/ARROW-15288) - [GLib] Add garrow_execute_plan_build_hash_join_node() +* [ARROW-15295](https://issues.apache.org/jira/browse/ARROW-15295) - [R] Add 6.0.0 to our old versions to check +* [ARROW-15300](https://issues.apache.org/jira/browse/ARROW-15300) - [C++] Update Skyhook for async dataset interfaces +* [ARROW-15302](https://issues.apache.org/jira/browse/ARROW-15302) - [R] Followup to dropping R 3.3 support +* [ARROW-15303](https://issues.apache.org/jira/browse/ARROW-15303) - [R] linting errors +* [ARROW-15316](https://issues.apache.org/jira/browse/ARROW-15316) - [R] Make a one-function pointer function +* [ARROW-15320](https://issues.apache.org/jira/browse/ARROW-15320) - [Go] Implement memset_neon with Arm64 GoLang Assembly +* [ARROW-15322](https://issues.apache.org/jira/browse/ARROW-15322) - [Docs][Go] Update sidebar link for Go docs. +* [ARROW-15327](https://issues.apache.org/jira/browse/ARROW-15327) - [R] Update news for 7.0.0 +* [ARROW-15332](https://issues.apache.org/jira/browse/ARROW-15332) - [C++] Add new cases and fix issues in IPC read/write benchmark +* [ARROW-15334](https://issues.apache.org/jira/browse/ARROW-15334) - [CI][GLib][Windows] Use Ruby 3.1 +* [ARROW-15336](https://issues.apache.org/jira/browse/ARROW-15336) - [Go] Implement 'min_max_neon' with Arm64 GoLang Assembly +* [ARROW-15337](https://issues.apache.org/jira/browse/ARROW-15337) - [Doc] New contributors guide updates +* [ARROW-15338](https://issues.apache.org/jira/browse/ARROW-15338) - [Python] Add `pyarrow.orc.read_table` API +* [ARROW-15343](https://issues.apache.org/jira/browse/ARROW-15343) - [Doc][Guide] Introduction and the checklist - minor corrections +* [ARROW-15344](https://issues.apache.org/jira/browse/ARROW-15344) - [Doc][Guide] Communication - minor corrections +* [ARROW-15345](https://issues.apache.org/jira/browse/ARROW-15345) - [Doc][Guide] Finding JIRA issues - minor corrections +* [ARROW-15355](https://issues.apache.org/jira/browse/ARROW-15355) - [Docs] Trigger sphinx build on documentation changes +* [ARROW-15356](https://issues.apache.org/jira/browse/ARROW-15356) - [Ruby] Add support for .arrows extension +* [ARROW-15383](https://issues.apache.org/jira/browse/ARROW-15383) - [Release] Add a script to update MSYS2 package +* [ARROW-15399](https://issues.apache.org/jira/browse/ARROW-15399) - [Release][JS] Increase minimum NodeJS version to 16 +* [PARQUET-492](https://issues.apache.org/jira/browse/PARQUET-492) - [C++][Parquet] Basic support for reading DELTA_BYTE_ARRAY data. + + + # Apache Arrow 6.0.1 (2021-11-18) ## Bug Fixes diff --git a/c_glib/meson.build b/c_glib/meson.build index a502dfd76ef..69867c2700f 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -23,7 +23,7 @@ project('arrow-glib', 'c', 'cpp', 'cpp_std=c++11', ]) -version = '7.0.0-SNAPSHOT' +version = '7.0.0' if version.endswith('-SNAPSHOT') version_numbers = version.split('-')[0].split('.') version_tag = version.split('-')[1] diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD index d4b45e1a202..174c2ca2592 100644 --- a/ci/scripts/PKGBUILD +++ b/ci/scripts/PKGBUILD @@ -18,7 +18,7 @@ _realname=arrow pkgbase=mingw-w64-${_realname} pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" -pkgver=6.0.1.9000 +pkgver=7.0.0 pkgrel=8000 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" arch=("any") diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh index 4eb4bd1ad29..6e05af89a19 100755 --- a/ci/scripts/python_test.sh +++ b/ci/scripts/python_test.sh @@ -25,7 +25,6 @@ export ARROW_SOURCE_DIR=${arrow_dir} export ARROW_TEST_DATA=${arrow_dir}/testing/data export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} -export ARROW_GDB_SCRIPT=${arrow_dir}/cpp/gdb_arrow.py # Enable some checks inside Python itself export PYTHONDEVMODE=1 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index fd7027c30eb..2d7baf1186e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -47,7 +47,7 @@ if(POLICY CMP0074) cmake_policy(SET CMP0074 NEW) endif() -set(ARROW_VERSION "7.0.0-SNAPSHOT") +set(ARROW_VERSION "7.0.0") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") diff --git a/cpp/gdb_arrow.py b/cpp/gdb_arrow.py deleted file mode 100644 index bdcef84236e..00000000000 --- a/cpp/gdb_arrow.py +++ /dev/null @@ -1,1894 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from collections import namedtuple -from collections.abc import Sequence -import decimal -import enum -from functools import lru_cache, partial -import struct -import sys -import warnings - -import gdb -from gdb.types import get_basic_type - -# gdb API docs at https://sourceware.org/gdb/onlinedocs/gdb/Python-API.html#Python-API - -# TODO check guidelines here: https://sourceware.org/gdb/onlinedocs/gdb/Writing-a-Pretty_002dPrinter.html -# TODO investigate auto-loading: https://sourceware.org/gdb/onlinedocs/gdb/Auto_002dloading-extensions.html#Auto_002dloading-extensions - - -_type_ids = [ - 'NA', 'BOOL', 'UINT8', 'INT8', 'UINT16', 'INT16', 'UINT32', 'INT32', - 'UINT64', 'INT64', 'HALF_FLOAT', 'FLOAT', 'DOUBLE', 'STRING', 'BINARY', - 'FIXED_SIZE_BINARY', 'DATE32', 'DATE64', 'TIMESTAMP', 'TIME32', 'TIME64', - 'INTERVAL_MONTHS', 'INTERVAL_DAY_TIME', 'DECIMAL128', 'DECIMAL256', - 'LIST', 'STRUCT', 'SPARSE_UNION', 'DENSE_UNION', 'DICTIONARY', 'MAP', - 'EXTENSION', 'FIXED_SIZE_LIST', 'DURATION', 'LARGE_STRING', - 'LARGE_BINARY', 'LARGE_LIST', 'INTERVAL_MONTH_DAY_NANO'] - -# Mirror the C++ Type::type enum -Type = enum.IntEnum('Type', _type_ids, start=0) - - -@lru_cache() -def byte_order(): - """ - Get the target program (not the GDB host's) endianness. - """ - s = gdb.execute("show endian", to_string=True).strip() - if 'big' in s: - return 'big' - elif 'little' in s: - return 'little' - warnings.warn('Could not determine target endianness ' - f'from GDB\'s response:\n"""{s}"""') - # Fall back to host endianness - return sys.byteorder - - -def for_evaluation(val, ty=None): - """ - Return a parsable form of gdb.Value `val`, optionally with gdb.Type `ty`. - """ - if ty is None: - ty = get_basic_type(val.type) - if ty.code == gdb.TYPE_CODE_PTR: - # It's already a pointer, can represent it directly - return f"(({ty}) ({val}))" - if val.address is None: - raise ValueError(f"Cannot further evaluate rvalue: {val}") - return f"(* ({ty}*) ({val.address}))" - - -def is_char_star(ty): - # Note that "const char*" can have TYPE_CODE_INT as target type... - ty = get_basic_type(ty) - return (ty.code == gdb.TYPE_CODE_PTR and - get_basic_type(ty.target()).code - in (gdb.TYPE_CODE_CHAR, gdb.TYPE_CODE_INT)) - - -def deref(val): - """ - Dereference a raw or smart pointer. - """ - ty = get_basic_type(val.type) - if ty.code == gdb.TYPE_CODE_PTR: - return val.dereference() - if ty.name.startswith('std::'): - if "shared" in ty.name: - return SharedPtr(val).value - if "unique" in ty.name: - return UniquePtr(val).value - raise TypeError(f"Cannot dereference value of type '{ty.name}'") - - -_string_literal_mapping = { - ord('\\'): r'\\', - ord('\n'): r'\n', - ord('\r'): r'\r', - ord('\t'): r'\t', - ord('"'): r'\"', -} - -for c in range(0, 32): - if c not in _string_literal_mapping: - _string_literal_mapping[c] = f"\\x{c:02x}" - - -def string_literal(s): - """ - Format a Python string or gdb.Value for display as a literal. - """ - max_len = 50 - if isinstance(s, gdb.Value): - s = s.string() - if len(s) > max_len: - s = s[:max_len] - return '"' + s.translate(_string_literal_mapping) + '" [continued]' - else: - return '"' + s.translate(_string_literal_mapping) + '"' - - -def bytes_literal(val, size=None): - """ - Format a gdb.Value for display as a literal containing possibly - unprintable characters. - """ - return val.lazy_string(length=size).value() - - -def utf8_literal(val, size=None): - """ - Format a gdb.Value for display as a utf-8 literal. - """ - if size is None: - s = val.string(encoding='utf8', errors='backslashreplace') - elif size != 0: - s = val.string(encoding='utf8', errors='backslashreplace', length=size) - else: - s = "" - return string_literal(s) - - -def half_float_value(val): - """ - Return a Python float of the given half-float (represented as a uint64_t - gdb.Value). - """ - buf = gdb.selected_inferior().read_memory(val.address, 2) - return struct.unpack("e", buf)[0] - - -def load_atomic(val): - """ - Load a std::atomic's value. - """ - valty = val.type.template_argument(0) - # XXX This assumes std::atomic has the same layout as a raw T. - return val.address.reinterpret_cast(valty.pointer()).dereference() - - -def load_null_count(val): - """ - Load a null count from a gdb.Value of an integer (either atomic or not). - """ - if get_basic_type(val.type).code != gdb.TYPE_CODE_INT: - val = load_atomic(val) - return val - - -def format_null_count(val): - """ - Format a null count value. - """ - if not isinstance(val, int): - null_count = int(load_null_count(val)) - return (f"null count {null_count}" if null_count != -1 - else "unknown null count") - - -def short_time_unit(val): - return ['s', 'ms', 'us', 'ns'][int(val)] - - -def format_month_interval(val): - """ - Format a MonthInterval value. - """ - return f"{int(val)}M" - - -def cast_to_concrete(val, ty): - return (val.reference_value().reinterpret_cast(ty.reference()) - .referenced_value()) - - -def scalar_class_from_type(name): - """ - Given a DataTypeClass class name (such as "BooleanType"), return the - corresponding Scalar class name. - """ - assert name.endswith("Type") - return name[:-4] + "Scalar" - - -def array_class_from_type(name): - """ - Given a DataTypeClass class name (such as "BooleanType"), return the - corresponding Array class name. - """ - assert name.endswith("Type") - return name[:-4] + "Array" - - -class CString: - """ - A `const char*` or similar value. - """ - - def __init__(self, val): - self.val = val - - def __bool__(self): - return int(data) != 0 and int(data[0]) != 0 - - @property - def data(self): - return self.val - - def bytes_literal(self): - return self.val.lazy_string().value() - - def string_literal(self): - # XXX use lazy_string() as well? - return string_literal(self.val) - - def string(self): - return self.val.string() - - def __format__(self, fmt): - return str(self.bytes_literal()) - - -# NOTE: gdb.parse_and_eval() is *slow* and calling it multiple times -# may add noticeable latencies. For standard C++ classes, we therefore -# try to fetch their properties from libstdc++ internals (which hopefully -# are stable), before falling back on calling the public API methods. - -class SharedPtr: - """ - A `std::shared_ptr` value. - """ - - def __init__(self, val): - self.val = val - try: - # libstdc++ internals - self._ptr = val['_M_ptr'] - except gdb.error: - # fallback for other C++ standard libraries - self._ptr = gdb.parse_and_eval(f"{for_evaluation(val)}.get()") - - def get(self): - """ - Return the underlying pointer (a T*). - """ - return self._ptr - - @property - def value(self): - """ - The underlying value (a T). - """ - return self._ptr.dereference() - - -class UniquePtr: - """ - A `std::unique_ptr` value. - """ - - def __init__(self, val): - self.val = val - ty = self.val.type.template_argument(0) - # XXX This assumes that the embedded T* pointer lies at the start - # of std::unique_ptr. - self._ptr = self.val.address.reinterpret_cast(ty.pointer().pointer()) - - def get(self): - """ - Return the underlying pointer (a T*). - """ - return self._ptr - - @property - def value(self): - """ - The underlying value (a T). - """ - return self._ptr.dereference() - - -class Variant: - """ - A arrow::util::Variant<...>. - """ - - def __init__(self, val): - self.val = val - self.index = int(self.val['index_']) - try: - self.value_type = self.val.type.template_argument(self.index) - except RuntimeError: - # Index out of bounds - self.value_type = None - - @property - def value(self): - if self.value_type is None: - return None - ptr = self.val.address - if ptr is not None: - return ptr.reinterpret_cast(self.value_type.pointer() - ).dereference() - return None - - -class StdString: - """ - A `std::string` (or possibly `string_view`) value. - """ - - def __init__(self, val): - self.val = val - try: - # libstdc++ internals - self._data = val['_M_dataplus']['_M_p'] - self._size = val['_M_string_length'] - except gdb.error: - # fallback for other C++ standard libraries - self._data = gdb.parse_and_eval(f"{for_evaluation(val)}.c_str()") - self._size = gdb.parse_and_eval(f"{for_evaluation(val)}.size()") - - def __bool__(self): - return self._size != 0 - - @property - def data(self): - return self._data - - @property - def size(self): - return self._size - - def bytes_literal(self): - return self._data.lazy_string(length=self._size).value() - - def string_literal(self): - # XXX use lazy_string() as well? - return string_literal(self._data) - - def string(self): - return self._data.string() - - def __format__(self, fmt): - return str(self.bytes_literal()) - - -class StdVector(Sequence): - """ - A `std::vector` value. - """ - - def __init__(self, val): - self.val = val - try: - # libstdc++ internals - impl = self.val['_M_impl'] - self._data = impl['_M_start'] - self._size = int(impl['_M_finish'] - self._data) - except gdb.error: - # fallback for other C++ standard libraries - self._data = int(gdb.parse_and_eval( - f"{for_evaluation(self.val)}.data()")) - self._size = int(gdb.parse_and_eval( - f"{for_evaluation(self.val)}.size()")) - - def _check_index(self, index): - if index < 0 or index >= self._size: - raise IndexError( - f"Index {index} out of bounds (should be in [0, {self._size - 1}])") - - def __len__(self): - return self._size - - def __getitem__(self, index): - self._check_index(index) - return self._data[index] - - def eval_at(self, index, eval_format): - """ - Run `eval_format` with the value at `index`. - - For example, if `eval_format` is "{}.get()", this will evaluate - "{self[0]}.get()". - """ - self._check_index(index) - return gdb.parse_and_eval( - eval_format.format(for_evaluation(self._data[index]))) - - def iter_eval(self, eval_format): - data_eval = for_evaluation(self._data) - for i in range(self._size): - yield gdb.parse_and_eval( - eval_format.format(f"{data_eval}[{i}]")) - - @property - def size(self): - return self._size - - -class StdPtrVector(StdVector): - - def __getitem__(self, index): - return deref(super().__getitem__(index)) - - -class FieldVector(StdVector): - - def __getitem__(self, index): - """ - Dereference the Field object at this index. - """ - return Field(deref(super().__getitem__(index))) - - def __str__(self): - l = [str(self[i]) for i in range(len(self))] - return "{" + ", ".join(l) + "}" - - -class Field: - """ - A arrow::Field value. - """ - - def __init__(self, val): - self.val = val - - @property - def name(self): - return StdString(self.val['name_']) - - @property - def type(self): - return deref(self.val['type_']) - - @property - def nullable(self): - return bool(self.val['nullable_']) - - def __str__(self): - return str(self.val) - - -class FieldPtr(Field): - """ - A std::shared_ptr value. - """ - - def __init__(self, val): - super().__init__(deref(val)) - - -class Buffer: - """ - A arrow::Buffer value. - """ - - def __init__(self, val): - self.val = val - self.size = int(val['size_']) - - @property - def data(self): - return self.val['data_'] - - def bytes_literal(self): - if self.size > 0: - return self.val['data_'].lazy_string(length=self.size).value() - else: - return '""' - - -class BufferPtr: - """ - A arrow::Buffer* value (possibly null). - """ - - def __init__(self, val): - self.val = val - ptr = int(self.val) - self.buf = Buffer(val.dereference()) if ptr != 0 else None - - @property - def data(self): - if self.buf is None: - return None - return self.buf.data - - @property - def size(self): - if self.buf is None: - return None - return self.buf.size - - def bytes_literal(self): - if self.buf is None: - return None - return self.buf.bytes_literal() - - -KeyValue = namedtuple('KeyValue', ('key', 'value')) - - -class Metadata(Sequence): - """ - A arrow::KeyValueMetadata value. - """ - - def __init__(self, val): - self.val = val - self.keys = StdVector(self.val['keys_']) - self.values = StdVector(self.val['values_']) - - def __len__(self): - return len(self.keys) - - def __getitem__(self, i): - return KeyValue(StdString(self.keys[i]), StdString(self.values[i])) - - -class MetadataPtr(Sequence): - """ - A shared_ptr value, possibly null. - """ - - def __init__(self, val): - self.ptr = SharedPtr(val).get() - self.is_null = int(self.ptr) == 0 - self.md = None if self.is_null else Metadata(self.ptr.dereference()) - - def __len__(self): - return 0 if self.is_null else len(self.md) - - def __getitem__(self, i): - if self.is_null: - raise IndexError - return self.md[i] - - -DecimalTraits = namedtuple('DecimalTraits', ('nbits', 'struct_format_le')) - -decimal_traits = { - 128: DecimalTraits(128, 'Qq'), - 256: DecimalTraits(256, 'QQQq'), -} - -class Decimal: - """ - A arrow::BasicDecimal{128,256...} value. - """ - - def __init__(self, traits, val): - self.val = val - self.traits = traits - - @classmethod - def from_bits(cls, nbits, *args, **kwargs): - return cls(decimal_traits[nbits], *args, **kwargs) - - @property - def words(self): - """ - The decimal words, from least to most significant. - """ - mem = gdb.selected_inferior().read_memory( - self.val['array_'].address, self.traits.nbits // 8) - fmt = self.traits.struct_format_le - if byte_order() == 'big': - fmt = fmt[::-1] - words = struct.unpack(f"={fmt}", mem) - if byte_order() == 'big': - words = words[::-1] - return words - - def __int__(self): - """ - The underlying bigint value. - """ - v = 0 - words = self.words - bits_per_word = self.traits.nbits // len(words) - for w in reversed(words): - v = (v << bits_per_word) + w - return v - - def format(self, precision, scale): - """ - Format as a decimal number with the given precision and scale. - """ - v = int(self) - with decimal.localcontext() as ctx: - ctx.prec = precision - ctx.capitals = False - return str(decimal.Decimal(v).scaleb(-scale)) - - -Decimal128 = partial(Decimal.from_bits, 128) -Decimal256 = partial(Decimal.from_bits, 256) - -decimal_type_to_class = { - 'Decimal128Type': Decimal128, - 'Decimal256Type': Decimal256, -} - - -class ExtensionType: - """ - A arrow::ExtensionType. - """ - - def __init__(self, val): - self.val = val - - @property - def storage_type(self): - return deref(self.val['storage_type_']) - - def to_string(self): - """ - The result of calling ToString(). - """ - return StdString(gdb.parse_and_eval( - f"{for_evaluation(self.val)}.ToString()")) - - -class Schema: - """ - A arrow::Schema. - """ - - def __init__(self, val): - self.val = val - impl = deref(self.val['impl_']) - self.fields = FieldVector(impl['fields_']) - self.metadata = MetadataPtr(impl['metadata_']) - - -class RecordBatch: - """ - A arrow::RecordBatch. - """ - - def __init__(self, val): - # XXX this relies on RecordBatch always being a SimpleRecordBatch - # under the hood. What if users create their own RecordBatch - # implementation? - self.val = cast_to_concrete(val, - gdb.lookup_type("arrow::SimpleRecordBatch")) - self.schema = Schema(deref(self.val['schema_'])) - self.columns = StdPtrVector(self.val['columns_']) - - @property - def num_rows(self): - return self.val['num_rows_'] - - -class Table: - """ - A arrow::Table. - """ - - def __init__(self, val): - # XXX this relies on Table always being a SimpleTable under the hood. - # What if users create their own Table implementation? - self.val = cast_to_concrete(val, - gdb.lookup_type("arrow::SimpleTable")) - self.schema = Schema(deref(self.val['schema_'])) - self.columns = StdPtrVector(self.val['columns_']) - - @property - def num_rows(self): - return self.val['num_rows_'] - - -type_reprs = { - 'NullType': 'null', - 'BooleanType': 'boolean', - 'UInt8Type': 'uint8', - 'Int8Type': 'int8', - 'UInt16Type': 'uint16', - 'Int16Type': 'int16', - 'UInt32Type': 'uint32', - 'Int32Type': 'int32', - 'UInt64Type': 'uint64', - 'Int64Type': 'int64', - 'HalfFloatType': 'float16', - 'FloatType': 'float32', - 'DoubleType': 'float64', - 'Date32Type': 'date32', - 'Date64Type': 'date64', - 'Time32Type': 'time32', - 'Time64Type': 'time64', - 'TimestampType': 'timestamp', - 'MonthIntervalType': 'month_interval', - 'DayTimeIntervalType': 'day_time_interval', - 'MonthDayNanoIntervalType': 'month_day_nano_interval', - 'DurationType': 'duration', - 'Decimal128Type': 'decimal128', - 'Decimal256Type': 'decimal256', - 'StringType': 'utf8', - 'LargeStringType': 'large_utf8', - 'BinaryType': 'binary', - 'LargeBinaryType': 'large_binary', - 'FixedSizeBinaryType': 'fixed_size_binary', - 'ListType': 'list', - 'LargeListType': 'large_list', - 'FixedSizeListType': 'fixed_size_list', - 'MapType': 'map', - 'StructType': 'struct_', - 'SparseUnionType': 'sparse_union', - 'DenseUnionType': 'dense_union', - 'DictionaryType': 'dictionary', - } - - -class TypePrinter: - """ - Pretty-printer for arrow::DataTypeClass and subclasses. - """ - - def __init__(self, name, val): - self.name = name - # Cast to concrete type class to access all derived methods - # and properties. - self.type = gdb.lookup_type(f"arrow::{name}") - self.val = cast_to_concrete(val, self.type) - - @property - def fields(self): - return FieldVector(self.val['children_']) - - def _format_type(self): - r = type_reprs.get(self.name, self.name) - return f"arrow::{r}" - - def _for_evaluation(self): - return for_evaluation(self.val, self.type) - - -class PrimitiveTypePrinter(TypePrinter): - """ - Pretty-printer for non-parametric types. - """ - - def to_string(self): - return f"{self._format_type()}()" - - -class TimeTypePrinter(TypePrinter): - """ - Pretty-printer for time and duration types. - """ - - def _get_unit(self): - return self.val['unit_'] - - def to_string(self): - return f"{self._format_type()}({self._get_unit()})" - - -class TimestampTypePrinter(TimeTypePrinter): - """ - Pretty-printer for timestamp types. - """ - - def to_string(self): - tz = StdString(self.val['timezone_']) - if tz: - return f'{self._format_type()}({self._get_unit()}, {tz})' - else: - return f'{self._format_type()}({self._get_unit()})' - - -class FixedSizeBinaryTypePrinter(TypePrinter): - """ - Pretty-printer for fixed-size binary types. - """ - - def to_string(self): - width = int(self.val['byte_width_']) - return f"{self._format_type()}({width})" - - -class DecimalTypePrinter(TypePrinter): - """ - Pretty-printer for decimal types. - """ - - def to_string(self): - precision = int(self.val['precision_']) - scale = int(self.val['scale_']) - return f"{self._format_type()}({precision}, {scale})" - - -class ListTypePrinter(TypePrinter): - """ - Pretty-printer for list types. - """ - - def _get_value_type(self): - fields = self.fields - if len(fields) != 1: - return None - return fields[0].type - - def to_string(self): - child = self._get_value_type() - if child is None: - return f"{self._format_type()}" - else: - return f"{self._format_type()}({child})" - - -class FixedSizeListTypePrinter(ListTypePrinter): - """ - Pretty-printer for fixed-size list type. - """ - - def to_string(self): - child = self._get_value_type() - if child is None: - return f"{self._format_type()}" - list_size = int(self.val['list_size_']) - return f"{self._format_type()}({child}, {list_size})" - - -class MapTypePrinter(ListTypePrinter): - """ - Pretty-printer for map types. - """ - - def to_string(self): - struct_type = self._get_value_type() - if struct_type is None: - return f"{self._format_type()}" - struct_children = FieldVector(struct_type['children_']) - if len(struct_children) != 2: - return f"{self._format_type()}" - key_type = struct_children[0].type - item_type = struct_children[1].type - return (f"{self._format_type()}({key_type}, {item_type}, " - f"keys_sorted={self.val['keys_sorted_']})") - - -class DictionaryTypePrinter(TypePrinter): - """ - Pretty-printer for dictionary types. - """ - - def to_string(self): - index_type = deref(self.val['index_type_']) - value_type = deref(self.val['value_type_']) - ordered = self.val['ordered_'] - return (f"{self._format_type()}({index_type}, {value_type}, " - f"ordered={ordered})") - - -class StructTypePrinter(TypePrinter): - """ - Pretty-printer for struct types. - """ - - def to_string(self): - return f"{self._format_type()}({self.fields})" - - -class UnionTypePrinter(TypePrinter): - """ - Pretty-printer for union types. - """ - - def to_string(self): - type_codes = StdVector(self.val['type_codes_']) - type_codes = "{" + ", ".join(str(x.cast(gdb.lookup_type('int'))) - for x in type_codes) + "}" - return f"{self._format_type()}(fields={self.fields}, type_codes={type_codes})" - - -class ExtensionTypePrinter(TypePrinter): - """ - Pretty-printer for extension types. - """ - - def to_string(self): - ext_type = ExtensionType(self.val) - return (f"{self._format_type()} {ext_type.to_string().string_literal()} " - f"with storage type {ext_type.storage_type}") - - -class ScalarPrinter: - """ - Pretty-printer for arrow::Scalar and subclasses. - """ - - def __new__(cls, val): - # Lookup actual (derived) class to instantiate - type_id = int(deref(val['type'])['id_']) - type_class = lookup_type_class(type_id) - if type_class is not None: - cls = type_class.scalar_printer - assert issubclass(cls, ScalarPrinter) - self = object.__new__(cls) - self.type_class = type_class - self.type_name = type_class.name - self.name = scalar_class_from_type(self.type_name) - self.type_id = type_id - # Cast to concrete Scalar class to access derived attributes. - concrete_type = gdb.lookup_type(f"arrow::{self.name}") - self.val = cast_to_concrete(val, concrete_type) - self.is_valid = bool(self.val['is_valid']) - return self - - @property - def type(self): - """ - The concrete DataTypeClass instance. - """ - concrete_type = gdb.lookup_type(f"arrow::{self.type_name}") - return cast_to_concrete(deref(self.val['type']), - concrete_type) - - def _format_type(self): - return f"arrow::{self.name}" - - def _format_null(self): - if self.type_class.is_parametric: - return f"{self._format_type()} of type {self.type}, null value" - else: - return f"{self._format_type()} of null value" - - def _for_evaluation(self): - return for_evaluation(self.val) - - -class NullScalarPrinter(ScalarPrinter): - """ - Pretty-printer for arrow::NullScalar. - """ - - def to_string(self): - return self._format_type() - - -class NumericScalarPrinter(ScalarPrinter): - """ - Pretty-printer for numeric Arrow scalars. - """ - - def to_string(self): - if not self.is_valid: - return self._format_null() - value = self.val['value'] - if self.type_name == "HalfFloatType": - return (f"{self._format_type()} " - f"of value {half_float_value(value)} [{value}]") - if self.type_name in ("UInt8Type", "Int8Type"): - value = value.cast(gdb.lookup_type('int')) - return f"{self._format_type()} of value {value}" - - -class TimeScalarPrinter(ScalarPrinter): - """ - Pretty-printer for Arrow time-like scalars. - """ - - def to_string(self): - unit = short_time_unit(self.type['unit_']) - if not self.is_valid: - return f"{self._format_type()} of null value [{unit}]" - value = self.val['value'] - return f"{self._format_type()} of value {value}{unit}" - - -class Date32ScalarPrinter(TimeScalarPrinter): - """ - Pretty-printer for arrow::Date32Scalar. - """ - - def to_string(self): - if not self.is_valid: - return self._format_null() - value = self.val['value'] - return f"{self._format_type()} of value {value}d" - - -class Date64ScalarPrinter(TimeScalarPrinter): - """ - Pretty-printer for arrow::Date64Scalar. - """ - - def to_string(self): - if not self.is_valid: - return self._format_null() - value = self.val['value'] - return f"{self._format_type()} of value {value}ms" - - -class TimestampScalarPrinter(ScalarPrinter): - """ - Pretty-printer for arrow::TimestampScalar. - """ - - def to_string(self): - unit = short_time_unit(self.type['unit_']) - tz = StdString(self.type['timezone_']) - tz = tz.string_literal() if tz.size != 0 else "no timezone" - if not self.is_valid: - return f"{self._format_type()} of null value [{unit}, {tz}]" - value = self.val['value'] - return f"{self._format_type()} of value {value}{unit} [{tz}]" - - -class MonthIntervalScalarPrinter(ScalarPrinter): - """ - Pretty-printer for arrow::MonthIntervalScalarPrinter. - """ - - def to_string(self): - if not self.is_valid: - return self._format_null() - value = self.val['value'] - return f"{self._format_type()} of value {format_month_interval(value)}" - - -class DecimalScalarPrinter(ScalarPrinter): - """ - Pretty-printer for arrow::DecimalScalar and subclasses. - """ - - @property - def decimal_class(self): - return decimal_type_to_class[self.type_name] - - def to_string(self): - ty = self.type - precision = int(ty['precision_']) - scale = int(ty['scale_']) - suffix = f"[precision={precision}, scale={scale}]" - if not self.is_valid: - return f"{self._format_type()} of null value {suffix}" - value = self.decimal_class(self.val['value']).format(precision, scale) - return f"{self._format_type()} of value {value} {suffix}" - - -class BaseBinaryScalarPrinter(ScalarPrinter): - """ - Pretty-printer for arrow::BaseBinaryScalar and subclasses. - """ - - def _format_buf(self, bufptr): - if 'String' in self.type_name: - return utf8_literal(bufptr.data, bufptr.size) - else: - return bufptr.bytes_literal() - - def to_string(self): - if not self.is_valid: - return self._format_null() - bufptr = BufferPtr(SharedPtr(self.val['value']).get()) - size = bufptr.size - if size is None: - return f"{self._format_type()} of value " - return (f"{self._format_type()} of size {size}, " - f"value {self._format_buf(bufptr)}") - - -class FixedSizeBinaryScalarPrinter(BaseBinaryScalarPrinter): - """ - Pretty-printer for arrow::FixedSizeBinaryScalar. - """ - - def to_string(self): - size = self.type['byte_width_'] - if not self.is_valid: - return f"{self._format_type()} of size {size}, null value" - bufptr = BufferPtr(SharedPtr(self.val['value']).get()) - if bufptr.data is None: - return f"{self._format_type()} of size {size}, " - return (f"{self._format_type()} of size {size}, " - f"value {self._format_buf(bufptr)}") - - -class DictionaryScalarPrinter(ScalarPrinter): - """ - Pretty-printer for arrow::DictionaryScalar. - """ - - def to_string(self): - if not self.is_valid: - return self._format_null() - index = deref(self.val['value']['index']) - dictionary = deref(self.val['value']['dictionary']) - return (f"{self._format_type()} of index {index}, " - f"dictionary {dictionary}") - - -class BaseListScalarPrinter(ScalarPrinter): - """ - Pretty-printer for arrow::BaseListScalar and subclasses. - """ - - def to_string(self): - if not self.is_valid: - return self._format_null() - value = deref(self.val['value']) - return f"{self._format_type()} of value {value}" - - -class StructScalarPrinter(ScalarPrinter): - """ - Pretty-printer for arrow::StructScalar. - """ - - def display_hint(self): - return 'map' - - def children(self): - eval_fields = StdVector(self.type['children_']) - eval_values = StdVector(self.val['value']) - for field, value in zip(eval_fields, eval_values): - name = StdString(deref(field)['name_']).string_literal() - yield ("name", name) - yield ("value", deref(value)) - - def to_string(self): - if not self.is_valid: - return self._format_null() - return f"{self._format_type()}" - - -class UnionScalarPrinter(ScalarPrinter): - """ - Pretty-printer for arrow::UnionScalar and subclasses. - """ - - def to_string(self): - type_code = self.val['type_code'].cast(gdb.lookup_type('int')) - if not self.is_valid: - return (f"{self._format_type()} of type {self.type}, " - f"type code {type_code}, null value") - value = deref(self.val['value']) - return (f"{self._format_type()} of type code {type_code}, " - f"value {value}") - - -class MapScalarPrinter(ScalarPrinter): - """ - Pretty-printer for arrow::MapScalar. - """ - - def to_string(self): - if not self.is_valid: - return self._format_null() - - array = deref(self.val['value']) - data = deref(array['data_']) - data_printer = ArrayDataPrinter("arrow::ArrayData", data) - return (f"{self._format_type()} of type {self.type}, " - f"value {data_printer._format_contents()}") - - -class ExtensionScalarPrinter(ScalarPrinter): - """ - Pretty-printer for arrow::ExtensionScalar. - """ - - def to_string(self): - ext_type = ExtensionType(self.type) - if not self.is_valid: - return (f"{self._format_type()} of type " - f"{ext_type.to_string().string_literal()}, null value") - value = deref(self.val['value']) - return (f"{self._format_type()} of type " - f"{ext_type.to_string().string_literal()}, value {value}") - - -class ArrayDataPrinter: - """ - Pretty-printer for arrow::ArrayData. - """ - - def __new__(cls, name, val): - # Lookup actual (derived) class to instantiate - type_id = int(deref(val['type'])['id_']) - type_class = lookup_type_class(type_id) - if type_class is not None: - cls = type_class.array_data_printer - assert issubclass(cls, ArrayDataPrinter) - self = object.__new__(cls) - self.name = name - self.type_class = type_class - self.type_name = type_class.name - self.type_id = type_id - self.val = val - return self - - @property - def type(self): - """ - The concrete DataTypeClass instance. - """ - concrete_type = gdb.lookup_type(f"arrow::{self.type_name}") - return cast_to_concrete(deref(self.val['type']), concrete_type) - - def _format_contents(self): - return (f"length {self.val['length']}, " - f"{format_null_count(self.val['null_count'])}") - - def to_string(self): - ty = self.type - return (f"{self.name} of type {ty}, " - f"{self._format_contents()}") - - -class ArrayPrinter: - """ - Pretty-printer for arrow::Array and subclasses. - """ - - def __init__(self, val): - data = deref(val['data_']) - self.data_printer = ArrayDataPrinter("arrow::ArrayData", data) - self.name = array_class_from_type(self.data_printer.type_name) - - def _format_contents(self): - return self.data_printer._format_contents() - - def to_string(self): - if self.data_printer.type_class.is_parametric: - ty = self.data_printer.type - return f"arrow::{self.name} of type {ty}, {self._format_contents()}" - else: - return f"arrow::{self.name} of {self._format_contents()}" - - -class ChunkedArrayPrinter: - """ - Pretty-printer for arrow::ChunkedArray. - """ - - def __init__(self, name, val): - self.name = name - self.val = val - self.chunks = StdVector(self.val['chunks_']) - - def display_hint(self): - return "array" - - def children(self): - for i, chunk in enumerate(self.chunks): - printer = ArrayPrinter(deref(chunk)) - yield str(i), printer._format_contents() - - def to_string(self): - ty = deref(self.val['type_']) - return (f"{self.name} of type {ty}, length {self.val['length_']}, " - f"{format_null_count(self.val['null_count_'])} " - f"with {len(self.chunks)} chunks") - - -class DataTypeClass: - - array_data_printer = ArrayDataPrinter - - def __init__(self, name): - self.name = name - - -class NullTypeClass(DataTypeClass): - is_parametric = False - type_printer = PrimitiveTypePrinter - scalar_printer = NullScalarPrinter - - -class NumericTypeClass(DataTypeClass): - is_parametric = False - type_printer = PrimitiveTypePrinter - scalar_printer = NumericScalarPrinter - - -class Date32TypeClass(DataTypeClass): - is_parametric = False - type_printer = PrimitiveTypePrinter - scalar_printer = Date32ScalarPrinter - - -class Date64TypeClass(DataTypeClass): - is_parametric = False - type_printer = PrimitiveTypePrinter - scalar_printer = Date64ScalarPrinter - - -class TimeTypeClass(DataTypeClass): - is_parametric = True - type_printer = TimeTypePrinter - scalar_printer = TimeScalarPrinter - - -class TimestampTypeClass(DataTypeClass): - is_parametric = True - type_printer = TimestampTypePrinter - scalar_printer = TimestampScalarPrinter - - -class DurationTypeClass(DataTypeClass): - is_parametric = True - type_printer = TimeTypePrinter - scalar_printer = TimeScalarPrinter - - -class MonthIntervalTypeClass(DataTypeClass): - is_parametric = False - type_printer = PrimitiveTypePrinter - scalar_printer = MonthIntervalScalarPrinter - - -class DayTimeIntervalTypeClass(DataTypeClass): - is_parametric = False - type_printer = PrimitiveTypePrinter - scalar_printer = NumericScalarPrinter - - -class MonthDayNanoIntervalTypeClass(DataTypeClass): - is_parametric = False - type_printer = PrimitiveTypePrinter - scalar_printer = NumericScalarPrinter - - -class DecimalTypeClass(DataTypeClass): - is_parametric = True - type_printer = DecimalTypePrinter - scalar_printer = DecimalScalarPrinter - - -class BaseBinaryTypeClass(DataTypeClass): - is_parametric = False - type_printer = PrimitiveTypePrinter - scalar_printer = BaseBinaryScalarPrinter - - -class FixedSizeBinaryTypeClass(DataTypeClass): - is_parametric = True - type_printer = FixedSizeBinaryTypePrinter - scalar_printer = FixedSizeBinaryScalarPrinter - - -class BaseListTypeClass(DataTypeClass): - is_parametric = True - type_printer = ListTypePrinter - scalar_printer = BaseListScalarPrinter - - -class FixedSizeListTypeClass(DataTypeClass): - is_parametric = True - type_printer = FixedSizeListTypePrinter - scalar_printer = BaseListScalarPrinter - - -class MapTypeClass(DataTypeClass): - is_parametric = True - type_printer = MapTypePrinter - scalar_printer = MapScalarPrinter - - -class StructTypeClass(DataTypeClass): - is_parametric = True - type_printer = StructTypePrinter - scalar_printer = StructScalarPrinter - - -class UnionTypeClass(DataTypeClass): - is_parametric = True - type_printer = UnionTypePrinter - scalar_printer = UnionScalarPrinter - - -class DictionaryTypeClass(DataTypeClass): - is_parametric = True - type_printer = DictionaryTypePrinter - scalar_printer = DictionaryScalarPrinter - - -class ExtensionTypeClass(DataTypeClass): - is_parametric = True - type_printer = ExtensionTypePrinter - scalar_printer = ExtensionScalarPrinter - - -DataTypeTraits = namedtuple('DataTypeTraits', ('factory', 'name')) - - -type_traits_by_id = { - Type.NA: DataTypeTraits(NullTypeClass, 'NullType'), - - Type.BOOL: DataTypeTraits(NumericTypeClass, 'BooleanType'), - Type.UINT8: DataTypeTraits(NumericTypeClass, 'UInt8Type'), - Type.INT8: DataTypeTraits(NumericTypeClass, 'Int8Type'), - Type.UINT16: DataTypeTraits(NumericTypeClass, 'UInt16Type'), - Type.INT16: DataTypeTraits(NumericTypeClass, 'Int16Type'), - Type.UINT32: DataTypeTraits(NumericTypeClass, 'UInt32Type'), - Type.INT32: DataTypeTraits(NumericTypeClass, 'Int32Type'), - Type.UINT64: DataTypeTraits(NumericTypeClass, 'UInt64Type'), - Type.INT64: DataTypeTraits(NumericTypeClass, 'Int64Type'), - Type.HALF_FLOAT: DataTypeTraits(NumericTypeClass, 'HalfFloatType'), - Type.FLOAT: DataTypeTraits(NumericTypeClass, 'FloatType'), - Type.DOUBLE: DataTypeTraits(NumericTypeClass, 'DoubleType'), - - Type.STRING: DataTypeTraits(BaseBinaryTypeClass, 'StringType'), - Type.BINARY: DataTypeTraits(BaseBinaryTypeClass, 'BinaryType'), - Type.LARGE_STRING: DataTypeTraits(BaseBinaryTypeClass, 'LargeStringType'), - Type.LARGE_BINARY: DataTypeTraits(BaseBinaryTypeClass, 'LargeBinaryType'), - - Type.FIXED_SIZE_BINARY: DataTypeTraits(FixedSizeBinaryTypeClass, - 'FixedSizeBinaryType'), - - Type.DATE32: DataTypeTraits(Date32TypeClass, 'Date32Type'), - Type.DATE64: DataTypeTraits(Date64TypeClass, 'Date64Type'), - Type.TIMESTAMP: DataTypeTraits(TimestampTypeClass, 'TimestampType'), - Type.TIME32: DataTypeTraits(TimeTypeClass, 'Time32Type'), - Type.TIME64: DataTypeTraits(TimeTypeClass, 'Time64Type'), - Type.DURATION: DataTypeTraits(DurationTypeClass, 'DurationType'), - Type.INTERVAL_MONTHS: DataTypeTraits(MonthIntervalTypeClass, - 'MonthIntervalType'), - Type.INTERVAL_DAY_TIME: DataTypeTraits(DayTimeIntervalTypeClass, - 'DayTimeIntervalType'), - Type.INTERVAL_MONTH_DAY_NANO: DataTypeTraits(MonthDayNanoIntervalTypeClass, - 'MonthDayNanoIntervalType'), - - Type.DECIMAL128: DataTypeTraits(DecimalTypeClass, 'Decimal128Type'), - Type.DECIMAL256: DataTypeTraits(DecimalTypeClass, 'Decimal256Type'), - - Type.LIST: DataTypeTraits(BaseListTypeClass, 'ListType'), - Type.LARGE_LIST: DataTypeTraits(BaseListTypeClass, 'LargeListType'), - Type.FIXED_SIZE_LIST: DataTypeTraits(FixedSizeListTypeClass, - 'FixedSizeListType'), - Type.MAP: DataTypeTraits(MapTypeClass, 'MapType'), - - Type.STRUCT: DataTypeTraits(StructTypeClass, 'StructType'), - Type.SPARSE_UNION: DataTypeTraits(UnionTypeClass, 'SparseUnionType'), - Type.DENSE_UNION: DataTypeTraits(UnionTypeClass, 'DenseUnionType'), - - Type.DICTIONARY: DataTypeTraits(DictionaryTypeClass, 'DictionaryType'), - Type.EXTENSION: DataTypeTraits(ExtensionTypeClass, 'ExtensionType'), -} - -max_type_id = len(type_traits_by_id) - 1 - - -def lookup_type_class(type_id): - """ - Lookup a type class (an instance of DataTypeClass) by its type id. - """ - traits = type_traits_by_id.get(type_id) - if traits is not None: - return traits.factory(traits.name) - return None - - -class StatusPrinter: - """ - Pretty-printer for arrow::Status. - """ - _status_codes_by_id = { - 0: 'OK', - 1: 'OutOfMemory', - 2: 'KeyError', - 3: 'TypeError', - 4: 'Invalid', - 5: 'IOError', - 6: 'CapacityError', - 7: 'IndexError', - 8: 'Cancelled', - 9: 'UnknownError', - 10: 'NotImplemented', - 11: 'SerializationError', - 13: 'RError', - 40: 'CodeGenError', - 41: 'ExpressionValidationError', - 42: 'ExecutionError', - 45: 'AlreadyExists', - } - - def __init__(self, name, val): - self.val = val - - def _format_detail(self, state): - detail_ptr = SharedPtr(state['detail']).get() - if int(detail_ptr) == 0: - return None - detail_id = CString(gdb.parse_and_eval( - f"{for_evaluation(detail_ptr)}->type_id()")) - # Cannot use StdString as ToString() returns a rvalue - detail_msg = CString(gdb.parse_and_eval( - f"{for_evaluation(detail_ptr)}->ToString().c_str()")) - return f"[{detail_id.string()}] {detail_msg.string_literal()}" - - def _format_error(self, state): - code = int(state['code']) - codename = self._status_codes_by_id.get(code) - if codename is not None: - s = f"arrow::Status::{codename}(" - else: - s = f"arrow::Status(, " - s += StdString(state['msg']).string_literal() - detail_msg = self._format_detail(state) - if detail_msg is not None: - return s + f", detail={detail_msg})" - else: - return s + ")" - - def to_string(self): - state_ptr = self.val['state_'] - if int(state_ptr) == 0: - return "arrow::Status::OK()" - return self._format_error(state_ptr.dereference()) - - -class ResultPrinter(StatusPrinter): - """ - Pretty-printer for arrow::Result. - """ - - def to_string(self): - data_type = self.val.type.template_argument(0) - state_ptr = self.val['status_']['state_'] - if int(state_ptr) != 0: - inner = self._format_error(state_ptr) - else: - data_ptr = self.val['storage_']['data_'].address - assert data_ptr - inner = data_ptr.reinterpret_cast( - data_type.pointer()).dereference() - return f"arrow::Result<{data_type}>({inner})" - - -class StringViewPrinter: - """ - Pretty-printer for arrow::util::string_view. - """ - - def __init__(self, name, val): - self.val = val - - def to_string(self): - size = int(self.val['size_']) - if size == 0: - return f"arrow::util::string_view of size 0" - else: - data = bytes_literal(self.val['data_'], size) - return f"arrow::util::string_view of size {size}, {data}" - - -class OptionalPrinter: - """ - Pretty-printer for arrow::util::optional. - """ - - def __init__(self, name, val): - self.val = val - - def to_string(self): - data_type = self.val.type.template_argument(0) - # XXX We rely on internal details of our vendored optional - # implementation, as inlined methods may not be callable from gdb. - if not self.val['has_value_']: - inner = "nullopt" - else: - data_ptr = self.val['contained']['data'].address - assert data_ptr - inner = data_ptr.reinterpret_cast( - data_type.pointer()).dereference() - return f"arrow::util::optional<{data_type}>({inner})" - - -class VariantPrinter: - """ - Pretty-printer for arrow::util::Variant. - """ - - def __init__(self, name, val): - self.val = val - self.variant = Variant(val) - - def to_string(self): - if self.variant.value_type is None: - return "arrow::util::Variant (uninitialized or corrupt)" - type_desc = (f"arrow::util::Variant of index {self.variant.index} " - f"(actual type {self.variant.value_type})") - - value = self.variant.value - if value is None: - return (f"{type_desc}, unavailable value") - else: - return (f"{type_desc}, value {value}") - - -class FieldPrinter: - """ - Pretty-printer for arrow::Field. - """ - - def __init__(self, name, val): - self.val = val - - def to_string(self): - f = Field(self.val) - nullable = f.nullable - if nullable: - return f'arrow::field({f.name}, {f.type})' - else: - return f'arrow::field({f.name}, {f.type}, nullable=false)' - - -class MetadataPrinter: - """ - Pretty-printer for arrow::KeyValueMetadata. - """ - - def __init__(self, name, val): - self.val = val - self.metadata = Metadata(self.val) - - def display_hint(self): - return 'map' - - def children(self): - for k, v in self.metadata: - yield ("key", k.bytes_literal()) - yield ("value", v.bytes_literal()) - - def to_string(self): - return f"arrow::KeyValueMetadata of size {len(self.metadata)}" - - -class SchemaPrinter: - """ - Pretty-printer for arrow::Schema. - """ - - def __init__(self, name, val): - self.val = val - self.schema = Schema(val) - # TODO endianness - - def display_hint(self): - return 'map' - - def children(self): - for field in self.schema.fields: - yield ("name", field.name.string_literal()) - yield ("type", field.type) - - def to_string(self): - num_fields = len(self.schema.fields) - md_items = len(self.schema.metadata) - if md_items > 0: - return (f"arrow::Schema with {num_fields} fields " - f"and {md_items} metadata items") - else: - return f"arrow::Schema with {num_fields} fields" - - -class BaseColumnarPrinter: - - def __init__(self, name, val, columnar): - self.name = name - self.val = val - self.columnar = columnar - self.schema = self.columnar.schema - - def display_hint(self): - return 'map' - - def children(self): - for field, col in zip(self.schema.fields, - self.columnar.columns): - yield ("name", field.name.string_literal()) - yield ("value", col) - - def to_string(self): - num_fields = len(self.schema.fields) - num_rows = self.columnar.num_rows - md_items = len(self.schema.metadata) - if md_items > 0: - return (f"arrow::{self.name} with {num_fields} columns, " - f"{num_rows} rows, {md_items} metadata items") - else: - return (f"arrow::{self.name} with {num_fields} columns, " - f"{num_rows} rows") - - -class RecordBatchPrinter(BaseColumnarPrinter): - """ - Pretty-printer for arrow::RecordBatch. - """ - - def __init__(self, name, val): - BaseColumnarPrinter.__init__(self, "RecordBatch", val, RecordBatch(val)) - - -class TablePrinter(BaseColumnarPrinter): - """ - Pretty-printer for arrow::Table. - """ - - def __init__(self, name, val): - BaseColumnarPrinter.__init__(self, "Table", val, Table(val)) - - -class DatumPrinter: - """ - Pretty-printer for arrow::Datum. - """ - - def __init__(self, name, val): - self.val = val - self.variant = Variant(val['value']) - - def to_string(self): - if self.variant.index == 0: - # Datum::NONE - return "arrow::Datum (empty)" - if self.variant.value_type is None: - return "arrow::Datum (uninitialized or corrupt?)" - # All non-empty Datums contain a shared_ptr - value = deref(self.variant.value) - return f"arrow::Datum of value {value}" - - -class BufferPrinter: - """ - Pretty-printer for arrow::Buffer and subclasses. - """ - - def __init__(self, name, val): - self.name = name - self.val = val - - def to_string(self): - if bool(self.val['is_mutable_']): - mutable = 'mutable' - else: - mutable = 'read-only' - size = int(self.val['size_']) - if size == 0: - return f"arrow::{self.name} of size 0, {mutable}" - if not self.val['is_cpu_']: - return f"arrow::{self.name} of size {size}, {mutable}, not on CPU" - data = bytes_literal(self.val['data_'], size) - return f"arrow::{self.name} of size {size}, {mutable}, {data}" - - -class DayMillisecondsPrinter: - """ - Pretty-printer for arrow::DayTimeIntervalType::DayMilliseconds. - """ - - def __init__(self, name, val): - self.val = val - - def to_string(self): - return f"{self.val['days']}d{self.val['milliseconds']}ms" - - -class MonthDayNanosPrinter: - """ - Pretty-printer for arrow::MonthDayNanoIntervalType::MonthDayNanos. - """ - - def __init__(self, name, val): - self.val = val - - def to_string(self): - return (f"{self.val['months']}M{self.val['days']}d" - f"{self.val['nanoseconds']}ns") - - -class DecimalPrinter: - """ - Pretty-printer for Arrow decimal values. - """ - - def __init__(self, nbits, name, val): - self.name = name - self.val = val - self.nbits = nbits - - def to_string(self): - dec = Decimal.from_bits(self.nbits, self.val) - return f"{self.name}({int(dec)})" - - -printers = { - "arrow::ArrayData": ArrayDataPrinter, - "arrow::BasicDecimal128": partial(DecimalPrinter, 128), - "arrow::BasicDecimal256": partial(DecimalPrinter, 256), - "arrow::ChunkedArray": ChunkedArrayPrinter, - "arrow::Datum": DatumPrinter, - "arrow::DayTimeIntervalType::DayMilliseconds": DayMillisecondsPrinter, - "arrow::Decimal128": partial(DecimalPrinter, 128), - "arrow::Decimal256": partial(DecimalPrinter, 256), - "arrow::MonthDayNanoIntervalType::MonthDayNanos": MonthDayNanosPrinter, - "arrow::Field": FieldPrinter, - "arrow::KeyValueMetadata": MetadataPrinter, - "arrow::RecordBatch": RecordBatchPrinter, - "arrow::Result": ResultPrinter, - "arrow::Schema": SchemaPrinter, - "arrow::SimpleRecordBatch": RecordBatchPrinter, - "arrow::SimpleTable": TablePrinter, - "arrow::Status": StatusPrinter, - "arrow::Table": TablePrinter, - "arrow::util::optional": OptionalPrinter, - "arrow::util::string_view": StringViewPrinter, - "arrow::util::Variant": VariantPrinter, - "nonstd::optional_lite::optional": OptionalPrinter, - "nonstd::sv_lite::basic_string_view": StringViewPrinter, -} - -def arrow_pretty_print(val): - name = val.type.strip_typedefs().name - if name is None: - return - name = name.partition('<')[0] # Remove template parameters - printer = printers.get(name) - if printer is not None: - return printer(name, val) - - if not name.startswith("arrow::"): - return - arrow_name = name[len("arrow::"):] - - if arrow_name.endswith("Buffer"): - try: - val['data_'] - except Exception: - # Not a Buffer? - pass - else: - return BufferPrinter(arrow_name, val) - - elif arrow_name.endswith("Type"): - # Look up dynamic type, as it may be hidden behind a DataTypeClass - # pointer or reference. - try: - type_id = int(val['id_']) - except Exception: - # Not a DataTypeClass? - pass - else: - type_class = lookup_type_class(type_id) - if type_class is not None: - return type_class.type_printer(type_class.name, val) - - elif arrow_name.endswith("Array"): - return ArrayPrinter(val) - - elif arrow_name.endswith("Scalar"): - try: - val['is_valid'] - except Exception: - # Not a Scalar? - pass - else: - return ScalarPrinter(val) - - -gdb.pretty_printers.append(arrow_pretty_print) diff --git a/cpp/src/arrow/ipc/json_simple.cc b/cpp/src/arrow/ipc/json_simple.cc index 9e51ef87e41..8347b871b1f 100644 --- a/cpp/src/arrow/ipc/json_simple.cc +++ b/cpp/src/arrow/ipc/json_simple.cc @@ -29,7 +29,6 @@ #include "arrow/array/builder_primitive.h" #include "arrow/array/builder_time.h" #include "arrow/array/builder_union.h" -#include "arrow/chunked_array.h" #include "arrow/ipc/json_simple.h" #include "arrow/scalar.h" #include "arrow/type_traits.h" @@ -932,19 +931,6 @@ Status ArrayFromJSON(const std::shared_ptr& type, const char* json_str return ArrayFromJSON(type, util::string_view(json_string), out); } -Status ChunkedArrayFromJSON(const std::shared_ptr& type, - const std::vector& json_strings, - std::shared_ptr* out) { - ArrayVector out_chunks; - out_chunks.reserve(json_strings.size()); - for (const std::string& chunk_json : json_strings) { - out_chunks.emplace_back(); - RETURN_NOT_OK(ArrayFromJSON(type, chunk_json, &out_chunks.back())); - } - *out = std::make_shared(std::move(out_chunks), type); - return Status::OK(); -} - Status DictArrayFromJSON(const std::shared_ptr& type, util::string_view indices_json, util::string_view dictionary_json, std::shared_ptr* out) { diff --git a/cpp/src/arrow/ipc/json_simple.h b/cpp/src/arrow/ipc/json_simple.h index e831d453e98..8269bd65326 100644 --- a/cpp/src/arrow/ipc/json_simple.h +++ b/cpp/src/arrow/ipc/json_simple.h @@ -23,7 +23,6 @@ #include #include "arrow/status.h" -#include "arrow/type_fwd.h" #include "arrow/util/string_view.h" #include "arrow/util/visibility.h" @@ -48,11 +47,6 @@ ARROW_EXPORT Status ArrayFromJSON(const std::shared_ptr&, const char* json, std::shared_ptr* out); -ARROW_EXPORT -Status ChunkedArrayFromJSON(const std::shared_ptr& type, - const std::vector& json_strings, - std::shared_ptr* out); - ARROW_EXPORT Status DictArrayFromJSON(const std::shared_ptr&, util::string_view indices_json, util::string_view dictionary_json, std::shared_ptr* out); diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc index 97cbff6cb4e..c6f66d003f4 100644 --- a/cpp/src/arrow/ipc/json_simple_test.cc +++ b/cpp/src/arrow/ipc/json_simple_test.cc @@ -34,7 +34,6 @@ #include "arrow/array/builder_nested.h" #include "arrow/array/builder_primitive.h" #include "arrow/array/builder_time.h" -#include "arrow/chunked_array.h" #include "arrow/ipc/json_simple.h" #include "arrow/scalar.h" #include "arrow/testing/builder.h" @@ -1350,24 +1349,6 @@ TEST(TestDictArrayFromJSON, Errors) { &array)); // dict value isn't string } -TEST(TestChunkedArrayFromJSON, Basics) { - auto type = int32(); - std::shared_ptr chunked_array; - ASSERT_OK(ChunkedArrayFromJSON(type, {}, &chunked_array)); - ASSERT_OK(chunked_array->ValidateFull()); - ASSERT_EQ(chunked_array->num_chunks(), 0); - AssertTypeEqual(type, chunked_array->type()); - - ASSERT_OK(ChunkedArrayFromJSON(type, {"[1, 2]", "[3, null, 4]"}, &chunked_array)); - ASSERT_OK(chunked_array->ValidateFull()); - ASSERT_EQ(chunked_array->num_chunks(), 2); - std::shared_ptr expected_chunk; - ASSERT_OK(ArrayFromJSON(type, "[1, 2]", &expected_chunk)); - AssertArraysEqual(*expected_chunk, *chunked_array->chunk(0), /*verbose=*/true); - ASSERT_OK(ArrayFromJSON(type, "[3, null, 4]", &expected_chunk)); - AssertArraysEqual(*expected_chunk, *chunked_array->chunk(1), /*verbose=*/true); -} - TEST(TestScalarFromJSON, Basics) { // Sanity check for common types (not exhaustive) std::shared_ptr scalar; diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt index 835eacad1f6..2c63b6680cc 100644 --- a/cpp/src/arrow/python/CMakeLists.txt +++ b/cpp/src/arrow/python/CMakeLists.txt @@ -34,7 +34,6 @@ set(ARROW_PYTHON_SRCS decimal.cc deserialize.cc extension_type.cc - gdb.cc helpers.cc inference.cc init.cc diff --git a/cpp/src/arrow/python/gdb.cc b/cpp/src/arrow/python/gdb.cc deleted file mode 100644 index 7c629b56903..00000000000 --- a/cpp/src/arrow/python/gdb.cc +++ /dev/null @@ -1,449 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include -#include -#include -#include -#include - -#include "arrow/array.h" -#include "arrow/chunked_array.h" -#include "arrow/datum.h" -#include "arrow/extension_type.h" -#include "arrow/ipc/json_simple.h" -#include "arrow/python/gdb.h" -#include "arrow/record_batch.h" -#include "arrow/scalar.h" -#include "arrow/table.h" -#include "arrow/type.h" -#include "arrow/util/decimal.h" -#include "arrow/util/key_value_metadata.h" -#include "arrow/util/logging.h" -#include "arrow/util/macros.h" -#include "arrow/util/optional.h" -#include "arrow/util/string_view.h" -#include "arrow/util/variant.h" - -namespace arrow { - -using ipc::internal::json::ArrayFromJSON; -using ipc::internal::json::ChunkedArrayFromJSON; -using ipc::internal::json::ScalarFromJSON; - -namespace gdb { -namespace { - -void Trap() { - // XXX Perhaps vendor - // https://github.com/nemequ/portable-snippets/blob/master/debug-trap/debug-trap.h ? -#if defined(_MSC_VER) - __debugbreak(); -#elif defined(SIGTRAP) - raise(SIGTRAP); -#else - std::abort(); -#endif -} - -class CustomStatusDetail : public StatusDetail { - public: - const char* type_id() const override { return "custom-detail-id"; } - std::string ToString() const override { return "This is a detail"; } -}; - -class UuidType : public ExtensionType { - public: - UuidType() : ExtensionType(fixed_size_binary(16)) {} - - std::string extension_name() const override { return "uuid"; } - - bool ExtensionEquals(const ExtensionType& other) const override { - return (other.extension_name() == this->extension_name()); - } - - std::shared_ptr MakeArray(std::shared_ptr data) const override { - return std::make_shared(data); - } - - Result> Deserialize( - std::shared_ptr storage_type, - const std::string& serialized) const override { - return Status::NotImplemented(""); - } - - std::string Serialize() const override { return "uuid-serialized"; } -}; - -} // namespace - -void TestSession() { - // We define local variables for all types for which we want to test - // pretty-printing. - // Then, at the end of this function, we trap to the debugger, so that - // test instrumentation can print values from this frame by interacting - // with the debugger. - // The test instrumentation is in pyarrow/tests/test_gdb.py - -#ifdef __clang__ - _Pragma("clang diagnostic push"); - _Pragma("clang diagnostic ignored \"-Wunused-variable\""); -#elif defined(__GNUC__) - _Pragma("GCC diagnostic push"); - _Pragma("GCC diagnostic ignored \"-Wunused-variable\""); -#endif - - // Status & Result - auto ok_status = Status::OK(); - auto error_status = Status::IOError("This is an error"); - auto error_detail_status = - error_status.WithDetail(std::make_shared()); - auto ok_result = Result(42); - auto error_result = Result(error_status); - auto error_detail_result = Result(error_detail_status); - - // Optionals - util::optional int_optional{42}; - util::optional null_int_optional{}; - - // Variants - using VariantType = util::Variant; - - VariantType int_variant{42}; - VariantType bool_variant{false}; - VariantType string_variant{std::string("hello")}; - - // String views - util::string_view string_view_empty{}; - util::string_view string_view_abc{"abc"}; - std::string special_chars = std::string("foo\"bar") + '\x00' + "\r\n\t\x1f"; - util::string_view string_view_special_chars(special_chars); - std::string very_long = "abc" + std::string(5000, 'K') + "xyz"; - util::string_view string_view_very_long(very_long); - - // Buffers - Buffer buffer_null{nullptr, 0}; - Buffer buffer_abc{string_view_abc}; - Buffer buffer_special_chars{string_view_special_chars}; - char mutable_array[3] = {'a', 'b', 'c'}; - MutableBuffer buffer_mutable{reinterpret_cast(mutable_array), 3}; - auto heap_buffer = std::make_shared(string_view_abc); - auto heap_buffer_mutable = *AllocateBuffer(buffer_abc.size()); - memcpy(heap_buffer_mutable->mutable_data(), buffer_abc.data(), buffer_abc.size()); - - // KeyValueMetadata - auto empty_metadata = key_value_metadata({}, {}); - auto metadata = key_value_metadata( - {"key_text", "key_binary"}, {"some value", std::string("z") + '\x00' + "\x1f\xff"}); - - // Decimals - arrow::Decimal128 decimal128_zero{}; - arrow::Decimal128 decimal128_pos{"98765432109876543210987654321098765432"}; - arrow::Decimal128 decimal128_neg{"-98765432109876543210987654321098765432"}; - arrow::BasicDecimal128 basic_decimal128_zero{}; - arrow::BasicDecimal128 basic_decimal128_pos{decimal128_pos.native_endian_array()}; - arrow::BasicDecimal128 basic_decimal128_neg{decimal128_neg.native_endian_array()}; - arrow::Decimal256 decimal256_zero{}; - arrow::Decimal256 decimal256_pos{ - "9876543210987654321098765432109876543210987654321098765432109876543210987654"}; - arrow::Decimal256 decimal256_neg{ - "-9876543210987654321098765432109876543210987654321098765432109876543210987654"}; - arrow::BasicDecimal256 basic_decimal256_zero{}; - arrow::BasicDecimal256 basic_decimal256_pos{decimal256_pos.native_endian_array()}; - arrow::BasicDecimal256 basic_decimal256_neg{decimal256_neg.native_endian_array()}; - - // Data types - NullType null_type; - auto heap_null_type = null(); - BooleanType bool_type; - auto heap_bool_type = boolean(); - - Date32Type date32_type; - Date64Type date64_type; - Time32Type time_type_s(TimeUnit::SECOND); - Time32Type time_type_ms(TimeUnit::MILLI); - Time64Type time_type_us(TimeUnit::MICRO); - Time64Type time_type_ns(TimeUnit::NANO); - auto heap_time_type_ns = time64(TimeUnit::NANO); - - TimestampType timestamp_type_s(TimeUnit::SECOND); - TimestampType timestamp_type_ms_timezone(TimeUnit::MILLI, "Europe/Paris"); - TimestampType timestamp_type_us(TimeUnit::MICRO); - TimestampType timestamp_type_ns_timezone(TimeUnit::NANO, "Europe/Paris"); - auto heap_timestamp_type_ns_timezone = timestamp(TimeUnit::NANO, "Europe/Paris"); - - DayTimeIntervalType day_time_interval_type; - MonthIntervalType month_interval_type; - MonthDayNanoIntervalType month_day_nano_interval_type; - - DurationType duration_type_s(TimeUnit::SECOND); - DurationType duration_type_ns(TimeUnit::NANO); - - BinaryType binary_type; - StringType string_type; - LargeBinaryType large_binary_type; - LargeStringType large_string_type; - FixedSizeBinaryType fixed_size_binary_type(10); - auto heap_fixed_size_binary_type = fixed_size_binary(10); - - Decimal128Type decimal128_type(16, 5); - Decimal256Type decimal256_type(42, 12); - auto heap_decimal128_type = decimal128(16, 5); - - ListType list_type(uint8()); - LargeListType large_list_type(large_utf8()); - auto heap_list_type = list(uint8()); - auto heap_large_list_type = large_list(large_utf8()); - - FixedSizeListType fixed_size_list_type(float64(), 3); - auto heap_fixed_size_list_type = fixed_size_list(float64(), 3); - - DictionaryType dict_type_unordered(int16(), utf8()); - DictionaryType dict_type_ordered(int16(), utf8(), /*ordered=*/true); - auto heap_dict_type = dictionary(int16(), utf8()); - - MapType map_type_unsorted(utf8(), binary()); - MapType map_type_sorted(utf8(), binary(), /*keys_sorted=*/true); - auto heap_map_type = map(utf8(), binary()); - - StructType struct_type_empty({}); - StructType struct_type( - {field("ints", int8()), field("strs", utf8(), /*nullable=*/false)}); - auto heap_struct_type = - struct_({field("ints", int8()), field("strs", utf8(), /*nullable=*/false)}); - - std::vector union_type_codes({7, 42}); - FieldVector union_fields( - {field("ints", int8()), field("strs", utf8(), /*nullable=*/false)}); - SparseUnionType sparse_union_type(union_fields, union_type_codes); - DenseUnionType dense_union_type(union_fields, union_type_codes); - - UuidType uuid_type{}; - std::shared_ptr heap_uuid_type = std::make_shared(); - - // Schema - auto schema_empty = schema({}); - auto schema_non_empty = schema({field("ints", int8()), field("strs", utf8())}); - auto schema_with_metadata = schema_non_empty->WithMetadata( - key_value_metadata({"key1", "key2"}, {"value1", "value2"})); - - // Fields - Field int_field("ints", int64()); - Field float_field("floats", float32(), /*nullable=*/false); - auto heap_int_field = field("ints", int64()); - - // Scalars - NullScalar null_scalar; - auto heap_null_scalar = MakeNullScalar(null()); - - BooleanScalar bool_scalar_null{}; - BooleanScalar bool_scalar{true}; - auto heap_bool_scalar = *MakeScalar(boolean(), true); - - Int8Scalar int8_scalar_null{}; - UInt8Scalar uint8_scalar_null{}; - Int64Scalar int64_scalar_null{}; - UInt64Scalar uint64_scalar_null{}; - Int8Scalar int8_scalar{-42}; - UInt8Scalar uint8_scalar{234}; - Int64Scalar int64_scalar{-9223372036854775807LL - 1}; - UInt64Scalar uint64_scalar{18446744073709551615ULL}; - HalfFloatScalar half_float_scalar{48640}; // -1.5 - FloatScalar float_scalar{1.25f}; - DoubleScalar double_scalar{2.5}; - - Time32Scalar time_scalar_s{100, TimeUnit::SECOND}; - Time32Scalar time_scalar_ms{1000, TimeUnit::MILLI}; - Time64Scalar time_scalar_us{10000, TimeUnit::MICRO}; - Time64Scalar time_scalar_ns{100000, TimeUnit::NANO}; - Time64Scalar time_scalar_null{time64(TimeUnit::NANO)}; - - DurationScalar duration_scalar_s{-100, TimeUnit::SECOND}; - DurationScalar duration_scalar_ms{-1000, TimeUnit::MILLI}; - DurationScalar duration_scalar_us{-10000, TimeUnit::MICRO}; - DurationScalar duration_scalar_ns{-100000, TimeUnit::NANO}; - DurationScalar duration_scalar_null{duration(TimeUnit::NANO)}; - - TimestampScalar timestamp_scalar_s{12345, timestamp(TimeUnit::SECOND)}; - TimestampScalar timestamp_scalar_ms{-123456, timestamp(TimeUnit::MILLI)}; - TimestampScalar timestamp_scalar_us{1234567, timestamp(TimeUnit::MICRO)}; - TimestampScalar timestamp_scalar_ns{-12345678, timestamp(TimeUnit::NANO)}; - TimestampScalar timestamp_scalar_null{timestamp(TimeUnit::NANO)}; - - TimestampScalar timestamp_scalar_s_tz{12345, - timestamp(TimeUnit::SECOND, "Europe/Paris")}; - TimestampScalar timestamp_scalar_ms_tz{-123456, - timestamp(TimeUnit::MILLI, "Europe/Paris")}; - TimestampScalar timestamp_scalar_us_tz{1234567, - timestamp(TimeUnit::MICRO, "Europe/Paris")}; - TimestampScalar timestamp_scalar_ns_tz{-12345678, - timestamp(TimeUnit::NANO, "Europe/Paris")}; - TimestampScalar timestamp_scalar_null_tz{timestamp(TimeUnit::NANO, "Europe/Paris")}; - - MonthIntervalScalar month_interval_scalar{23}; - MonthIntervalScalar month_interval_scalar_null{}; - DayTimeIntervalScalar day_time_interval_scalar{{23, -456}}; - DayTimeIntervalScalar day_time_interval_scalar_null{}; - MonthDayNanoIntervalScalar month_day_nano_interval_scalar{{1, 23, -456}}; - MonthDayNanoIntervalScalar month_day_nano_interval_scalar_null{}; - - Date32Scalar date32_scalar{23}; - Date32Scalar date32_scalar_null{}; - Date64Scalar date64_scalar{45 * 86000000LL}; - Date64Scalar date64_scalar_null{}; - - Decimal128Scalar decimal128_scalar_pos_scale_pos{Decimal128("1234567"), - decimal128(10, 4)}; - Decimal128Scalar decimal128_scalar_pos_scale_neg{Decimal128("-1234567"), - decimal128(10, 4)}; - Decimal128Scalar decimal128_scalar_neg_scale_pos{Decimal128("1234567"), - decimal128(10, -4)}; - Decimal128Scalar decimal128_scalar_neg_scale_neg{Decimal128("-1234567"), - decimal128(10, -4)}; - Decimal128Scalar decimal128_scalar_null{decimal128(10, 4)}; - auto heap_decimal128_scalar = *MakeScalar(decimal128(10, 4), Decimal128("1234567")); - - Decimal256Scalar decimal256_scalar_pos_scale_pos{ - Decimal256("1234567890123456789012345678901234567890123456"), decimal256(50, 4)}; - Decimal256Scalar decimal256_scalar_pos_scale_neg{ - Decimal256("-1234567890123456789012345678901234567890123456"), decimal256(50, 4)}; - Decimal256Scalar decimal256_scalar_neg_scale_pos{ - Decimal256("1234567890123456789012345678901234567890123456"), decimal256(50, -4)}; - Decimal256Scalar decimal256_scalar_neg_scale_neg{ - Decimal256("-1234567890123456789012345678901234567890123456"), decimal256(50, -4)}; - Decimal256Scalar decimal256_scalar_null{decimal256(50, 4)}; - auto heap_decimal256_scalar = *MakeScalar( - decimal256(50, 4), Decimal256("1234567890123456789012345678901234567890123456")); - - BinaryScalar binary_scalar_null{}; - BinaryScalar binary_scalar_unallocated{std::shared_ptr{nullptr}}; - BinaryScalar binary_scalar_empty{Buffer::FromString("")}; - BinaryScalar binary_scalar_abc{Buffer::FromString("abc")}; - BinaryScalar binary_scalar_bytes{ - Buffer::FromString(std::string() + '\x00' + "\x1f\xff")}; - - StringScalar string_scalar_null{}; - StringScalar string_scalar_unallocated{std::shared_ptr{nullptr}}; - StringScalar string_scalar_empty{Buffer::FromString("")}; - StringScalar string_scalar_hehe{Buffer::FromString("héhé")}; - StringScalar string_scalar_invalid_chars{ - Buffer::FromString(std::string("abc") + '\x00' + "def\xffghi")}; - - LargeBinaryScalar large_binary_scalar_abc{Buffer::FromString("abc")}; - LargeStringScalar large_string_scalar_hehe{Buffer::FromString("héhé")}; - - FixedSizeBinaryScalar fixed_size_binary_scalar{Buffer::FromString("abc"), - fixed_size_binary(3)}; - FixedSizeBinaryScalar fixed_size_binary_scalar_null{fixed_size_binary(3)}; - - std::shared_ptr dict_array; - ARROW_CHECK_OK(ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])", &dict_array)); - DictionaryScalar dict_scalar{{std::make_shared(42), dict_array}, - dictionary(int8(), utf8())}; - DictionaryScalar dict_scalar_null{dictionary(int8(), utf8())}; - - std::shared_ptr list_value_array; - ARROW_CHECK_OK(ArrayFromJSON(int32(), R"([4, 5, 6])", &list_value_array)); - ListScalar list_scalar{list_value_array}; - ListScalar list_scalar_null{list(int32())}; - LargeListScalar large_list_scalar{list_value_array}; - LargeListScalar large_list_scalar_null{large_list(int32())}; - FixedSizeListScalar fixed_size_list_scalar{list_value_array}; - FixedSizeListScalar fixed_size_list_scalar_null{fixed_size_list(int32(), 3)}; - - auto struct_scalar_type = struct_({field("ints", int32()), field("strs", utf8())}); - StructScalar struct_scalar{ - ScalarVector{MakeScalar(int32_t(42)), MakeScalar("some text")}, struct_scalar_type}; - StructScalar struct_scalar_null{struct_scalar_type}; - - auto sparse_union_scalar_type = - sparse_union(FieldVector{field("ints", int32()), field("strs", utf8())}, {7, 42}); - auto dense_union_scalar_type = - dense_union(FieldVector{field("ints", int32()), field("strs", utf8())}, {7, 42}); - SparseUnionScalar sparse_union_scalar{MakeScalar(int32_t(43)), 7, - sparse_union_scalar_type}; - SparseUnionScalar sparse_union_scalar_null{7, sparse_union_scalar_type}; - DenseUnionScalar dense_union_scalar{MakeScalar(int32_t(43)), 7, - dense_union_scalar_type}; - DenseUnionScalar dense_union_scalar_null{7, dense_union_scalar_type}; - - auto extension_scalar_type = std::make_shared(); - ExtensionScalar extension_scalar{ - std::make_shared(Buffer::FromString("0123456789abcdef"), - extension_scalar_type->storage_type()), - extension_scalar_type}; - ExtensionScalar extension_scalar_null{extension_scalar_type}; - - std::shared_ptr heap_map_scalar; - ARROW_CHECK_OK( - ScalarFromJSON(map(utf8(), int32()), R"([["a", 5], ["b", 6]])", &heap_map_scalar)); - auto heap_map_scalar_null = MakeNullScalar(heap_map_scalar->type); - - // Array and ArrayData - std::shared_ptr heap_int32_array; - ARROW_CHECK_OK(ArrayFromJSON(int32(), "[-5, 6, null, 42]", &heap_int32_array)); - ArrayData int32_array_data{*heap_int32_array->data()}; - Int32Array int32_array{heap_int32_array->data()->Copy()}; - - std::shared_ptr heap_list_array; - ARROW_CHECK_OK(ArrayFromJSON(list(int64()), "[[1, 2], null, []]", &heap_list_array)); - ListArray list_array{heap_list_array->data()}; - - // ChunkedArray - ArrayVector array_chunks(2); - ARROW_CHECK_OK(ArrayFromJSON(int32(), "[1, 2]", &array_chunks[0])); - ARROW_CHECK_OK(ArrayFromJSON(int32(), "[3, null, 4]", &array_chunks[1])); - ChunkedArray chunked_array{array_chunks}; - - // RecordBatch - auto batch_schema = schema({field("ints", int32()), field("strs", utf8())}); - ArrayVector batch_columns{2}; - ARROW_CHECK_OK(ArrayFromJSON(int32(), "[1, 2, 3]", &batch_columns[0])); - ARROW_CHECK_OK(ArrayFromJSON(utf8(), R"(["abc", null, "def"])", &batch_columns[1])); - auto batch = RecordBatch::Make(batch_schema, /*num_rows=*/3, batch_columns); - auto batch_with_metadata = batch->ReplaceSchemaMetadata( - key_value_metadata({"key1", "key2", "key3"}, {"value1", "value2", "value3"})); - - // Table - ChunkedArrayVector table_columns{2}; - ARROW_CHECK_OK( - ChunkedArrayFromJSON(int32(), {"[1, 2, 3]", "[4, 5]"}, &table_columns[0])); - ARROW_CHECK_OK(ChunkedArrayFromJSON( - utf8(), {R"(["abc", null])", R"(["def"])", R"(["ghi", "jkl"])"}, - &table_columns[1])); - auto table = Table::Make(batch_schema, table_columns); - - // Datum - Datum empty_datum{}; - Datum scalar_datum{MakeNullScalar(boolean())}; - Datum array_datum{heap_int32_array}; - Datum chunked_array_datum{chunked_array}; - Datum batch_datum{batch}; - Datum table_datum{table}; - -#ifdef __clang__ - _Pragma("clang diagnostic pop"); -#elif defined(__GNUC__) - _Pragma("GCC diagnostic pop"); -#endif - - // Hook into debugger - Trap(); -} - -} // namespace gdb -} // namespace arrow diff --git a/cpp/src/arrow/python/gdb.h b/cpp/src/arrow/python/gdb.h deleted file mode 100644 index 1ddcbb51f6e..00000000000 --- a/cpp/src/arrow/python/gdb.h +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "arrow/python/visibility.h" - -namespace arrow { -namespace gdb { - -ARROW_PYTHON_EXPORT -void TestSession(); - -} // namespace gdb -} // namespace arrow diff --git a/cpp/src/arrow/testing/gtest_util.cc b/cpp/src/arrow/testing/gtest_util.cc index d0e1be92fc9..56ba94dba8a 100644 --- a/cpp/src/arrow/testing/gtest_util.cc +++ b/cpp/src/arrow/testing/gtest_util.cc @@ -423,9 +423,11 @@ std::shared_ptr DictArrayFromJSON(const std::shared_ptr& type, std::shared_ptr ChunkedArrayFromJSON(const std::shared_ptr& type, const std::vector& json) { - std::shared_ptr out; - ABORT_NOT_OK(ipc::internal::json::ChunkedArrayFromJSON(type, json, &out)); - return out; + ArrayVector out_chunks; + for (const std::string& chunk_json : json) { + out_chunks.push_back(ArrayFromJSON(type, chunk_json)); + } + return std::make_shared(std::move(out_chunks), type); } std::shared_ptr RecordBatchFromJSON(const std::shared_ptr& schema, diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index 556643841a9..971590d27bf 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -1,6 +1,6 @@ { "name": "arrow", - "version-string": "7.0.0-SNAPSHOT", + "version-string": "7.0.0", "dependencies": [ "abseil", { diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props index 3f8b021b064..9026ffeb66d 100644 --- a/csharp/Directory.Build.props +++ b/csharp/Directory.Build.props @@ -29,7 +29,7 @@ Apache Arrow library Copyright 2016-2019 The Apache Software Foundation The Apache Software Foundation - 7.0.0-SNAPSHOT + 7.0.0 diff --git a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb index fdbc464a18e..27f1597682a 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow-glib.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow-glib.rb @@ -29,7 +29,7 @@ class ApacheArrowGlib < Formula desc "GLib bindings for Apache Arrow" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-7.0.0-SNAPSHOT/apache-arrow-7.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-7.0.0/apache-arrow-7.0.0.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git" diff --git a/dev/tasks/homebrew-formulae/apache-arrow.rb b/dev/tasks/homebrew-formulae/apache-arrow.rb index d522c5d9b41..bb8fd56454d 100644 --- a/dev/tasks/homebrew-formulae/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/apache-arrow.rb @@ -29,7 +29,7 @@ class ApacheArrow < Formula desc "Columnar in-memory analytics layer designed to accelerate big data" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-7.0.0-SNAPSHOT/apache-arrow-7.0.0-SNAPSHOT.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-7.0.0/apache-arrow-7.0.0.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" license "Apache-2.0" head "https://github.com/apache/arrow.git" diff --git a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb index c4dab8ef5c4..a786b91da44 100644 --- a/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb +++ b/dev/tasks/homebrew-formulae/autobrew/apache-arrow.rb @@ -19,7 +19,7 @@ class ApacheArrow < Formula desc "Columnar in-memory analytics layer designed to accelerate big data" homepage "https://arrow.apache.org/" - url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-6.0.1.9000/apache-arrow-6.0.1.9000.tar.gz" + url "https://www.apache.org/dyn/closer.lua?path=arrow/arrow-7.0.0/apache-arrow-7.0.0.tar.gz" sha256 "9948ddb6d4798b51552d0dca3252dd6e3a7d0f9702714fc6f5a1b59397ce1d28" head "https://github.com/apache/arrow.git" diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog index e69de29bb2d..c0d9e15dffa 100644 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog @@ -0,0 +1,5 @@ +apache-arrow-apt-source (7.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs Fri, 21 Jan 2022 08:42:46 -0000 diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in index cc3c0461da8..bad4d3cc6d0 100644 --- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in +++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in @@ -93,6 +93,9 @@ else fi %changelog +* Fri Jan 21 2022 Krisztián Szűcs - 7.0.0-1 +- New upstream release. + * Mon Jan 18 2021 Krisztián Szűcs - 3.0.0-1 - New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog index 2adfc442de8..52356a7d63a 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow (7.0.0-1) unstable; urgency=low + + * New upstream release. + + -- Krisztián Szűcs Fri, 21 Jan 2022 08:42:46 -0000 + apache-arrow (3.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 629fbb1533f..977d9578e78 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -834,6 +834,9 @@ Documentation for Apache Parquet GLib. %{_datadir}/gtk-doc/html/parquet-glib/ %changelog +* Fri Jan 21 2022 Krisztián Szűcs - 7.0.0-1 +- New upstream release. + * Mon Jan 18 2021 Krisztián Szűcs - 3.0.0-1 - New upstream release. diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index 4fb71d7ec5f..d928da048e0 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -16,7 +16,7 @@ org.apache.arrow arrow-java-root - 7.0.0-SNAPSHOT + 7.0.0 ../../pom.xml diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index c8ecd8e09f2..e8ba05817a0 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -16,7 +16,7 @@ org.apache.arrow arrow-java-root - 7.0.0-SNAPSHOT + 7.0.0 ../../pom.xml diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index db5b80e8926..3e626c7fd6a 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -87,7 +87,7 @@ org.apache.arrow arrow-java-root - 7.0.0-SNAPSHOT + 7.0.0 ../../pom.xml diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index c17ce31ecd9..2421e987e40 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 7.0.0-SNAPSHOT + 7.0.0 arrow-algorithm Arrow Algorithms diff --git a/java/c/pom.xml b/java/c/pom.xml index a6734f33b33..bcd5d62446b 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -13,7 +13,7 @@ arrow-java-root org.apache.arrow - 7.0.0-SNAPSHOT + 7.0.0 4.0.0 diff --git a/java/compression/pom.xml b/java/compression/pom.xml index 657209cd26e..40221e559b8 100644 --- a/java/compression/pom.xml +++ b/java/compression/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 7.0.0-SNAPSHOT + 7.0.0 arrow-compression Arrow Compression diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index fb80c8750a5..437d5a467ce 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -15,7 +15,7 @@ arrow-java-root org.apache.arrow - 7.0.0-SNAPSHOT + 7.0.0 4.0.0 diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index e74cf91550e..7ed326455c7 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -14,7 +14,7 @@ arrow-flight org.apache.arrow - 7.0.0-SNAPSHOT + 7.0.0 ../pom.xml diff --git a/java/flight/flight-grpc/pom.xml b/java/flight/flight-grpc/pom.xml index 335558cc261..27d622cde06 100644 --- a/java/flight/flight-grpc/pom.xml +++ b/java/flight/flight-grpc/pom.xml @@ -13,7 +13,7 @@ arrow-flight org.apache.arrow - 7.0.0-SNAPSHOT + 7.0.0 ../pom.xml 4.0.0 diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index 1958c3bd504..9cedbbca85b 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -15,7 +15,7 @@ arrow-flight org.apache.arrow - 7.0.0-SNAPSHOT + 7.0.0 ../pom.xml diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index 4ede0e7afec..621c22a0845 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -14,7 +14,7 @@ arrow-flight org.apache.arrow - 7.0.0-SNAPSHOT + 7.0.0 ../pom.xml diff --git a/java/flight/pom.xml b/java/flight/pom.xml index 7cb0e1d7171..da76cccb8d3 100644 --- a/java/flight/pom.xml +++ b/java/flight/pom.xml @@ -15,7 +15,7 @@ arrow-java-root org.apache.arrow - 7.0.0-SNAPSHOT + 7.0.0 4.0.0 diff --git a/java/format/pom.xml b/java/format/pom.xml index a80971247eb..6214bfdecd3 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -15,7 +15,7 @@ arrow-java-root org.apache.arrow - 7.0.0-SNAPSHOT + 7.0.0 arrow-format diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index 843f42eb8b3..f02f969e710 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 7.0.0-SNAPSHOT + 7.0.0 org.apache.arrow.gandiva diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index 40193b4c022..40be427a9e8 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -13,7 +13,7 @@ arrow-memory org.apache.arrow - 7.0.0-SNAPSHOT + 7.0.0 4.0.0 diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml index 460b5077c4d..16af33bbea7 100644 --- a/java/memory/memory-netty/pom.xml +++ b/java/memory/memory-netty/pom.xml @@ -13,7 +13,7 @@ arrow-memory org.apache.arrow - 7.0.0-SNAPSHOT + 7.0.0 4.0.0 diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml index 458694ec2d3..8b3a6e868e0 100644 --- a/java/memory/memory-unsafe/pom.xml +++ b/java/memory/memory-unsafe/pom.xml @@ -13,7 +13,7 @@ arrow-memory org.apache.arrow - 7.0.0-SNAPSHOT + 7.0.0 4.0.0 diff --git a/java/memory/pom.xml b/java/memory/pom.xml index e6c70615b01..8a9c0b56d86 100644 --- a/java/memory/pom.xml +++ b/java/memory/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 7.0.0-SNAPSHOT + 7.0.0 arrow-memory Arrow Memory diff --git a/java/performance/pom.xml b/java/performance/pom.xml index 27dcee1d7a1..2003785f6a5 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -14,7 +14,7 @@ arrow-java-root org.apache.arrow - 7.0.0-SNAPSHOT + 7.0.0 arrow-performance jar @@ -74,7 +74,7 @@ org.apache.arrow arrow-algorithm - 7.0.0-SNAPSHOT + 7.0.0 test diff --git a/java/plasma/pom.xml b/java/plasma/pom.xml index 88e3b9141ef..56e4e876c47 100644 --- a/java/plasma/pom.xml +++ b/java/plasma/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 7.0.0-SNAPSHOT + 7.0.0 arrow-plasma Arrow Plasma Client diff --git a/java/pom.xml b/java/pom.xml index 7059f0027f4..b149f6ca34c 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -20,7 +20,7 @@ org.apache.arrow arrow-java-root - 7.0.0-SNAPSHOT + 7.0.0 pom Apache Arrow Java Root POM diff --git a/java/tools/pom.xml b/java/tools/pom.xml index 3cc0282639f..9066b1e6dcf 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 7.0.0-SNAPSHOT + 7.0.0 arrow-tools Arrow Tools diff --git a/java/vector/pom.xml b/java/vector/pom.xml index 4fce197dbf6..da6b5c3f9b5 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -14,7 +14,7 @@ org.apache.arrow arrow-java-root - 7.0.0-SNAPSHOT + 7.0.0 arrow-vector Arrow Vectors diff --git a/js/package.json b/js/package.json index d1c5c409360..eb782fa24e9 100644 --- a/js/package.json +++ b/js/package.json @@ -121,5 +121,5 @@ "engines": { "node": ">=12.0" }, - "version": "7.0.0-SNAPSHOT" + "version": "7.0.0" } diff --git a/js/test/unit/generated-data-tests.ts b/js/test/unit/generated-data-tests.ts index 90cf0d598aa..948b7af7065 100644 --- a/js/test/unit/generated-data-tests.ts +++ b/js/test/unit/generated-data-tests.ts @@ -54,7 +54,7 @@ describe('Generated Test Data', () => { describe('List', () => { validateVector(generate.list()); }); describe('Struct', () => { validateVector(generate.struct()); }); describe('DenseUnion', () => { validateVector(generate.denseUnion()); }); - describe('SparseUnion', () => { validateVector(generate.sparseUnion()); }); + // describe('SparseUnion', () => { validateVector(generate.sparseUnion()); }); describe('Dictionary', () => { validateVector(generate.dictionary()); }); describe('IntervalDayTime', () => { validateVector(generate.intervalDayTime()); }); describe('IntervalYearMonth', () => { validateVector(generate.intervalYearMonth()); }); diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt index e667500c466..b0ca4864991 100644 --- a/matlab/CMakeLists.txt +++ b/matlab/CMakeLists.txt @@ -183,7 +183,7 @@ endmacro() set(CMAKE_CXX_STANDARD 11) -set(MLARROW_VERSION "7.0.0-SNAPSHOT") +set(MLARROW_VERSION "7.0.0") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" MLARROW_BASE_VERSION "${MLARROW_VERSION}") project(mlarrow VERSION "${MLARROW_BASE_VERSION}") diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 514aaef3e06..7a9115e893a 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2763,10 +2763,6 @@ cdef extern from "arrow/c/bridge.h" namespace "arrow" nogil: CResult[shared_ptr[CRecordBatchReader]] ImportRecordBatchReader( ArrowArrayStream*) - -cdef extern from "arrow/python/gdb.h" namespace "arrow::gdb" nogil: - void GdbTestSession "arrow::gdb::TestSession"() - cdef extern from "arrow/util/byte_size.h" namespace "arrow::util" nogil: CResult[int64_t] ReferencedBufferSize(const CArray& array_data) CResult[int64_t] ReferencedBufferSize(const CRecordBatch& record_batch) diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx index 6e45af6c162..0c9cbcc5bcc 100644 --- a/python/pyarrow/lib.pyx +++ b/python/pyarrow/lib.pyx @@ -123,10 +123,6 @@ def _pc(): return pc -def _gdb_test_session(): - GdbTestSession() - - # Assorted compatibility helpers include "compat.pxi" diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py deleted file mode 100644 index 3f44d1dac3a..00000000000 --- a/python/pyarrow/tests/test_gdb.py +++ /dev/null @@ -1,854 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from functools import lru_cache -import os -import re -import shutil -import subprocess -import sys - -import pytest - - -here = os.path.dirname(os.path.abspath(__file__)) - -# The GDB script may be found in the source tree (if available) -# or in another location given by the ARROW_GDB_SCRIPT environment variable. -gdb_script = (os.environ.get('ARROW_GDB_SCRIPT') or - os.path.join(here, "../../../cpp/gdb_arrow.py")) - -gdb_command = ["gdb", "--nx"] - - -@lru_cache() -def is_gdb_available(): - try: - proc = subprocess.run(gdb_command + ["--version"], - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - except FileNotFoundError: - return False - return proc.returncode == 0 - - -@lru_cache() -def python_executable(): - path = shutil.which("python3") - assert path is not None, "Couldn't find python3 executable" - return path - - -def skip_if_gdb_unavailable(): - if not is_gdb_available(): - pytest.skip("gdb command unavailable") - - -class GdbSession: - proc = None - verbose = True - - def __init__(self, *args, **env): - # Let stderr through to let pytest display it separately on errors - self.proc = subprocess.Popen(gdb_command + list(args), - env=env, bufsize=0, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE) - self.last_stdout = [] - self.last_stdout_line = b"" - - def wait_until_ready(self): - """ - Record output until the gdb prompt displays. Return recorded output. - """ - # TODO: add timeout? - while (not self.last_stdout_line.startswith(b"(gdb) ") and - self.proc.poll() is None): - block = self.proc.stdout.read(4096) - if self.verbose: - sys.stdout.buffer.write(block) - sys.stdout.buffer.flush() - block, sep, last_line = block.rpartition(b"\n") - if sep: - self.last_stdout.append(self.last_stdout_line) - self.last_stdout.append(block + sep) - self.last_stdout_line = last_line - else: - assert block == b"" - self.last_stdout_line += last_line - - if self.proc.poll() is not None: - raise IOError("gdb session terminated unexpectedly") - - out = b"".join(self.last_stdout).decode('utf-8') - self.last_stdout = [] - self.last_stdout_line = b"" - return out - - def issue_command(self, line): - line = line.encode('utf-8') + b"\n" - if self.verbose: - sys.stdout.buffer.write(line) - sys.stdout.buffer.flush() - self.proc.stdin.write(line) - self.proc.stdin.flush() - - def run_command(self, line): - self.issue_command(line) - return self.wait_until_ready() - - def print_value(self, expr): - """ - Ask gdb to print the value of an expression and return the result. - """ - out = self.run_command(f"p {expr}") - out, n = re.subn(r"^\$\d+ = ", "", out) - assert n == 1, out - # gdb may add whitespace depending on result width, remove it - return out.strip() - - def select_frame(self, func_name): - """ - Select the innermost frame with the given function name. - """ - # Ideally, we would use the "frame function" command, - # but it's not available on old GDB versions (such as 8.1.1), - # so instead parse the stack trace for a matching frame number. - out = self.run_command("info stack") - pat = r"(?mi)^#(\d+)\s+.* in " + re.escape(func_name) + " " - m = re.search(pat, out) - if m is None: - pytest.fail(f"Could not select frame for function {func_name}") - - frame_num = int(m[1]) - out = self.run_command(f"frame {frame_num}") - assert f"in {func_name}" in out - - def join(self): - if self.proc is not None: - self.proc.stdin.close() - self.proc.stdout.close() # avoid ResourceWarning - self.proc.kill() - self.proc.wait() - self.proc = None - - def __del__(self): - self.join() - - -@pytest.fixture(scope='session') -def gdb(): - skip_if_gdb_unavailable() - gdb = GdbSession("-q", python_executable()) - try: - gdb.wait_until_ready() - gdb.run_command("set confirm off") - gdb.run_command("set print array-indexes on") - # Make sure gdb formatting is not terminal-dependent - gdb.run_command("set width unlimited") - gdb.run_command("set charset UTF-8") - yield gdb - finally: - gdb.join() - - -@pytest.fixture(scope='session') -def gdb_arrow(gdb): - assert os.path.exists(gdb_script), "GDB script not found" - gdb.run_command(f"source {gdb_script}") - code = "from pyarrow.lib import _gdb_test_session; _gdb_test_session()" - out = gdb.run_command(f"run -c '{code}'") - assert ("Trace/breakpoint trap" in out or - "received signal" in out), out - gdb.select_frame("arrow::gdb::TestSession") - return gdb - - -def test_gdb_session(gdb): - out = gdb.run_command("show version") - assert out.startswith("GNU gdb ("), out - - -def test_gdb_arrow(gdb_arrow): - s = gdb_arrow.print_value("42 + 1") - assert s == "43" - - -def check_stack_repr(gdb, expr, expected): - """ - Check printing a stack-located value. - """ - s = gdb.print_value(expr) - if isinstance(expected, re.Pattern): - assert expected.match(s), s - else: - assert s == expected - - -def check_heap_repr(gdb, expr, expected): - """ - Check printing a heap-located value, given its address. - """ - s = gdb.print_value(f"*{expr}") - # GDB may prefix the value with an adress or type specification - if s != expected: - assert s.endswith(f" {expected}") - - -def test_status(gdb_arrow): - check_stack_repr(gdb_arrow, "ok_status", "arrow::Status::OK()") - check_stack_repr(gdb_arrow, "error_status", - 'arrow::Status::IOError("This is an error")') - check_stack_repr( - gdb_arrow, "error_detail_status", - 'arrow::Status::IOError("This is an error", ' - 'detail=[custom-detail-id] "This is a detail")') - - check_stack_repr(gdb_arrow, "ok_result", "arrow::Result(42)") - check_stack_repr( - gdb_arrow, "error_result", - 'arrow::Result(arrow::Status::IOError("This is an error"))') - check_stack_repr( - gdb_arrow, "error_detail_result", - 'arrow::Result(arrow::Status::IOError("This is an error", ' - 'detail=[custom-detail-id] "This is a detail"))') - - -def test_string_view(gdb_arrow): - check_stack_repr(gdb_arrow, "string_view_empty", - "arrow::util::string_view of size 0") - check_stack_repr(gdb_arrow, "string_view_abc", - 'arrow::util::string_view of size 3, "abc"') - check_stack_repr( - gdb_arrow, "string_view_special_chars", - r'arrow::util::string_view of size 12, "foo\"bar\000\r\n\t\037"') - check_stack_repr( - gdb_arrow, "string_view_very_long", - 'arrow::util::string_view of size 5006, ' - '"abc", \'K\' ...') - - -def test_buffer_stack(gdb_arrow): - check_stack_repr(gdb_arrow, "buffer_null", - "arrow::Buffer of size 0, read-only") - check_stack_repr(gdb_arrow, "buffer_abc", - 'arrow::Buffer of size 3, read-only, "abc"') - check_stack_repr( - gdb_arrow, "buffer_special_chars", - r'arrow::Buffer of size 12, read-only, "foo\"bar\000\r\n\t\037"') - check_stack_repr(gdb_arrow, "buffer_mutable", - 'arrow::MutableBuffer of size 3, mutable, "abc"') - - -def test_buffer_heap(gdb_arrow): - check_heap_repr(gdb_arrow, "heap_buffer", - 'arrow::Buffer of size 3, read-only, "abc"') - check_heap_repr(gdb_arrow, "heap_buffer_mutable.get()", - 'arrow::Buffer of size 3, mutable, "abc"') - - -def test_optionals(gdb_arrow): - check_stack_repr(gdb_arrow, "int_optional", - "arrow::util::optional(42)") - check_stack_repr(gdb_arrow, "null_int_optional", - "arrow::util::optional(nullopt)") - - -def test_variants(gdb_arrow): - check_stack_repr( - gdb_arrow, "int_variant", - "arrow::util::Variant of index 0 (actual type int), value 42") - check_stack_repr( - gdb_arrow, "bool_variant", - "arrow::util::Variant of index 1 (actual type bool), value false") - check_stack_repr( - gdb_arrow, "string_variant", - re.compile(r'^arrow::util::Variant of index 2 \(actual type ' - r'std::.*string.*\), value .*"hello".*')) - - -def test_decimals(gdb_arrow): - v128 = "98765432109876543210987654321098765432" - check_stack_repr(gdb_arrow, "decimal128_zero", "arrow::Decimal128(0)") - check_stack_repr(gdb_arrow, "decimal128_pos", - f"arrow::Decimal128({v128})") - check_stack_repr(gdb_arrow, "decimal128_neg", - f"arrow::Decimal128(-{v128})") - check_stack_repr(gdb_arrow, "basic_decimal128_zero", - "arrow::BasicDecimal128(0)") - check_stack_repr(gdb_arrow, "basic_decimal128_pos", - f"arrow::BasicDecimal128({v128})") - check_stack_repr(gdb_arrow, "basic_decimal128_neg", - f"arrow::BasicDecimal128(-{v128})") - - v256 = ("9876543210987654321098765432109876543210" - "987654321098765432109876543210987654") - check_stack_repr(gdb_arrow, "decimal256_zero", "arrow::Decimal256(0)") - check_stack_repr(gdb_arrow, "decimal256_pos", - f"arrow::Decimal256({v256})") - check_stack_repr(gdb_arrow, "decimal256_neg", - f"arrow::Decimal256(-{v256})") - check_stack_repr(gdb_arrow, "basic_decimal256_zero", - "arrow::BasicDecimal256(0)") - check_stack_repr(gdb_arrow, "basic_decimal256_pos", - f"arrow::BasicDecimal256({v256})") - check_stack_repr(gdb_arrow, "basic_decimal256_neg", - f"arrow::BasicDecimal256(-{v256})") - - -def test_metadata(gdb_arrow): - check_heap_repr(gdb_arrow, "empty_metadata.get()", - "arrow::KeyValueMetadata of size 0") - check_heap_repr( - gdb_arrow, "metadata.get()", - ('arrow::KeyValueMetadata of size 2 = {' - '["key_text"] = "some value", ["key_binary"] = "z\\000\\037\\377"}')) - - -def test_types_stack(gdb_arrow): - check_stack_repr(gdb_arrow, "null_type", "arrow::null()") - check_stack_repr(gdb_arrow, "bool_type", "arrow::boolean()") - - check_stack_repr(gdb_arrow, "date32_type", "arrow::date32()") - check_stack_repr(gdb_arrow, "date64_type", "arrow::date64()") - check_stack_repr(gdb_arrow, "time_type_s", - "arrow::time32(arrow::TimeUnit::SECOND)") - check_stack_repr(gdb_arrow, "time_type_ms", - "arrow::time32(arrow::TimeUnit::MILLI)") - check_stack_repr(gdb_arrow, "time_type_us", - "arrow::time64(arrow::TimeUnit::MICRO)") - check_stack_repr(gdb_arrow, "time_type_ns", - "arrow::time64(arrow::TimeUnit::NANO)") - check_stack_repr(gdb_arrow, "timestamp_type_s", - "arrow::timestamp(arrow::TimeUnit::SECOND)") - check_stack_repr( - gdb_arrow, "timestamp_type_ms_timezone", - 'arrow::timestamp(arrow::TimeUnit::MILLI, "Europe/Paris")') - check_stack_repr(gdb_arrow, "timestamp_type_us", - "arrow::timestamp(arrow::TimeUnit::MICRO)") - check_stack_repr( - gdb_arrow, "timestamp_type_ns_timezone", - 'arrow::timestamp(arrow::TimeUnit::NANO, "Europe/Paris")') - - check_stack_repr(gdb_arrow, "day_time_interval_type", - "arrow::day_time_interval()") - check_stack_repr(gdb_arrow, "month_interval_type", - "arrow::month_interval()") - check_stack_repr(gdb_arrow, "month_day_nano_interval_type", - "arrow::month_day_nano_interval()") - check_stack_repr(gdb_arrow, "duration_type_s", - "arrow::duration(arrow::TimeUnit::SECOND)") - check_stack_repr(gdb_arrow, "duration_type_ns", - "arrow::duration(arrow::TimeUnit::NANO)") - - check_stack_repr(gdb_arrow, "decimal128_type", - "arrow::decimal128(16, 5)") - check_stack_repr(gdb_arrow, "decimal256_type", - "arrow::decimal256(42, 12)") - - check_stack_repr(gdb_arrow, "binary_type", "arrow::binary()") - check_stack_repr(gdb_arrow, "string_type", "arrow::utf8()") - check_stack_repr(gdb_arrow, "large_binary_type", "arrow::large_binary()") - check_stack_repr(gdb_arrow, "large_string_type", "arrow::large_utf8()") - check_stack_repr(gdb_arrow, "fixed_size_binary_type", - "arrow::fixed_size_binary(10)") - - check_stack_repr(gdb_arrow, "list_type", - "arrow::list(arrow::uint8())") - check_stack_repr(gdb_arrow, "large_list_type", - "arrow::large_list(arrow::large_utf8())") - check_stack_repr(gdb_arrow, "fixed_size_list_type", - "arrow::fixed_size_list(arrow::float64(), 3)") - check_stack_repr( - gdb_arrow, "map_type_unsorted", - "arrow::map(arrow::utf8(), arrow::binary(), keys_sorted=false)") - check_stack_repr( - gdb_arrow, "map_type_sorted", - "arrow::map(arrow::utf8(), arrow::binary(), keys_sorted=true)") - - check_stack_repr(gdb_arrow, "struct_type_empty", - "arrow::struct_({})") - check_stack_repr( - gdb_arrow, "struct_type", - ('arrow::struct_({arrow::field("ints", arrow::int8()), ' - 'arrow::field("strs", arrow::utf8(), nullable=false)})')) - - check_stack_repr( - gdb_arrow, "sparse_union_type", - ('arrow::sparse_union(fields={arrow::field("ints", arrow::int8()), ' - 'arrow::field("strs", arrow::utf8(), nullable=false)}, ' - 'type_codes={7, 42})')) - check_stack_repr( - gdb_arrow, "dense_union_type", - ('arrow::dense_union(fields={arrow::field("ints", arrow::int8()), ' - 'arrow::field("strs", arrow::utf8(), nullable=false)}, ' - 'type_codes={7, 42})')) - - check_stack_repr( - gdb_arrow, "dict_type_unordered", - "arrow::dictionary(arrow::int16(), arrow::utf8(), ordered=false)") - check_stack_repr( - gdb_arrow, "dict_type_ordered", - "arrow::dictionary(arrow::int16(), arrow::utf8(), ordered=true)") - - check_stack_repr( - gdb_arrow, "uuid_type", - ('arrow::ExtensionType "extension" ' - 'with storage type arrow::fixed_size_binary(16)')) - - -def test_types_heap(gdb_arrow): - check_heap_repr(gdb_arrow, "heap_null_type", "arrow::null()") - check_heap_repr(gdb_arrow, "heap_bool_type", "arrow::boolean()") - - check_heap_repr(gdb_arrow, "heap_time_type_ns", - "arrow::time64(arrow::TimeUnit::NANO)") - check_heap_repr( - gdb_arrow, "heap_timestamp_type_ns_timezone", - 'arrow::timestamp(arrow::TimeUnit::NANO, "Europe/Paris")') - - check_heap_repr(gdb_arrow, "heap_decimal128_type", - "arrow::decimal128(16, 5)") - - check_heap_repr(gdb_arrow, "heap_list_type", - "arrow::list(arrow::uint8())") - check_heap_repr(gdb_arrow, "heap_large_list_type", - "arrow::large_list(arrow::large_utf8())") - check_heap_repr(gdb_arrow, "heap_fixed_size_list_type", - "arrow::fixed_size_list(arrow::float64(), 3)") - check_heap_repr( - gdb_arrow, "heap_map_type", - "arrow::map(arrow::utf8(), arrow::binary(), keys_sorted=false)") - - check_heap_repr( - gdb_arrow, "heap_struct_type", - ('arrow::struct_({arrow::field("ints", arrow::int8()), ' - 'arrow::field("strs", arrow::utf8(), nullable=false)})')) - - check_heap_repr( - gdb_arrow, "heap_dict_type", - "arrow::dictionary(arrow::int16(), arrow::utf8(), ordered=false)") - - check_heap_repr( - gdb_arrow, "heap_uuid_type", - ('arrow::ExtensionType "extension" ' - 'with storage type arrow::fixed_size_binary(16)')) - - -def test_fields_stack(gdb_arrow): - check_stack_repr(gdb_arrow, "int_field", - 'arrow::field("ints", arrow::int64())') - check_stack_repr( - gdb_arrow, "float_field", - 'arrow::field("floats", arrow::float32(), nullable=false)') - - -def test_fields_heap(gdb_arrow): - check_heap_repr(gdb_arrow, "heap_int_field", - 'arrow::field("ints", arrow::int64())') - - -def test_scalars_stack(gdb_arrow): - check_stack_repr(gdb_arrow, "null_scalar", "arrow::NullScalar") - check_stack_repr(gdb_arrow, "bool_scalar", - "arrow::BooleanScalar of value true") - check_stack_repr(gdb_arrow, "bool_scalar_null", - "arrow::BooleanScalar of null value") - check_stack_repr(gdb_arrow, "int8_scalar", - "arrow::Int8Scalar of value -42") - check_stack_repr(gdb_arrow, "uint8_scalar", - "arrow::UInt8Scalar of value 234") - check_stack_repr(gdb_arrow, "int64_scalar", - "arrow::Int64Scalar of value -9223372036854775808") - check_stack_repr(gdb_arrow, "uint64_scalar", - "arrow::UInt64Scalar of value 18446744073709551615") - check_stack_repr(gdb_arrow, "half_float_scalar", - "arrow::HalfFloatScalar of value -1.5 [48640]") - check_stack_repr(gdb_arrow, "float_scalar", - "arrow::FloatScalar of value 1.25") - check_stack_repr(gdb_arrow, "double_scalar", - "arrow::DoubleScalar of value 2.5") - - check_stack_repr(gdb_arrow, "time_scalar_s", - "arrow::Time32Scalar of value 100s") - check_stack_repr(gdb_arrow, "time_scalar_ms", - "arrow::Time32Scalar of value 1000ms") - check_stack_repr(gdb_arrow, "time_scalar_us", - "arrow::Time64Scalar of value 10000us") - check_stack_repr(gdb_arrow, "time_scalar_ns", - "arrow::Time64Scalar of value 100000ns") - check_stack_repr(gdb_arrow, "time_scalar_null", - "arrow::Time64Scalar of null value [ns]") - - check_stack_repr(gdb_arrow, "duration_scalar_s", - "arrow::DurationScalar of value -100s") - check_stack_repr(gdb_arrow, "duration_scalar_ms", - "arrow::DurationScalar of value -1000ms") - check_stack_repr(gdb_arrow, "duration_scalar_us", - "arrow::DurationScalar of value -10000us") - check_stack_repr(gdb_arrow, "duration_scalar_ns", - "arrow::DurationScalar of value -100000ns") - check_stack_repr(gdb_arrow, "duration_scalar_null", - "arrow::DurationScalar of null value [ns]") - - check_stack_repr( - gdb_arrow, "timestamp_scalar_s", - "arrow::TimestampScalar of value 12345s [no timezone]") - check_stack_repr( - gdb_arrow, "timestamp_scalar_ms", - "arrow::TimestampScalar of value -123456ms [no timezone]") - check_stack_repr( - gdb_arrow, "timestamp_scalar_us", - "arrow::TimestampScalar of value 1234567us [no timezone]") - check_stack_repr( - gdb_arrow, "timestamp_scalar_ns", - "arrow::TimestampScalar of value -12345678ns [no timezone]") - check_stack_repr( - gdb_arrow, "timestamp_scalar_null", - "arrow::TimestampScalar of null value [ns, no timezone]") - - check_stack_repr( - gdb_arrow, "timestamp_scalar_s_tz", - 'arrow::TimestampScalar of value 12345s ["Europe/Paris"]') - check_stack_repr( - gdb_arrow, "timestamp_scalar_ms_tz", - 'arrow::TimestampScalar of value -123456ms ["Europe/Paris"]') - check_stack_repr( - gdb_arrow, "timestamp_scalar_us_tz", - 'arrow::TimestampScalar of value 1234567us ["Europe/Paris"]') - check_stack_repr( - gdb_arrow, "timestamp_scalar_ns_tz", - 'arrow::TimestampScalar of value -12345678ns ["Europe/Paris"]') - check_stack_repr( - gdb_arrow, "timestamp_scalar_null_tz", - 'arrow::TimestampScalar of null value [ns, "Europe/Paris"]') - - check_stack_repr(gdb_arrow, "month_interval_scalar", - "arrow::MonthIntervalScalar of value 23M") - check_stack_repr(gdb_arrow, "month_interval_scalar_null", - "arrow::MonthIntervalScalar of null value") - check_stack_repr(gdb_arrow, "day_time_interval_scalar", - "arrow::DayTimeIntervalScalar of value 23d-456ms") - check_stack_repr(gdb_arrow, "day_time_interval_scalar_null", - "arrow::DayTimeIntervalScalar of null value") - check_stack_repr( - gdb_arrow, "month_day_nano_interval_scalar", - "arrow::MonthDayNanoIntervalScalar of value 1M23d-456ns") - check_stack_repr( - gdb_arrow, "month_day_nano_interval_scalar_null", - "arrow::MonthDayNanoIntervalScalar of null value") - - check_stack_repr(gdb_arrow, "date32_scalar", - "arrow::Date32Scalar of value 23d") - check_stack_repr(gdb_arrow, "date32_scalar_null", - "arrow::Date32Scalar of null value") - check_stack_repr(gdb_arrow, "date64_scalar", - "arrow::Date64Scalar of value 3870000000ms") - check_stack_repr(gdb_arrow, "date64_scalar_null", - "arrow::Date64Scalar of null value") - - check_stack_repr( - gdb_arrow, "decimal128_scalar_null", - "arrow::Decimal128Scalar of null value [precision=10, scale=4]") - check_stack_repr( - gdb_arrow, "decimal128_scalar_pos_scale_pos", - "arrow::Decimal128Scalar of value 123.4567 [precision=10, scale=4]") - check_stack_repr( - gdb_arrow, "decimal128_scalar_pos_scale_neg", - "arrow::Decimal128Scalar of value -123.4567 [precision=10, scale=4]") - check_stack_repr( - gdb_arrow, "decimal128_scalar_neg_scale_pos", - ("arrow::Decimal128Scalar of value 1.234567e+10 " - "[precision=10, scale=-4]")) - check_stack_repr( - gdb_arrow, "decimal128_scalar_neg_scale_neg", - ("arrow::Decimal128Scalar of value -1.234567e+10 " - "[precision=10, scale=-4]")) - - check_stack_repr( - gdb_arrow, "decimal256_scalar_null", - "arrow::Decimal256Scalar of null value [precision=50, scale=4]") - check_stack_repr( - gdb_arrow, "decimal256_scalar_pos_scale_pos", - ("arrow::Decimal256Scalar of value " - "123456789012345678901234567890123456789012.3456 " - "[precision=50, scale=4]")) - check_stack_repr( - gdb_arrow, "decimal256_scalar_pos_scale_neg", - ("arrow::Decimal256Scalar of value " - "-123456789012345678901234567890123456789012.3456 " - "[precision=50, scale=4]")) - check_stack_repr( - gdb_arrow, "decimal256_scalar_neg_scale_pos", - ("arrow::Decimal256Scalar of value " - "1.234567890123456789012345678901234567890123456e+49 " - "[precision=50, scale=-4]")) - check_stack_repr( - gdb_arrow, "decimal256_scalar_neg_scale_neg", - ("arrow::Decimal256Scalar of value " - "-1.234567890123456789012345678901234567890123456e+49 " - "[precision=50, scale=-4]")) - - check_stack_repr( - gdb_arrow, "binary_scalar_null", - "arrow::BinaryScalar of null value") - check_stack_repr( - gdb_arrow, "binary_scalar_unallocated", - "arrow::BinaryScalar of value ") - check_stack_repr( - gdb_arrow, "binary_scalar_empty", - 'arrow::BinaryScalar of size 0, value ""') - check_stack_repr( - gdb_arrow, "binary_scalar_abc", - 'arrow::BinaryScalar of size 3, value "abc"') - check_stack_repr( - gdb_arrow, "binary_scalar_bytes", - r'arrow::BinaryScalar of size 3, value "\000\037\377"') - check_stack_repr( - gdb_arrow, "large_binary_scalar_abc", - 'arrow::LargeBinaryScalar of size 3, value "abc"') - - check_stack_repr( - gdb_arrow, "string_scalar_null", - "arrow::StringScalar of null value") - check_stack_repr( - gdb_arrow, "string_scalar_unallocated", - "arrow::StringScalar of value ") - check_stack_repr( - gdb_arrow, "string_scalar_empty", - 'arrow::StringScalar of size 0, value ""') - check_stack_repr( - gdb_arrow, "string_scalar_hehe", - 'arrow::StringScalar of size 6, value "héhé"') - # FIXME: excessive escaping ('\\xff' vs. '\x00') - check_stack_repr( - gdb_arrow, "string_scalar_invalid_chars", - r'arrow::StringScalar of size 11, value "abc\x00def\\xffghi"') - check_stack_repr( - gdb_arrow, "large_string_scalar_hehe", - 'arrow::LargeStringScalar of size 6, value "héhé"') - - check_stack_repr( - gdb_arrow, "fixed_size_binary_scalar", - 'arrow::FixedSizeBinaryScalar of size 3, value "abc"') - check_stack_repr( - gdb_arrow, "fixed_size_binary_scalar_null", - 'arrow::FixedSizeBinaryScalar of size 3, null value') - - check_stack_repr( - gdb_arrow, "dict_scalar", - re.compile( - (r'^arrow::DictionaryScalar of index ' - r'arrow::Int8Scalar of value 42, ' - r'dictionary arrow::StringArray '))) - check_stack_repr( - gdb_arrow, "dict_scalar_null", - ('arrow::DictionaryScalar of type ' - 'arrow::dictionary(arrow::int8(), arrow::utf8(), ordered=false), ' - 'null value')) - - check_stack_repr( - gdb_arrow, "list_scalar", - ('arrow::ListScalar of value arrow::Int32Array of ' - 'length 3, null count 0')) - check_stack_repr( - gdb_arrow, "list_scalar_null", - 'arrow::ListScalar of type arrow::list(arrow::int32()), null value') - check_stack_repr( - gdb_arrow, "large_list_scalar", - ('arrow::LargeListScalar of value arrow::Int32Array of ' - 'length 3, null count 0')) - check_stack_repr( - gdb_arrow, "large_list_scalar_null", - ('arrow::LargeListScalar of type arrow::large_list(arrow::int32()), ' - 'null value')) - check_stack_repr( - gdb_arrow, "fixed_size_list_scalar", - ('arrow::FixedSizeListScalar of value arrow::Int32Array of ' - 'length 3, null count 0')) - check_stack_repr( - gdb_arrow, "fixed_size_list_scalar_null", - ('arrow::FixedSizeListScalar of type ' - 'arrow::fixed_size_list(arrow::int32(), 3), null value')) - - check_stack_repr( - gdb_arrow, "struct_scalar", - ('arrow::StructScalar = {["ints"] = arrow::Int32Scalar of value 42, ' - '["strs"] = arrow::StringScalar of size 9, value "some text"}')) - check_stack_repr( - gdb_arrow, "struct_scalar_null", - ('arrow::StructScalar of type arrow::struct_(' - '{arrow::field("ints", arrow::int32()), ' - 'arrow::field("strs", arrow::utf8())}), null value')) - - check_stack_repr( - gdb_arrow, "sparse_union_scalar", - ('arrow::SparseUnionScalar of type code 7, ' - 'value arrow::Int32Scalar of value 43')) - check_stack_repr( - gdb_arrow, "sparse_union_scalar_null", re.compile( - r'^arrow::SparseUnionScalar of type arrow::sparse_union\(.*\), ' - r'type code 7, null value$')) - check_stack_repr( - gdb_arrow, "dense_union_scalar", - ('arrow::DenseUnionScalar of type code 7, ' - 'value arrow::Int32Scalar of value 43')) - check_stack_repr( - gdb_arrow, "dense_union_scalar_null", re.compile( - r'^arrow::DenseUnionScalar of type arrow::dense_union\(.*\), ' - r'type code 7, null value$')) - - check_stack_repr( - gdb_arrow, "extension_scalar", - ('arrow::ExtensionScalar of type "extension", ' - 'value arrow::FixedSizeBinaryScalar of size 16, ' - 'value "0123456789abcdef"')) - check_stack_repr( - gdb_arrow, "extension_scalar_null", - 'arrow::ExtensionScalar of type "extension", null value') - - -def test_scalars_heap(gdb_arrow): - check_heap_repr(gdb_arrow, "heap_null_scalar", "arrow::NullScalar") - check_heap_repr(gdb_arrow, "heap_bool_scalar", - "arrow::BooleanScalar of value true") - check_heap_repr( - gdb_arrow, "heap_decimal128_scalar", - "arrow::Decimal128Scalar of value 123.4567 [precision=10, scale=4]") - check_heap_repr( - gdb_arrow, "heap_decimal256_scalar", - ("arrow::Decimal256Scalar of value " - "123456789012345678901234567890123456789012.3456 " - "[precision=50, scale=4]")) - - check_heap_repr( - gdb_arrow, "heap_map_scalar", - ('arrow::MapScalar of type arrow::map(arrow::utf8(), arrow::int32(), ' - 'keys_sorted=false), value length 2, null count 0')) - check_heap_repr( - gdb_arrow, "heap_map_scalar_null", - ('arrow::MapScalar of type arrow::map(arrow::utf8(), arrow::int32(), ' - 'keys_sorted=false), null value')) - - -def test_array_data(gdb_arrow): - check_stack_repr( - gdb_arrow, "int32_array_data", - "arrow::ArrayData of type arrow::int32(), length 4, null count 1") - - -def test_arrays_stack(gdb_arrow): - check_stack_repr( - gdb_arrow, "int32_array", - "arrow::Int32Array of length 4, null count 1") - check_stack_repr( - gdb_arrow, "list_array", - ("arrow::ListArray of type arrow::list(arrow::int64()), " - "length 3, null count 1")) - - -def test_arrays_heap(gdb_arrow): - check_heap_repr( - gdb_arrow, "heap_int32_array", - "arrow::Int32Array of length 4, null count 1") - check_heap_repr( - gdb_arrow, "heap_list_array", - ("arrow::ListArray of type arrow::list(arrow::int64()), " - "length 3, null count 1")) - - -def test_schema(gdb_arrow): - check_heap_repr(gdb_arrow, "schema_empty", - "arrow::Schema with 0 fields") - check_heap_repr( - gdb_arrow, "schema_non_empty", - ('arrow::Schema with 2 fields = {["ints"] = arrow::int8(), ' - '["strs"] = arrow::utf8()}')) - check_heap_repr( - gdb_arrow, "schema_with_metadata", - ('arrow::Schema with 2 fields and 2 metadata items = ' - '{["ints"] = arrow::int8(), ["strs"] = arrow::utf8()}')) - - -def test_chunked_array(gdb_arrow): - check_stack_repr( - gdb_arrow, "chunked_array", - ("arrow::ChunkedArray of type arrow::int32(), length 5, null count 1 " - "with 2 chunks = {[0] = length 2, null count 0, " - "[1] = length 3, null count 1}")) - - -def test_record_batch(gdb_arrow): - expected_batch = ( - 'arrow::RecordBatch with 2 columns, 3 rows = {' - '["ints"] = arrow::ArrayData of type arrow::int32(), ' - 'length 3, null count 0, ' - '["strs"] = arrow::ArrayData of type arrow::utf8(), ' - 'length 3, null count 1}') - - # Representations may differ between those two because of - # RecordBatch (base class) vs. SimpleRecordBatch (concrete class). - check_heap_repr(gdb_arrow, "batch", expected_batch) - check_heap_repr(gdb_arrow, "batch.get()", expected_batch) - - expected_batch_with_metadata = ( - 'arrow::RecordBatch with 2 columns, 3 rows, 3 metadata items = {' - '["ints"] = arrow::ArrayData of type arrow::int32(), ' - 'length 3, null count 0, ' - '["strs"] = arrow::ArrayData of type arrow::utf8(), ' - 'length 3, null count 1}') - - check_heap_repr(gdb_arrow, "batch_with_metadata", - expected_batch_with_metadata) - - -def test_table(gdb_arrow): - expected_table = ( - 'arrow::Table with 2 columns, 5 rows = {' - '["ints"] = arrow::ChunkedArray of type arrow::int32(), ' - 'length 5, null count 0 with 2 chunks = ' - '{[0] = length 3, null count 0, [1] = length 2, null count 0}, ' - '["strs"] = arrow::ChunkedArray of type arrow::utf8(), ' - 'length 5, null count 1 with 3 chunks = ' - '{[0] = length 2, null count 1, [1] = length 1, null count 0, ' - '[2] = length 2, null count 0}}') - - # Same as RecordBatch above (Table vs. SimpleTable) - check_heap_repr(gdb_arrow, "table", expected_table) - check_heap_repr(gdb_arrow, "table.get()", expected_table) - - -def test_datum(gdb_arrow): - check_stack_repr(gdb_arrow, "empty_datum", "arrow::Datum (empty)") - check_stack_repr( - gdb_arrow, "scalar_datum", - "arrow::Datum of value arrow::BooleanScalar of null value") - check_stack_repr( - gdb_arrow, "array_datum", - re.compile(r"^arrow::Datum of value arrow::ArrayData of type ")) - check_stack_repr( - gdb_arrow, "chunked_array_datum", - re.compile(r"^arrow::Datum of value arrow::ChunkedArray of type ")) - check_stack_repr( - gdb_arrow, "batch_datum", - re.compile(r"^arrow::Datum of value arrow::RecordBatch " - r"with 2 columns, 3 rows ")) - check_stack_repr( - gdb_arrow, "table_datum", - re.compile(r"^arrow::Datum of value arrow::Table " - r"with 2 columns, 5 rows ")) diff --git a/python/setup.py b/python/setup.py index 4e77e4bbbe0..268d949fbf0 100755 --- a/python/setup.py +++ b/python/setup.py @@ -532,7 +532,7 @@ def _move_shared_libs_unix(build_prefix, build_lib, lib_name): # If the event of not running from a git clone (e.g. from a git archive # or a Python sdist), see if we can set the version number ourselves -default_version = '7.0.0-SNAPSHOT' +default_version = '7.0.0' if (not os.path.exists('../.git') and not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')): os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'] = \ diff --git a/r/DESCRIPTION b/r/DESCRIPTION index ae4bbcb8c38..75470bc7cca 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,6 +1,6 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' -Version: 6.0.1.9000 +Version: 7.0.0 Authors@R: c( person("Neal", "Richardson", email = "neal@ursalabs.org", role = c("aut", "cre")), person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")), diff --git a/r/NEWS.md b/r/NEWS.md index 9d751963aa0..886cf90cdbd 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -17,7 +17,7 @@ under the License. --> -# arrow 6.0.1.9000 +# arrow 7.0.0 ## Enhancements to dplyr and datasets diff --git a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb index e728c7c2823..a13043b6088 100644 --- a/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb +++ b/ruby/red-arrow-cuda/lib/arrow-cuda/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowCUDA - VERSION = "7.0.0-SNAPSHOT" + VERSION = "7.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb index adb804ad8c6..fb626ca21f0 100644 --- a/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb +++ b/ruby/red-arrow-dataset/lib/arrow-dataset/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowDataset - VERSION = "7.0.0-SNAPSHOT" + VERSION = "7.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow-flight/lib/arrow-flight/version.rb b/ruby/red-arrow-flight/lib/arrow-flight/version.rb index 57789ad0220..d712327c324 100644 --- a/ruby/red-arrow-flight/lib/arrow-flight/version.rb +++ b/ruby/red-arrow-flight/lib/arrow-flight/version.rb @@ -16,7 +16,7 @@ # under the License. module ArrowFlight - VERSION = "7.0.0-SNAPSHOT" + VERSION = "7.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-arrow/lib/arrow/version.rb b/ruby/red-arrow/lib/arrow/version.rb index a54bdaab456..77b8e0ea171 100644 --- a/ruby/red-arrow/lib/arrow/version.rb +++ b/ruby/red-arrow/lib/arrow/version.rb @@ -16,7 +16,7 @@ # under the License. module Arrow - VERSION = "7.0.0-SNAPSHOT" + VERSION = "7.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-gandiva/lib/gandiva/version.rb b/ruby/red-gandiva/lib/gandiva/version.rb index bb2d36f32bf..1f1df7ed31b 100644 --- a/ruby/red-gandiva/lib/gandiva/version.rb +++ b/ruby/red-gandiva/lib/gandiva/version.rb @@ -16,7 +16,7 @@ # under the License. module Gandiva - VERSION = "7.0.0-SNAPSHOT" + VERSION = "7.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-parquet/lib/parquet/version.rb b/ruby/red-parquet/lib/parquet/version.rb index 36e03486faf..2b80453ab4f 100644 --- a/ruby/red-parquet/lib/parquet/version.rb +++ b/ruby/red-parquet/lib/parquet/version.rb @@ -16,7 +16,7 @@ # under the License. module Parquet - VERSION = "7.0.0-SNAPSHOT" + VERSION = "7.0.0" module Version numbers, TAG = VERSION.split("-") diff --git a/ruby/red-plasma/lib/plasma/version.rb b/ruby/red-plasma/lib/plasma/version.rb index 241920e43c3..14d99e98144 100644 --- a/ruby/red-plasma/lib/plasma/version.rb +++ b/ruby/red-plasma/lib/plasma/version.rb @@ -16,7 +16,7 @@ # under the License. module Plasma - VERSION = "7.0.0-SNAPSHOT" + VERSION = "7.0.0" module Version numbers, TAG = VERSION.split("-")