From 7f51392e10a3c7d949c193e0cb9ab6718b05627b Mon Sep 17 00:00:00 2001 From: regro-cf-autotick-bot <36490558+regro-cf-autotick-bot@users.noreply.github.com> Date: Wed, 21 Jan 2026 11:07:24 +0000 Subject: [PATCH 1/5] updated v23.0.0 Co-Authored-By: H. Vetinari --- recipe/meta.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 7e0e97c8a..11c4ebf00 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -1,4 +1,4 @@ -{% set version = "22.0.0" %} +{% set version = "23.0.0" %} {% set cuda_enabled = cuda_compiler_version != "None" %} {% set build_ext_version = "5.0.0" %} {% set build_ext = "cuda" if cuda_enabled else "cpu" %} @@ -15,7 +15,7 @@ package: source: - url: https://www.apache.org/dyn/closer.lua/arrow/arrow-{{ version }}/apache-arrow-{{ version }}.tar.gz?action=download fn: apache-arrow-{{ version }}.tar.gz - sha256: 131250cd24dec0cddde04e2ad8c9e2bc43edc5e84203a81cf71cf1a33a6e7e0f + sha256: 12f6844a0ba3b99645cd2bc6cc4f44f6a174ab90da37e474f08b7d073433cb60 patches: # skip gcsfs tests due to missing `storage-testbench` - patches/0001-disable-gcsfs_test.patch @@ -35,14 +35,14 @@ source: # testing-submodules not part of release tarball - git_url: https://github.com/apache/arrow-testing.git - git_rev: 9a02925d1ba80bd493b6d4da6e8a777588d57ac4 + git_rev: 19dda67f485ffb3ffa92f4c6fa083576ef052d58 folder: testing - git_url: https://github.com/apache/parquet-testing.git git_rev: a3d96a65e11e2bbca7d22a894e8313ede90a33a3 folder: cpp/submodules/parquet-testing build: - number: 9 + number: 0 # for cuda support, building with one version is enough to be compatible with # all later versions, since arrow is only using libcuda, and not libcudart. skip: true # [cuda_compiler_version not in ("None", cuda_compiler_version_min)] From 7f2af8bf59cff60232c2dfdd2ce9607c0a5953cd Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 24 Jan 2026 10:48:25 +1100 Subject: [PATCH 2/5] bump llvm_version --- recipe/conda_build_config.yaml | 4 ++-- recipe/meta.yaml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/recipe/conda_build_config.yaml b/recipe/conda_build_config.yaml index 7cdfa120a..b69b9c261 100644 --- a/recipe/conda_build_config.yaml +++ b/recipe/conda_build_config.yaml @@ -6,6 +6,6 @@ libarrow_all: # on osx, keep this in sync with llvm_version in meta.yaml. c_compiler_version: # [osx] - - 19 # [osx] + - 21 # [osx] cxx_compiler_version: # [osx] - - 19 # [osx] + - 21 # [osx] diff --git a/recipe/meta.yaml b/recipe/meta.yaml index 11c4ebf00..f048d1562 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -3,7 +3,7 @@ {% set build_ext_version = "5.0.0" %} {% set build_ext = "cuda" if cuda_enabled else "cpu" %} {% set proc_build_number = "0" %} -{% set llvm_version = "19" %} +{% set llvm_version = "21" %} # see https://github.com/apache/arrow/blob/apache-arrow-10.0.1/cpp/CMakeLists.txt#L88-L90 {% set so_version = (version.split(".")[0] | int * 100 + version.split(".")[1] | int) ~ "." ~ version.split(".")[2] ~ ".0" %} From a965e532e36d8a8b0faed46b704058a92fca9178 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 24 Jan 2026 11:31:04 +1100 Subject: [PATCH 3/5] rebase patches --- recipe/meta.yaml | 10 +- recipe/patches/0001-disable-gcsfs_test.patch | 18 +- ...2-skip-NonExistentBucket-test-on-osx.patch | 6 +- ...003-Change-xsimd-inclusion-criterion.patch | 24 -- ...-Use-base-Azure-Core-RequestFailedE.patch} | 8 +- ...n-R-Move-S3-bucket-references-to-new.patch | 407 ------------------ ...use-standard-calendar-timezone-APIs.patch} | 205 ++------- ...diva-tests-related-to-tzdb-handling.patch} | 4 +- 8 files changed, 67 insertions(+), 615 deletions(-) delete mode 100644 recipe/patches/0003-Change-xsimd-inclusion-criterion.patch rename recipe/patches/{0005-GH-48894-Python-C-Use-base-Azure-Core-RequestFailedE.patch => 0003-GH-48894-Python-C-Use-base-Azure-Core-RequestFailedE.patch} (99%) delete mode 100644 recipe/patches/0004-GH-48260-C-Python-R-Move-S3-bucket-references-to-new.patch rename recipe/patches/{0006-GH-48593-C-C-20-use-standard-calendar-timezone-APIs.patch => 0004-GH-48593-C-C-20-use-standard-calendar-timezone-APIs.patch} (90%) rename recipe/patches/{0007-disable-some-gandiva-tests-related-to-tzdb-handling.patch => 0005-disable-some-gandiva-tests-related-to-tzdb-handling.patch} (94%) diff --git a/recipe/meta.yaml b/recipe/meta.yaml index f048d1562..b439343a6 100644 --- a/recipe/meta.yaml +++ b/recipe/meta.yaml @@ -22,16 +22,12 @@ source: # upstream problems on with s3 tests on osx, see # https://github.com/apache/arrow/issues/35587 - patches/0002-skip-NonExistentBucket-test-on-osx.patch - # for correctly including xsimd headers - - patches/0003-Change-xsimd-inclusion-criterion.patch - # backport https://github.com/apache/arrow/pull/48261 - - patches/0004-GH-48260-C-Python-R-Move-S3-bucket-references-to-new.patch # backport https://github.com/apache/arrow/pull/48895 - - patches/0005-GH-48894-Python-C-Use-base-Azure-Core-RequestFailedE.patch + - patches/0003-GH-48894-Python-C-Use-base-Azure-Core-RequestFailedE.patch # backport https://github.com/apache/arrow/pull/48601; needs C++20 - - patches/0006-GH-48593-C-C-20-use-standard-calendar-timezone-APIs.patch # [win] + - patches/0004-GH-48593-C-C-20-use-standard-calendar-timezone-APIs.patch # [win] # disable gandiva tests that are "unmaintained" and failing on windows - - patches/0007-disable-some-gandiva-tests-related-to-tzdb-handling.patch # [win] + - patches/0005-disable-some-gandiva-tests-related-to-tzdb-handling.patch # [win] # testing-submodules not part of release tarball - git_url: https://github.com/apache/arrow-testing.git diff --git a/recipe/patches/0001-disable-gcsfs_test.patch b/recipe/patches/0001-disable-gcsfs_test.patch index ee349669c..0080a4a58 100644 --- a/recipe/patches/0001-disable-gcsfs_test.patch +++ b/recipe/patches/0001-disable-gcsfs_test.patch @@ -1,20 +1,20 @@ -From b7b6283fd8a9905d959a0f25f97293750aa7669c Mon Sep 17 00:00:00 2001 +From 8e54e12c2954360c1552509c712c1a392708564d Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sat, 2 Nov 2024 15:41:34 +1100 -Subject: [PATCH 1/7] disable gcsfs_test +Subject: [PATCH 1/5] disable gcsfs_test it cannot work unless we package https://github.com/googleapis/storage-testbench, which however has extremely tight dependencies on protobuf etc., making it very hard to fit this into our migration patterns --- - cpp/src/arrow/filesystem/CMakeLists.txt | 8 -------- - 1 file changed, 8 deletions(-) + cpp/src/arrow/filesystem/CMakeLists.txt | 14 -------------- + 1 file changed, 14 deletions(-) diff --git a/cpp/src/arrow/filesystem/CMakeLists.txt b/cpp/src/arrow/filesystem/CMakeLists.txt -index 5250ed2a88..ba053bd501 100644 +index e6330df426..68f95b8b42 100644 --- a/cpp/src/arrow/filesystem/CMakeLists.txt +++ b/cpp/src/arrow/filesystem/CMakeLists.txt -@@ -42,14 +42,6 @@ if(ARROW_BUILD_BENCHMARKS) +@@ -42,20 +42,6 @@ if(ARROW_BUILD_BENCHMARKS) ${ARROW_BENCHMARK_LINK_LIBS}) endif() @@ -24,6 +24,12 @@ index 5250ed2a88..ba053bd501 100644 - filesystem - EXTRA_LINK_LIBS - google-cloud-cpp::storage) +- if(CMAKE_CXX_COMPILER_ID MATCHES "Clang|AppleClang") +- if(TARGET arrow-gcsfs-test) +- target_compile_options(arrow-gcsfs-test PRIVATE -Wno-documentation +- -Wno-documentation-deprecated-sync) +- endif() +- endif() -endif() - if(ARROW_AZURE) diff --git a/recipe/patches/0002-skip-NonExistentBucket-test-on-osx.patch b/recipe/patches/0002-skip-NonExistentBucket-test-on-osx.patch index ade6196cb..920aa9048 100644 --- a/recipe/patches/0002-skip-NonExistentBucket-test-on-osx.patch +++ b/recipe/patches/0002-skip-NonExistentBucket-test-on-osx.patch @@ -1,14 +1,14 @@ -From 18258d1295a5e95426ec7b52a6412427d17b114d Mon Sep 17 00:00:00 2001 +From 0420ba3d4af352103ec4b5a31cc7f95470e60dc8 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 24 Nov 2024 20:22:35 +1100 -Subject: [PATCH 2/7] skip NonExistentBucket test on osx +Subject: [PATCH 2/5] skip NonExistentBucket test on osx --- cpp/src/arrow/filesystem/s3fs_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc -index f0a5d0e2e4..b5578b4f74 100644 +index f7c125c896..3eb898ea71 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -438,6 +438,10 @@ TEST_F(S3RegionResolutionTest, RestrictedBucket) { diff --git a/recipe/patches/0003-Change-xsimd-inclusion-criterion.patch b/recipe/patches/0003-Change-xsimd-inclusion-criterion.patch deleted file mode 100644 index f9824c6e9..000000000 --- a/recipe/patches/0003-Change-xsimd-inclusion-criterion.patch +++ /dev/null @@ -1,24 +0,0 @@ -From 37f86532af653cb230ff3b6c8040d6a8577fd3a4 Mon Sep 17 00:00:00 2001 -From: AntoinePrv -Date: Mon, 27 Oct 2025 10:25:58 +0100 -Subject: [PATCH 3/7] Change xsimd inclusion criterion - ---- - cpp/src/arrow/util/byte_stream_split_internal.h | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/cpp/src/arrow/util/byte_stream_split_internal.h b/cpp/src/arrow/util/byte_stream_split_internal.h -index 70f9b87d6c..1ba2d5335b 100644 ---- a/cpp/src/arrow/util/byte_stream_split_internal.h -+++ b/cpp/src/arrow/util/byte_stream_split_internal.h -@@ -29,7 +29,9 @@ - #include - #include - --#if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_SSE4_2) -+// ARROW_HAVE_RUNTIME_SSE4_2 is used on x86-64 to include ARROW_HAVE_SSE4_2 and -+// ARROW_RUNTIME_SIMD_LEVEL != NONE. -+#if defined(ARROW_HAVE_NEON) || defined(ARROW_HAVE_RUNTIME_SSE4_2) - # include - # define ARROW_HAVE_SIMD_SPLIT - #endif diff --git a/recipe/patches/0005-GH-48894-Python-C-Use-base-Azure-Core-RequestFailedE.patch b/recipe/patches/0003-GH-48894-Python-C-Use-base-Azure-Core-RequestFailedE.patch similarity index 99% rename from recipe/patches/0005-GH-48894-Python-C-Use-base-Azure-Core-RequestFailedE.patch rename to recipe/patches/0003-GH-48894-Python-C-Use-base-Azure-Core-RequestFailedE.patch index 73a7336ad..a79c685bf 100644 --- a/recipe/patches/0005-GH-48894-Python-C-Use-base-Azure-Core-RequestFailedE.patch +++ b/recipe/patches/0003-GH-48894-Python-C-Use-base-Azure-Core-RequestFailedE.patch @@ -1,7 +1,7 @@ -From 407552a97befc16add72c08c05c439d2ddfada06 Mon Sep 17 00:00:00 2001 +From 1208d461bf2c3b241f993758de5ab9287d638aa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 20 Jan 2026 15:32:01 +0100 -Subject: [PATCH 5/7] GH-48894: [Python][C++] Use base +Subject: [PATCH 3/5] GH-48894: [Python][C++] Use base Azure::Core::RequestFailedException instead of final Azure::Storage::StorageException and set minimum nodejs on conda env to 16 for Azurite to work (#48895) @@ -60,7 +60,7 @@ Signed-off-by: Raúl Cumplido 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt -index 6e23e920a4..afb9447d5e 100644 +index 18d58f7bb2..fec8488f95 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -39,7 +39,7 @@ lz4-c @@ -73,7 +73,7 @@ index 6e23e920a4..afb9447d5e 100644 pkg-config python diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc -index 0ca18eed51..047247b651 100644 +index a3a162616e..6580476d38 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -558,7 +558,7 @@ Status CrossContainerMoveNotImplemented(const AzureLocation& src, diff --git a/recipe/patches/0004-GH-48260-C-Python-R-Move-S3-bucket-references-to-new.patch b/recipe/patches/0004-GH-48260-C-Python-R-Move-S3-bucket-references-to-new.patch deleted file mode 100644 index 2d7be6868..000000000 --- a/recipe/patches/0004-GH-48260-C-Python-R-Move-S3-bucket-references-to-new.patch +++ /dev/null @@ -1,407 +0,0 @@ -From c4f7a9859aaebc9e7a94fea125e10c7d54d56a30 Mon Sep 17 00:00:00 2001 -From: Nic Crane -Date: Fri, 28 Nov 2025 10:00:28 +0000 -Subject: [PATCH 4/7] GH-48260: [C++][Python][R] Move S3 bucket references to - new bucket as Voltron Data ones will be removed soon (#48261) -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -No more VD, no more VD S3 bucket! - -Move references to S3 bucket to the new Arrow one, update a few references to regions and things. - -Yeah, for the most part. - -No -* GitHub Issue: #48260 - -Authored-by: Nic Crane -Signed-off-by: Raúl Cumplido ---- - cpp/src/arrow/filesystem/s3fs_test.cc | 2 +- - docs/source/python/dataset.rst | 8 ++++---- - python/pyarrow/_s3fs.pyx | 4 ++-- - python/pyarrow/tests/test_fs.py | 22 +++++++++++----------- - r/R/filesystem.R | 6 +++--- - r/man/gs_bucket.Rd | 2 +- - r/man/s3_bucket.Rd | 4 ++-- - r/tests/testthat/test-filesystem.R | 14 +++++++------- - r/vignettes/arrow.Rmd | 2 +- - r/vignettes/dataset.Rmd | 4 ++-- - r/vignettes/fs.Rmd | 24 ++++++++++++------------ - r/vignettes/python.Rmd | 2 +- - 12 files changed, 47 insertions(+), 47 deletions(-) - -diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc -index b5578b4f74..3eb898ea71 100644 ---- a/cpp/src/arrow/filesystem/s3fs_test.cc -+++ b/cpp/src/arrow/filesystem/s3fs_test.cc -@@ -420,7 +420,7 @@ TEST_F(S3OptionsTest, FromAssumeRole) { - class S3RegionResolutionTest : public AwsTestMixin {}; - - TEST_F(S3RegionResolutionTest, PublicBucket) { -- ASSERT_OK_AND_EQ("us-east-2", ResolveS3BucketRegion("voltrondata-labs-datasets")); -+ ASSERT_OK_AND_EQ("us-east-1", ResolveS3BucketRegion("arrow-datasets")); - - // Taken from a registry of open S3-hosted datasets - // at https://github.com/awslabs/open-data-registry -diff --git a/docs/source/python/dataset.rst b/docs/source/python/dataset.rst -index 00469fd57b..de4ff7be4c 100644 ---- a/docs/source/python/dataset.rst -+++ b/docs/source/python/dataset.rst -@@ -350,7 +350,7 @@ specifying a S3 path: - - .. code-block:: python - -- dataset = ds.dataset("s3://voltrondata-labs-datasets/nyc-taxi/") -+ dataset = ds.dataset("s3://arrow-datasets/nyc-taxi/") - - Typically, you will want to customize the connection parameters, and then - a file system object can be created and passed to the ``filesystem`` keyword: -@@ -359,8 +359,8 @@ a file system object can be created and passed to the ``filesystem`` keyword: - - from pyarrow import fs - -- s3 = fs.S3FileSystem(region="us-east-2") -- dataset = ds.dataset("voltrondata-labs-datasets/nyc-taxi/", filesystem=s3) -+ s3 = fs.S3FileSystem(region="us-east-1") -+ dataset = ds.dataset("arrow-datasets/nyc-taxi/", filesystem=s3) - - The currently available classes are :class:`~pyarrow.fs.S3FileSystem` and - :class:`~pyarrow.fs.HadoopFileSystem`. See the :ref:`filesystem` docs for more -@@ -381,7 +381,7 @@ useful for testing or benchmarking. - - # By default, MinIO will listen for unencrypted HTTP traffic. - minio = fs.S3FileSystem(scheme="http", endpoint_override="localhost:9000") -- dataset = ds.dataset("voltrondata-labs-datasets/nyc-taxi/", filesystem=minio) -+ dataset = ds.dataset("arrow-datasets/nyc-taxi/", filesystem=minio) - - - Working with Parquet Datasets -diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx -index b01e0b6ae4..6317bd3785 100644 ---- a/python/pyarrow/_s3fs.pyx -+++ b/python/pyarrow/_s3fs.pyx -@@ -91,8 +91,8 @@ def resolve_s3_region(bucket): - - Examples - -------- -- >>> fs.resolve_s3_region('voltrondata-labs-datasets') -- 'us-east-2' -+ >>> fs.resolve_s3_region('arrow-datasets') -+ 'us-east-1' - """ - cdef: - c_string c_bucket -diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py -index a3b10f4cbe..0c7f1d9665 100644 ---- a/python/pyarrow/tests/test_fs.py -+++ b/python/pyarrow/tests/test_fs.py -@@ -1460,20 +1460,20 @@ def test_s3fs_wrong_region(): - # anonymous=True incase CI/etc has invalid credentials - fs = S3FileSystem(region='eu-north-1', anonymous=True) - -- msg = ("When getting information for bucket 'voltrondata-labs-datasets': " -+ msg = ("When getting information for bucket 'arrow-datasets': " - r"AWS Error UNKNOWN \(HTTP status 301\) during HeadBucket " - "operation: No response body. Looks like the configured region is " -- "'eu-north-1' while the bucket is located in 'us-east-2'." -+ "'eu-north-1' while the bucket is located in 'us-east-1'." - "|NETWORK_CONNECTION") - with pytest.raises(OSError, match=msg) as exc: -- fs.get_file_info("voltrondata-labs-datasets") -+ fs.get_file_info("arrow-datasets") - - # Sometimes fails on unrelated network error, so next call would also fail. - if 'NETWORK_CONNECTION' in str(exc.value): - return - -- fs = S3FileSystem(region='us-east-2', anonymous=True) -- fs.get_file_info("voltrondata-labs-datasets") -+ fs = S3FileSystem(region='us-east-1', anonymous=True) -+ fs.get_file_info("arrow-datasets") - - - @pytest.mark.azure -@@ -1911,15 +1911,15 @@ def test_s3_real_aws(): - fs = S3FileSystem(anonymous=True) - assert fs.region == default_region - -- fs = S3FileSystem(anonymous=True, region='us-east-2') -+ fs = S3FileSystem(anonymous=True, region='us-east-1') - entries = fs.get_file_info(FileSelector( -- 'voltrondata-labs-datasets/nyc-taxi')) -+ 'arrow-datasets/nyc-taxi')) - assert len(entries) > 0 -- key = 'voltrondata-labs-datasets/nyc-taxi/year=2019/month=6/part-0.parquet' -+ key = 'arrow-datasets/nyc-taxi/year=2019/month=6/part-0.parquet' - with fs.open_input_stream(key) as f: - md = f.metadata() - assert 'Content-Type' in md -- assert md['Last-Modified'] == b'2022-07-12T23:32:00Z' -+ assert md['Last-Modified'] == b'2025-11-26T10:28:55Z' - # For some reason, the header value is quoted - # (both with AWS and Minio) - assert md['ETag'] == b'"4c6a76826a695c6ac61592bc30cda3df-16"' -@@ -1962,7 +1962,7 @@ def test_s3_real_aws_region_selection(): - @pytest.mark.s3 - def test_resolve_s3_region(): - from pyarrow.fs import resolve_s3_region -- assert resolve_s3_region('voltrondata-labs-datasets') == 'us-east-2' -+ assert resolve_s3_region('arrow-datasets') == 'us-east-1' - assert resolve_s3_region('mf-nwp-models') == 'eu-west-1' - - with pytest.raises(ValueError, match="Not a valid bucket name"): -@@ -2119,7 +2119,7 @@ def test_s3_finalize_region_resolver(): - with pytest.raises(ValueError, match="S3 .* finalized"): - resolve_s3_region('mf-nwp-models') - with pytest.raises(ValueError, match="S3 .* finalized"): -- resolve_s3_region('voltrondata-labs-datasets') -+ resolve_s3_region('arrow-datasets') - """ - subprocess.check_call([sys.executable, "-c", code]) - -diff --git a/r/R/filesystem.R b/r/R/filesystem.R -index 233e1981aa..ca219611a8 100644 ---- a/r/R/filesystem.R -+++ b/r/R/filesystem.R -@@ -488,13 +488,13 @@ default_s3_options <- list( - #' relative path. Note that this function's success does not guarantee that you - #' are authorized to access the bucket's contents. - #' @examplesIf FALSE --#' bucket <- s3_bucket("voltrondata-labs-datasets") -+#' bucket <- s3_bucket("arrow-datasets") - #' - #' @examplesIf FALSE - #' # Turn on debug logging. The following line of code should be run in a fresh - #' # R session prior to any calls to `s3_bucket()` (or other S3 functions) - #' Sys.setenv("ARROW_S3_LOG_LEVEL" = "DEBUG") --#' bucket <- s3_bucket("voltrondata-labs-datasets") -+#' bucket <- s3_bucket("arrow-datasets") - #' - #' @export - s3_bucket <- function(bucket, ...) { -@@ -530,7 +530,7 @@ s3_bucket <- function(bucket, ...) { - #' relative path. Note that this function's success does not guarantee that you - #' are authorized to access the bucket's contents. - #' @examplesIf FALSE --#' bucket <- gs_bucket("voltrondata-labs-datasets") -+#' bucket <- gs_bucket("arrow-datasets") - #' @export - gs_bucket <- function(bucket, ...) { - assert_that(is.string(bucket)) -diff --git a/r/man/gs_bucket.Rd b/r/man/gs_bucket.Rd -index 7dc39a42c3..44b2efb7d9 100644 ---- a/r/man/gs_bucket.Rd -+++ b/r/man/gs_bucket.Rd -@@ -22,6 +22,6 @@ that holds onto its relative path - } - \examples{ - \dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} --bucket <- gs_bucket("voltrondata-labs-datasets") -+bucket <- gs_bucket("arrow-datasets") - \dontshow{\}) # examplesIf} - } -diff --git a/r/man/s3_bucket.Rd b/r/man/s3_bucket.Rd -index bffcfa5c38..66a552ccbc 100644 ---- a/r/man/s3_bucket.Rd -+++ b/r/man/s3_bucket.Rd -@@ -30,12 +30,12 @@ be useful to increase the log level. See the Notes section in - } - \examples{ - \dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} --bucket <- s3_bucket("voltrondata-labs-datasets") -+bucket <- s3_bucket("arrow-datasets") - \dontshow{\}) # examplesIf} - \dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} - # Turn on debug logging. The following line of code should be run in a fresh - # R session prior to any calls to `s3_bucket()` (or other S3 functions) - Sys.setenv("ARROW_S3_LOG_LEVEL"="DEBUG") --bucket <- s3_bucket("voltrondata-labs-datasets") -+bucket <- s3_bucket("arrow-datasets") - \dontshow{\}) # examplesIf} - } -diff --git a/r/tests/testthat/test-filesystem.R b/r/tests/testthat/test-filesystem.R -index 34095acc25..cf17c6b74e 100644 ---- a/r/tests/testthat/test-filesystem.R -+++ b/r/tests/testthat/test-filesystem.R -@@ -146,20 +146,20 @@ test_that("FileSystem$from_uri", { - skip_on_cran() - skip_if_not_available("s3") - skip_if_offline() -- fs_and_path <- FileSystem$from_uri("s3://voltrondata-labs-datasets") -+ fs_and_path <- FileSystem$from_uri("s3://arrow-datasets") - expect_r6_class(fs_and_path$fs, "S3FileSystem") -- expect_identical(fs_and_path$fs$region, "us-east-2") -+ expect_identical(fs_and_path$fs$region, "us-east-1") - }) - - test_that("SubTreeFileSystem$create() with URI", { - skip_on_cran() - skip_if_not_available("s3") - skip_if_offline() -- fs <- SubTreeFileSystem$create("s3://voltrondata-labs-datasets") -+ fs <- SubTreeFileSystem$create("s3://arrow-datasets") - expect_r6_class(fs, "SubTreeFileSystem") - expect_identical( - capture.output(print(fs)), -- "SubTreeFileSystem: s3://voltrondata-labs-datasets/" -+ "SubTreeFileSystem: s3://arrow-datasets/" - ) - }) - -@@ -193,12 +193,12 @@ test_that("gs_bucket", { - skip_on_cran() - skip_if_not_available("gcs") - skip_if_offline() -- bucket <- gs_bucket("voltrondata-labs-datasets") -+ bucket <- gs_bucket("arrow-datasets") - expect_r6_class(bucket, "SubTreeFileSystem") - expect_r6_class(bucket$base_fs, "GcsFileSystem") - expect_identical( - capture.output(print(bucket)), -- "SubTreeFileSystem: gs://voltrondata-labs-datasets/" -+ "SubTreeFileSystem: gs://arrow-datasets/" - ) -- expect_identical(bucket$base_path, "voltrondata-labs-datasets/") -+ expect_identical(bucket$base_path, "arrow-datasets/") - }) -diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd -index be31f3a0ac..d8460415bd 100644 ---- a/r/vignettes/arrow.Rmd -+++ b/r/vignettes/arrow.Rmd -@@ -178,7 +178,7 @@ To learn more about analyzing Arrow data, see the [data wrangling article](./dat - Another use for the arrow R package is to read, write, and analyze data sets stored remotely on cloud services. The package currently supports both Amazon Simple Storage Service (S3) and Google Cloud Storage (GCS). The example below illustrates how you can use `s3_bucket()` to refer to a an S3 bucket, and use `open_dataset()` to connect to the data set stored there: - - ```{r, eval=FALSE} --bucket <- s3_bucket("voltrondata-labs-datasets/nyc-taxi") -+bucket <- s3_bucket("arrow-datasets/nyc-taxi") - nyc_taxi <- open_dataset(bucket) - ``` - -diff --git a/r/vignettes/dataset.Rmd b/r/vignettes/dataset.Rmd -index bf8c00a5b6..085113033c 100644 ---- a/r/vignettes/dataset.Rmd -+++ b/r/vignettes/dataset.Rmd -@@ -22,13 +22,13 @@ This multi-file data set is comprised of 158 distinct Parquet files, each corres - If you have Amazon S3 support enabled in arrow (true for most users; see links at the end of this article if you need to troubleshoot this), you can connect to a copy of the "tiny taxi data" stored on S3 with this command: - - ```r --bucket <- s3_bucket("voltrondata-labs-datasets/nyc-taxi-tiny") -+bucket <- s3_bucket("arrow-datasets/nyc-taxi-tiny") - ``` - - Alternatively you could connect to a copy of the data on Google Cloud Storage (GCS) using the following command: - - ```r --bucket <- gs_bucket("voltrondata-labs-datasets/nyc-taxi-tiny", anonymous = TRUE) -+bucket <- gs_bucket("arrow-datasets/nyc-taxi-tiny", anonymous = TRUE) - ``` - - If you want to use the full data set, replace `nyc-taxi-tiny` with `nyc-taxi` in the code above. Apart from size -- and with it the cost in time, bandwidth usage, and CPU cycles -- there is no difference in the two versions of the data: you can test your code using the tiny taxi data and then check how it scales using the full data set. -diff --git a/r/vignettes/fs.Rmd b/r/vignettes/fs.Rmd -index 07476877c5..ed3b1bddb0 100644 ---- a/r/vignettes/fs.Rmd -+++ b/r/vignettes/fs.Rmd -@@ -39,16 +39,16 @@ and pass the result to file readers and writers (`read_parquet()`, `write_feathe - - Often the reason users work with cloud storage in real world analysis is to access large data sets. An example of this is discussed in the [datasets article](./dataset.html), but new users may prefer to work with a much smaller data set while learning how the arrow cloud storage interface works. To that end, the examples in this article rely on a multi-file Parquet dataset that stores a copy of the `diamonds` data made available through the [`ggplot2`](https://ggplot2.tidyverse.org/) package, documented in `help("diamonds", package = "ggplot2")`. The cloud storage version of this data set consists of 5 Parquet files totaling less than 1MB in size. - --The diamonds data set is hosted on both S3 and GCS, in a bucket named `voltrondata-labs-datasets`. To create an S3FileSystem object that refers to that bucket, use the following command: -+The diamonds data set is hosted on both S3 and GCS, in a bucket named `arrow-datasets`. To create an S3FileSystem object that refers to that bucket, use the following command: - - ```r --bucket <- s3_bucket("voltrondata-labs-datasets") -+bucket <- s3_bucket("arrow-datasets") - ``` - - To do this for the GCS version of the data, the command is as follows: - - ```r --bucket <- gs_bucket("voltrondata-labs-datasets", anonymous = TRUE) -+bucket <- gs_bucket("arrow-datasets", anonymous = TRUE) - ``` - - Note that `anonymous = TRUE` is required for GCS if credentials have not been configured. -@@ -126,7 +126,7 @@ df <- read_parquet(june2019$path("part-0.parquet")) - `SubTreeFileSystem` can also be made from a URI: - - ```r --june2019 <- SubTreeFileSystem$create("s3://voltrondata-labs-datasets/nyc-taxi/year=2019/month=6") -+june2019 <- SubTreeFileSystem$create("s3://arrow-datasets/nyc-taxi/year=2019/month=6") - ``` - --> - -@@ -150,8 +150,8 @@ gs://anonymous@bucket/path - For example, the Parquet file storing the "good cut" diamonds that we downloaded earlier in the article is available on both S3 and CGS. The relevant URIs are as follows: - - ```r --uri <- "s3://voltrondata-labs-datasets/diamonds/cut=Good/part-0.parquet" --uri <- "gs://anonymous@voltrondata-labs-datasets/diamonds/cut=Good/part-0.parquet" -+uri <- "s3://arrow-datasets/diamonds/cut=Good/part-0.parquet" -+uri <- "gs://anonymous@arrow-datasets/diamonds/cut=Good/part-0.parquet" - ``` - - Note that "anonymous" is required on GCS for public buckets. Regardless of which version you use, you can pass this URI to `read_parquet()` as if the file were stored locally: -@@ -165,7 +165,7 @@ that are passed down to configure the underlying file system. They are separated - by `&`. For example, - - ``` --s3://voltrondata-labs-datasets/?endpoint_override=https%3A%2F%2Fstorage.googleapis.com&allow_bucket_creation=true -+s3://arrow-datasets/?endpoint_override=https%3A%2F%2Fstorage.googleapis.com&allow_bucket_creation=true - ``` - - is equivalent to: -@@ -175,7 +175,7 @@ bucket <- S3FileSystem$create( - endpoint_override="https://storage.googleapis.com", - allow_bucket_creation=TRUE - ) --bucket$path("voltrondata-labs-datasets/") -+bucket$path("arrow-datasets/") - ``` - - Both tell the `S3FileSystem` object that it should allow the creation of new buckets -@@ -198,7 +198,7 @@ a request may spend retrying before returning an error. The current default is - 15 minutes, so in many interactive contexts it's nice to set a lower value: - - ``` --gs://anonymous@voltrondata-labs-datasets/diamonds/?retry_limit_seconds=10 -+gs://anonymous@arrow-datasets/diamonds/?retry_limit_seconds=10 - ``` - - ## Authentication -@@ -237,9 +237,9 @@ If you haven't configured credentials, then to access *public* buckets, you - must pass `anonymous = TRUE` or `anonymous` as the user in a URI: - - ```r --bucket <- gs_bucket("voltrondata-labs-datasets", anonymous = TRUE) -+bucket <- gs_bucket("arrow-datasets", anonymous = TRUE) - fs <- GcsFileSystem$create(anonymous = TRUE) --df <- read_parquet("gs://anonymous@voltrondata-labs-datasets/diamonds/cut=Good/part-0.parquet") -+df <- read_parquet("gs://anonymous@arrow-datasets/diamonds/cut=Good/part-0.parquet") - ``` - -